# Codeforces problemset downloading
Codeforces: https://codeforces.com/  
Codeforces API: https://codeforces.com/apiHelp

In [1]:
import requests
import pandas as pd
import numpy as np
import json

## Request

In [2]:
response = requests.get("https://codeforces.com/api/problemset.problems")
problemset_json = response.content.decode("utf-8")
problemset = json.loads(problemset_json)["result"]

## Transform

In [3]:
all_tags = set()
for problem in problemset["problems"]:
    tags = problem.get("tags", [])
    for tag in tags:
        all_tags.add(tag)

all_tags = sorted(list(all_tags))
print(all_tags)

['*special', '2-sat', 'binary search', 'bitmasks', 'brute force', 'chinese remainder theorem', 'combinatorics', 'constructive algorithms', 'data structures', 'dfs and similar', 'divide and conquer', 'dp', 'dsu', 'expression parsing', 'fft', 'flows', 'games', 'geometry', 'graph matchings', 'graphs', 'greedy', 'hashing', 'implementation', 'interactive', 'math', 'matrices', 'meet-in-the-middle', 'number theory', 'probabilities', 'schedules', 'shortest paths', 'sortings', 'string suffix structures', 'strings', 'ternary search', 'trees', 'two pointers']


In [4]:
problem_list = []
for problem in problemset["problems"]:
    contest_id = problem.get("contestId", "")
    # problemset_name = problem.get("problemsetName", "")
    index = problem.get("index", "")
    name = problem.get("name", "")
    # type_ = problem.get('type', "")
    points = problem.get('points', "")
    rating = problem.get('rating', "")
    tags = problem.get('tags', [])

    tags_num = [0] * len(all_tags)
    for tag in tags:
        tags_num[all_tags.index(tag)] = 1
    
    problem = [contest_id, index, name, points, rating] \
        + tags_num
    problem_list.append(problem)

In [5]:
# I assume that there is exacty one problemStatistics element for each problem
for idx, stats in enumerate(problemset["problemStatistics"]):
    problem_list[idx].insert(6, stats.get("solvedCount", ""))

## Save

In [6]:
columns = ["contest_id", "index", "name", "points", "rating", "solved_count"] \
    + all_tags
dataframe = pd.DataFrame(problem_list, columns=columns)
dataframe = dataframe.iloc[::-1].reset_index(drop=True)
dataframe

Unnamed: 0,contest_id,index,name,points,rating,solved_count,*special,2-sat,binary search,bitmasks,...,number theory,probabilities,schedules,shortest paths,sortings,string suffix structures,strings,ternary search,trees,two pointers
0,1,A,Theatre Square,,1000,0,117144,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,B,Spreadsheet,,1600,0,13168,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,C,Ancient Berland Circus,,2100,0,3628,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2,A,Winner,,1500,0,12176,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2,B,The least round way,,2000,0,5644,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6296,1401,B,Ternary Sequence,1000,,0,12332,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6297,1401,C,Mere Array,1500,,0,10641,0,0,0,...,1,0,0,0,1,0,0,0,0,0
6298,1401,D,Maximum Distributed Tree,1750,,0,4172,0,0,0,...,1,0,0,0,1,0,0,0,1,0
6299,1401,E,Divide Square,2500,,0,864,0,1,0,...,0,0,0,0,1,0,0,0,0,0


In [7]:
dataframe.to_csv("codeforces.tsv", index=None, sep="\t")