# Codeforces Problem Recommender

### Import The Required Libraries

In [140]:
import math
import torch
from torch import nn, Tensor
from torchtext.vocab import vocab
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils.rnn import pad_sequence
from collections import Counter
import pandas as pd
import numpy as np
import requests

### Enter Your Codeforces Username Here

In [141]:
target_user_handle = 'shlokagrawal'

### Retrieving User Information from Codeforces API

In [142]:
url = "https://codeforces.com/api/user.info?handles=" + target_user_handle

# Make a GET request to the API endpoint
response = requests.get(url)
if response.status_code == 200:
    data = response.json()

    if 'status' in data and data['status'] == 'OK':
        user_info = data['result'][0]  
        print(f"Handle: {user_info['handle']}")
        rating = user_info['rating']
        print(f"Rating: {rating}")
    else:
        print("API request was not successful.")
else:
    print(f"Request failed with status code: {response.status_code}")

Handle: shlokagrawal
Rating: 1911


### Importing The Required CSV Files

In [143]:
if rating < 1200:
    path = "data/newbie_to_pupil/"
elif rating >= 1200 and rating < 1400:
    path = "data/pupil_to_specialist/"
elif rating >= 1400 and rating < 1600:
    path = "data/specialist_to_expert/"
elif rating >= 1600 and rating < 1900:
    path = "data/expert_to_cm/"
elif rating >= 1900 and rating < 2100:
    path = "data/cm_to_m/"
elif rating >= 2100 and rating < 2300:
    path = "data/m_to_im/"
elif rating >= 2300 and rating < 2400:
    path = "data/im_to_gm/"
elif rating >= 2400 and rating < 2600:
    path = "data/gm_to_igm/"
elif rating >= 2600 and rating < 3000:
    path = "data/igm_to_lgm/"
else:
    print("Congrats on being a Legendary Grand Master!")
    path = "data/igm_to_lgm/"

model_path = "models" + path[4:-1] + ".pth"

interactions = pd.read_csv(path + "user_problem.csv")
user_tags = pd.read_csv(path + "user_tags.csv")
user_ratings = pd.read_csv(path + "user_ratings.csv")

### Pre-Processing

In [144]:
interactions.drop(columns=['problem_rating', 'problem_tags'], inplace=True)
user_ratings.drop(columns=['undefined'], inplace=True)
user_tags.rename(columns={'0user_handle': 'user_id'}, inplace=True)
user_ratings.rename(columns={'0user_handle': 'user_id'}, inplace=True)
interactions.rename(columns={'user_handle': 'user_id'}, inplace=True)

In [145]:
interactions_copy = interactions.copy()

In [146]:
problems_per_user = interactions.groupby('user_id').problem_id.count()
users_per_problem = interactions.groupby('problem_id').user_id.count()

print(f"Total No. of users: {len(interactions.user_id.unique())}")
print(f"Total No. of problems: {len(interactions.problem_id.unique())}")
print("\n")

print(f"Max no. of problems per user: {problems_per_user.max()}")
print(f"Min no. of problems per user: {problems_per_user.min()}")
print(f"Median no. of problems per user: {problems_per_user.median()}")
print("\n")

print(f"Max no. of users per problem: {users_per_problem.max()}")
print(f"Min no. of users per problem: {users_per_problem.min()}")
print(f"Median no. of users per problem: {users_per_problem.median()}")

Total No. of users: 185
Total No. of problems: 7607


Max no. of problems per user: 738
Min no. of problems per user: 62
Median no. of problems per user: 111.0


Max no. of users per problem: 24
Min no. of users per problem: 1
Median no. of users per problem: 3.0


In [147]:
user_tags.fillna(0, inplace=True)
user_ratings.fillna(0, inplace=True)

In [148]:
user_tags

Unnamed: 0,*special,user_id,2-sat,binary search,bitmasks,brute force,chinese remainder theorem,combinatorics,constructive algorithms,data structures,...,number theory,probabilities,schedules,shortest paths,sortings,string suffix structures,strings,ternary search,trees,two pointers
0,0.0,maspy,0.0,32.0,11.0,56,0.0,6.0,45,34,...,15.0,0.0,0.0,12.0,55,0.0,24.0,0.0,18.0,21.0
1,0.0,wsyear,0.0,25.0,15.0,47,0.0,18.0,61,46,...,17.0,3.0,0.0,3.0,24,0.0,11.0,2.0,24.0,15.0
2,0.0,LXH-cat,3.0,51.0,23.0,84,0.0,51.0,118,94,...,43.0,18.0,0.0,13.0,54,4.0,24.0,4.0,46.0,29.0
3,1.0,skittles1412,0.0,3.0,1.0,5,0.0,0.0,8,1,...,0.0,0.0,0.0,0.0,4,0.0,8.0,0.0,1.0,1.0
4,0.0,PurpleCrayon,4.0,30.0,28.0,77,0.0,18.0,101,68,...,36.0,4.0,1.0,9.0,60,2.0,41.0,3.0,38.0,26.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
180,0.0,Osmabnlden,0.0,28.0,15.0,51,2.0,25.0,49,29,...,30.0,2.0,0.0,2.0,30,0.0,26.0,2.0,12.0,18.0
181,4.0,Carmel_Ab1,1.0,100.0,49.0,226,2.0,40.0,181,104,...,99.0,6.0,0.0,15.0,158,3.0,106.0,6.0,28.0,60.0
182,26.0,valavshonok,0.0,42.0,23.0,106,0.0,29.0,86,40,...,39.0,8.0,5.0,10.0,72,0.0,69.0,1.0,10.0,30.0
183,0.0,Erinyes,2.0,33.0,20.0,54,2.0,8.0,77,26,...,18.0,2.0,1.0,9.0,42,0.0,11.0,4.0,14.0,21.0


In [149]:
user_ratings

Unnamed: 0,user_id,1000,1100,1200,1300,1400,1500,1600,1700,1800,...,2800,2900,3000,3100,3200,3300,3400,3500,800,900
0,maspy,13.0,13.0,23.0,23,14.0,24.0,17.0,26.0,19.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,43,16.0
1,wsyear,15.0,6.0,17.0,8,8.0,8.0,11.0,15.0,12.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,33,5.0
2,LXH-cat,16.0,13.0,17.0,12,14.0,15.0,33.0,33.0,27.0,...,6.0,4.0,7.0,2.0,0.0,2.0,0.0,1.0,47,4.0
3,skittles1412,3.0,1.0,3.0,1,2.0,2.0,4.0,2.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16,1.0
4,PurpleCrayon,31.0,22.0,27.0,31,32.0,38.0,29.0,32.0,23.0,...,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,77,27.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
180,Osmabnlden,11.0,13.0,10.0,12,14.0,11.0,13.0,18.0,18.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,46,11.0
181,Carmel_Ab1,81.0,77.0,80.0,104,65.0,110.0,57.0,57.0,37.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,242,74.0
182,valavshonok,37.0,43.0,19.0,64,76.0,22.0,22.0,7.0,8.0,...,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,282,22.0
183,Erinyes,5.0,7.0,11.0,10,8.0,4.0,8.0,6.0,5.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40,10.0


In [150]:
interactions["problem_id"] = interactions["problem_id"].apply(lambda x: f"problem_{x}")
interactions["user_id"] = interactions["user_id"].apply(lambda x: f"user_{x}")
user_tags["user_id"] = user_tags["user_id"].apply(lambda x: f"user_{x}")
user_ratings["user_id"] = user_ratings["user_id"].apply(lambda x: f"user_{x}")

### Creating Vocabulary

In [151]:
np.random.seed(42)
# Generating a list of unique problem ids
problem_ids = interactions.problem_id.unique()

# Counter is used to feed problems to movive_vocab
problem_counter = Counter(problem_ids)

# Genarting vocabulary
problem_vocab = vocab(problem_counter, specials=['<unk>'])

# For indexing input ids
problem_vocab_stoi = problem_vocab.get_stoi()

# problem to title mapping dictionary
# problem_title_dict = dict(zip(problems.problem_id, problems.title))

# Similarly generating a vocabulary for user ids
user_ids = interactions.user_id.unique()
user_counter = Counter(user_ids)
user_vocab = vocab(user_counter, specials=['<unk>'])
user_vocab_stoi = user_vocab.get_stoi()

### GRU Model Definition

In [152]:
class GRUModel(nn.Module):
    def __init__(self, ntoken: int, nuser: int, d_model: int, d_hid: int, nlayers: int, dropout: float = 0.5):
        super().__init__()
        self.model_type = 'GRU'

        # Embedding layers
        self.problem_embedding = nn.Embedding(ntoken, d_model)
        self.user_embedding = nn.Embedding(nuser, d_model)
        self.feature_embedding = nn.Embedding(1000, d_model)

        # GRU layers
        self.GRU = nn.GRU(d_model, d_hid, nlayers, batch_first=True, dropout=dropout)

        self.d_model = d_model

        # Linear layer to map the GRU output to problem vocabulary
        self.linear = nn.Linear(67*d_hid, ntoken)
#         self.linear = nn.Sequential(
#             nn.Linear(31*d_hid,31*d_hid//2),
#             nn.Linear(31*d_hid//2, 31*d_hid//4),
#             nn.Dropout(0.2),
#             nn.Linear(31*d_hid//4, 31*d_hid//8),
#             nn.Linear(31*d_hid//8, ntoken)
#         )

        self.init_weights()

    def init_weights(self) -> None:
        # Initializing the weights of the embedding and linear layers
        initrange = 0.1
        self.problem_embedding.weight.data.uniform_(-initrange, initrange)
        self.user_embedding.weight.data.uniform_(-initrange, initrange)
        self.feature_embedding.weight.data.uniform_(-initrange, initrange)
        self.linear.bias.data.zero_()
        self.linear.weight.data.uniform_(-initrange, initrange)

    def forward(self, src: Tensor, user: Tensor, other_features_batch) -> Tensor:
        # Embedding problem ids and user id
        
#         print("user shape",user.shape)
        problem_embed = self.problem_embedding(src)* math.sqrt(self.d_model)
        user_embed = self.user_embedding(user)* math.sqrt(self.d_model)
        
        # print("problem_embed shape:", problem_embed.shape)
#         print("user_embed shape:", user_embed.shape)

        # Pass the combined embeddings through GRU layers
        GRU_output, _ = self.GRU(problem_embed)
        
#         print("GRU_output", GRU_output.shape)
        
        other_features_embed = []
        for i, (feature_name, feature_tensor) in enumerate(other_features_batch.items()):
#             print(f"Processing other feature '{feature_name}'")
#             print("Feature tensor shape:", feature_tensor.shape)
            other_feature_embedding = self.feature_embedding(feature_tensor)* math.sqrt(self.d_model)
            other_features_embed.append(other_feature_embedding)
#             print("Other feature embedding shape:", other_feature_embedding.shape)
        
        
        other_features_embed = torch.cat(other_features_embed, dim=-1)
        other_features_embed = other_features_embed.expand(-1, GRU_output.size(1), -1)

        user_embed = user_embed.expand(-1, GRU_output.size(1), -1)
#         print("user_embed after expansion", user_embed.shape)

        output = torch.cat((GRU_output, user_embed, other_features_embed), dim=-1)

        # print("hello", output.shape)

        # Apply linear layer to obtain the output logits
        output = self.linear(output)

        return output


### Defining Hyperparameters and Loading the Model

In [153]:
ntokens = len(problem_vocab)  # size of vocabulary
nusers = len(user_vocab)
d_model = 128  # embedding dimension (maybe 512?)
d_hid = 128  # dimension of the GRU hidden states
nlayers = 2  # number of GRU layers
dropout = 0.2  # dropout probability

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GRUModel(ntokens, nusers, d_model, d_hid, nlayers, dropout).to(device)

# Load the model's state dictionary
model.load_state_dict(torch.load(model_path))

<All keys matched successfully>

### Code for Generating Recommendations

In [154]:
def generate_recommendation(data, k=5):
    model.eval()
    user_id = data[0]
    problem_sequence = data[1]
    input_sequence = problem_sequence[:-1]
    # Tokenize and numerically encode the user id and problem sequence
    user_tensor = torch.tensor(user_vocab_stoi[user_id])
    problem_tensor = torch.tensor([[problem_vocab_stoi[problem_id]] for problem_id in input_sequence])
    # Shape: [1, 1]
    user_tensor = user_tensor.unsqueeze(0).to(device)
    user_tensor = user_tensor.view(user_tensor.shape[0], 1)

    # Shape: [1, seq_length]
    problem_tensor = problem_tensor.unsqueeze(0).to(device)[0]
    problem_tensor = problem_tensor.view(1, problem_tensor.shape[0])
    
    other_features_batch={}
    
    for i, feature in enumerate(data[2:]):
            feature_name = f'feature_{i+1}'
            other_features_batch[feature_name] = torch.tensor(feature).int()
            other_features_batch[feature_name] = other_features_batch[feature_name].unsqueeze(0).to(device)
            other_features_batch[feature_name] = other_features_batch[feature_name].view(other_features_batch[feature_name].shape[0], 1)
    
#     print("user_tensor", user_tensor.shape)
#     print("problem_tensor", problem_tensor.shape)

    # Pass the tensors through the model
    with torch.no_grad():
        predictions = model(problem_tensor, user_tensor, other_features_batch)

    # The output is a probability distribution over the next problem.
    # Topk to get most probable problems
    values, indices = predictions.topk(k + len(input_sequence), dim=-1)

    # Eliminate already watched problems
    indices = [indice for indice in indices[-1, :][0] if indice not in problem_tensor][:k]
    predicted_problems = [problem_vocab.get_itos()[problem] for problem in indices]
    return predicted_problems

### Getting User Features from Codeforces API

In [155]:
# Define the URL of the API endpoint you want to call
url = "https://codeforces.com/api/user.status?handle=" + target_user_handle

# Make a GET request to the API endpoint
response = requests.get(url)
data = response.json()

target_tags = {}
target_ratings = {}

for submission in data['result']:
    if submission['verdict'] == 'OK':
        for tag in submission['problem']['tags']:
            target_tags[tag] = target_tags.get(tag, 0) + 1
        if 'rating' in submission['problem']:
            target_ratings[submission['problem']['rating']] = target_ratings.get(submission['problem']['rating'], 0) + 1

user_ratings.drop(columns=['user_id'], inplace=True)
user_tags.drop(columns=['user_id'], inplace=True)

all_tags = user_tags.columns.tolist()
all_ratings = user_ratings.columns.tolist()
features = []

for tag in all_tags:
    if tag != 'user_id' and tag in target_tags:
        features.append(target_tags[tag])
    else:
        features.append(0)

for rating in all_ratings:
    if rating != 'user_id' and int(rating) in target_ratings:
        features.append(target_ratings[int(rating)])
    else:
        features.append(0)

sequence_length = 20
problems = set(interactions_copy.problem_id.unique())
count = 0
stack = []
for submission in data['result']:
    if submission['verdict'] == "OK" and count < sequence_length:
        key = str(submission['problem']['contestId']) + ':' + str(submission['problem']['index'])
        encoding = 'problem_' + key
        if key in problems and encoding not in stack:
            stack.append(encoding)
            count += 1
    if count >= sequence_length:
        break
stack = stack[::-1]
print(stack)

['problem_1928:E', 'problem_1932:G', 'problem_1923:E', 'problem_1938:H', 'problem_1938:C', 'problem_1938:G', 'problem_1938:J', 'problem_1938:E', 'problem_1938:F', 'problem_1948:E', 'problem_1943:C', 'problem_1943:B', 'problem_1743:F', 'problem_1550:D', 'problem_1949:I', 'problem_1949:C', 'problem_1949:B', 'problem_102056:I', 'problem_1942:D', 'problem_1942:E']


In [156]:
input = ['<unk>', stack] + features
input = np.array(input, dtype=object)
input

array(['<unk>',
       list(['problem_1928:E', 'problem_1932:G', 'problem_1923:E', 'problem_1938:H', 'problem_1938:C', 'problem_1938:G', 'problem_1938:J', 'problem_1938:E', 'problem_1938:F', 'problem_1948:E', 'problem_1943:C', 'problem_1943:B', 'problem_1743:F', 'problem_1550:D', 'problem_1949:I', 'problem_1949:C', 'problem_1949:B', 'problem_102056:I', 'problem_1942:D', 'problem_1942:E']),
       0, 2, 118, 57, 173, 4, 70, 163, 205, 95, 20, 199, 41, 1, 0, 4, 24,
       14, 4, 98, 315, 22, 227, 16, 310, 12, 3, 95, 15, 0, 24, 133, 1, 59,
       7, 86, 70, 29, 26, 37, 34, 45, 44, 53, 60, 52, 65, 54, 68, 53, 55,
       26, 11, 12, 1, 0, 0, 0, 0, 0, 0, 0, 0, 73, 18], dtype=object)

### Generating Recommendations

In [157]:
recos = '\n-'.join(generate_recommendation(input))

print(f"Recomendations:\n-{recos}")

Recomendations:
-problem_103470:M
-problem_1339:D
-problem_104021:A
-problem_1635:B
-problem_1649:C
