In [None]:
import requests
import pprint
import pandas as pd
import numpy as np
from collections import defaultdict
from tqdm import tqdm
import random
import networkx as nx
import matplotlib.pyplot as plt
from typing import List
from itertools import combinations

## Loading and Sampling Dataset (Diego Canez)

In [None]:
df = pd.read_csv('userProblemDONE.csv')

In [None]:
def filter_handles(df: pd.DataFrame, handles=[]) -> pd.DataFrame:
  df = df[df['handle'].isin(handles)]
  return df

def sample_random_users(df: pd.DataFrame, N:int = 5) -> pd.DataFrame:
  handles = list(df['handle'].unique())
  return random.sample(handles, N)

In [None]:
df = filter_handles(df, sample_random_users(df, 5))

In [None]:
print(df)

                  handle problemID
29                Cvangi     1552B
30                Cvangi     1512C
31                Cvangi     1499B
32                Cvangi     1157B
55     I-Love-Phuong-Anh     1506B
...                  ...       ...
48505            cjxj233      776D
48506            cjxj233      916E
48657             Cvangi     1455A
48658             Cvangi     1238B
48726            agrippa    1249B1

[989 rows x 2 columns]


## Creation of the Graph (Jorge Rebosio)

In [None]:
def add_edges(G: nx.Graph, user_problems: List[str]):
  for u, v in combinations(user_problems, 2):
    if (G.has_edge(u , v)):
      G.edges[u , v]['weight'] +=1 
    else:
      G.add_edge(u, v, weight=1)

def create_graph(df: pd.DataFrame) -> nx.Graph:
  G = nx.Graph()
  for user in [y for x, y in df.groupby('handle', as_index=False)]:
    add_edges(G, user["problemID"].values)
  return G

### Plotting Subgraph

In [None]:
# Sampling 10 nodes from graph G
def draw_subgraph(G: nx.Graph, N = 10, edge_attribute='weight'):
  H = G.subgraph(list(G.nodes)[:N])
  pos = nx.spring_layout(H)
  nx.draw(H, pos)
  edge_labels = nx.get_edge_attributes(H,edge_attribute)
  nx.draw_networkx_edge_labels(H, pos, edge_labels=edge_labels)
  plt.show()

## Building similarity matrix (Diego Canez)

### Similarity Metrics

#### Edge Weights (EW)
$$
EW(x, y) = A_{x, y}
$$

In [None]:
def edge_weights(G: nx.Graph, attr='weight'):
    N = len(G)
    EW = defaultdict(lambda: defaultdict(int))
    for (x, y) in tqdm(G.edges()):
        EW[x][y] = G[x][y]
    return EW

#### Weighted Common Neighbors (WCN)
$$
WCN(x, y) = \Sigma_{z \in N(x) \cap N(y)} A_{x, z} + A_{z, y}
$$

In [None]:
def weighted_common_neighbors(G: nx.Graph, attr='weight'):
  N = len(G)
  WCN = defaultdict(lambda: defaultdict(int))
  for (x, y) in tqdm(G.edges()):
    for z in nx.common_neighbors(G, x, y):
      val = G[x][z][attr] + G[z][y][attr]
      WCN[x][y] += val
      WCN[y][x] += val
  return WCN

### Choosing a Similarity Matrix

In [None]:
sim = edge_weights(G)

100%|██████████| 11175608/11175608 [00:26<00:00, 426031.43it/s]


## Produce Recommendations (Maria Lovaton)

Source: https://sci-hub.se/https://doi.org/10.1007/978-3-319-61030-6_7

In [None]:
from functools import lru_cache

URL_USER_SUBMISSIONS = 'https://codeforces.com/api/user.status'


@lru_cache(maxsize = 128)
def get_all_problems():
    df = pd.DataFrame(sim.keys(), columns =['problem'])
    return df

@lru_cache(maxsize = 128)
def get_user_problems(user_handle):
    res = requests.get(URL_USER_SUBMISSIONS,
                       params={'handle': user_handle, 'from': 1,
                       'to': 10000})
    df = pd.DataFrame.from_dict(res.json()['result'])
    df = df[df['verdict'] == 'OK' | df['verdict'] == 'WRONG_ANSWER']
    df = df[['problem']]
    df['problem'] = df['problem'].apply(lambda x: str(x['contestId']) \
            + x['index'])
    df.drop_duplicates(subset=['problem'])
    return df

@lru_cache(maxsize = 128)
def get_solved_problems(user_handle):
    df = get_user_problems()
    df = df[df['verdict'] == 'OK']
    return df

@lru_cache(maxsize = 128)
def get_pending_problems(user_handle, sim):
    all_problems = get_all_problems()
    solved_problems = get_solved_problems()
    df = all_problems[~problem.isin(solved_problems['problem'])]
    return 

In [None]:
def recommend(user_handle, sim):
  solved_problems = get_solved_problems()
  pending_problems = get_pending_problems()
  P = defaultdict(int)
  for pi in solved_problems:
    for pj in pending_problems:
      if P[pj] < sim[pi][pj]:
        P[pj] = sim[pi][pj]
  ans = P.items() # (problem, similarity)
  ans.sort(key=lambda (x, y): y, reverse=True)
  return ans

In [None]:
recommend('dgcnz', sim)