In [None]:
import matplotlib.pyplot as plt
from tqdm import tqdm
import json 
import scipy
import pickle
import numpy as np
import pandas as pd
from multiprocessing import Pool

In [None]:
year = 2017
methods = ['combsage','graphsage','specter']

In [None]:
data = {} 
with open(f'co_citations/{year}.json', 'r') as infile:
    for json_line in infile.readlines():
        try:
            data.update(json.loads(json_line))
        except:
            continue

co_cit_df = pd.DataFrame(data)
del data

In [None]:
def auc(q):
    irrel = list(co_cit_df[q][co_cit_df[q].isna()].index)
    rel = list(co_cit_df[q][co_cit_df[q].notna()].index)
    if len(rel) == 0:
        return
    rec_q = recs[q]
    score = 0 
    for i in irrel:
        for r in rel:
            if rec_q[i] < rec_q[r]:
                score+=1  
    return {q:score/(len(irrel)*len(rel))}

In [None]:
for method in methods:

    print('reading recommendations for ',method)
    with open(f'recommendations/total/{method}_{year}.json','r') as infile: 
        recs = json.load(infile)
    
    queries = list(co_cit_df.columns)
    
    with Pool(10) as p:
          r = list(tqdm(p.imap(auc, queries), total=len(queries)))
    
    with open(f'auc/{method}.json', 'w+') as outfile:
        json.dump(r,outfile)

In [None]:
aucs = {}
for method in methods:
    with open(f'auc/{method}.json', 'r+') as infile:
        aucs[method] = json.load(infile)

In [None]:
for method in methods:
    aucs[method] = pd.Series([list(d.items())[0][1] for d in aucs[method] if d])

In [None]:
auc_df = pd.concat([aucs[method] for method in methods],axis=1)
auc_df.columns = methods

In [None]:
print(auc_df.mean().to_latex())

In [None]:
from sklearn.metrics import ndcg_score, average_precision_score

In [None]:
for method in methods:

    print('reading recommendations for ',method)
    with open(f'recommendations/total/{method}_{year}.json','r') as infile: 
        recs = json.load(infile)
    
    queries = list(co_cit_df.columns)
    
    rr, ndcg = [], []
    for q in tqdm(queries):
        
        y_true = co_cit_df[q].fillna(0)
        y_true[y_true>0] = 1
        y_score = pd.Series(recs[q])
        y_df = pd.concat([y_true,y_score], axis = 1).dropna()
        y_df.columns = ['true','pred']
        ndcg.append(ndcg_score([y_df.true],[y_df.pred]))
        
        rel = list(co_cit_df[q].dropna().index)
        rank_s = y_score.rank(ascending = False)
        rr.append(1 / rank_s[rel].min())
        
    mrr = pd.Series(rr).mean()
    ndcg = pd.Series(ndcg).mean()

    m = {'mrr':mrr, 'ndcg':ndcg}
    
    with open(f'metrics/{method}.json', 'w+') as outfile:
        json.dump(m,outfile)