In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import sys

MAIN_MODULE_PATH = os.path.join(os.getcwd(), '..', '..', '..')
sys.path.append(MAIN_MODULE_PATH)

In [None]:
from collections import Counter, defaultdict

import numpy as np
import scipy
import pandas as pd
import seaborn as sns
from sklearn.metrics import ndcg_score

from src.dataset.prepare import prepare_interactions
# from src.defaults import KNN_MODEL_PATH
from src.metrics.surprise_metrics import precision_recall_at_k, ndcg_at_k

In [None]:
train, test = prepare_interactions()

# KNNsurprise

In [None]:
def evaluate_model(algo, test):
    predictions = [algo.predict(row.full_name, row.repo_requirements, row.rating) for row in test.itertuples()]
    print(predictions[:4])
    precisions, recalls = precision_recall_at_k(predictions, k=10, threshold=0.5)
    ndcg = ndcg_at_k(predictions, k=10, threshold=0.5)
    precision = sum(prec for prec in precisions.values()) / len(precisions)
    recall = sum(rec for rec in recalls.values()) / len(recalls)
    
    return {'model': model.__name__, 'similarity_measure': sim_measure_name, 'precision': precision, 'recall': recall, 'ndcg': ndcg}

In [None]:
from surprise import KNNBasic, KNNWithMeans, KNNWithZScore, KNNBaseline, NormalPredictor, BaselineOnly
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate, KFold, train_test_split

data = Dataset.load_from_df(train, Reader(rating_scale=(0, 1)))

trainset = data.build_full_trainset()

results = []


for model in [KNNBasic, KNNWithMeans, KNNWithZScore, KNNBaseline]:
    for sim_measure_name in ['cosine', 'msd', 'pearson']:
        algo = model(sim_options = {'name': sim_measure_name})
        algo.fit(trainset)
        results.append(evaluate_model(algo, test))
        
for model in [NormalPredictor, BaselineOnly]:
    algo = model()
    algo.fit(trainset)
    results.append(evaluate_model(algo, test))

results = pd.DataFrame(results).sort_values(by='ndcg', ascending=False)

In [None]:
results.iloc[0]

In [None]:
from surprise import dump

algo = KNNBasic(sim_options = {'name': 'pearson'})
algo.fit(trainset)

dump.dump(KNN_MODEL_PATH, algo=algo)

In [None]:
test