In [None]:
import sys
sys.path.append('../src/')

import numpy as np
import pandas as pd
from scipy import sparse as sp
from tqdm.notebook import tqdm
import json

from utils import (
    ProductEncoder,
    make_coo_row,
    normalized_average_precision,
    get_shard_path
)


In [None]:
product_encoder = ProductEncoder("../data/raw/products.csv")

In [None]:
rows = []
for i in range(8):
    for js in tqdm((json.loads(s) for s in open(get_shard_path(i)))):
        rows.append(make_coo_row(js['transaction_history'], product_encoder))
train_mat = sp.vstack(rows)

In [None]:
import implicit

for model, tag in [
    (implicit.nearest_neighbours.TFIDFRecommender(K=30), "tfidf200"),
    (implicit.nearest_neighbours.TFIDFRecommender(K=2), "tfidf2"),
    (implicit.nearest_neighbours.CosineRecommender(K=2), "cosine2"),
    (implicit.nearest_neighbours.CosineRecommender(K=1), "cosine1"),
    (implicit.nearest_neighbours.CosineRecommender(K=2), "cosine2"),
    (implicit.nearest_neighbours.CosineRecommender(K=3), "cosine3"),
    (implicit.nearest_neighbours.CosineRecommender(K=5), "cosine5"),
    (implicit.nearest_neighbours.CosineRecommender(K=10), "cosine10"),
    (implicit.nearest_neighbours.CosineRecommender(K=20), "cosine20"),
    (implicit.nearest_neighbours.CosineRecommender(K=30), "cosine30"),
    (implicit.nearest_neighbours.CosineRecommender(K=50), "cosine50")
    
]:
    scores = []
    model.fit(train_mat.T)
    for js in tqdm((json.loads(s) for s in open(get_shard_path(15)))):

        row = make_coo_row(js['transaction_history'], product_encoder).tocsr()
        raw_recs = model.recommend(userid=0, 
                                   user_items=row, 
                                   N=30, 
                                   filter_already_liked_items=False, 
                                   recalculate_user=True
        )
        recommended_items = product_encoder.toPid([idx for (idx, score) in raw_recs])
        gt_items = js["target"][0]["product_ids"]
        ap = normalized_average_precision(gt_items, recommended_items)
        scores.append(ap)
    print(tag, mean_ap)