In [25]:
import mkl
import pandas as pd
import numpy as np
from lenskit import batch, util, topn
from lenskit.algorithms import Recommender
from lenskit.algorithms.als import ImplicitMF
import pyarrow 
import os
import sys
import subprocess
import dask.dataframe as dd
import time
import datetime

mkl.set_num_threads(1)

def average_precision(rec, gt):
    '''
    rec : recommendation list for one user
    gt : ground truth for one user
    '''
    score = 0.0
    num_hits = 0.0

    for i,item in enumerate(rec):
        if item in gt:
            num_hits += 1.0
            score += num_hits / (i+1.0)
    
    return score / num_hits if num_hits != 0.0 else 0.0

def evaluation(rec, gt):
    '''
    rec : dataframe
    gt : dataframe
    '''
    df = pd.merge(rec, gt, how='inner', on='user')
    score = [average_precision(df.item_x[i], df.item_y[i]) for i in range(len(df))]
    return np.array(score).mean()

print("start:", datetime.datetime.now())

train_path = "asl-train_small.parquet"
val_path = "asl-val_small.parquet"
test_path = "asl-test_gt.parquet"
train_dd = dd.read_parquet(train_path, engine = 'pyarrow')
val_dd = dd.read_parquet(val_path, engine='pyarrow')
test_dd = dd.read_parquet(test_path, engine='pyarrow')
print('read success')
train = train_dd.compute().rename(columns={'user_id':'user', 'track_id':'item', 'score':'rating'})
train = train.groupby('user')['item'].agg(list).reset_index()
val = val_dd.compute().rename(columns={'user_id':'user', 'track_id':'item', 'score':'rating'})
test = test_dd.compute().rename(columns={'user_id':'user', 'gt':'item'})

print('train:', train.shape)
print('val:', val.shape)
print('test:', test.shape)

''' TRAIN MODEL'''
model = ImplicitMF(features=3, iterations=3, reg=1, weight=10, use_ratings=True)
recommender = Recommender.adapt(model)
s = time.time()
recommender.fit(train)
e = time.time()
print(f'train time: {e-s}')

''' GENERATE RECOMMENDATIONS FOR USERS IN TRAIN DATA'''
s2 = time.time()
recommendations = batch.recommend(recommender, train.user.unique(), 100)
e2 = time.time()
print(f'inference time: {e2-s2}')
recommendations = recommendations.groupby('user')['item'].agg(list).reset_index()
recommendations.to_csv("recommendations.csv")
val = val.groupby('user')['item'].agg(list).reset_index()
print(recommendations.head())

'''Evaluation'''
print(f'MAP train: {evaluation(recommendations, train)}')
print(f'MAP val: {evaluation(recommendations, val)}')
print(f'MAP test: {evaluation(recommendations, test)}')
print("end:", datetime.datetime.now())

MAP train: 0.0
MAP val: 0.0049411345233550275
MAP test: 0.0006169501309728373
