In [1]:
import logging
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import sys
import os
sys.path.append(os.path.abspath("../../.."))  # Adds the project root to sys.path
os.environ['OPENBLAS_NUM_THREADS'] = '1'
import numpy as np 
import math
from scipy.sparse import coo_matrix
from scipy.sparse import csr_matrix
import implicit
from implicit.nearest_neighbours import bm25_weight
from sklearn.model_selection import train_test_split

# Configure logging to display INFO level messages
logging.basicConfig(level=logging.INFO)

# Create a logger
log = logging.getLogger("implicit")

from recommenders.datasets.shared import *

In [12]:
save_interaction_values("../../../../pa-data/Interactions.xlsx")

In [3]:
Y, R, num_products = get_data()
sparse_product_user = csr_matrix(Y)
sparse_product_user = bm25_weight(sparse_product_user, K1=100, B=0.8)
# get the transpose since the most of the functions in implicit expect (user, product) sparse matrices instead of (product, user)
sparse_user_product = sparse_product_user.T.tocsr()

product_names = load_csv_list('Products.csv')
product_names = np.array([row[0] for row in product_names])
user_ids = load_csv_list('Users.csv')[0]

Y (117, 4) R (117, 4)
num_products 117
num_users 4


In [None]:
# https://github.com/benfred/implicit/issues/281
model = implicit.als.AlternatingLeastSquares(factors=100, regularization=0.1, alpha=1.0, iterations=128, calculate_training_loss=True)
model.fit(sparse_user_product, show_progress=True)

  0%|          | 0/128 [00:00<?, ?it/s]

INFO:implicit:Final training loss 0.0101


In [None]:
user_id = (str)(55284)
user_index = user_ids.index(user_id)
indexes, scores = model.recommend(user_index, sparse_user_product[user_index], N=10, filter_already_liked_items=False)

In [22]:
import numpy as np
import pandas as pd

original_scores = Y[:, user_index]

pd.DataFrame({
    "product": product_names[indexes], 
    "score": scores, 
    "original_score": original_scores[indexes],
    "already_liked": np.in1d(indexes, sparse_user_product[user_index].indices)
})

Unnamed: 0,product,score,original_score,already_liked
0,Vill 074376,0.950983,0.683233,True
1,EN 4512042,0.948742,0.442119,True
2,HT1A770,0.948496,0.425515,True
3,RUBI 01958,0.948495,0.425515,True
4,STHT0-05926,0.945273,0.285231,True
5,SENCO AX10EAAP,0.945272,0.285231,True
6,MAX TW1061,0.945272,0.285231,True
7,71 02 200,0.943119,0.233527,True
8,6-PHT150,0.943118,0.233527,True
9,WBS 1214526,0.941874,0.211304,True


In [23]:
indexes, scores = model.similar_items(100)
pd.DataFrame({"product": product_names[indexes], "score": scores})

Unnamed: 0,product,score
0,VILL 055320,1.0
1,FMST82961-1,0.999994
2,FME1250K,0.999994
3,WBS 1214526,0.999994
4,EN 4511502,0.999994
5,UN120/1MS 6-22/17,0.999994
6,SENCO AX10EAAP,0.999994
7,F 561512,0.999994
8,3E 1130864,0.999993
9,STHT0-05926,0.999993
