In [1]:
import pandas as pd
import numpy as np

In [2]:
train_interactions_matrix = np.load('Data/train_rating_matrix.npy', allow_pickle=False)
train_interactions_matrix

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0]], dtype=int8)

In [3]:
from lightfm.data import Dataset
dataset = Dataset()
dataset.fit(np.arange(train_interactions_matrix.shape[0]), np.arange(train_interactions_matrix.shape[1]))

In [4]:
num_users, num_items = dataset.interactions_shape()
print('Num users: {}, num_items {}.'.format(num_users, num_items))

Num users: 138493, num_items 26744.


In [5]:
from scipy.sparse import coo_matrix
# Convert to COO format
coo_interactions_matrix = coo_matrix(train_interactions_matrix)

In [6]:
test_interactions_matrix = np.load('Data/test_rating_matrix.npy', allow_pickle=False)

In [7]:
test_coo_interactions_matrix = coo_matrix(test_interactions_matrix)

In [8]:
test_coo_interactions_matrix

<138493x26744 sparse matrix of type '<class 'numpy.int8'>'
	with 2002349 stored elements in COOrdinate format>

In [9]:
from lightfm import LightFM

model = LightFM(loss='warp',learning_schedule='adagrad',max_sampled=10,learning_rate=0.05,random_state=42)
model.fit(interactions=coo_interactions_matrix, epochs=50, num_threads=14,verbose=True,)   

Epoch: 100%|██████████| 50/50 [02:22<00:00,  2.85s/it]


<lightfm.lightfm.LightFM at 0x72c6c39a16d0>

In [10]:
%%time
prediction=model.predict(np.full(num_items,45), np.arange(num_items))
print(prediction.argmax(),prediction.max())

11 2.882569
CPU times: user 0 ns, sys: 5.99 ms, total: 5.99 ms
Wall time: 8.7 ms


In [11]:
print("Training Interactions Shape:", coo_interactions_matrix.shape)
print("Test Interactions Shape:", test_coo_interactions_matrix.shape)


Training Interactions Shape: (138493, 26744)
Test Interactions Shape: (138493, 26744)


In [12]:
from lightfm.evaluation import auc_score
train_auc = auc_score(model=model, test_interactions=test_coo_interactions_matrix,num_threads=12)


In [13]:
print(f"test_auc = {train_auc.mean():.2f}")

test_auc = 0.99


In [14]:
from lightfm.evaluation import precision_at_k, recall_at_k
precision = precision_at_k(model, test_interactions=test_coo_interactions_matrix,train_interactions=coo_interactions_matrix, k=50,num_threads=12).mean()
recall= recall_at_k(model,test_interactions= test_coo_interactions_matrix,train_interactions=coo_interactions_matrix, k=50,num_threads=12).mean()
print(f"precision = {precision:.2f} \t recall = {recall:.2f}")

precision = 0.10 	 recall = 0.41


In [15]:
from lightfm.evaluation import reciprocal_rank
reciprocal_rank = reciprocal_rank(model, test_interactions=test_coo_interactions_matrix,train_interactions=coo_interactions_matrix,num_threads=12).mean()

In [16]:
reciprocal_rank

0.41726044

## Essai train/test split lightfm 

In [17]:
interaction_matrix = np.load('Data/interaction_matrix.npy', allow_pickle=False)

In [18]:
scipy_coo_matrix = coo_matrix(interaction_matrix)

In [19]:
from lightfm.cross_validation import random_train_test_split
train,test=random_train_test_split(scipy_coo_matrix, test_percentage=0.2, random_state=42)

In [20]:
train.shape, test.shape

((138493, 26744), (138493, 26744))

In [21]:
from lightfm import LightFM

model = LightFM(loss='warp',learning_schedule='adagrad',max_sampled=10,learning_rate=0.05,random_state=42)
model.fit(interactions=train, epochs=50, num_threads=14,verbose=True,)   

Epoch: 100%|██████████| 50/50 [02:26<00:00,  2.94s/it]


<lightfm.lightfm.LightFM at 0x72c5bb79cd90>

In [22]:
print("Training Interactions Shape:", coo_interactions_matrix.shape)
print("Test Interactions Shape:", test_coo_interactions_matrix.shape)


Training Interactions Shape: (138493, 26744)
Test Interactions Shape: (138493, 26744)


In [23]:
from lightfm.evaluation import auc_score
train_auc = auc_score(model=model, test_interactions=test,num_threads=12)


In [24]:
print(f"test_auc = {train_auc.mean():.2f}")

test_auc = 0.99


In [25]:
from lightfm.evaluation import precision_at_k, recall_at_k
precision = precision_at_k(model, test_interactions=test,train_interactions=train, k=50,num_threads=12).mean()
recall= recall_at_k(model,test_interactions= test,train_interactions=train, k=50,num_threads=12).mean()
print(f"precision = {precision:.2f} \t recall = {recall:.2f}")

precision = 0.10 	 recall = 0.41


In [26]:
from lightfm.evaluation import reciprocal_rank
reciprocal_rank = reciprocal_rank(model, test_interactions=test,train_interactions=train,num_threads=12).mean()

In [27]:
reciprocal_rank

0.41122517