In [5]:
import pandas as pd
import numpy as np
from lightfm import LightFM
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score
from lightfm.evaluation import recall_at_k
from lightfm.cross_validation import random_train_test_split
from lightfm.data import Dataset
from scipy.sparse import csr_matrix




In [6]:
readers = pd.read_csv("../data/readers.csv")
readers = readers.rename(columns={"id":"user_id", "art_id":"nzz_id"})
readers.head()

Unnamed: 0,user_id,nzz_id
0,1,ld.154103
1,1,ld.142559
2,1,1.18331199
3,1,ld.144819
4,1,ld.1293110


In [7]:
print(readers.shape)
readers = readers.drop_duplicates()
print(readers.shape)

(27855, 2)
(27855, 2)


In [8]:
read_counts = readers["user_id"].value_counts(sort=True)
read_counts = read_counts.rename_axis("user_id").reset_index(name="read_count")
# Biorę pod uwagę tylko użytkowników, którzy przeczytali minimum 5 artykułów
min_read_count = 5
read_counts = read_counts[read_counts["read_count"] > min_read_count]

readers = readers[readers["user_id"].isin(read_counts["user_id"])]
readers

Unnamed: 0,user_id,nzz_id
10,3,1.18306956
11,3,ld.140509
12,3,ld.151615
13,3,ld.152374
14,3,ld.148811
...,...,...
27850,1000,ld.153608
27851,1000,ld.137667
27852,1000,ld.1289804
27853,1000,ld.154369


In [9]:
dataset = Dataset()
dataset.fit(readers["user_id"].to_numpy(), readers["nzz_id"].to_numpy())

matrix_interactions, matrix_weights = dataset.build_interactions(readers.to_numpy())

In [20]:
type(matrix_interactions)

scipy.sparse.coo.coo_matrix

In [10]:
train, test = random_train_test_split(matrix_interactions)

In [11]:
model = LightFM(learning_rate=0.05, loss='warp')
model.fit(train, epochs=1000)

<lightfm.lightfm.LightFM at 0x184d2ba8760>

In [12]:
train_precision = precision_at_k(model, train, k=5).mean()
test_precision = precision_at_k(model, test, k=5).mean()

train_recall = recall_at_k(model, train, k=5).mean()
test_recall = recall_at_k(model, test, k=5).mean()

train_auc = auc_score(model, train).mean()
test_auc = auc_score(model, test).mean()

print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))
print('Recall: train %.2f, test %.2f.' % (train_recall, test_recall))
print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))

Precision: train 0.09, test 0.01.
Recall: train 0.02, test 0.01.
AUC: train 1.00, test 0.60.


In [13]:
model.predict(1, [1,2,3,4,5,6,7,8])

array([-6.752632 , -7.953919 , -7.7626963, -6.402547 , -7.9321218,
       -9.509697 , -7.598825 , -9.399061 ], dtype=float32)

In [14]:
import implicit

In [15]:
base_model = implicit.als.AlternatingLeastSquares()
base_model.fit(train)

100%|██████████| 15/15 [00:03<00:00,  4.36it/s]


In [16]:
base_model.recommend(1, test)

TypeError: 'coo_matrix' object is not subscriptable