In [2]:
import pandas as pd
import numpy as np
from lightfm import LightFM

## 1. Data Pre-Proceesing

### A- User Interactions

In [5]:
ratings_df = pd.read_csv("Data/archive/rating.csv")

In [13]:
ratings_df["userId"] = ratings_df["userId"].astype("int32")
ratings_df["movieId"] = ratings_df["movieId"].astype("int32")
ratings_df["rating"] = ratings_df["rating"].astype("float32")

In [8]:
ratings_df.drop(["timestamp"], axis=1, inplace=True)
ratings_df.head(2)

Unnamed: 0,userId,movieId,rating
0,1,2,3.5
1,1,29,3.5


In [14]:
ratings_df.values

array([[1.00000e+00, 2.00000e+00, 3.50000e+00],
       [1.00000e+00, 2.90000e+01, 3.50000e+00],
       [1.00000e+00, 3.20000e+01, 3.50000e+00],
       ...,
       [1.38493e+05, 6.96440e+04, 3.00000e+00],
       [1.38493e+05, 7.02860e+04, 5.00000e+00],
       [1.38493e+05, 7.16190e+04, 2.50000e+00]])

---

## 2. Dataset preparation

In [17]:
from lightfm.data import Dataset

dataset = Dataset()

In [18]:
dataset.fit(ratings_df["userId"], ratings_df["movieId"])

In [20]:
interactions, weights = dataset.build_interactions(ratings_df.values)

In [22]:
from lightfm.cross_validation import random_train_test_split

train_interactions, test_interactions = random_train_test_split(
    interactions, test_percentage=0.2, random_state=42
)
train_weights, test_weights = random_train_test_split(
    weights, test_percentage=0.2, random_state=42
)

In [24]:
train_interactions, test_interactions

(<138493x26744 sparse matrix of type '<class 'numpy.int32'>'
 	with 16000210 stored elements in COOrdinate format>,
 <138493x26744 sparse matrix of type '<class 'numpy.int32'>'
 	with 4000053 stored elements in COOrdinate format>)

In [25]:
train_weights, test_weights

(<138493x26744 sparse matrix of type '<class 'numpy.float32'>'
 	with 16000210 stored elements in COOrdinate format>,
 <138493x26744 sparse matrix of type '<class 'numpy.float32'>'
 	with 4000053 stored elements in COOrdinate format>)

---

## 3. Model

In [35]:
model = LightFM(loss="warp", learning_schedule="adagrad", random_state=42)
model.fit(
    interactions=train_interactions,
    sample_weight=train_weights,
    epochs=50,
    num_threads=14,
    verbose=True,
)

Epoch: 100%|██████████| 75/75 [08:33<00:00,  6.84s/it]


<lightfm.lightfm.LightFM at 0x73beeae409d0>

---

## 4. Evaulation

In [36]:
from lightfm.evaluation import precision_at_k, recall_at_k, auc_score

train_roc_auc = auc_score(model, train_interactions, num_threads=14).mean()
test_roc_auc = auc_score(model, test_interactions, num_threads=14).mean()

train_precision = precision_at_k(model, train_interactions, num_threads=14).mean()
test_precision = precision_at_k(model, test_interactions,train_interactions=train_interactions,k=15, num_threads=14).mean()

train_recall = recall_at_k(model, train_interactions, num_threads=14).mean()
test_recall = recall_at_k(model, test_interactions,train_interactions=train_interactions,k=15, num_threads=14).mean()

In [37]:
print(f"Train ROC AUC: {train_roc_auc:.2f}\tTest ROC AUC: {test_roc_auc:.2f}")
print(f"Train Precision: {train_precision:.2f}\tTest Precision: {test_precision:.2f}")
print(f"Train Recall: {train_recall:.2f}\tTest Recall: {test_recall:.2f}")

Train ROC AUC: 0.99	Test ROC AUC: 0.98
Train Precision: 0.49	Test Precision: 0.25
Train Recall: 0.09	Test Recall: 0.19
