In [34]:
import numpy as np
import pandas as pd
from scipy.spatial.distance import cosine
from sklearn.metrics import mean_squared_error

In [12]:
train = pd.read_csv("Recommendations-train.csv")
test  = pd.read_csv("Recommendations-test.csv")

In [13]:
train.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,451,25,5.0,854089175
1,206,17,5.0,850763267
2,69,4027,5.0,1021647538
3,148,5618,3.0,1482548682
4,407,76093,4.0,1424349580


In [14]:
test.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,5,36,4.0,847435292
1,8,339,3.0,839463509
2,8,432,1.0,839463702
3,8,440,3.0,839463728
4,9,2012,4.0,1044657237


# Recommendations with LightFM

In [4]:
# Install lightfm
# pip install lightfm
# alternative, to avoid install problems
#pip install git+https://github.com/daviddavo/lightfm

In [26]:
from lightfm import LightFM
#from lightfm.datasets import fetch_movielens
from lightfm.evaluation import precision_at_k

In [16]:
from lightfm.data import Dataset

In [17]:
# Identify unique users and movies 
dataset = Dataset()
dataset.fit(
    users=train['userId'].unique(), 
    items=train['movieId'].unique()
)

In [18]:
train_interactions, train_weights = dataset.build_interactions(
    [(u, i, r) for u, i, r in zip(
        train_df['userId'],
        train_df['movieId'],
        train_df['rating']
    )]
)

In [19]:
test_interactions, test_weights = dataset.build_interactions(
    [(u, i, r) for u, i, r in zip(
        test_df['userId'],
        test_df['movieId'],
        test_df['rating']
    )]
)

In [22]:
model = LightFM(loss='warp', no_components=50, learning_rate=0.05, random_state=42)

In [23]:
# 4. Initialize and train the model
model = LightFM(loss='warp', no_components=30)

model.fit(interactions, sample_weight=weights, epochs=20, num_threads=2)

<lightfm.lightfm.LightFM at 0x7304e7a547a0>

In [30]:
user_map, _, item_map, _ = dataset.mapping()

In [31]:
y_true = []
y_pred = []

In [32]:
for _, row in test.iterrows():
    u = user_map[row['userId']]
    i = item_map[row['movieId']]
    
    pred = model.predict(u, np.array([i]))[0]
    
    y_true.append(row['rating'])
    y_pred.append(pred)

In [38]:
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
print(f"Test RMSE: {rmse:.4f}")

Test RMSE: 4.3441


In [42]:
preddf = pd.DataFrame(y_true)
preddf.columns = ['True']
preddf.head()

Unnamed: 0,True
0,4.0
1,3.0
2,1.0
3,3.0
4,4.0


In [43]:
preddf['pred'] = y_pred

In [44]:
preddf.head()

Unnamed: 0,True,pred
0,4.0,-1.765883
1,3.0,0.125452
2,1.0,0.548526
3,3.0,-2.142364
4,4.0,-1.500574
