In [290]:
import pandas as pd
import numpy as np
from lightfm import LightFM

## 1. Data Pre-Proceesing

### A- User Interactions

In [351]:
ratings_df = pd.read_csv("Data/rating_user_IQR.csv")

In [352]:
ratings_df["userId"] = ratings_df["userId"].astype("int32")
ratings_df["movieId"] = ratings_df["movieId"].astype("int32")
ratings_df["rating"] = ratings_df["rating"].astype("float32")

In [353]:
ratings_df.head(2)

Unnamed: 0,userId,movieId,rating
0,1,2,3.5
1,1,29,3.5


In [354]:
ratings_df['rating'] = ratings_df['rating'].apply(lambda x: 1 if x >= 4 else 0)

In [355]:
ratings_df.values

array([[     1,      2,      0],
       [     1,     29,      0],
       [     1,     32,      0],
       ...,
       [138493,  69644,      0],
       [138493,  70286,      1],
       [138493,  71619,      0]])

### B- Movie Features

In [356]:
movies_df = pd.read_csv("Data/movie_user_IQR.csv")

In [357]:
movies_df.head(2)

Unnamed: 0,movieId,title,genres,year,Action,Adventure,Animation,Children,Comedy,Crime,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story,Adventure|Animation|Children|Comedy|Fantasy,1995.0,0,1,1,1,1,0,...,0,0,0,0,0,0,0,0,0,0
1,2,Jumanji,Adventure|Children|Fantasy,1995.0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [358]:
movies_df.columns

Index(['movieId', 'title', 'genres', 'year', 'Action', 'Adventure',
       'Animation', 'Children', 'Comedy', 'Crime', 'Documentary', 'Drama',
       'Fantasy', 'Film-Noir', 'Horror', 'IMAX', 'Musical', 'Mystery',
       'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'],
      dtype='object')

In [359]:
movies_df.drop(
    #["title", "avg_movie_rating", "movie_youth_rate", "movie_popularity_rate"],
    ["title", "genres","year"],
    inplace=True,
    axis=1,
)

In [360]:
movies_df.head(2)

Unnamed: 0,movieId,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,2,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0


In [361]:
genres = movies_df.columns.drop("movieId")

In [362]:
movies_df["genres"]=movies_df[genres].apply(lambda row: [col for col in genres if row[col] == 1], axis=1)
movies_df.head(2)

Unnamed: 0,movieId,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,...,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western,genres
0,1,0,1,1,1,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,"[Adventure, Animation, Children, Comedy, Fantasy]"
1,2,0,1,0,1,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,"[Adventure, Children, Fantasy]"


In [363]:
movie_features_matrix=list(zip(movies_df.movieId, movies_df.genres))
movie_features_matrix

[(1, ['Adventure', 'Animation', 'Children', 'Comedy', 'Fantasy']),
 (2, ['Adventure', 'Children', 'Fantasy']),
 (3, ['Comedy', 'Romance']),
 (4, ['Comedy', 'Drama', 'Romance']),
 (5, ['Comedy']),
 (6, ['Action', 'Crime', 'Thriller']),
 (7, ['Comedy', 'Romance']),
 (8, ['Adventure', 'Children']),
 (9, ['Action']),
 (10, ['Action', 'Adventure', 'Thriller']),
 (11, ['Comedy', 'Drama', 'Romance']),
 (12, ['Comedy', 'Horror']),
 (13, ['Adventure', 'Animation', 'Children']),
 (14, ['Drama']),
 (15, ['Action', 'Adventure', 'Romance']),
 (16, ['Crime', 'Drama']),
 (17, ['Drama', 'Romance']),
 (18, ['Comedy']),
 (19, ['Comedy']),
 (20, ['Action', 'Comedy', 'Crime', 'Drama', 'Thriller']),
 (21, ['Comedy', 'Crime', 'Thriller']),
 (22, ['Crime', 'Drama', 'Horror', 'Mystery', 'Thriller']),
 (23, ['Action', 'Crime', 'Thriller']),
 (24, ['Drama', 'Sci-Fi']),
 (25, ['Drama', 'Romance']),
 (26, ['Drama']),
 (27, ['Children', 'Drama']),
 (28, ['Drama', 'Romance']),
 (29, ['Adventure', 'Drama', 'Fantasy'

## C. User Features

In [364]:
user_genres = ratings_df.merge(movies_df.drop(['genres'],axis=1),on="movieId", how="left").drop(["movieId","rating"],axis=1)

In [365]:
user_genres.head(2)

Unnamed: 0,userId,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,1,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,0,0


In [366]:
user_genres=user_genres.groupby("userId").sum()

In [369]:
user_genres

Unnamed: 0_level_0,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1,66,73,10,19,41,21,0,43,69,0,45,2,3,18,11,40,42,9,4
3,61,50,4,10,52,21,1,58,20,1,32,0,6,11,16,93,50,6,3
7,61,59,7,17,122,17,0,130,23,4,11,1,9,16,112,68,45,17,9
11,223,163,66,71,145,58,7,149,84,2,78,27,15,30,59,229,171,14,2
14,33,72,47,72,140,12,0,89,44,0,1,4,31,8,76,35,18,5,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
138483,53,30,9,16,91,37,1,164,19,2,8,3,10,14,68,20,64,11,6
138484,52,27,3,6,47,33,1,68,9,0,8,2,3,11,29,19,58,8,5
138486,63,51,14,24,72,18,1,50,40,0,79,6,9,17,18,41,83,4,1
138490,5,4,0,3,32,18,1,144,3,2,1,1,5,12,28,3,20,7,1


In [370]:
user_genres = user_genres.div(user_genres.sum(axis=1), axis=0)
user_genres.head(2)

Unnamed: 0_level_0,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1,0.127907,0.141473,0.01938,0.036822,0.079457,0.040698,0.0,0.083333,0.133721,0.0,0.087209,0.003876,0.005814,0.034884,0.021318,0.077519,0.081395,0.017442,0.007752
3,0.123232,0.10101,0.008081,0.020202,0.105051,0.042424,0.00202,0.117172,0.040404,0.00202,0.064646,0.0,0.012121,0.022222,0.032323,0.187879,0.10101,0.012121,0.006061


In [371]:
user_genres = user_genres.round(2)
user_genres.head(2)

Unnamed: 0_level_0,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1,0.13,0.14,0.02,0.04,0.08,0.04,0.0,0.08,0.13,0.0,0.09,0.0,0.01,0.03,0.02,0.08,0.08,0.02,0.01
3,0.12,0.1,0.01,0.02,0.11,0.04,0.0,0.12,0.04,0.0,0.06,0.0,0.01,0.02,0.03,0.19,0.1,0.01,0.01


In [372]:
user_features = list(zip(user_genres.index, user_genres.to_dict(orient="records")))
user_features

[(1,
  {'Action': 0.13,
   'Adventure': 0.14,
   'Animation': 0.02,
   'Children': 0.04,
   'Comedy': 0.08,
   'Crime': 0.04,
   'Documentary': 0.0,
   'Drama': 0.08,
   'Fantasy': 0.13,
   'Film-Noir': 0.0,
   'Horror': 0.09,
   'IMAX': 0.0,
   'Musical': 0.01,
   'Mystery': 0.03,
   'Romance': 0.02,
   'Sci-Fi': 0.08,
   'Thriller': 0.08,
   'War': 0.02,
   'Western': 0.01}),
 (3,
  {'Action': 0.12,
   'Adventure': 0.1,
   'Animation': 0.01,
   'Children': 0.02,
   'Comedy': 0.11,
   'Crime': 0.04,
   'Documentary': 0.0,
   'Drama': 0.12,
   'Fantasy': 0.04,
   'Film-Noir': 0.0,
   'Horror': 0.06,
   'IMAX': 0.0,
   'Musical': 0.01,
   'Mystery': 0.02,
   'Romance': 0.03,
   'Sci-Fi': 0.19,
   'Thriller': 0.1,
   'War': 0.01,
   'Western': 0.01}),
 (7,
  {'Action': 0.08,
   'Adventure': 0.08,
   'Animation': 0.01,
   'Children': 0.02,
   'Comedy': 0.17,
   'Crime': 0.02,
   'Documentary': 0.0,
   'Drama': 0.18,
   'Fantasy': 0.03,
   'Film-Noir': 0.01,
   'Horror': 0.02,
   'IMAX': 0

---

## 2. Dataset preparation

In [373]:
from lightfm.data import Dataset
dataset = Dataset()

In [374]:
dataset.fit(
    users=ratings_df["userId"],
    items=ratings_df["movieId"],
    item_features=genres,
    user_features=genres,
)

In [375]:
dataset.model_dimensions()

(38124, 19677)

In [376]:
dataset.item_features_shape()

(19658, 19677)

In [377]:
dataset.user_features_shape()

(38105, 38124)

### A- Interactions

In [378]:
interactions, weights = dataset.build_interactions(ratings_df.values)

In [379]:
from lightfm.cross_validation import random_train_test_split

train_interactions, test_interactions = random_train_test_split(
    interactions, test_percentage=0.7, random_state=42
)
train_weights, test_weights = random_train_test_split(
    weights, test_percentage=0.7, random_state=42
)

In [380]:
train_interactions, test_interactions

(<38105x19658 sparse matrix of type '<class 'numpy.int32'>'
 	with 2988700 stored elements in COOrdinate format>,
 <38105x19658 sparse matrix of type '<class 'numpy.int32'>'
 	with 6973636 stored elements in COOrdinate format>)

In [381]:
train_weights, test_weights

(<38105x19658 sparse matrix of type '<class 'numpy.float32'>'
 	with 2988700 stored elements in COOrdinate format>,
 <38105x19658 sparse matrix of type '<class 'numpy.float32'>'
 	with 6973636 stored elements in COOrdinate format>)

### B- Movies Features

In [382]:
processed_movie_features = dataset.build_item_features(movie_features_matrix)

## C- User Features

In [383]:
processed_user_features = dataset.build_user_features(user_features)

---

## 3. Model

In [384]:
model = LightFM(loss="warp", learning_schedule="adagrad",no_components=100,random_state=42)
model.fit(
    interactions=train_interactions,
    sample_weight=train_weights,
    item_features=processed_movie_features,
    user_features=processed_user_features,
    epochs=50,
    num_threads=14,
    verbose=True,
)

Epoch: 100%|██████████| 50/50 [13:20<00:00, 16.01s/it]


<lightfm.lightfm.LightFM at 0x7bd4c8ceb550>

---

## 4. Evaulation

In [385]:
from lightfm.evaluation import precision_at_k, recall_at_k, auc_score

train_roc_auc = auc_score(
    model,
    train_interactions,
    item_features=processed_movie_features,
    user_features=processed_user_features,
    num_threads=14,
).mean()
test_roc_auc = auc_score(
    model,
    test_interactions,
    item_features=processed_movie_features,
    user_features=processed_user_features,
    num_threads=14,
).mean()

train_precision = precision_at_k(
    model,
    train_interactions,
    item_features=processed_movie_features,
    user_features=processed_user_features,
    num_threads=14,
).mean()
test_precision = precision_at_k(
    model,
    test_interactions,
    train_interactions=train_interactions,
    item_features=processed_movie_features,
    user_features=processed_user_features,
    k=15,
    num_threads=14,
).mean()

train_recall = recall_at_k(
    model,
    train_interactions,
    item_features=processed_movie_features,
    user_features=processed_user_features,
    num_threads=14,
).mean()
test_recall = recall_at_k(
    model,
    test_interactions,
    train_interactions=train_interactions,
    k=15,
    item_features=processed_movie_features,
    user_features=processed_user_features,
    num_threads=14,
).mean()

In [386]:
print(f"Train ROC AUC: {train_roc_auc:.2f}\tTest ROC AUC: {test_roc_auc:.2f}")
print(f"Train Precision: {train_precision:.2f}\tTest Precision: {test_precision:.2f}")
print(f"Train Recall: {train_recall:.2f}\tTest Recall: {test_recall:.2f}")

Train ROC AUC: 0.97	Test ROC AUC: 0.96
Train Precision: 0.25	Test Precision: 0.69
Train Recall: 0.04	Test Recall: 0.06


---

## Cross Validation

In [387]:
import numpy as np
import scipy.sparse as sp

def _shuffle(uids, iids, data, random_state):
    shuffle_indices = np.arange(len(uids))
    random_state.shuffle(shuffle_indices)
    return uids[shuffle_indices], iids[shuffle_indices], data[shuffle_indices]

def inverse_cross_validation_split(interactions, n_splits=5, random_state=None):
    """
    Split interactions into `n_splits` test sets and `n-1` train sets for inverse cross-validation.
    
    This function creates `n_splits` folds of the dataset, with each fold being used as the training set,
    and the remaining `n_splits-1` folds combined to form the test set.

    Parameters
    ----------
    interactions: scipy sparse matrix
        The interactions to split.
    n_splits: int, optional
        Number of folds. Must be at least 2.
    random_state: int or numpy.random.RandomState, optional
        Random seed used to initialize the numpy.random.RandomState number generator.
        Accepts an instance of numpy.random.RandomState for backwards compatibility.

    Yields
    ------
    (train, test): (scipy.sparse.COOMatrix, scipy.sparse.COOMatrix)
        A generator yielding `n_splits` tuples of (train data, test data).
    """
    
    if not sp.issparse(interactions):
        raise ValueError("Interactions must be a scipy.sparse matrix.")
    
    if not isinstance(random_state, np.random.RandomState):
        random_state = np.random.RandomState(seed=random_state)
    
    interactions = interactions.tocoo()
    shape = interactions.shape
    uids, iids, data = interactions.row, interactions.col, interactions.data
    
    uids, iids, data = _shuffle(uids, iids, data, random_state)
    
    fold_size = len(uids) // n_splits
    
    for i in range(n_splits):
        train_start = i * fold_size
        train_end = (i + 1) * fold_size if i < n_splits - 1 else len(uids)
        
        train_idx = slice(train_start, train_end)
        test_idx = np.concatenate([np.arange(0, train_start), np.arange(train_end, len(uids))])
        
        train = sp.coo_matrix(
            (data[train_idx], (uids[train_idx], iids[train_idx])),
            shape=shape,
            dtype=interactions.dtype,
        )
        test = sp.coo_matrix(
            (data[test_idx], (uids[test_idx], iids[test_idx])),
            shape=shape,
            dtype=interactions.dtype,
        )
        
        yield train, test


In [388]:
from lightfm.evaluation import precision_at_k, recall_at_k, auc_score

def test_model(model,train_interactions,test_interactions,processed_movie_features,processed_user_features):
    train_roc_auc = auc_score(
        model,
        train_interactions,
        item_features=processed_movie_features,
        user_features=processed_user_features,
        num_threads=14,
    ).mean()
    test_roc_auc = auc_score(
        model,
        test_interactions,
        item_features=processed_movie_features,
        user_features=processed_user_features,
        num_threads=14,
    ).mean()

    train_precision = precision_at_k(
        model,
        train_interactions,
        item_features=processed_movie_features,
        user_features=processed_user_features,
        num_threads=14,
    ).mean()
    test_precision = precision_at_k(
        model,
        test_interactions,
        train_interactions=train_interactions,
        item_features=processed_movie_features,
        user_features=processed_user_features,
        k=15,
        num_threads=14,
    ).mean()

    train_recall = recall_at_k(
        model,
        train_interactions,
        item_features=processed_movie_features,
        user_features=processed_user_features,
        num_threads=14,
    ).mean()
    test_recall = recall_at_k(
        model,
        test_interactions,
        train_interactions=train_interactions,
        k=15,
        item_features=processed_movie_features,
        user_features=processed_user_features,
        num_threads=14,
    ).mean()
    print(f"Train ROC AUC: {train_roc_auc:.2f}\tTest ROC AUC: {test_roc_auc:.2f}")
    print(f"Train Precision: {train_precision:.2f}\tTest Precision: {test_precision:.2f}")
    print(f"Train Recall: {train_recall:.2f}\tTest Recall: {test_recall:.2f}")
    return train_roc_auc,test_roc_auc,train_precision,test_precision,train_recall,test_recall

In [389]:
from scipy.sparse import coo_matrix
# Assuming you have a sparse interaction matrix named 'interactions'
n_splits = 4
random_state = 42
results={}
model = LightFM(loss="warp", learning_schedule="adagrad", random_state=random_state)
for i, (train, test) in enumerate(inverse_cross_validation_split(interactions, n_splits=n_splits, random_state=random_state)):
    print(f"Fold {i + 1}")
    model.fit(
        train,
        epochs=50,
        num_threads=14,
        verbose=True,
        item_features=processed_movie_features,
        user_features=processed_user_features,
    )
    results[i+1]=test_model(model,train,test,processed_movie_features,processed_user_features)
    print("Train set shape:", train.shape, "Test set shape:", test.shape)


Fold 1


Epoch: 100%|██████████| 50/50 [03:50<00:00,  4.61s/it]


Train ROC AUC: 0.97	Test ROC AUC: 0.97
Train Precision: 0.19	Test Precision: 0.67
Train Recall: 0.03	Test Recall: 0.06
Train set shape: (38105, 19658) Test set shape: (38105, 19658)
Fold 2


Epoch: 100%|██████████| 50/50 [04:04<00:00,  4.89s/it]


Train ROC AUC: 0.97	Test ROC AUC: 0.97
Train Precision: 0.19	Test Precision: 0.67
Train Recall: 0.03	Test Recall: 0.06
Train set shape: (38105, 19658) Test set shape: (38105, 19658)
Fold 3


Epoch: 100%|██████████| 50/50 [04:48<00:00,  5.77s/it]


Train ROC AUC: 0.97	Test ROC AUC: 0.97
Train Precision: 0.19	Test Precision: 0.67
Train Recall: 0.03	Test Recall: 0.06
Train set shape: (38105, 19658) Test set shape: (38105, 19658)
Fold 4


Epoch: 100%|██████████| 50/50 [04:52<00:00,  5.85s/it]


Train ROC AUC: 0.97	Test ROC AUC: 0.97
Train Precision: 0.19	Test Precision: 0.67
Train Recall: 0.03	Test Recall: 0.06
Train set shape: (38105, 19658) Test set shape: (38105, 19658)


---

## Prediction

In [390]:
items_mapping = {v: k for k, v in dataset.mapping()[2].items()}
items_mapping

{0: 2,
 1: 29,
 2: 32,
 3: 47,
 4: 50,
 5: 112,
 6: 151,
 7: 223,
 8: 253,
 9: 260,
 10: 293,
 11: 296,
 12: 318,
 13: 337,
 14: 367,
 15: 541,
 16: 589,
 17: 593,
 18: 653,
 19: 919,
 20: 924,
 21: 1009,
 22: 1036,
 23: 1079,
 24: 1080,
 25: 1089,
 26: 1090,
 27: 1097,
 28: 1136,
 29: 1193,
 30: 1196,
 31: 1198,
 32: 1200,
 33: 1201,
 34: 1208,
 35: 1214,
 36: 1215,
 37: 1217,
 38: 1219,
 39: 1222,
 40: 1240,
 41: 1243,
 42: 1246,
 43: 1249,
 44: 1258,
 45: 1259,
 46: 1261,
 47: 1262,
 48: 1266,
 49: 1278,
 50: 1291,
 51: 1304,
 52: 1321,
 53: 1333,
 54: 1348,
 55: 1350,
 56: 1358,
 57: 1370,
 58: 1374,
 59: 1387,
 60: 1525,
 61: 1584,
 62: 1750,
 63: 1848,
 64: 1920,
 65: 1967,
 66: 1994,
 67: 1997,
 68: 2021,
 69: 2100,
 70: 2118,
 71: 2138,
 72: 2140,
 73: 2143,
 74: 2173,
 75: 2174,
 76: 2193,
 77: 2194,
 78: 2253,
 79: 2288,
 80: 2291,
 81: 2542,
 82: 2628,
 83: 2644,
 84: 2648,
 85: 2664,
 86: 2683,
 87: 2692,
 88: 2716,
 89: 2761,
 90: 2762,
 91: 2804,
 92: 2872,
 93: 2918,
 94

In [391]:
original_ratings = pd.read_csv("Data/Data_LightFm/ratings.csv")

In [392]:
movies_df = pd.read_csv("Data/archive/movie.csv")

In [393]:
def make_pred (user_id, model,movies_df,processed_movie_features,processed_user_features,nb_preds=10):
    n_items = dataset.item_features_shape()[0]
    mapped_user_id = dataset.mapping()[0][user_id]
    watched_movies = ratings_df[ratings_df["userId"]==user_id]["movieId"].values
    mapped_watched_movies = [dataset.mapping()[2][movie] for movie in watched_movies]
    non_watched_movies = np.setdiff1d(np.arange(n_items), mapped_watched_movies)
    prediction = model.predict(user_ids=mapped_user_id,item_ids=non_watched_movies,item_features=processed_movie_features,user_features=processed_user_features)
    predicted_movies = prediction.argsort()[::-1] [:nb_preds]
    results = [items_mapping[item] for item in predicted_movies]
    recommended_movies = pd.DataFrame({"movieId":results,"title":movies_df[movies_df["movieId"].isin(results)]["title"],"genres":movies_df[movies_df["movieId"].isin(results)]["genres"],"Score":sorted(prediction)[::-1][:nb_preds]})
    return recommended_movies

In [394]:
def get_watched_movies(user_id):
    mapped_user_id = dataset.mapping()[0][user_id]
    watched_movies = interactions.tocsr()[mapped_user_id]
    watched_movies.indices
    mapped_watched_movies = [items_mapping[movie_id] for movie_id in watched_movies.indices]
    rated_watched_movies = pd.DataFrame({"movieId":mapped_watched_movies,"title":movies_df[movies_df.movieId.isin(mapped_watched_movies)].title}).merge(ratings_df[ratings_df.userId==user_id],on="movieId",how="left")
    return original_ratings[original_ratings.userId==user_id].merge(movies_df,on="movieId",how="left").sort_values("rating",ascending=False).set_index("movieId").loc[rated_watched_movies.sort_values("rating",ascending=False)["movieId"]]

In [395]:
def user_pred(user_id,processed_movie_features,processed_user_features,nb_preds=10):
    print("Watched Movies by user ",user_id)
    display(get_watched_movies(user_id)[:nb_preds])
    print("Recommended Movies for user ",user_id)
    display(make_pred(user_id,model,movies_df,processed_movie_features,processed_user_features,nb_preds))

In [396]:
user_id = 1
user_pred(user_id,processed_movie_features=processed_movie_features,processed_user_features=processed_user_features,nb_preds=10,)

Watched Movies by user  1


Unnamed: 0_level_0,userId,rating,title,genres
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
31696,1,4.0,Constantine (2005),Action|Fantasy|Horror|Thriller
2100,1,4.0,Splash (1984),Comedy|Fantasy|Romance
2288,1,4.0,"Thing, The (1982)",Action|Horror|Sci-Fi|Thriller
5171,1,4.0,"Time Machine, The (2002)",Action|Adventure|Sci-Fi
5540,1,4.0,Clash of the Titans (1981),Action|Adventure|Fantasy|Romance
2193,1,4.0,Willow (1988),Action|Adventure|Fantasy
2174,1,4.0,Beetlejuice (1988),Comedy|Fantasy
2173,1,4.0,"Navigator: A Mediaeval Odyssey, The (1988)",Adventure|Fantasy|Sci-Fi
2143,1,4.0,Legend (1985),Adventure|Fantasy|Romance
2140,1,4.0,"Dark Crystal, The (1982)",Adventure|Fantasy


Recommended Movies for user  1


Unnamed: 0,movieId,title,genres,Score
1,3039,Jumanji (1995),Adventure|Children|Fantasy,-196.185822
1193,2,Psycho (1960),Crime|Horror,-196.258942
1238,1219,Unforgiven (1992),Drama|Western,-196.284225
1810,1894,Six Days Seven Nights (1998),Adventure|Comedy|Romance,-196.288116
1848,7438,"Great Ziegfeld, The (1936)",Drama|Musical,-196.293198
2111,1932,Dirty Work (1998),Comedy,-196.350403
2952,46578,Trading Places (1983),Comedy,-196.376724
3839,2195,"Invisible Man, The (1933)",Horror|Sci-Fi,-196.386124
7312,3932,Kill Bill: Vol. 2 (2004),Action|Drama|Thriller,-196.417328
11119,1266,Little Miss Sunshine (2006),Adventure|Comedy|Drama,-196.434158
