# Movie Recommender

In [1]:
import pandas as pd
import numpy as np

from scipy.sparse import coo_matrix
from lightfm import LightFM
from lightfm.data import Dataset

from config.settings import DB_PATH
import data_processing.load_sqlite as load_sql



## Load and Prepare the Data using LightFM

In [2]:
df = load_sql.fetch_user_movie_ratings(tmdb_only=True)
df.head()

Unnamed: 0,movie_id,user_id,rating,timestamp
0,4584,1,4.0,944249077
1,451,1,1.0,944250228
2,902,1,2.0,943230976
3,37557,1,5.0,944249077
4,63,1,5.0,943228858


In [3]:
# Convert ratings to implicit feedback (1 = liked, 0 = not liked)
df['rating'] = (df['rating'] >= 4).astype(int)

# Create dataset object
dataset = Dataset()
dataset.fit(users=df['user_id'].unique(), items=df['movie_id'].unique())

# Build interactions matrix
(interactions, weights) = dataset.build_interactions(zip(df['user_id'], df['movie_id'], df['rating']))

## Add Movie Metadata

In [4]:
movies = load_sql.fetch_one_hot_genres()
movies.head()

Unnamed: 0,movie_id,genre_Adventure,genre_Fantasy,genre_Animation,genre_Drama,genre_Horror,genre_Action,genre_Comedy,genre_History,genre_Western,genre_Thriller,genre_Crime,genre_Documentary,genre_Science Fiction,genre_Mystery,genre_Music,genre_Romance,genre_Family,genre_War,genre_TV Movie
0,2,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0
1,3,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0
2,5,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0
3,6,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0
4,8,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0


In [5]:
# Convert one-hot genre encoding into a list of features per movie
movies['features'] = movies.drop(columns=['movie_id']).apply(lambda x: list(x[x == 1].index), axis=1)

# Fit dataset with movie features
dataset.fit_partial(items=movies['movie_id'].unique(), item_features=movies['features'].explode().unique())

# Build item features matrix
item_features = dataset.build_item_features(zip(movies['movie_id'], movies['features']))

## Train the LightFM Model

In [6]:
model = LightFM(loss='warp')  # 'warp' works well for implicit feedback
model.fit(interactions, item_features=item_features, epochs=5, num_threads=4)

<lightfm.lightfm.LightFM at 0x1453be970>

## Recommend!

In [7]:
def recommend_movies(model, user_id, dataset, item_features, n=10):
    # Get item mapping (movie IDs to their index in the matrix)
    movie_ids = list(dataset.mapping()[2].keys())
    
    # Ensure that movie_ids corresponds to the correct rows in item_features
    movie_index_map = dataset.mapping()[2]  # This is the dictionary mapping movie IDs to the matrix index
    
    # Predict scores for each movie
    scores = model.predict(user_id, np.array([movie_index_map[movie_id] for movie_id in movie_ids]), item_features=item_features)
    
    # Sort movies by score
    top_movies = [movie_ids[i] for i in np.argsort(-scores)[:n]]

    return top_movies

In [8]:
user_id = 1
recommend_movies(model, user_id, dataset, item_features)

[13, 14, 568, 4584, 278, 424, 687, 489, 680, 289]

### Examples

In [9]:
user_id = 1
for mv in recommend_movies(model, user_id, dataset, item_features):
    print(load_sql.fetch_movie_title(mv))

Forrest Gump (1994)
American Beauty (1999)
Apollo 13 (1995)
Sense and Sensibility (1995)
The Shawshank Redemption (1994)
Schindler's List (1993)
Dead Man Walking (1995)
Good Will Hunting (1997)
Pulp Fiction (1994)
Casablanca (1943)


In [10]:
user_id = 2
for mv in recommend_movies(model, user_id, dataset, item_features):
    print(load_sql.fetch_movie_title(mv))

The Matrix (1999)
Star Wars (1977)
Jurassic Park (1993)
Forrest Gump (1994)
Pulp Fiction (1994)
The Empire Strikes Back (1980)
Return of the Jedi (1983)
Raiders of the Lost Ark (1981)
Independence Day (1996)
Back to the Future (1985)
