In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Import necessary libraries
import sys
sys.path.append('../lib')  # Import the lib directory to access the functions

import ml  # Import the custom functions for machine learning
import pandas as pd
import warnings
from sklearn.model_selection import train_test_split

In [3]:
# Display options and warnings
pd.set_option('display.max_columns', None)
warnings.filterwarnings('ignore')

In [4]:
# 1. Load the datasets
df_interactions = pd.read_csv('../data/interactions.csv')
df_ratings = pd.read_csv('../data/ratings.csv')
df_favorites = pd.read_csv('../data/favorites.csv')
df_movies = pd.read_csv('../data/imdb_clean.csv', index_col=0)  # 'movie_id' is the index

In [5]:
# 2. Reset and preprocess df_movies to have movie_id and avoid overwriting poster
df_movies.reset_index(inplace=True)
df_movies.rename(columns={'index': 'poster'}, inplace=True)
df_movies['movie_id'] = df_movies.index
df_movies.rename(columns={'rating': 'movie_rating'}, inplace=True)
df_movies = df_movies[['movie_id', 'poster', 'title', 'year', 'duration', 'genre', 'movie_rating', 'director', 'cast', 'description', 'emotions']]
df_movies.set_index('movie_id', inplace=True)

In [6]:
# 3. Prepare the data for collaborative filtering
df_user_movie = ml.preprocess_data(df_interactions, df_ratings, df_favorites, df_movies)

In [7]:
# 4. Train the SVD collaborative filtering model
svd_model, svd_results = ml.train_svd_model(df_user_movie)
print("SVD Model Results:")
print(svd_results)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    2.6536  2.4846  2.3316  2.3601  2.6227  2.4905  0.1314  
MAE (testset)     2.1458  2.0579  1.8662  1.9538  2.1558  2.0359  0.1118  
Fit time          0.05    0.04    0.03    0.03    0.04    0.04    0.01    
Test time         0.00    0.00    0.00    0.00    0.00    0.00    0.00    
SVD Model Results:
{'test_rmse': array([2.65364129, 2.48456596, 2.33159783, 2.3601265 , 2.62268275]), 'test_mae': array([2.14575383, 2.05785036, 1.86621032, 1.95379829, 2.15580671]), 'fit_time': (0.04588913917541504, 0.04399895668029785, 0.03099846839904785, 0.028412818908691406, 0.04300880432128906), 'test_time': (0.004002094268798828, 0.0029973983764648438, 0.0020008087158203125, 0.0030028820037841797, 0.00499415397644043)}


In [8]:
# 4.1. Filter extreme ratings
df_filtered = df_user_movie[(df_user_movie['rating'] > 2) & (df_user_movie['rating'] < 9)]

# 4.1. Train the SVD collaborative filtering model with filtered data
svd_model, svd_results = ml.train_svd_model(df_filtered)
print("SVD Model Results:")
print(svd_results)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.5826  1.6507  1.6778  1.5152  1.6235  1.6100  0.0569  
MAE (testset)     1.3416  1.4099  1.4208  1.2988  1.3811  1.3705  0.0451  
Fit time          0.02    0.01    0.04    0.06    0.06    0.04    0.02    
Test time         0.00    0.00    0.00    0.00    0.00    0.00    0.00    
SVD Model Results:
{'test_rmse': array([1.58260688, 1.65072175, 1.67784841, 1.51516518, 1.62346385]), 'test_mae': array([1.3416173 , 1.40990175, 1.42081027, 1.29881262, 1.38114002]), 'fit_time': (0.017239809036254883, 0.011998414993286133, 0.04200172424316406, 0.06399846076965332, 0.06290888786315918), 'test_time': (0.001996278762817383, 0.002002716064453125, 0.002007722854614258, 0.001998424530029297, 0.001997709274291992)}


In [9]:
# 5. Evaluate the RandomForest model (regression example)
X = df_user_movie[['duration', 'rating']].fillna(0)  # Features used in training
y = df_user_movie['is_favorite']  # Target (whether it is marked as favorite)

In [11]:
# 6. Train RandomForest to classify whether a movie will be marked as favorite
rf_model, accuracy, precision, recall, f1, X_test, y_test = ml.train_random_forest(df_user_movie)  # Now returns X_test, y_test
print(f"RandomForest Results - Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1: {f1}")

RandomForest Results - Accuracy: 0.9750025199072674, Precision: 1.0, Recall: 0.5059760956175299, F1: 0.671957671957672


In [12]:
# 7. Re-evaluate classification metrics on the test set
y_pred = rf_model.predict(X_test)
accuracy, precision, recall, f1 = ml.evaluate_classification_model(y_test, y_pred)
print(f"Updated Classification Metrics - Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1: {f1}")

Updated Classification Metrics - Accuracy: 0.9750025199072674, Precision: 1.0, Recall: 0.5059760956175299, F1: 0.671957671957672


In [13]:
# 8. Generate confusion matrix for classification
y_pred = rf_model.predict(X_test)
cm = ml.evaluate_classification(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

Confusion Matrix:
[[9419    0]
 [ 248  254]]


In [14]:
import pickle

# SVD model trained
with open('svd_model.pkl', 'wb') as file:
    pickle.dump(svd_model, file)

# RandomForest model trained
with open('rf_model.pkl', 'wb') as file:
    pickle.dump(rf_model, file)

In [7]:
df_user_movie

Unnamed: 0,user_id,movie_id,emotion,interaction_type,date,poster,title,year,duration,genre,movie_rating,director,cast,description,emotions,rating,is_favorite
0,917,6056,Relaxed,shown,2024-01-19 11:22:29.324201,https://m.media-amazon.com/images/M/MV5BOWNkZj...,Tralala,1945,0.469680,"['Drama', 'Musical']",5.9,Arnaud Larrieu,"['Jean-Marie Larrieu', 'Mathieu Amalric', 'Jos...",A 48-year-old singer/songwriter in search of t...,"['Happy', 'Down', 'Sweet', 'Relaxed']",,False
1,743,6333,Happy,shown,2024-01-11 11:22:29.343197,https://m.media-amazon.com/images/M/MV5BMTM1OD...,Bin-jip,1994,-0.970235,"['Crime', 'Drama', 'Romance']",7.9,Kim Ki-duk,"['Lee Seung-yun', 'Jae Hee', 'Hyuk-ho Kwon', '...",A transient young man breaks into empty homes ...,"['Scared', 'Happy', 'Down', 'Excited', 'Sweet']",,False
2,1,6542,Excited,view,2024-05-27 11:22:29.375745,https://m.media-amazon.com/images/M/MV5BYTQzZj...,Rings,2017,-0.340272,"['Drama', 'Horror', 'Mystery']",4.5,F. Javier Gutiérrez,"['Matilda Anna Ingrid Lutz', 'Alex Roe', 'John...",A young woman finds herself on the receiving e...,"['Scared', 'Down', 'Sweet', 'Excited']",,False
3,392,3429,Relaxed,shown,2023-12-23 11:22:29.385743,https://m.media-amazon.com/images/M/MV5BOWY1NG...,Dennis the Menace,2001,-0.700251,"['Comedy', 'Family']",5.7,Nick Castle,"['Walter Matthau', 'Mason Gamble', 'Joan Plowr...",When his parents must go out of town on busine...,"['Happy', 'Relaxed']",,False
4,105,5767,Inspired,shown,2023-12-07 11:22:29.397109,https://m.media-amazon.com/images/M/MV5BMDNhZG...,Evita,2002,1.144640,"['Biography', 'Drama', 'History']",6.3,Alan Parker,"['Madonna', 'Jonathan Pryce', 'Antonio Bandera...",The hit musical based on the life ofEva Perón(...,"['Inspired', 'Down', 'Sweet']",,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49599,928,745,Excited,shown,2024-05-10 11:29:36.534233,https://m.media-amazon.com/images/M/MV5BM2E3N2...,Savageland,2005,-1.240219,"['Crime', 'Horror', 'Thriller']",6.0,Phil Guidry,"['Simon Herbert', 'David Whelan', 'Heather Moo...",When a small town near the Arizona-Mexico bord...,"['Scared', 'Excited']",,False
49600,258,6229,Happy,shown,2024-09-29 11:29:36.541243,https://m.media-amazon.com/images/M/MV5BMTMyNT...,Fever Pitch,2002,-0.250278,"['Comedy', 'Drama', 'Romance']",6.2,Bobby Farrelly,"['Peter Farrelly', 'Drew Barrymore', 'Jimmy Fa...",Lindsay is stuck in the middle of her relation...,"['Happy', 'Down', 'Sweet', 'Relaxed']",,False
49601,363,7719,Excited,shown,2024-04-24 11:29:36.551180,https://m.media-amazon.com/images/M/MV5BMTc3ND...,Chaos,2023,-0.160283,"['Action', 'Crime', 'Drama']",6.4,Tony Giglio,"['Jason Statham', 'Ryan Phillippe', 'Wesley Sn...","Two cops, a rookie and a grizzled vet, pursue ...","['Scared', 'Down', 'Sweet', 'Excited']",,False
49602,506,3282,Relaxed,shown,2024-06-07 11:29:36.557177,https://m.media-amazon.com/images/M/MV5BM2Q3NW...,The Chronicles of Narnia: Prince Caspian,2022,1.819600,"['Action', 'Adventure', 'Family']",6.5,Andrew Adamson,"['Ben Barnes', 'Skandar Keynes', 'Georgie Henl...","The Pevensie siblings return to Narnia, where ...","['Happy', 'Excited', 'Relaxed']",,False
