# **Modelo de Recomendación**

In [1]:
# Importando librerías necesarias
import pandas as pd
import matplotlib as plt

%matplotlib inline

# Utlizaremos el modelo SVD
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

In [2]:
# Generando los df
df_t = pd.read_csv('datasets/full_titles.csv')



In [3]:
df_t.head(2)

Unnamed: 0,id,type,title,director,cast,country,date_added,release_year,rating,duration_int,duration_type,listed_in,description,platform,score
0,as1,movie,the grand seduction,don mckellar,"brendan gleeson, taylor kitsch, gordon pinsent",canada,2021-03-30,2014,g,113,min,"comedy, drama",a small fishing village must procure a local d...,amazon,3.47
1,as2,movie,take care good night,girish joshi,"mahesh manjrekar, abhay mahajan, sachin khedekar",india,2021-03-30,2018,13+,110,min,"drama, international",a metro family decides to fight a cyber crimin...,amazon,3.55


In [4]:
df_s = df_data_scores  = pd.read_csv('datasets/full_scores.csv')

In [5]:
df_s.head(2)

Unnamed: 0,userId,score,timestamp,movieId
0,1,1.0,2015-03-09,as680
1,1,4.5,2015-03-09,ns2186


In [6]:
# Renombrando las columnas 'listed_in' y 'id'
df_t = df_t.rename(columns={'listed_in': 'gender'})

df_t = df_t.rename(columns={'id': 'movieId'})


In [7]:
# Generando nuevamente el df_t con las columnas de interés
df_t = df_t[['movieId', 'title', 'gender', 'platform',]]

# Reestablenciendo los índices
df_t = df_t.reset_index(drop=True)

df_t.head()

Unnamed: 0,movieId,title,gender,platform
0,as1,the grand seduction,"comedy, drama",amazon
1,as2,take care good night,"drama, international",amazon
2,as3,secrets of deception,"action, drama, suspense",amazon
3,as4,pink: staying true,documentary,amazon
4,as5,monster maker,"drama, fantasy",amazon


**Preparando el modelo**

In [8]:
# Limitando el dataset a N_filas
N_filas = 100000 

# Creando un objeto Reader que interpreta los datos de calificación en una escala de 1 a 5.
reader = Reader(rating_scale=(1, 5))

# Conviertiendo el DataFrame en un formato que puede ser procesado por el modelo.
data = Dataset.load_from_df(df_s[['userId', 'movieId', 'score']][:N_filas], reader)



In [9]:
# Separando los datos en train y test
train, test = train_test_split(data, test_size=0.25)

In [10]:
# Entrenando el modelo SVD

model = SVD()

model.fit(train)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x28c166c3520>

In [11]:
# Prediciendo con el modelo

predictions = model.test(test)


In [12]:
# Probando el modelo de predicción
predictions[5]

Prediction(uid=463, iid='ds1146', r_ui=3.0, est=2.8452671822303524, details={'was_impossible': False})

In [13]:
# Probando el modelo de predicción con datos al azar

model.predict(8,'hs12')


Prediction(uid=8, iid='hs12', r_ui=None, est=3.3823620795244995, details={'was_impossible': False})

**Predicción para un usuario (seleccionado al azar)**

In [14]:
# Prediciendo en forma completa con un usuario al azar y recomendando
user  = 112655 
rating = 4   # Tomamos películas a las que haya calificado con 4 o 5 estrellas
df_user = df_s[(df_s.userId == user) & (df_s.score >= rating)]
df_user = df_user.reset_index(drop=True)

df_user = pd.merge(df_user, df_t[['movieId', 'title']], left_on='movieId', right_on='movieId', how='left')

df_user

Unnamed: 0,userId,score,timestamp,movieId,title
0,112655,4.0,2009-11-28,as8333,cinderella
1,112655,4.0,2010-04-24,hs2181,declared
2,112655,4.5,2011-01-26,as552,the lucy show - vol. 2
3,112655,4.5,2009-11-28,as8894,something like a business
4,112655,4.5,2009-08-11,as7834,these old broads
...,...,...,...,...,...
98,112655,4.5,2011-01-26,as4305,we were children
99,112655,5.0,2010-09-04,hs1810,toilet-bound hanako-kun
100,112655,4.0,2011-01-26,ns4010,jeff dunham: minding the monsters
101,112655,4.5,2011-02-22,ns7689,p se pm tak


In [15]:
recommend_user = df_t.iloc[:23000].copy()
print(recommend_user.shape)
recommend_user.head()

(22998, 4)


Unnamed: 0,movieId,title,gender,platform
0,as1,the grand seduction,"comedy, drama",amazon
1,as2,take care good night,"drama, international",amazon
2,as3,secrets of deception,"action, drama, suspense",amazon
3,as4,pink: staying true,documentary,amazon
4,as5,monster maker,"drama, fantasy",amazon


In [16]:
# Extrayendo los títulos que el usuario ya ha visto

viewed_by_user = df_s[df_s['userId'] == user]
print(viewed_by_user.shape)
viewed_by_user.head()

(162, 4)


Unnamed: 0,userId,score,timestamp,movieId
9923315,112655,3.0,2009-08-11,ns8181
9923316,112655,4.0,2009-11-28,as8333
9923317,112655,4.0,2010-04-24,hs2181
9923318,112655,4.5,2011-01-26,as552
9923319,112655,4.5,2009-11-28,as8894


In [17]:
# Eliminando los títulos ya vistos por el usuario

recommend_user = recommend_user[~recommend_user['movieId'].isin(viewed_by_user['movieId'])]

recommend_user.shape

(22836, 4)

In [18]:
# Efectuando la recomendación

recommend_user['Estimate_Score'] = recommend_user['movieId'].apply(lambda x: model.predict(user, x).est)


In [29]:
recommend_user = recommend_user.sort_values('Estimate_Score', ascending=False)

recommend_user.head(5)

Unnamed: 0,movieId,title,gender,platform,Estimate_Score
12852,hs1735,jesus camp,documentaries,hulu,4.013338
11296,hs179,the simpsons,"adult animation, comedy, sitcom",hulu,3.996909
14549,ns359,the net,thrillers,netflix,3.965745
10359,ds692,henry hugglemonster,"animation, fantasy, kids",disney,3.960712
19031,ns4841,bad genius,"dramas, international movies, thrillers",netflix,3.953876


**Predicción para un usuario (seleccionado al azar) y un título (seleccionado al azar)**

In [24]:
def movie_recommend(userId, movieId):
    # Prediciendo la calificación otorgada por el usuario al título seleccionado
    prediction = model.predict(userId, str(movieId))

    # Si el resultado de la predicción mayor o igual a 3.5, se recomienda el título
    if prediction.est >= 3.5:
        return "Recomendada", prediction.est
    else:
        return "No recomendada", prediction.est

In [25]:
# Probando la función

# id usuario
user_id = 8
# id película
movie_id = 'as8'

recommend = movie_recommend(user_id, movie_id)

print("La película es:", recommend)


La película es: ('No recomendada', 3.0466503223513737)


**Evaluación y Optimización del Modelo**

In [26]:
# Evaluando el modelo
from surprise import accuracy

accuracy.rmse(predictions)

RMSE: 0.9516


0.9516229138745846

In [27]:
# Optimizando el modelo 
from surprise.model_selection import cross_validate
import numpy as np

rmse_test_means = []
factores = [1,2,4,8,16,32,64,128]

for factor in factores:
    print(factor)
    model = SVD(n_factors=factor)
    cv = cross_validate(model, data, measures=['RMSE'], cv = 3, verbose=True)
    rmse_test_means.append(np.mean(cv['test_rmse']))

1
Evaluating RMSE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9459  0.9523  0.9506  0.9496  0.0027  
Fit time          0.44    0.40    0.40    0.41    0.02    
Test time         0.27    0.17    0.18    0.21    0.04    
2
Evaluating RMSE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9435  0.9484  0.9546  0.9488  0.0046  
Fit time          0.39    0.40    0.40    0.40    0.01    
Test time         0.28    0.28    0.19    0.25    0.04    
4
Evaluating RMSE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9524  0.9499  0.9470  0.9497  0.0022  
Fit time          0.40    0.41    0.42    0.41    0.01    
Test time         0.18    0.28    0.28    0.24    0.05    
8
Evaluating RMSE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9543  0.9

**GRADIO. Interfaz gráfica para el modelo**