# **Modelo de Recomendación**

In [7]:
# Importando librerías necesarias
import pandas as pd
import matplotlib as plt

%matplotlib inline

# Utlizaremos el modelo SVD
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

In [8]:
# Generando los df
df_t = pd.read_csv('datasets/full_titles.csv')



In [9]:
df_t.head(2)

Unnamed: 0,id,type,title,director,cast,country,date_added,release_year,rating,duration_int,duration_type,listed_in,description,platform,score
0,as1,movie,the grand seduction,don mckellar,"brendan gleeson, taylor kitsch, gordon pinsent",canada,2021-03-30,2014,g,113,min,"comedy, drama",a small fishing village must procure a local d...,amazon,3.47
1,as2,movie,take care good night,girish joshi,"mahesh manjrekar, abhay mahajan, sachin khedekar",india,2021-03-30,2018,13+,110,min,"drama, international",a metro family decides to fight a cyber crimin...,amazon,3.55


In [10]:
df_s = df_data_scores  = pd.read_csv('datasets/full_scores.csv')

In [11]:
df_s.head(2)

Unnamed: 0,userId,score,timestamp,movieId
0,1,1.0,2015-03-09,as680
1,1,4.5,2015-03-09,ns2186


In [12]:
# Renombrando las columnas 'listed_in' y 'id'
df_t = df_t.rename(columns={'listed_in': 'gender'})

df_t = df_t.rename(columns={'id': 'movieId'})


In [13]:
# Generando nuevamente el df_t con las columnas de interés
df_t = df_t[['movieId', 'title', 'gender', 'platform',]]

# Reestablenciendo los índices
df_t = df_t.reset_index(drop=True)

df_t.head()

Unnamed: 0,movieId,title,gender,platform
0,as1,the grand seduction,"comedy, drama",amazon
1,as2,take care good night,"drama, international",amazon
2,as3,secrets of deception,"action, drama, suspense",amazon
3,as4,pink: staying true,documentary,amazon
4,as5,monster maker,"drama, fantasy",amazon


**Preparando el modelo**

In [14]:
# Limitando el dataset a N_filas
N_filas = 100000 

# Creando un objeto Reader que interpreta los datos de calificación en una escala de 1 a 5.
reader = Reader(rating_scale=(1, 5))

# Conviertiendo el DataFrame en un formato que puede ser procesado por el modelo.
data = Dataset.load_from_df(df_s[['userId', 'movieId', 'score']][:N_filas], reader)



In [15]:
# Separando los datos en train y test
train, test = train_test_split(data, test_size=0.25)

In [16]:
# Entrenando el modelo SVD

model = SVD()

model.fit(train)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1cf22fd7d00>

In [17]:
# Prediciendo con el modelo

predictions = model.test(test)


In [18]:
# Probando el modelo de predicción
predictions[5]

Prediction(uid=201, iid='ns7143', r_ui=2.0, est=3.6448485973964995, details={'was_impossible': False})

In [19]:
# Probando el modelo de predicción con datos al azar

model.predict(8,'hs12')


Prediction(uid=8, iid='hs12', r_ui=None, est=3.169038841347522, details={'was_impossible': False})

**Predicción para un usuario (seleccionado al azar)**

In [20]:
# Prediciendo en forma completa con un usuario al azar y recomendando
user  = 112655 
rating = 4   # Tomamos películas a las que haya calificado con 4 o 5 estrellas
df_user = df_s[(df_s.userId == user) & (df_s.score >= rating)]
df_user = df_user.reset_index(drop=True)

df_user = pd.merge(df_user, df_t[['movieId', 'title']], left_on='movieId', right_on='movieId', how='left')

df_user

Unnamed: 0,userId,score,timestamp,movieId,title
0,112655,4.0,2009-11-28,as8333,cinderella
1,112655,4.0,2010-04-24,hs2181,declared
2,112655,4.5,2011-01-26,as552,the lucy show - vol. 2
3,112655,4.5,2009-11-28,as8894,something like a business
4,112655,4.5,2009-08-11,as7834,these old broads
...,...,...,...,...,...
98,112655,4.5,2011-01-26,as4305,we were children
99,112655,5.0,2010-09-04,hs1810,toilet-bound hanako-kun
100,112655,4.0,2011-01-26,ns4010,jeff dunham: minding the monsters
101,112655,4.5,2011-02-22,ns7689,p se pm tak


In [21]:
recommend_user = df_t.iloc[:23000].copy()
print(recommend_user.shape)
recommend_user.head()

(22998, 4)


Unnamed: 0,movieId,title,gender,platform
0,as1,the grand seduction,"comedy, drama",amazon
1,as2,take care good night,"drama, international",amazon
2,as3,secrets of deception,"action, drama, suspense",amazon
3,as4,pink: staying true,documentary,amazon
4,as5,monster maker,"drama, fantasy",amazon


In [22]:
# Extrayendo los títulos que el usuario ya ha visto

viewed_by_user = df_s[df_s['userId'] == user]
print(viewed_by_user.shape)
viewed_by_user.head()

(162, 4)


Unnamed: 0,userId,score,timestamp,movieId
9913839,112655,3.0,2009-08-11,ns8181
9913840,112655,4.0,2009-11-28,as8333
9913841,112655,4.0,2010-04-24,hs2181
9913842,112655,4.5,2011-01-26,as552
9913843,112655,4.5,2009-11-28,as8894


In [23]:
# Eliminando los títulos ya vistos por el usuario

recommend_user = recommend_user[~recommend_user['movieId'].isin(viewed_by_user['movieId'])]

recommend_user.shape

(22836, 4)

In [24]:
# Efectuando la recomendación

recommend_user['Estimate_Score'] = recommend_user['movieId'].apply(lambda x: model.predict(user, x).est)


In [25]:
recommend_user = recommend_user.sort_values('Estimate_Score', ascending=False)

recommend_user.head(5)

Unnamed: 0,movieId,title,gender,platform,Estimate_Score
8231,as8232,the wheel of time,"adventure, drama, fantasy",amazon,3.991992
9391,as9392,vallanzasca angel of evil,"action, drama",amazon,3.980652
2005,as2006,jonas kaufmann - a global star in private,documentary,amazon,3.978767
12852,hs1735,jesus camp,documentaries,hulu,3.97682
16984,ns2794,the english game,"british tv shows, international tv shows, tv d...",netflix,3.96408


**Predicción para un usuario (seleccionado al azar) y un título (seleccionado al azar)**

In [34]:
def movie_recommend(userId, movieId):
    # Prediciendo la calificación otorgada por el usuario al título seleccionado
    prediction = model.predict(userId, str(movieId))

    # Si el resultado de la predicción mayor o igual a 3.5, se recomienda el título
    if prediction.est >= 3.5:
        return "Recomendada", prediction.est
    else:
        return "No Recomendada", prediction.est

In [35]:
# Probando la función

# id usuario
user_id = 8
# id película
movie_id = 'as8'

recommend = movie_recommend(user_id, movie_id)

print("La película es:", recommend)


La película es: ('No Recomendada', 3.083196501699244)


**Evaluación y Optimización del Modelo**

In [36]:
# Evaluando el modelo
from surprise import accuracy

accuracy.rmse(predictions)

RMSE: 0.9663


0.966276276961709

In [37]:
# Optimizando el modelo 
from surprise.model_selection import cross_validate
import numpy as np

rmse_test_means = []
factores = [1,2,4,8,16,32,64,128]

for factor in factores:
    print(factor)
    model = SVD(n_factors=factor)
    cv = cross_validate(model, data, measures=['RMSE'], cv = 3, verbose=True)
    rmse_test_means.append(np.mean(cv['test_rmse']))

1
Evaluating RMSE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9505  0.9497  0.9471  0.9491  0.0014  
Fit time          0.90    0.68    0.52    0.70    0.16    
Test time         0.46    0.31    0.40    0.39    0.06    
2
Evaluating RMSE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9511  0.9494  0.9493  0.9499  0.0008  
Fit time          0.53    0.66    0.76    0.65    0.10    
Test time         0.53    0.30    0.33    0.39    0.10    
4
Evaluating RMSE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9499  0.9513  0.9481  0.9498  0.0013  
Fit time          0.78    0.60    0.68    0.69    0.08    
Test time         0.54    0.27    0.33    0.38    0.12    
8
Evaluating RMSE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9588  0.9

**GRADIO. Interfaz gráfica para el modelo**

In [38]:
# Creando una función para extraer el titulo de la película o serie
def get_title(movieId):
    return df_t[df_t.movieId == movieId].title.iloc[0].title()

In [39]:
# Probando la función creada
get_title('ds108')


'Miraculous: Tales Of Ladybug & Cat Noir'

In [40]:
# Fusionando la función  "get_movie_recommendation"  declarada anteriomente con la actual 'get_title' 

def title_recommended(userId, movieId):
    recommended, score = movie_recommend(userId, movieId)
    title = get_title(movieId)
    return (recommended, score, title)

In [41]:
# Probando la nueva función
title_recommended(8, 'ds108')

('No Recomendada',
 2.8522571450131338,
 'Miraculous: Tales Of Ladybug & Cat Noir')

In [43]:
# Importando libreria Gradio para realizar la interface gráfica
import gradio as gr

In [44]:
# Creando la interface

title = str("Sistema de Recomendación de Series y Películas")

with gr.Blocks(title=title) as demo:
    user_id = gr.inputs.Number(label='Id de Usuario')
    movie = gr.Textbox(label='Id de Serie o Película')
    recommendation  = gr.Button('Ver Recomendación')
    title = gr.Textbox(label='Título')
    output = gr.Textbox(label= '¿Recomendada?')
    score = gr.Textbox(label='Ratio de Recomendación')
    recommendation.click(fn = title_recommended, inputs=[user_id,movie], outputs=[output, score,title])
    
demo.launch(share = True)





Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://4ea5ee6e9e9d42f2ed.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces




Traceback (most recent call last):
  File "c:\Users\consul_05\AppData\Local\Programs\Python\Python310\lib\site-packages\gradio\routes.py", line 394, in run_predict
    output = await app.get_blocks().process_api(
  File "c:\Users\consul_05\AppData\Local\Programs\Python\Python310\lib\site-packages\gradio\blocks.py", line 1075, in process_api
    result = await self.call_function(
  File "c:\Users\consul_05\AppData\Local\Programs\Python\Python310\lib\site-packages\gradio\blocks.py", line 884, in call_function
    prediction = await anyio.to_thread.run_sync(
  File "c:\Users\consul_05\AppData\Local\Programs\Python\Python310\lib\site-packages\anyio\to_thread.py", line 31, in run_sync
    return await get_asynclib().run_sync_in_worker_thread(
  File "c:\Users\consul_05\AppData\Local\Programs\Python\Python310\lib\site-packages\anyio\_backends\_asyncio.py", line 937, in run_sync_in_worker_thread
    return await future
  File "c:\Users\consul_05\AppData\Local\Programs\Python\Python310\lib\sit