<a href="https://colab.research.google.com/github/edwinmgallego/Best-README-Template/blob/main/avd_121_Mineria_de_datos_clase03.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# 📌 1. Importar librerías necesarias
!pip install surprise  # Instalamos la librería si no está instalada

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from surprise import Dataset, Reader, SVD
from surprise.model_selection import cross_validate

# 📌 2. Cargar el dataset de Netflix
df = pd.read_csv("netflix_titles.csv")
df = df.dropna(subset=["cast", "rating", "director"])  # Eliminamos valores nulos
df["date_added"] = pd.to_datetime(df["date_added"].str.strip(), format='mixed')
  # Convertimos fechas
df["popularity"] = df.groupby("title")["show_id"].transform("count")
df["popularity"] = (df["popularity"] - df["popularity"].min()) / (df["popularity"].max() - df["popularity"].min())

# 📌 3. Preparar los datos para Surprise
reader = Reader(rating_scale=(0, 1))
data = Dataset.load_from_df(df[["show_id", "title", "popularity"]], reader)
trainset = data.build_full_trainset()

# 📌 4. Crear y entrenar el modelo SVD
model = SVD()
model.fit(trainset)

# 📌 5. Evaluación del modelo
cross_validate(model, data, measures=["RMSE", "MAE"], cv=5, verbose=True)

# 📌 6. Función para recomendar películas
def recomendar_peliculas(usuario_id, df, modelo, n_recomendaciones=5):
    peliculas_no_vistas = df[["show_id", "title"]].drop_duplicates()
    peliculas_no_vistas["predicted_rating"] = peliculas_no_vistas["show_id"].apply(lambda x: modelo.predict(usuario_id, x).est)
    recomendaciones = peliculas_no_vistas.sort_values("predicted_rating", ascending=False).head(n_recomendaciones)
    return recomendaciones

# 📌 7. Generar recomendaciones para un usuario ficticio (ID = 1001)
recomendaciones = recomendar_peliculas(1001, df, model)
print("🎬 Recomendaciones para el usuario 1001:")
print(recomendaciones)


Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    nan     nan     nan     nan     nan     nan     nan     
MAE (testset)     nan     nan     nan     nan     nan     nan     nan     
Fit time          0.09    0.09    0.09    0.09    0.09    0.09    0.00    
Test time         0.01    0.00    0.00    0.01    0.01    0.01    0.00    
🎬 Recomendaciones para el usuario 1001:
     show_id                   title  predicted_rating
2         s3               Ganglands                 1
6148   s6149     American Warfighter                 1
6173   s6174  Antariksha Ke Rakhwale                 1
6172   s6173    Antar: Son of Shadad                 1
6171   s6172    Ant-Man and the Wasp                 1
