In [9]:
#############################################
### 1. LIBRERÍAS NECESARIAS
#############################################
from google.colab import drive
import numpy as np
import pandas as pd
import joblib
from sklearn.metrics.pairwise import cosine_similarity
from sklearn import neighbors
from ipywidgets import interact
import os

In [14]:
############################################
### 2. CARGAR DATOS PREPROCESADOS
#############################################

# Montar Google Drive
drive.mount('/content/drive')

# Asegurarse que la carpeta 'salidas' existe en Google Drive
output_path = '/content/drive/My Drive/cod/A3_marketing/salidas'  # Ajusta esta ruta a la ubicación de tu carpeta
os.makedirs(output_path, exist_ok=True)

# Cargar datos preprocesados desde Google Drive
df_scaled = joblib.load(f"{output_path}/df_scaled.joblib")

# Cargar ratings si no los tienes cargados
ratings_path = '/content/drive/My Drive/cod/A3_marketing/data/ratings.csv'  # Ajusta esta ruta a la ubicación de tu archivo ratings.csv
ratings = pd.read_csv(ratings_path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [15]:
#############################################
### 3. MODELO DE RECOMENDACIÓN - CONTENIDO (CORRELACIÓN)
#############################################

columnas_modelo = df_scaled.columns.difference(['movieId', 'title'])

def recomendacion_pelicula(titulo):
    idx = df_scaled[df_scaled['title'] == titulo].index[0]
    vector_ref = df_scaled.loc[idx, columnas_modelo].values.reshape(1, -1)

    # Similitud por coseno
    similitudes = cosine_similarity(vector_ref, df_scaled[columnas_modelo])[0]
    df_resultado = df_scaled.copy()
    df_resultado['similitud'] = similitudes
    df_resultado = df_resultado.sort_values(by='similitud', ascending=False)
    return df_resultado[['title', 'similitud']].iloc[1:11]

interact(recomendacion_pelicula, titulo=df_scaled['title'].tolist())

interactive(children=(Dropdown(description='titulo', options=('Toy Story', 'Jumanji', 'Grumpier Old Men', 'Fat…

In [16]:
#############################################
### 4. RECOMENDACIÓN KNN BASADO EN CONTENIDO
#############################################

# Entrenar modelo KNN
model_knn = neighbors.NearestNeighbors(n_neighbors=11, metric='euclidean')
model_knn.fit(df_scaled[columnas_modelo])
_, idlist = model_knn.kneighbors(df_scaled[columnas_modelo])

def MovieRecommender(movie_name):
    movie_id = df_scaled[df_scaled['title'] == movie_name].index[0]
    return [df_scaled.iloc[i].title for i in idlist[movie_id][1:]]

interact(MovieRecommender, movie_name=df_scaled['title'].tolist())

interactive(children=(Dropdown(description='movie_name', options=('Toy Story', 'Jumanji', 'Grumpier Old Men', …

In [17]:
#############################################
### 5. RECOMENDACIÓN BASADA EN ITEM (FILTRADO COLABORATIVO)
#############################################

# Crear matriz de usuario x película
movie_user_matrix = ratings.pivot_table(index='movieId', columns='userId', values='rating').fillna(0)

# Calcular similitud
item_similarity = cosine_similarity(movie_user_matrix)
item_similarity_df = pd.DataFrame(item_similarity, index=movie_user_matrix.index, columns=movie_user_matrix.index)

# Diccionario de títulos a movieId
pelicula_dict = dict(zip(df_scaled['title'], df_scaled['movieId']))

def recomendar_por_item(movie_id, n=10):
    if movie_id not in item_similarity_df.index:
        return f"movieId {movie_id} no encontrado."
    similares = item_similarity_df[movie_id].sort_values(ascending=False)[1:n+1]
    recomendaciones = df_scaled[df_scaled['movieId'].isin(similares.index)][['movieId', 'title']].drop_duplicates()
    recomendaciones['similitud'] = similares.values
    return recomendaciones

@interact
def ver_recomendaciones_contenido(titulo=list(pelicula_dict.keys())):
    movie_id = pelicula_dict[titulo]
    display(recomendar_por_item(movie_id))

interactive(children=(Dropdown(description='titulo', options=('Toy Story', 'Jumanji', 'Grumpier Old Men', 'Fat…

In [18]:
#############################################
### 6. SISTEMA DE POPULARIDAD
#############################################

def recomendar_populares(df, top_n=10):
    populares = df.sort_values(by=["avg_rating", "num_ratings"], ascending=False)
    return populares[["title", "avg_rating", "num_ratings"]].head(top_n)

recomendar_populares(df_scaled)

Unnamed: 0,title,avg_rating,num_ratings
317,Patton,2.755573,-0.262473
270,Once Upon a Time in the West (C'era una volta ...,2.633338,-0.621707
1032,Intouchables,2.486657,-0.197158
300,"Bridge on the River Kwai, The",2.322196,0.031445
87,"Shawshank Redemption, The",2.293937,7.901919
71,Like Water for Chocolate (Como agua para choco...,2.256158,-0.360446
633,"Hustler, The",2.256158,-0.589049
813,Nausicaä of the Valley of the Wind (Kaze no ta...,2.256158,-0.589049
1034,"Cabin in the Woods, The",2.256158,-0.589049
185,"Godfather, The",2.228572,4.113641
