In [None]:
# Instalación de librerías
!pip install lightfm
!pip install --quiet optuna

In [None]:
import pandas as pd
import numpy as np

from sklearn import neighbors
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from lightfm import LightFM
from lightfm.data import Dataset
from lightfm.evaluation import auc_score

import optuna

import sqlite3 as sql
import joblib

from ipywidgets import interact

import time

In [None]:
from google.colab import drive
import sys
import os

drive.mount('/content/drive')

# Personalized path to your project directory
path = '/content/drive/My Drive/cod/A3_marketing' # Replace with your actual path

# Add the path to sys.path
sys.path.append(path)
os.chdir(path)

In [None]:
#######################################################################
#### 3 Sistema de recomendación basado en contenido KNN #################
#### Con base en todo lo visto por el usuario #######################
#######################################################################

# Conexión
con = sql.connect('data/db_movies_c1')

##### cargar data frame escalado y con dummies ###
movies_dum2 = joblib.load('salidas/movies_dum2.joblib')  # Este archivo debes haberlo generado con dummies

### carga data frame normal que tiene nombres de películas
movies = pd.read_sql('SELECT * FROM full_ratings', con)

#### seleccionar usuario para recomendaciones ####
usuarios = pd.read_sql('SELECT DISTINCT userId as user_id FROM ratings', con)
user_id = 1  # ejemplo manual

def recomendar(user_id=list(usuarios['user_id'].value_counts().index)):

    ###seleccionar solo los ratings del usuario seleccionado
    ratings = pd.read_sql('SELECT * FROM ratings WHERE userId = :user', con, params={'user': user_id})
    l_movies_r = ratings['movieId'].to_numpy()

    ###agregar columnas necesarias para mostrar luego
    movies_dum2[['movieId', 'title']] = movies[['movieId', 'title']]

    ### filtrar películas calificadas por el usuario
    movies_r = movies_dum2[movies_dum2['movieId'].isin(l_movies_r)]

    ## eliminar columnas no numéricas
    movies_r = movies_r.drop(columns=['movieId', 'title'])
    movies_r["indice"] = 1
    centroide = movies_r.groupby("indice").mean()

    ### filtrar películas no vistas
    movies_nr = movies_dum2[~movies_dum2['movieId'].isin(l_movies_r)]
    movies_nr_simple = movies_nr.drop(columns=['movieId', 'title'])

    ### modelo
    model = neighbors.NearestNeighbors(n_neighbors=11, metric='cosine')
    model.fit(movies_nr_simple)
    dist, idlist = model.kneighbors(centroide)

    ids = idlist[0]
    recomend_m = movies_nr.iloc[ids][['title', 'movieId']]
    leidos = movies[movies['movieId'].isin(l_movies_r)][['title', 'movieId']]

    return recomend_m

# ejemplo
recomendar(1)
interact(recomendar)

In [None]:
#######################################################################
#### 4 Sistema de recomendación con LightFM ##########################
#######################################################################

# Cargar data
ratings = pd.read_sql('SELECT * FROM ratings', con)

# Crear dataset
dataset_train = Dataset()
dataset_test = Dataset()

all_unique_users = ratings['userId'].unique()
all_unique_items = ratings['movieId'].unique()

dataset_train.fit(users=all_unique_users, items=all_unique_items)
dataset_test.fit(users=all_unique_users, items=all_unique_items)

# Separar en train y test
train_df, test_df = train_test_split(ratings, test_size=0.2, random_state=42)

# Crear interacciones
train_interactions_list = [(row['userId'], row['movieId'], row['rating']) for index, row in train_df.iterrows()]
train_interactions, train_weights = dataset_train.build_interactions(train_interactions_list)

test_interactions_list = [(row['userId'], row['movieId'], row['rating']) for index, row in test_df.iterrows()]
test_interactions, test_weights = dataset_test.build_interactions(test_interactions_list)

# Entrenamiento manual inicial
model = LightFM(loss='logistic', random_state=42)
model.fit(train_interactions, epochs=20, verbose=True, sample_weight=train_weights)

# AUC
train_auc = auc_score(model, train_interactions).mean()
test_auc = auc_score(model, test_interactions).mean()
print(f'AUC: train {train_auc:.2f}, test {test_auc:.2f}')

In [None]:
#######################################################################
#### 5 Ajuste de hiperparámetros con Optuna ###########################
#######################################################################

def objective(trial):
    n_components = trial.suggest_int('no_components', 10, 100)
    learning_rate = trial.suggest_float('learning_rate', 0.001, 0.1, log=True)
    loss = trial.suggest_categorical('loss', ['logistic', 'bpr', 'warp'])
    epochs = trial.suggest_int('epochs', 10, 50)

    model = LightFM(
        no_components=n_components,
        learning_rate=learning_rate,
        loss=loss,
        random_state=42
    )

    model.fit(train_interactions, epochs=epochs, verbose=False, sample_weight=train_weights)
    test_auc = auc_score(model, test_interactions).mean()
    return test_auc

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10)

print(f"Best trial AUC: {study.best_value:.4f}")
print(f"Best Params: {study.best_params}")

In [None]:
#######################################################################
#### 6 Generar recomendaciones con modelo LightFM #####################
#######################################################################

def recommendation(model, data, original_user_id, conn, k):
    df_all = pd.read_sql(f"SELECT * FROM ratings", con)
    movie_ids_all = df_all['movieId'].unique()

    # Usuario en ID interno
    uid_index = data.mapping()[0][original_user_id]
    item_ids = [value for key, value in data.mapping()[2].items()]
    scores = model.predict(uid_index, item_ids)

    sorted_indices = np.argsort(-scores)
    top_items = [key for key, value in data.mapping()[2].items() if value in sorted_indices[:k]]

    full_movies = pd.read_sql("SELECT DISTINCT movieId, title FROM full_ratings", con)
    recommended = full_movies[full_movies['movieId'].isin(top_items)]

    return recommended

recommendation(model, dataset_train, 1, con, 10)