In [8]:
import random
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim

from utils.recomendaciones import *
from utils.modelos import *

VAR_SEED = 42
VAR_TESTSET_SIZE = 0.20
VAR_DIR_DATA_CLEAN = '../data/cleaning'

random.seed(VAR_SEED)
np.random.seed(VAR_SEED)

df_dataset = pd.read_csv(f"{VAR_DIR_DATA_CLEAN}/mf_dataset.csv", sep=",", encoding="latin1")
df_catalogo = pd.read_csv(f"{VAR_DIR_DATA_CLEAN}/catalogo.csv", sep=",", encoding="latin1")
df_diagnostico = pd.read_csv(f"{VAR_DIR_DATA_CLEAN}/prueba_diagnostico.csv", sep=",", encoding="latin1")[['id_estudiante', 'score_a', 'score_p', 'score_d', 'score_s']]

df_users = pd.merge(df_dataset, df_diagnostico, on='id_estudiante', how='left')
df_users.fillna(0, inplace=True)
df_users = df_users.astype(int)
df_users = df_users.drop(labels=['id_estudiante'], axis=1)
df_items = df_catalogo[['h1', 'h2', 'h3', 'h4', 's1', 's2', 's3', 's4', 'k1', 'k2', 'k3', 'k4']]

In [None]:
# INICIALIZAR 
user_feature_size = len(df_users.columns)       # Contamos todas las características de los usuarios
item_feature_size = len(df_items.columns)       # Contamos todas las características de los ítems

embedding_size = 64                             # Tamaño del embedding

modelo_V1 = TwoTowerModelv1(user_feature_size, item_feature_size, embedding_size)
modelo_V1.train_model(df_users, df_items, epochs=30)



Epoch 1/30, Loss: 0.7039
Epoch 2/30, Loss: 0.6962
Epoch 3/30, Loss: 0.7024
Epoch 4/30, Loss: 0.6975
Epoch 5/30, Loss: 0.6944
Epoch 6/30, Loss: 0.6944
Epoch 7/30, Loss: 0.6972
Epoch 8/30, Loss: 0.6966
Epoch 9/30, Loss: 0.6953
Epoch 10/30, Loss: 0.6936
Epoch 11/30, Loss: 0.6939
Epoch 12/30, Loss: 0.6941
Epoch 13/30, Loss: 0.6949
Epoch 14/30, Loss: 0.6941
Epoch 15/30, Loss: 0.6944
Epoch 16/30, Loss: 0.6939
Epoch 17/30, Loss: 0.6934
Epoch 18/30, Loss: 0.6935
Epoch 19/30, Loss: 0.6938
Epoch 20/30, Loss: 0.6934
Epoch 21/30, Loss: 0.6936
Epoch 22/30, Loss: 0.6935
Epoch 23/30, Loss: 0.6934
Epoch 24/30, Loss: 0.6934
Epoch 25/30, Loss: 0.6934
Epoch 26/30, Loss: 0.6932
Epoch 27/30, Loss: 0.6934
Epoch 28/30, Loss: 0.6932
Epoch 29/30, Loss: 0.6933
Epoch 30/30, Loss: 0.6935


In [4]:
# Predicción: Obtener las probabilidades de recomendación para cada par usuario-ítem
predictions = modelo_V1.predict(df_users, df_items)
print("Predicciones:", predictions)

Predicciones: tensor([0.4955, 0.4860, 0.4834,  ..., 0.5126, 0.4959, 0.4905])


In [5]:
user_id_to_recommend = 0  # Cambia esto al ID del usuario que deseas recomendar

# Obtén los ejercicios recomendados en el formato correcto
ejercicicos_realizados = [
    int(idx[1:]) if idx.startswith('e') else idx
    for idx in df_dataset[df_dataset['id_estudiante'] == user_id_to_recommend]
    .iloc[:, 1:].columns[
        df_dataset[df_dataset['id_estudiante'] == user_id_to_recommend].iloc[:, 1:].values[0] == 1
    ].tolist()
]

df_catalogo[df_catalogo['id_ejercicio'].isin(ejercicicos_realizados)].sort_values(by=["complexity"], ascending=True)

Unnamed: 0,id_ejercicio,nombre,h1,h2,h3,h4,s1,s2,s3,s4,k1,k2,k3,k4,hito,skill,knowledge,complexity,complexity12
4,4,Nota Final,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,257
25,25,Ordenar tres nÃºmeros,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,257
17,17,Suma de los N primeros nÃºmeros naturales,0,0,0,1,0,0,0,0,0,0,1,0,1,0,2,2,258
23,23,Conversor de Decimal a Binario,0,0,0,1,0,0,0,0,0,0,1,0,1,0,2,2,258
3,3,NÃºmeros Primos,0,0,0,1,0,0,0,0,0,0,1,1,1,0,3,3,259
10,10,Descomponer un nÃºmero,0,0,0,1,0,0,0,0,0,0,1,1,1,0,3,3,259
18,18,Juego Adivina mi nÃºmero,0,0,0,1,0,0,0,0,0,0,1,1,1,0,3,3,259
26,26,Contestador AutomÃ¡tico,0,0,0,1,0,0,0,0,0,0,1,1,1,0,3,3,259
29,29,AprobaciÃ³n de CrÃ©ditos,0,0,0,1,0,0,0,0,0,0,1,1,1,0,3,3,259
42,42,NÃºmeros Amigos,0,0,0,1,0,0,0,0,0,0,1,1,1,0,3,3,259


In [7]:
reco = recomendaciones_TwoTowerModelv1(prediciones=predictions, id_usuario=user_id_to_recommend, matriz_factorizacion=df_dataset, df_ejercicios=df_catalogo)
print(f"Recomendaciones para el usuario {user_id_to_recommend}:")
reco.head(10).sort_values(by=["complexity"], ascending=True)

Recomendaciones para el usuario 0:


Unnamed: 0,id_ejercicio,nombre,h1,h2,h3,h4,s1,s2,s3,s4,k1,k2,k3,k4,hito,skill,knowledge,complexity,complexity12,prediccion
8,35,Factores Primos,0,0,0,1,0,0,0,1,0,0,1,1,1,1,3,19,275,0.503069
2,24,Clase Vector3D,0,0,1,1,0,0,0,1,1,1,1,1,3,1,15,31,799,0.508246
3,19,Cuenta Corriente en un Banco,0,0,1,1,0,0,1,0,1,0,1,1,3,2,11,43,811,0.506893
9,41,Clase FechaHora,0,0,1,1,0,0,1,0,1,1,1,1,3,2,15,47,815,0.502121
0,2,Subsecuencias de ADN,0,0,1,0,0,0,1,1,0,1,1,1,2,3,7,55,567,0.512431
4,8,Distancia Levenshtein,0,0,1,0,0,0,1,1,0,1,1,1,2,3,7,55,567,0.506088
7,43,Clase Usuario,0,0,1,1,0,0,1,1,1,1,1,1,3,3,15,63,831,0.503874
1,20,MultiplicaciÃ³n de Matrices,0,0,1,1,0,1,0,0,0,1,1,1,3,4,7,71,839,0.509631
6,48,Sopa de Letras,0,0,1,0,0,1,0,1,0,1,1,1,2,5,7,87,599,0.503934
5,5,Validador de Expresiones MatemÃ¡ticas,0,1,0,0,1,0,0,0,0,1,1,1,4,8,7,135,1159,0.504497


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import pickle
from pandas import DataFrame
from sklearn.metrics import precision_score, recall_score, average_precision_score


class TwoTowerModelv2(nn.Module):
    def __init__(self, df_users: DataFrame, df_items: DataFrame, embedding_size: int = 64):
        super(TwoTowerModelv2, self).__init__()

        self.dataframe_users = df_users
        self.dataframe_items = df_items
        self.embedding_size = embedding_size

        self.user_input_size = len(df_users.columns)
        self.item_input_size = len(df_items.columns)

        self.user_tower = nn.Sequential(
            nn.Linear(self.user_input_size, 128),
            nn.ReLU(),
            nn.Linear(128, embedding_size)
        )

        self.item_tower = nn.Sequential(
            nn.Linear(self.item_input_size, 128),
            nn.ReLU(),
            nn.Linear(128, embedding_size)
        )

        self.optimizer = optim.Adam(self.parameters(), lr=0.001)
        self.criterion = nn.BCELoss()

    def forward(self, user_input, item_input):
        user_embedding = self.user_tower(user_input)
        item_embedding = self.item_tower(item_input)
        score = torch.sum(user_embedding * item_embedding, dim=1)
        return torch.sigmoid(score)

    def train_model(self, df_users: DataFrame, df_items: DataFrame, epochs: int = 30):
        user_input = torch.tensor(df_users.values).float()
        item_input = torch.tensor(df_items.values).float()
        num_users = len(df_users)
        num_items = len(df_items)
        user_input_expanded = user_input.unsqueeze(1).expand(-1, num_items, -1).reshape(-1, user_input.size(1))
        item_input_expanded = item_input.repeat(num_users, 1)

        for epoch in range(epochs):
            self.optimizer.zero_grad()
            output = self(user_input_expanded, item_input_expanded)
            labels = torch.randint(0, 2, (len(output),)).float()
            loss = self.criterion(output, labels)
            loss.backward()
            self.optimizer.step()
            print(f"[+] Epoch {epoch+1}/{epochs} => Loss: {loss.item():.4f}")

    def predict(self, df_users: DataFrame, df_items: DataFrame):
        user_input = torch.tensor(df_users.values).float()
        item_input = torch.tensor(df_items.values).float()
        num_users = len(df_users)
        num_items = len(df_items)
        user_input_expanded = user_input.unsqueeze(1).expand(-1, num_items, -1).reshape(-1, user_input.size(1))
        item_input_expanded = item_input.repeat(num_users, 1)
        with torch.no_grad():
            predictions = self(user_input_expanded, item_input_expanded)
        return predictions

    def recomendaciones_usuario(self, predicciones, id_usuario: int, df_ejercicios: DataFrame, matriz_factorizacion: DataFrame, threshold: float = 0.01) -> DataFrame:
        if id_usuario not in matriz_factorizacion['id_estudiante'].values:
            print(f"Error: El usuario {id_usuario} no tiene interacciones previas. No se pueden hacer recomendaciones.")
            return DataFrame()

        predicciones_numpy = predicciones.numpy()
        num_usuarios = len(predicciones_numpy) // len(df_ejercicios)
        
        if id_usuario < 0 or id_usuario >= num_usuarios:
            raise ValueError("id_usuario fuera de rango")
        
        predicciones_usuario = predicciones_numpy[id_usuario::num_usuarios]

        recomendaciones_indices = np.where(predicciones_usuario > threshold)[0]
        recomendaciones_puntaje = predicciones_usuario[recomendaciones_indices]

        recomendaciones_ejercicios = df_ejercicios.iloc[recomendaciones_indices].copy()
        recomendaciones_ejercicios['prediccion'] = recomendaciones_puntaje

        ejercicios_interactuados = matriz_factorizacion[matriz_factorizacion['id_estudiante'] == id_usuario].iloc[:, 1:]
        ejercicios_interactuados_indices = ejercicios_interactuados.columns[ejercicios_interactuados.values[0] == 1].tolist()
        ejercicios_interactuados_indices = [idx[1:] if idx.startswith('e') else idx for idx in ejercicios_interactuados_indices]

        recomendaciones_ejercicios['id_ejercicio'] = recomendaciones_ejercicios['id_ejercicio'].astype(str)
        recomendaciones_ejercicios = recomendaciones_ejercicios[~recomendaciones_ejercicios['id_ejercicio'].isin(ejercicios_interactuados_indices)]
        recomendaciones_ejercicios = recomendaciones_ejercicios.sort_values(by='prediccion', ascending=False).reset_index(drop=True)

        return recomendaciones_ejercicios

    def save_model(self, filepath: str):
        """Guardar el modelo en un archivo"""
        with open(filepath, 'wb') as f:
            pickle.dump(self.state_dict(), f)

    def load_model(self, filepath: str):
        """Cargar el modelo desde un archivo"""
        with open(filepath, 'rb') as f:
            model_state_dict = pickle.load(f)
            self.load_state_dict(model_state_dict)

    def update_model(self, df_users: DataFrame, df_items: DataFrame, epochs: int = 10):
        """Actualizar el modelo entrenando con nuevos datos"""
        self.train_model(df_users, df_items, epochs)

    def evaluate_metrics(self, true_labels: torch.Tensor, predictions: torch.Tensor) -> dict:
        """Calcular las métricas de precisión, recall y MAP"""
        # Convierte las predicciones a valores binarios usando un umbral
        predicted_labels = (predictions > 0.5).float()

        precision = precision_score(true_labels.numpy(), predicted_labels.numpy())
        recall = recall_score(true_labels.numpy(), predicted_labels.numpy())
        map_score = average_precision_score(true_labels.numpy(), predictions.numpy())

        return {'precision': precision, 'recall': recall, 'MAP': map_score}
