In [None]:
import random
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim

from utils.recomendaciones import *
from utils.modelos import *

VAR_SEED = 42
VAR_TESTSET_SIZE = 0.20
VAR_DIR_DATA_CLEAN = '../data/cleaning'

random.seed(VAR_SEED)
np.random.seed(VAR_SEED)

df_dataset = pd.read_csv(f"{VAR_DIR_DATA_CLEAN}/mf_dataset.csv", sep=",", encoding="latin1")
df_catalogo = pd.read_csv(f"{VAR_DIR_DATA_CLEAN}/catalogo.csv", sep=",", encoding="latin1")
df_diagnostico = pd.read_csv(f"{VAR_DIR_DATA_CLEAN}/prueba_diagnostico.csv", sep=",", encoding="latin1")[['id_estudiante', 'score_a', 'score_p', 'score_d', 'score_s']]

ModuleNotFoundError: No module named 'utils'

In [None]:
df_users = pd.merge(df_dataset, df_diagnostico, on='id_estudiante', how='left')
df_users.fillna(0, inplace=True)
df_users = df_users.astype(int)
df_users = df_users.drop(labels=['id_estudiante'], axis=1)

df_items = df_catalogo[['h1', 'h2', 'h3', 'h4', 's1', 's2', 's3', 's4', 'k1', 'k2', 'k3', 'k4']]

In [None]:
# Definimos las dimensiones de entrada para los usuarios y los ejercicios
item_feature_size = len(df_items.columns) - 1  # Contamos todas las características de los ítems
user_feature_size = len(df_users.columns) - 1  # Contamos todas las características de los usuarios

embedding_size = 64  # Tamaño del embedding

In [16]:
df_users

Unnamed: 0,id_estudiante,e0,e1,e2,e3,e4,e5,e6,e7,e8,...,e47,e48,e49,e50,e51,e52,score_a,score_p,score_d,score_s
0,0,1,1,0,1,1,0,1,0,0,...,0,0,0,0,0,0,5,4,2,3
1,1,1,0,0,1,1,0,0,0,0,...,0,0,0,0,0,0,4,4,2,3
2,2,0,0,0,1,1,0,0,0,0,...,0,0,0,0,0,0,5,4,3,2
3,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,3,3,1,2
4,4,1,1,0,1,1,0,0,0,0,...,0,0,0,0,0,0,6,4,3,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
761,1301,1,0,0,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
762,1302,1,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
763,1303,1,0,0,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
764,1304,1,0,0,1,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
# Definimos la torre para los usuarios
class UserTower(nn.Module):
    def __init__(self, user_input_size, embedding_size):
        super(UserTower, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(user_input_size, 128),
            nn.ReLU(),
            nn.Linear(128, embedding_size)
        )
    
    def forward(self, x):
        return self.fc(x)

# Definimos la torre para los ejercicios
class ItemTower(nn.Module):
    def __init__(self, item_input_size, embedding_size):
        super(ItemTower, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(item_input_size, 128),
            nn.ReLU(),
            nn.Linear(128, embedding_size)
        )
    
    def forward(self, x):
        return self.fc(x)

# Definimos el modelo de dos torres
class TwoTowerModel(nn.Module):
    def __init__(self, user_input_size, item_input_size, embedding_size):
        super(TwoTowerModel, self).__init__()
        self.user_tower = UserTower(user_input_size, embedding_size)
        self.item_tower = ItemTower(item_input_size, embedding_size)
    
    def forward(self, user_input, item_input):
        # Obtener los embeddings para usuarios y ejercicios
        user_embedding = self.user_tower(user_input)
        item_embedding = self.item_tower(item_input)
        
        # Calcular la similitud como el producto punto entre los embeddings
        score = torch.sum(user_embedding * item_embedding, dim=1)
        
        return torch.sigmoid(score)
    
##################################################################


class TwoTowerModelv1(nn.Module):
    def __init__(self, user_input_size, item_input_size, embedding_size):
        super(TwoTowerModelv1, self).__init__()

        # Torre para los usuarios
        self.user_tower = nn.Sequential(
            nn.Linear(user_input_size, 128),
            nn.ReLU(),
            nn.Linear(128, embedding_size)
        )

        # Torre para los ejercicios
        self.item_tower = nn.Sequential(
            nn.Linear(item_input_size, 128),
            nn.ReLU(),
            nn.Linear(128, embedding_size)
        )

        # Optimizer y loss function
        self.optimizer = optim.Adam(self.parameters(), lr=0.001)
        self.criterion = nn.BCELoss()

    def forward(self, user_input, item_input):
        # Obtener los embeddings para usuarios y ejercicios
        user_embedding = self.user_tower(user_input)
        item_embedding = self.item_tower(item_input)
        
        # Calcular la similitud como el producto punto entre los embeddings
        score = torch.sum(user_embedding * item_embedding, dim=1)
        
        return torch.sigmoid(score)

    def train_model(self, df_users, df_items, epochs=30):
        # Extraer las características de los DataFrames y convertirlas en tensores
        item_input = torch.tensor(df_items.iloc[:, 1:].values).float()  # Datos de ejercicios
        user_input = torch.tensor(df_users.iloc[:, 1:].values).float()  # Datos de usuarios

        # Crear pares de usuario e ítem
        num_users = len(df_users)
        num_items = len(df_items)

        user_input_expanded = user_input.unsqueeze(1).expand(-1, num_items, -1).reshape(-1, user_input.size(1))  # Replicamos las características del usuario
        item_input_expanded = item_input.repeat(num_users, 1)  # Repetimos las características de los ítems

        # Entrenar el modelo
        for epoch in range(epochs):
            self.optimizer.zero_grad()

            # Pasar las entradas a través del modelo
            output = self(user_input_expanded, item_input_expanded)

            # Crear etiquetas aleatorias para los pares
            labels = torch.randint(0, 2, (len(output),)).float()  # Ajustar el tamaño de las etiquetas

            # Calcular la pérdida
            loss = self.criterion(output, labels)

            # Retropropagación
            loss.backward()

            # Actualizar los pesos
            self.optimizer.step()

            print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")

    def predict(self, df_users, df_items):
        # Extraer las características de los DataFrames y convertirlas en tensores
        item_input = torch.tensor(df_items.iloc[:, 1:].values).float()  # Datos de ejercicios
        user_input = torch.tensor(df_users.iloc[:, 1:].values).float()  # Datos de usuarios

        # Crear pares de usuario e ítem
        num_users = len(df_users)
        num_items = len(df_items)

        user_input_expanded = user_input.unsqueeze(1).expand(-1, num_items, -1).reshape(-1, user_input.size(1))  # Replicamos las características del usuario
        item_input_expanded = item_input.repeat(num_users, 1)  # Repetimos las características de los ítems

        # Realizar la predicción
        with torch.no_grad():
            predictions = self(user_input_expanded, item_input_expanded)

        return predictions

In [4]:
# Crear el modelo
model = TwoTowerModel(user_feature_size, item_feature_size, embedding_size)

# Definir el optimizador y la función de pérdida
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCELoss()

# Extraer las características de los DataFrames y convertirlas en tensores
item_input = torch.tensor(df_items.iloc[:, 1:].values).float()  # Datos de ejercicios
user_input = torch.tensor(df_users.iloc[:, 1:].values).float()  # Datos de usuarios

# Crear pares de usuario e ítem
num_users = len(df_users)
num_items = len(df_items)

# Expandir las características para crear combinaciones de usuarios e ítems
user_input_expanded = user_input.unsqueeze(1).expand(-1, num_items, -1).reshape(-1, user_feature_size)  # Replicamos las características del usuario
item_input_expanded = item_input.repeat(num_users, 1)  # Repetimos las características de los ítems

# Asegurarse de que las dimensiones son correctas
print(f"User input expanded shape: {user_input_expanded.shape}")
print(f"Item input expanded shape: {item_input_expanded.shape}")



User input expanded shape: torch.Size([40598, 57])
Item input expanded shape: torch.Size([40598, 12])


In [5]:
# Entrenar el modelo
epochs = 30
for epoch in range(epochs):
    optimizer.zero_grad()
    
    # Pasar las entradas a través del modelo
    output = model(user_input_expanded, item_input_expanded)
    
    # Calcular la pérdida (debes crear etiquetas correspondientes a los pares)
    labels = torch.randint(0, 2, (len(output),)).float()  # Ajustar el tamaño de las etiquetas
    loss = criterion(output, labels)
    
    # Retropropagación
    loss.backward()
    
    # Actualizar los pesos
    optimizer.step()
    
    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")

Epoch 1/30, Loss: 0.6994
Epoch 2/30, Loss: 0.6969
Epoch 3/30, Loss: 0.6967
Epoch 4/30, Loss: 0.6943
Epoch 5/30, Loss: 0.6936
Epoch 6/30, Loss: 0.6949
Epoch 7/30, Loss: 0.6959
Epoch 8/30, Loss: 0.6942
Epoch 9/30, Loss: 0.6937
Epoch 10/30, Loss: 0.6938
Epoch 11/30, Loss: 0.6939
Epoch 12/30, Loss: 0.6939
Epoch 13/30, Loss: 0.6941
Epoch 14/30, Loss: 0.6940
Epoch 15/30, Loss: 0.6934
Epoch 16/30, Loss: 0.6933
Epoch 17/30, Loss: 0.6932
Epoch 18/30, Loss: 0.6937
Epoch 19/30, Loss: 0.6937
Epoch 20/30, Loss: 0.6938
Epoch 21/30, Loss: 0.6936
Epoch 22/30, Loss: 0.6932
Epoch 23/30, Loss: 0.6935
Epoch 24/30, Loss: 0.6935
Epoch 25/30, Loss: 0.6934
Epoch 26/30, Loss: 0.6934
Epoch 27/30, Loss: 0.6933
Epoch 28/30, Loss: 0.6935
Epoch 29/30, Loss: 0.6934
Epoch 30/30, Loss: 0.6933


In [6]:
# Prueba: Predecir la probabilidad de recomendar un ejercicio a un usuario
with torch.no_grad():
    predictions = model(user_input_expanded, item_input_expanded)
    print("Predicciones:", predictions)

Predicciones: tensor([0.4958, 0.4932, 0.4991,  ..., 0.4949, 0.5013, 0.4941])


In [None]:
recomendacion_ejercicios

In [7]:
# Ejemplo de uso
user_id_to_recommend = 0  # Cambia esto al ID del usuario que deseas recomendar
print(f'Ejercicios Relizados por el estudiante: {user_id_to_recommend}')
print([idx[1:] if idx.startswith('e') else idx for idx in mf_dataset[mf_dataset['id_estudiante'] == user_id_to_recommend].iloc[:, 1:].columns[mf_dataset[mf_dataset['id_estudiante'] == user_id_to_recommend].iloc[:, 1:].values[0] == 1].tolist()])


Ejercicios Relizados por el estudiante: 0
['0', '1', '3', '4', '6', '10', '17', '18', '22', '23', '25', '26', '29', '42', '44']


In [9]:
reco = recomendacion_ejercicios(predictions, user_id_to_recommend, catalogo, mf_dataset)
print(f"Recomendaciones para el usuario {user_id_to_recommend}:")
reco.head(15)

Recomendaciones para el usuario 0:


Unnamed: 0,id_ejercicio,h1,h2,h3,h4,s1,s2,s3,s4,k1,k2,k3,k4,prediccion
0,24,0,0,1,1,0,0,0,1,1,1,1,1,0.516147
1,15,0,0,1,1,0,1,0,1,0,1,1,1,0.510478
2,37,0,0,1,0,0,0,1,0,0,1,1,1,0.510384
3,35,0,0,0,1,0,0,0,1,0,0,1,1,0.509115
4,27,0,0,1,1,0,0,1,0,0,1,1,1,0.507017
5,43,0,0,1,1,0,0,1,1,1,1,1,1,0.505785
6,34,0,0,1,1,0,0,0,0,1,1,0,0,0.5054
7,7,0,0,0,1,0,0,0,0,0,0,0,1,0.505325
8,47,0,1,0,0,1,1,0,0,0,1,1,1,0.504173
9,45,0,0,1,1,0,0,1,0,1,1,1,1,0.504173


In [None]:



# Definir parámetros y crear el modelo
user_feature_size = 10  # tamaño de las características del usuario
item_feature_size = 10  # tamaño de las características del ítem
embedding_size = 5  # tamaño de los embeddings

# Crear un modelo de TwoTower
# model = TwoTowerModelv1(user_feature_size, item_feature_size, embedding_size)

# Crear un DataFrame de ejemplo
num_users = 100
num_items = 50
user_data = np.random.rand(num_users, user_feature_size)
item_data = np.random.rand(num_items, item_feature_size)
df_users = pd.DataFrame(user_data, columns=[f"user_feature_{i}" for i in range(user_feature_size)])
df_items = pd.DataFrame(item_data, columns=[f"item_feature_{i}" for i in range(item_feature_size)])

# Entrenar el modelo
model.train_model(df_users, df_items, epochs=30)

# Predicción: Obtener las probabilidades de recomendación para cada par usuario-ítem
predictions = model.predict(df_users, df_items)
print("Predicciones:", predictions)


In [10]:
# Definir parámetros y crear el modelo
user_feature_size = 10  # tamaño de las características del usuario
item_feature_size = 10  # tamaño de las características del ítem
embedding_size = 5  # tamaño de los embeddings

# Crear un modelo de TwoTower
# model = TwoTowerModelv1(user_feature_size, item_feature_size, embedding_size)

# Crear un DataFrame de ejemplo
num_users = 100
num_items = 50
user_data = np.random.rand(num_users, user_feature_size)
item_data = np.random.rand(num_items, item_feature_size)
df_users = pd.DataFrame(user_data, columns=[f"user_feature_{i}" for i in range(user_feature_size)])
df_items = pd.DataFrame(item_data, columns=[f"item_feature_{i}" for i in range(item_feature_size)])


In [13]:
df_users

Unnamed: 0,user_feature_0,user_feature_1,user_feature_2,user_feature_3,user_feature_4,user_feature_5,user_feature_6,user_feature_7,user_feature_8,user_feature_9
0,0.374540,0.950714,0.731994,0.598658,0.156019,0.155995,0.058084,0.866176,0.601115,0.708073
1,0.020584,0.969910,0.832443,0.212339,0.181825,0.183405,0.304242,0.524756,0.431945,0.291229
2,0.611853,0.139494,0.292145,0.366362,0.456070,0.785176,0.199674,0.514234,0.592415,0.046450
3,0.607545,0.170524,0.065052,0.948886,0.965632,0.808397,0.304614,0.097672,0.684233,0.440152
4,0.122038,0.495177,0.034389,0.909320,0.258780,0.662522,0.311711,0.520068,0.546710,0.184854
...,...,...,...,...,...,...,...,...,...,...
95,0.600517,0.665037,0.175371,0.914412,0.418771,0.383139,0.518918,0.046966,0.166283,0.738034
96,0.082799,0.603152,0.245349,0.389296,0.288694,0.355673,0.719046,0.297122,0.566405,0.476050
97,0.663671,0.936830,0.732572,0.214940,0.031183,0.262264,0.595078,0.051426,0.496366,0.596843
98,0.334244,0.770912,0.106598,0.075138,0.728189,0.495491,0.688402,0.434827,0.246402,0.819102
