In [4]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Embedding, Input, Flatten, Dense, Concatenate, Dropout

In [3]:
df=pd.read_csv("../../data/input/recomendation/data_processed.csv")

In [5]:
# Codificamos userId y productId
user_encoder = LabelEncoder()
product_encoder = LabelEncoder()

df['userId_encoded'] = user_encoder.fit_transform(df['productId'])  # Usamos productId como usuario ficticio
df['productId_encoded'] = product_encoder.fit_transform(df['productId'])

# División en conjuntos de entrenamiento y prueba
train_data, test_data = train_test_split(df, test_size=0.3, random_state=42)

# Parámetros del modelo
num_users = df['userId_encoded'].nunique()
num_products = df['productId_encoded'].nunique()
embedding_size = 50  # Tamaño del embedding


In [6]:
def build_nn_model(embedding_size=50, dropout_rate=0.3):
    user_input = Input(shape=(1,))
    product_input = Input(shape=(1,))

    user_embedding = Embedding(input_dim=num_users, output_dim=embedding_size)(user_input)
    product_embedding = Embedding(input_dim=num_products, output_dim=embedding_size)(product_input)

    user_vector = Flatten()(user_embedding)
    product_vector = Flatten()(product_embedding)

    concat = Concatenate()([user_vector, product_vector])

    dense = Dense(128, activation='relu')(concat)
    dense = Dropout(dropout_rate)(dense)
    dense = Dense(64, activation='relu')(dense)
    dense = Dropout(dropout_rate)(dense)
    output = Dense(1, activation='linear')(dense)  # Predicción de rating

    model = Model(inputs=[user_input, product_input], outputs=output)
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

# Construimos el modelo
nn_model = build_nn_model()
nn_model.summary()

In [7]:
history = nn_model.fit(
    [train_data['userId_encoded'], train_data['productId_encoded']],
    train_data['ratings'],
    validation_data=([test_data['userId_encoded'], test_data['productId_encoded']], test_data['ratings']),
    epochs=10, batch_size=128, verbose=1
)


Epoch 1/10
[1m1441/2821[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m6:03[0m 263ms/step - loss: 2.1077 - mae: 0.9741

KeyboardInterrupt: 