In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.initializers import HeNormal, GlorotNormal
import tensorflow as tf

In [2]:
# load data
df = pd.read_csv('./data/ml-latest-small/ratings.csv')

In [3]:
# binary implicit feedback
# rating >= 3.0일 경우 positive로 간주

df['rating'] = df['rating'].apply(lambda x: 1 if x >=3.0 else 0)

# 전체 유저 수와 영화 수
d_users = df.userId.nunique()
d_movies = df.movieId.nunique()

# train test split
train, validation = train_test_split(df, test_size=0.2, random_state=42)

# user id indexing
unique_user_ids = df['userId'].unique()
userId_dict = {id: i for i, id in enumerate(unique_user_ids)}

# movie id indexing
unique_movie_ids = df['movieId'].unique()
movieId_dict = {id: i for i, id in enumerate(unique_movie_ids)}

# id mapping using indexed dictionary
train['userId'] = train['userId'].map(userId_dict)
train['movieId'] = train['movieId'].map(movieId_dict)

validation['userId'] = validation['userId'].map(userId_dict)
validation['movieId'] = validation['movieId'].map(movieId_dict)

# total count of unique users
num_unique_users = df['userId'].nunique() + 1

# total count of unique movies
num_unique_movies = df['movieId'].nunique() + 1

In [4]:
# user/item input layer
user_input = Input(shape=(1, ))
movie_input = Input(shape=(1, ))

# embedding dimension
n_latent_factors = 20

# define embedding layers
user_embedding = Embedding(num_unique_users, n_latent_factors, name='user_embedding')(user_input)
movie_embedding = Embedding(num_unique_movies, n_latent_factors, name='movie_embedding')(movie_input)

# embedding flatten
user_vector = Flatten()(user_embedding)
movie_vector = Flatten()(movie_embedding)

# concat of users and movie vectors
concat = Concatenate()([user_vector, movie_vector])

In [10]:
# result table
results_df = pd.DataFrame(columns=['initializer', 'activation', 'last_loss', 'last_val_loss'])

# initialization, activation
initializers = [HeNormal(), HeNormal(), GlorotNormal(), GlorotNormal()]
activations = ['relu', 'sigmoid', 'relu', 'sigmoid']

for init, act in zip(initializers, activations):
    print('\n')
    print('initializer: {}'.format(init.__class__.__name__))
    print('activation: {}'.format(act))
    print('\n')
    
    def create_mlp(hidden_layers, output):
        for i in range(len(hidden_layers)):
            if i == 0:
                hidden = Dense(hidden_layers[i], activation=act, kernel_initializer=init)(output)
            else:
                hidden = Dense(hidden_layers[i], activation=act, kernel_initializer=init)(hidden)
        output = Dense(1, activation='sigmoid', kernel_initializer=init)(hidden)
        return output
    
    output = create_mlp([64, 64], concat)
    
    model = Model(inputs=[user_input, movie_input], outputs=output)
    
    auc = tf.keras.metrics.AUC()
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', auc])
    
    history = model.fit([train.userId, train.movieId], train.rating, epochs=3, verbose=2, validation_data=([validation.userId, validation.movieId], validation.rating))
    
    # DataFrame에 결과 append
    # results_df = results_df.append(
    #     {'initializer': init.__class__.__name__,
    #      'activation': act,
    #      'last_loss': history.history['loss'][-1],
    #      'last_val_loss': history.history['val_loss'][-1]},
    #     ignore_index=True
    # )
    append_df = pd.DataFrame(
        {'initializer': init.__class__.__name__,
         'activation': act,
         'last_loss': history.history['loss'][-1],
         'last_val_loss': history.history['val_loss'][-1]},
        index=[0]   # ensures correct shape if you're adding a single row
    )
    results_df = pd.concat([results_df, append_df], ignore_index=True)



initializer: HeNormal
activation: relu


Epoch 1/3




2521/2521 - 3s - 1ms/step - accuracy: 0.8991 - auc_3: 0.9256 - loss: 0.2480 - val_accuracy: 0.8102 - val_auc_3: 0.7645 - val_loss: 0.5004
Epoch 2/3
2521/2521 - 2s - 766us/step - accuracy: 0.9218 - auc_3: 0.9537 - loss: 0.1960 - val_accuracy: 0.8057 - val_auc_3: 0.7609 - val_loss: 0.5286
Epoch 3/3
2521/2521 - 2s - 754us/step - accuracy: 0.9322 - auc_3: 0.9640 - loss: 0.1725 - val_accuracy: 0.8073 - val_auc_3: 0.7548 - val_loss: 0.5959


initializer: HeNormal
activation: sigmoid


Epoch 1/3


  results_df = pd.concat([results_df, append_df], ignore_index=True)


2521/2521 - 3s - 1ms/step - accuracy: 0.8517 - auc_4: 0.8451 - loss: 0.3483 - val_accuracy: 0.8298 - val_auc_4: 0.7953 - val_loss: 0.4030
Epoch 2/3
2521/2521 - 2s - 774us/step - accuracy: 0.8673 - auc_4: 0.8794 - loss: 0.3126 - val_accuracy: 0.8285 - val_auc_4: 0.7941 - val_loss: 0.4123
Epoch 3/3
2521/2521 - 2s - 767us/step - accuracy: 0.8746 - auc_4: 0.8953 - loss: 0.2924 - val_accuracy: 0.8260 - val_auc_4: 0.7889 - val_loss: 0.4300


initializer: GlorotNormal
activation: relu


Epoch 1/3
2521/2521 - 3s - 1ms/step - accuracy: 0.9027 - auc_5: 0.9325 - loss: 0.2374 - val_accuracy: 0.8153 - val_auc_5: 0.7642 - val_loss: 0.5287
Epoch 2/3
2521/2521 - 2s - 741us/step - accuracy: 0.9270 - auc_5: 0.9596 - loss: 0.1838 - val_accuracy: 0.8146 - val_auc_5: 0.7587 - val_loss: 0.5524
Epoch 3/3
2521/2521 - 2s - 739us/step - accuracy: 0.9361 - auc_5: 0.9686 - loss: 0.1620 - val_accuracy: 0.8046 - val_auc_5: 0.7421 - val_loss: 0.6260


initializer: GlorotNormal
activation: sigmoid


Epoch 1/3
2521/25

In [11]:
history

<keras.src.callbacks.history.History at 0x1e6b79d3520>

In [12]:
# Print the dataframe
display(results_df)

Unnamed: 0,initializer,activation,last_loss,last_val_loss
0,HeNormal,relu,0.172472,0.595877
1,HeNormal,sigmoid,0.292443,0.429962
2,GlorotNormal,relu,0.161966,0.62595
3,GlorotNormal,sigmoid,0.291189,0.423215
