In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import TensorBoard
from sklearn.model_selection import train_test_split

from tqdm.keras import TqdmCallback
from sklearn.preprocessing import StandardScaler, MinMaxScaler

import datetime



In [3]:
# Read interaction matrix pickle file
size = 'demo'
fillna_value = '0'
interaction_matrix_file_path = f'./files/pickle/interaction_matrix_{size}_{fillna_value}.pkl'
interaction_matrix_df = pd.read_pickle(interaction_matrix_file_path)
print('Interaction matrix df shape:                      ',interaction_matrix_df.shape)

# interaction_matrix_df = interaction_matrix_df.replace({np.nan: -999})
# print('Interaction df shape:                            ',interaction_matrix_df.shape)

# Read user matrix and article matrix pickle file

user_matrix_df_file_path = f'./files/pickle/user_matrix_{size}_{fillna_value}.pkl'
article_matrix_df_file_path = f'./files/pickle/article_matrix_{size}_{fillna_value}.pkl'

user_matrix_df = pd.read_pickle(user_matrix_df_file_path)
article_matrix_df = pd.read_pickle(article_matrix_df_file_path)
print('User embedding df shape:                         ',user_matrix_df.shape)
print('Article embedding df shape:                      ',article_matrix_df.shape)

Interaction matrix df shape:                       (1590, 4247)
User embedding df shape:                          (1590, 300)
Article embedding df shape:                       (4247, 300)


In [4]:
# Convert the dataframes to numpy arrays
user_vectors = user_matrix_df.values
article_vectors = article_matrix_df.values
interaction_matrix = interaction_matrix_df.values

In [5]:
# Get the indices of the non-zero entries in the interaction matrix
user_idx, article_idx = np.where(interaction_matrix != 0)
read_times = interaction_matrix[user_idx, article_idx]

In [16]:
read_times.shape

(212069,)

In [17]:
# Create the input features by concatenating user and article vectors
X = np.hstack((user_vectors[user_idx], article_vectors[article_idx]))
y = read_times


In [19]:
print(X.shape)
print(y.shape)

(212069, 600)
(212069,)


In [20]:
# Use only the first 100 rows for testing
X = X[:100]
y = y[:100]

In [21]:
print(X.shape)
print(y.shape)

(100, 600)
(100,)


In [23]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(80, 600)
(20, 600)
(80,)
(20,)


In [25]:
# Define the model
model = Sequential([
    tf.keras.Input(shape=(600,)),
    Dense(256, activation='relu'),
    Dense(128, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mse'])

# Prepare TensorBoard callback
log_dir = "files/logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Train the model
num_epochs = 2
history = model.fit(X_train, y_train, epochs=num_epochs, batch_size=16,
                    validation_data=(X_test, y_test),
                    callbacks=[tensorboard_callback, TqdmCallback(verbose=1)],
                    verbose=2)

0epoch [00:00, ?epoch/s]

0batch [00:00, ?batch/s]

Epoch 1/2


In [None]:
# Save the trained model
model.save('recommendation_model.h5')


In [None]:
# Evaluate the model
train_loss, train_mse = model.evaluate(X_train, y_train, verbose=1)
test_loss, test_mse = model.evaluate(X_test, y_test, verbose=1)