In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Dense, Concatenate, Dot, Flatten
from tensorflow.keras.models import Model

In [None]:
# Load the MovieLens 20M dataset
data = pd.read_csv('/content/drive/MyDrive/Colab_Notebooks/rating.csv')

In [None]:
# Filter relevant columns
data = data[['userId', 'movieId', 'rating']]


In [None]:
# Handle missing values (if any)
data = data.dropna()

In [None]:
data.shape

(20000263, 3)

In [None]:
data.shape[0]*0.01

200002.63

In [None]:
data = data.sample(int(data.shape[0]*0.05))
data.shape

(1000013, 3)

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1000013 entries, 16314683 to 19657567
Data columns (total 3 columns):
 #   Column   Non-Null Count    Dtype  
---  ------   --------------    -----  
 0   userId   1000013 non-null  int64  
 1   movieId  1000013 non-null  int64  
 2   rating   1000013 non-null  float64
dtypes: float64(1), int64(2)
memory usage: 30.5 MB


In [None]:
# Convert user IDs and movie IDs to integer format
user_ids = data['userId'].astype('category').cat.codes.values
movie_ids = data['movieId'].astype('category').cat.codes.values

# Define the input layers
user_input = Input(shape=(1,), dtype=tf.int32, name='user_input')
item_input = Input(shape=(1,), dtype=tf.int32, name='item_input')

# Create the user and item embeddings
user_embedding = Embedding(input_dim=len(data['userId'].unique()), output_dim=16)(user_input)
item_embedding = Embedding(input_dim=len(data['movieId'].unique()), output_dim=16)(item_input)

# Flatten the embeddings
user_embedding_flat = Flatten()(user_embedding)
item_embedding_flat = Flatten()(item_embedding)

# Concatenate the user and item embeddings
concatenated_embeddings = Concatenate()([user_embedding_flat, item_embedding_flat])

# Attention mechanism
attention_weights = Dense(1, activation='tanh')(concatenated_embeddings)
attention_weights = Flatten()(attention_weights)
attention_weights = tf.keras.activations.softmax(attention_weights)
attention_weights = tf.keras.layers.Reshape((-1, 1))(attention_weights)

# Multiply attention weights with item embeddings
weighted_sum = Dot(axes=(1, 1))([attention_weights, item_embedding])
weighted_sum = Flatten()(weighted_sum)

# Concatenate the user embedding and the weighted sum
output = Concatenate()([user_embedding_flat, weighted_sum])

# Final prediction layer
output = Dense(1, activation='linear')(output)

# Create the AFM model
model = Model(inputs=[user_input, item_input], outputs=output)

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error',metrics=[tf.keras.metrics.BinaryAccuracy(name='accuracy'),
                       tf.keras.metrics.Precision(name='precision'),
                       tf.keras.metrics.Recall(name='recall')
              ])

# Print the model summary
model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 user_input (InputLayer)        [(None, 1)]          0           []                               
                                                                                                  
 item_input (InputLayer)        [(None, 1)]          0           []                               
                                                                                                  
 embedding (Embedding)          (None, 1, 16)        2006752     ['user_input[0][0]']             
                                                                                                  
 embedding_1 (Embedding)        (None, 1, 16)        245792      ['item_input[0][0]']             
                                                                                              

In [None]:
# Train the model 1%
model.fit([user_ids, movie_ids], data['rating'], epochs=25, batch_size=128)


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x7fe587137430>

In [None]:
# Train the model 5%
model.fit([user_ids, movie_ids], data['rating'], epochs=25, batch_size=128)


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x7fe5516dcd90>

In [None]:
# Convert test set IDs to integer format
test_user_ids = data['userId'].astype('category').cat.codes.values
test_movie_ids = data['movieId'].astype('category').cat.codes.values

In [None]:
# Evaluate the model
from sklearn.metrics import mean_squared_error
import numpy as np

predictions = model.predict([test_user_ids, test_movie_ids])

mse = mean_squared_error(data['rating'], predictions)
rmse = np.sqrt(mse)
print(f"RMSE: {rmse}")

RMSE: 0.798328882313024


In [None]:
# Make predictions
user_ids_to_predict = np.array([1])  # Example user IDs to predict for
movie_ids_to_predict = np.array([2])  # Example movie IDs to predict for

predictions = model.predict([user_ids_to_predict, movie_ids_to_predict])
print('Predictions:', predictions)