## 11b. Keras deep learning for recommendations

In [1]:
# Change the Directory
import os
os.chdir("C:\\Users\\datam\\OneDrive\\Desktop\\python\\ml-20m") # change as needed

# Verify the change
os.getcwd()

'C:\\Users\\datam\\OneDrive\\Desktop\\python\\ml-20m'

In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt

In [None]:
# 1. Load and preprocess the data
ratings_df = pd.read_csv('ratings.csv')  # Adjust path as needed
movies_df = pd.read_csv('movies.csv')    # Adjust path as needed

In [5]:
movies_df.shape

(27278, 3)

In [6]:
# Map user and movie IDs to continuous integers
user_ids = ratings_df['userId'].unique()
movie_ids = ratings_df['movieId'].unique()
n_users = len(user_ids)
n_movies = len(movie_ids)

user_id_map = {old_id: new_id for new_id, old_id in enumerate(user_ids)}
movie_id_map = {old_id: new_id for new_id, old_id in enumerate(movie_ids)}

ratings_df['userId'] = ratings_df['userId'].map(user_id_map)
ratings_df['movieId'] = ratings_df['movieId'].map(movie_id_map)

In [7]:
# Split the data
train_data, test_data = train_test_split(ratings_df, test_size=0.2, random_state=42)

In [8]:
# Define model parameters; adjust these parameters as needed
embedding_size = 50  # Size of embedding vectors
n_epochs = 1 # an epoch can take a LOOOONNNNGGGG time, but default to 10 if you can
batch_size = 256

In [9]:
# Build the Keras model; adjust these parameters as needed
def build_recommender_model(n_users, n_movies, embedding_size):
    # User input and embedding
    user_input = layers.Input(shape=(1,), name='user_input')
    user_embedding = layers.Embedding(n_users, embedding_size, 
                                    name='user_embedding')(user_input)
    user_vec = layers.Flatten(name='flatten_users')(user_embedding)
    
    # Movie input and embedding
    movie_input = layers.Input(shape=(1,), name='movie_input')
    movie_embedding = layers.Embedding(n_movies, embedding_size, 
                                     name='movie_embedding')(movie_input)
    movie_vec = layers.Flatten(name='flatten_movies')(movie_embedding)
    
    # Concatenate user and movie embeddings
    concat = layers.Concatenate()([user_vec, movie_vec])
    
    # Neural network layers
    dense_1 = layers.Dense(96, activation='relu')(concat) #128 was recommended, I went with 96
    dropout_1 = layers.Dropout(0.2)(dense_1)
    dense_2 = layers.Dense(64, activation='relu')(dropout_1)
    dropout_2 = layers.Dropout(0.2)(dense_2)
    output = layers.Dense(1, activation='linear')(dropout_2)
    
    # Create and compile model
    model = keras.Model(inputs=[user_input, movie_input], outputs=output)
    model.compile(optimizer='adam', 
                 loss='mean_squared_error',
                 metrics=['mae'])
    
    return model

## Run the code below and go get yourself a meal, watch all seasons of Breaking Bad, or go on vacation.  

This is a LARGE datset

In [11]:
# Create and train the model
model = build_recommender_model(n_users, n_movies, embedding_size)
model.summary()

# Prepare training data
train_users = train_data['userId'].values
train_movies = train_data['movieId'].values
train_ratings = train_data['rating'].values

# Train the model
history = model.fit(
    [train_users, train_movies],
    train_ratings,
    batch_size=batch_size,
    epochs=n_epochs,
    validation_split=0.1,
    verbose=1
)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 user_input (InputLayer)        [(None, 1)]          0           []                               
                                                                                                  
 movie_input (InputLayer)       [(None, 1)]          0           []                               
                                                                                                  
 user_embedding (Embedding)     (None, 1, 50)        6924650     ['user_input[0][0]']             
                                                                                                  
 movie_embedding (Embedding)    (None, 1, 50)        1337200     ['movie_input[0][0]']            
                                                                                              

In [None]:
# Evaluate Model