In [16]:
import pandas as pd
import numpy as np

In [17]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Embedding, Flatten, Concatenate, Multiply, Dense, Dropout, Input

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [18]:
### Function for getting movie recommendations based on the highest predicted ratings
def get_top_recommendations_tf(user, model, user_encoder, item_encoder, id_to_name, df, n_recommendations=10):
    # Encode the user ID
    encoded_user = user_encoder.transform([user])[0]
    
    # # Get all unique movie IDs and encode them
    all_movies = df['movieid'].unique()
    encoded_movies = item_encoder.transform(all_movies)
    
    # # Filter out movies the user has already rated
    rated_movies = df[df['userid'] == user]['movieid'].unique()
    encoded_rated_movies = item_encoder.transform(rated_movies)

    #unrated_movies = np.setdiff1d(all_movies, rated_movies)
    # encoded_unrated_movies = item_encoder.transform(unrated_movies)
    encoded_unrated_movies = np.setdiff1d(encoded_movies, encoded_rated_movies)
    unrated_movies = item_encoder.inverse_transform(encoded_unrated_movies)

    # Prepare the user input for the model (repeat the user ID for each movie)
    user_input = np.array([encoded_user] * len(encoded_unrated_movies))
    
    # Predict ratings for all unrated movies
    predictions = model.predict([user_input, encoded_unrated_movies])
    predictions = np.clip(predictions, 0,5)
    
    # Combine movie IDs with their predicted ratings
    predicted_ratings = list(zip(unrated_movies, predictions.flatten()))
    
    # Sort the predicted ratings
    predicted_ratings.sort(key=lambda x: x[1], reverse=True)
    
    # Get the top N recommendations
    top_recommendations = predicted_ratings[:n_recommendations]
    
    # Print the top N recommendations
    print(f'Top {n_recommendations} recommendations for User {user}:')
    print('-----')
    for movie_id, rating in top_recommendations:
        movie_name = id_to_name[movie_id]
        print(f'{movie_name} ({round(rating, 3)})')


In [19]:
#### Read in Data

## movies
movies = pd.read_csv('movies.csv',dtype={'movieid':str})
id_to_name = movies.set_index('movieid')['title'].to_dict()
## Class ratings
class_ratings = pd.read_csv('class_ratings_f24.csv', dtype={'movieid': str})
user_ids = class_ratings['userid'].unique()
n_ratings = class_ratings.groupby('userid').size()
users = zip(user_ids, n_ratings)
## MovieLens ratings
df = pd.read_csv('ratings_subset.csv', dtype={'userid': str, 'movieid': str, 'rating': float})
## combine class ratings with ratings from MovieLens
df = pd.concat([df, class_ratings])

In [20]:
## Prepare data for tensorflow model 
## Creates a unique numeric number for each user and each movie
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

df['user'] = user_encoder.fit_transform(df['userid'])
df['item'] = item_encoder.fit_transform(df['movieid'])

In [21]:
## Create training and test data
train_data, test_data = train_test_split(df, test_size=0.2, random_state=7)
X_train = [train_data['user'].values, train_data['item'].values]
y_train = train_data['rating']

X_test = [test_data['user'].values, test_data['item'].values]
y_test = test_data['rating']

In [22]:
# model input values
num_users = df["userid"].nunique()
num_items = df["movieid"].nunique()
latent_dim = 10 #This is a hyperparameter

## Deep Learning Approaches to Recommender Systems
There are certainly many deep learning approaches for building recommender systems, but two possible model choices are neural collaborative filter (NCF) and deep matrix factorization (DMF).

### Neural Collaborative Filtering (NCF)
NCF combines traditional collaborative filtering techniques with neural networks to enhance the recommendation system's capability to capture complex user-item interactions. It explicitly models the matrix factorization under the neural network framework.  Then it extends beyond matrix factorization by integrating a multi-layer perceptron (MLP) to learn the user-item interaction function.

### Deep Matrix Factorization (DMF)
DMF focuses on enhancing traditional matrix factorization methods by incorporating neural networks. It focuses on improving the embeddings' quality and the interactions between the embeddings (users and items). It tries to take these better representations (embeddings) of users and items in the shared latent space to make more accurate predictions of user-item interactions. At its core, DMF uses embedding layers for users and items, similar to traditional matrix factorization but benefits from the non-linear transformations provided by subsequent neural network layers.


### Key Differences
*  NCF generally introduces more complexity through its use of both linear (matrix factorization) and non-linear (MLP) components, whereas DMF focuses on deepening the matrix factorization approach with neural network layers.
* NCF explicitly models both linear and non-linear interactions between users and items, offering a broader scope in capturing the nuances of user preferences. DMF primarily enhances the latent feature interactions through deep learning, improving upon traditional matrix factorization without fundamentally altering its linear nature.
* NCF offers greater flexibility in modeling different types of interactions but at the cost of increased complexity. DMF maintains a focused enhancement of matrix factorization, potentially making it more accessible for those already familiar with matrix factorization techniques.

## NCF

In [23]:

# Define inputs
user_input = Input(shape=(1,), name='user_input')
item_input = Input(shape=(1,), name='item_input')

# Embeddings
user_embedding = Embedding(input_dim=num_users, output_dim=latent_dim, name='user_embedding')(user_input)
item_embedding = Embedding(input_dim=num_items, output_dim=latent_dim, name='item_embedding')(item_input)

# Flatten embeddings
user_vec = Flatten(name='flatten_user')(user_embedding)
item_vec = Flatten(name='flatten_item')(item_embedding)

# Element-wise multiply (Matrix Factorization Part)
multiply_vec = Multiply(name='multiply')([user_vec, item_vec])

# Concatenate the multiply vector with the flattened user and item vectors
concat_vec = Concatenate(name='concatenate')([user_vec, item_vec, multiply_vec])

# Dense layers (MLP Part)
dense = Dense(128, activation='relu', name='dense1')(concat_vec)
dense = Dropout(0.2, name='dropout1')(dense)
dense = Dense(64, activation='relu', name='dense2')(dense)
dense = Dropout(0.2, name='dropout2')(dense)
output = Dense(1, activation=None, name='output')(dense)

# Create model
model = Model(inputs=[user_input, item_input], outputs=output)

# Compile model
model.compile(optimizer='adam', loss='mean_squared_error')

# Summary
#model.summary()



In [24]:
## Train model
n_epochs = 5
history = model.fit(X_train, y_train, epochs=n_epochs, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [25]:
model.evaluate(X_test, y_test)



0.6398596167564392

In [29]:
for user in user_ids:
    get_top_recommendations_tf(user, model, user_encoder, item_encoder, id_to_name, df, 10)
    print('\n')

Top 10 recommendations for User Caleb Christensen:
-----
Lord of the Rings: The Return of the King, The (2003) (4.547999858856201)
Mulholland Drive (2001) (4.2870001792907715)
Sin City (2005) (4.263999938964844)
Kill Bill: Vol. 1 (2003) (4.203000068664551)
Old Boy (2003) (4.172999858856201)
Donnie Darko (2001) (4.125)
Memento (2000) (4.110000133514404)
Kill Bill: Vol. 2 (2004) (4.10099983215332)
Spirited Away (Sen to Chihiro no kamikakushi) (2001) (4.09499979019165)
City of God (Cidade de Deus) (2002) (4.061999797821045)


Top 10 recommendations for User Emma Ouzts:
-----
The Martian (2015) (4.5289998054504395)
Pride & Prejudice (2005) (4.519999980926514)
Intouchables (2011) (4.513999938964844)
Blind Side, The  (2009) (4.498000144958496)
Spotlight (2015) (4.441999912261963)
Harry Potter and the Prisoner of Azkaban (2004) (4.409999847412109)
Finding Nemo (2003) (4.409999847412109)
Pirates of the Caribbean: The Curse of the Black Pearl (2003) (4.406000137329102)
Room (2015) (4.4060001373

## DMF

In [None]:
### deep matrix factorization

user_input = Input(shape=(1,))
item_input = Input(shape=(1,))

user_embedding = Embedding(num_users, latent_dim, name='user_embedding')(user_input)
item_embedding = Embedding(num_items, latent_dim, name='item_embedding')(item_input)

user_latent = Flatten()(user_embedding)
item_latent = Flatten()(item_embedding)

# Instead of separating GMF and MLP, DMF combines embeddings 
# and directly applies deep learning

concat_latent = Concatenate()([user_latent, item_latent])

dense = Dense(64, activation='relu')(concat_latent)
dense = Dropout(0.5)(dense)
dense = Dense(32, activation='relu')(dense)
dense = Dropout(0.5)(dense)

predictions = Dense(1)(dense)

model_dmf = Model(inputs=[user_input, item_input], outputs=predictions)
model_dmf.compile(optimizer='adam', loss='mse')
    

#model = deep_matrix_factorization_model(num_users, num_items, latent_dim)
model_dmf.summary()


In [None]:
#train model 
n_epochs = 5
history_dmf = model_dmf.fit(X_train, y_train, epochs=n_epochs, validation_split=0.2)

In [None]:
# evaluate model
model_dmf.evaluate(X_test, y_test)

In [None]:
# get recommendations
get_top_recommendations_tf('shannon', model_dmf, user_encoder, item_encoder, id_to_name, df, 10)