## Import Libraries

In [36]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [37]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Embedding, Flatten, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam

In [38]:
from sklearn.utils import shuffle

## Load the Dataset

In [39]:
ratings = pd.read_csv('ratings.csv')

In [40]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,296,5.0,1147880044
1,1,306,3.5,1147868817
2,1,307,5.0,1147868828
3,1,665,5.0,1147878820
4,1,899,3.5,1147868510


In [41]:
ratings.shape

(25000095, 4)

In [42]:
ratings= ratings.drop(['timestamp'], axis=1)

In [43]:
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,296,5.0
1,1,306,3.5
2,1,307,5.0
3,1,665,5.0
4,1,899,3.5


userId and movieId both should have values lying from (0 to n-1) and (0 to m-1) if there are n users and m movies. 

In [44]:
current_user=0
custom_user_map={}
def map_userID(row):
    global current_user, custom_user_map
    old_user_id = row['userId']
    if old_user_id not in custom_user_map:
        custom_user_map[old_user_id] = current_user
        current_user+=1
    return custom_user_map[old_user_id]

ratings['new_userId']= ratings.apply(map_userID, axis=1)

In [45]:
current_movie=0
custom_movie_map={}
def map_movie(row):
    global current_movie, custom_movie_map
    old_movie_id = row['movieId']
    if old_movie_id not in custom_movie_map:
        custom_movie_map[old_movie_id] = current_movie
        current_movie+=1
    return custom_movie_map[old_movie_id]

ratings['new_movieId']= ratings.apply(map_movie, axis=1)

In [46]:
ratings.head()

Unnamed: 0,userId,movieId,rating,new_userId,new_movieId
0,1,296,5.0,0,0
1,1,306,3.5,0,1
2,1,307,5.0,0,2
3,1,665,5.0,0,3
4,1,899,3.5,0,4


In [47]:
# Remove old userId and movieId

ratings = ratings.drop(['userId','movieId'], axis=1)

In [48]:
# Shuffle the Dataframe

ratings= shuffle(ratings, random_state=42)

In [61]:
ratings.head()

Unnamed: 0,rating,new_userId,new_movieId
15347762,3.5,99475,3024
16647840,4.0,107978,9645
23915192,3.0,155371,1092
10052313,4.0,65224,227
12214125,5.0,79160,964


In [63]:
ratings.shape[0]

25000095

Now we can use the concept of embedding layer to map userId and movieId into feature vectors.

In [49]:
# Convert user, movie, and rating into arrays

new_userId = ratings['new_userId'].values
new_movieId = ratings['new_movieId'].values
rating = ratings['rating'].values

In [50]:
len_user= len(set(new_userId))
len_movie= len(set(new_movieId))
embedding_dim = 10

In [51]:
input_user = Input(shape=(1,))              # Shpe is 1 because the user or movie is going to be a number
input_movie = Input(shape=(1,))

output_user = Embedding(len_user,embedding_dim)(input_user)          #Dimension = len_user x 1 x embedding_dim
output_movie = Embedding(len_movie,embedding_dim)(input_movie)

output_user and out_movie needs to be reshaped for them to become of appropriate shape to be fed as input to the ANN.

In [52]:
# Flatten both the Embeddings
output_user = Flatten()(output_user)
output_movie = Flatten()(output_movie)

In [53]:
# Lets concatenate output_user and output_movie
combined_input = Concatenate()([output_user, output_movie])

In [54]:
combined_output = Dense(1024, activation='relu')(combined_input)
combined_output = Dense(1)(combined_output)

## Compile the Model

In [55]:
model= Model([input_user,input_movie], combined_output)
model.compile(loss='mse', 
             optimizer=SGD(learning_rate=0.08, momentum=0.9))

### Split the Dataset

In [64]:
# Consider 80% for training and rest for testing

train_length = int(0.8 * ratings.shape[0])

train_user= new_userId[:train_length]
train_movie= new_movieId[:train_length]
train_rating= rating[:train_length]

test_user= new_userId[train_length:]
test_movie= new_movieId[train_length:]
test_rating= rating[train_length:]

## Train the Model

In [65]:
r = model.fit(x=[train_user,train_movie],
             y= train_rating,
             batch_size=1024,
             epochs=15,
             verbose=2,
             validation_data= ([test_user,test_movie],test_rating))

Epoch 1/15
19532/19532 - 594s - loss: 0.6619 - val_loss: 0.6720 - 594s/epoch - 30ms/step
Epoch 2/15
19532/19532 - 596s - loss: 0.6519 - val_loss: 0.6685 - 596s/epoch - 31ms/step
Epoch 3/15
19532/19532 - 539s - loss: 0.6378 - val_loss: 0.6515 - 539s/epoch - 28ms/step
Epoch 4/15
19532/19532 - 526s - loss: 0.6200 - val_loss: 0.6429 - 526s/epoch - 27ms/step
Epoch 5/15
19532/19532 - 572s - loss: 0.6061 - val_loss: 0.6365 - 572s/epoch - 29ms/step
Epoch 6/15
19532/19532 - 517s - loss: 0.5952 - val_loss: 0.6354 - 517s/epoch - 26ms/step
Epoch 7/15
19532/19532 - 527s - loss: 0.5857 - val_loss: 0.6323 - 527s/epoch - 27ms/step
Epoch 8/15
19532/19532 - 508s - loss: 0.5772 - val_loss: 0.6269 - 508s/epoch - 26ms/step
Epoch 9/15
19532/19532 - 518s - loss: 0.5693 - val_loss: 0.6231 - 518s/epoch - 27ms/step
Epoch 10/15
19532/19532 - 521s - loss: 0.5627 - val_loss: 0.6205 - 521s/epoch - 27ms/step
Epoch 11/15
19532/19532 - 523s - loss: 0.5572 - val_loss: 0.6197 - 523s/epoch - 27ms/step
Epoch 12/15
19532/1