# Matrix Factorization with Keras

In [25]:
import pandas as pd
import numpy as np
import matplotlib as plt

from collections import Counter
from sklearn.utils import shuffle

from keras.layers import Input, Embedding, Dot, Add, Flatten, Concatenate
from keras.layers import Dense, Dropout, Activation
from keras.models import Model
from keras.regularizers import l2
from keras.optimizers import SGD, Adam

## 1. Loading the data

In [7]:
df = pd.read_csv('data/rating.csv')
len(df)

20000263

In [8]:
mo = pd.read_csv('data/movie.csv')
mo.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [9]:
# Joing the two data frame
df2 = pd.merge(df, mo, how = 'inner', on = ['movieId'])
df2.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,2,3.5,2005-04-02 23:53:47,Jumanji (1995),Adventure|Children|Fantasy
1,5,2,3.0,1996-12-25 15:26:09,Jumanji (1995),Adventure|Children|Fantasy
2,13,2,3.0,1996-11-27 08:19:02,Jumanji (1995),Adventure|Children|Fantasy
3,29,2,3.0,1996-06-23 20:36:14,Jumanji (1995),Adventure|Children|Fantasy
4,34,2,3.0,1996-10-28 13:29:44,Jumanji (1995),Adventure|Children|Fantasy


In [10]:
df2 = df2.drop(columns = ['timestamp', 'genres'])
df2.head()

Unnamed: 0,userId,movieId,rating,title
0,1,2,3.5,Jumanji (1995)
1,5,2,3.0,Jumanji (1995)
2,13,2,3.0,Jumanji (1995)
3,29,2,3.0,Jumanji (1995)
4,34,2,3.0,Jumanji (1995)


In [11]:
# Make the user Id starts from 0 
df2.userId -= 1
df2.head()

Unnamed: 0,userId,movieId,rating,title
0,0,2,3.5,Jumanji (1995)
1,4,2,3.0,Jumanji (1995)
2,12,2,3.0,Jumanji (1995)
3,28,2,3.0,Jumanji (1995)
4,33,2,3.0,Jumanji (1995)


## 2. Preprocessing 

### 2-1. Assigning movie index

In [12]:
movie_set = set(df.movieId.values)

In [13]:
movie_idx = {}
i = 0

for k in movie_set:
    movie_idx[k] = i
    i += 1

In [14]:
df['movie_idx'] = df.apply(lambda x: movie_idx[x.movieId], axis = 1)

### 2-3. Splitting into train and test set

In [15]:
df.head()

Unnamed: 0,userId,movieId,rating,timestamp,movie_idx
0,1,2,3.5,2005-04-02 23:53:47,2
1,1,29,3.5,2005-04-02 23:31:16,29
2,1,32,3.5,2005-04-02 23:33:39,32
3,1,47,3.5,2005-04-02 23:32:07,47
4,1,50,3.5,2005-04-02 23:29:40,50


In [16]:
N = df.userId.max() + 1
M = df.movieId.max() + 1

print("The number of Users is ", N)
print("The number of Movies is ", M)

The number of Users is  138494
The number of Movies is  131263


In [17]:
cut = int(0.8*len(df))

df = shuffle(df)
tr = df.iloc[:cut]
te = df.iloc[cut:]

In [18]:
rating_avg = tr.rating.mean()    # global average

X_tr = [tr.userId.values, tr.movieId.values]
y_tr = tr.rating.values - rating_avg

X_te = [te.userId.values, te.movieId.values]
y_te = te.rating.values - rating_avg

## 3. Modeling

In [19]:
K = 10                           # Latent Dimensionality
reg = 0                          # regularity penalty
epochs = 10

### 3-1. baseline

In [20]:
# Input layer
u = Input(shape = (1, ))
m = Input(shape = (1, ))

# Embedding layer
u_embedding = Embedding(N, K, embeddings_regularizer= l2(reg))(u)
m_embedding = Embedding(M, K, embeddings_regularizer= l2(reg))(m)
x = Dot(axes = 2)([u_embedding, m_embedding])

u_bias = Embedding(N, 1, embeddings_regularizer= l2(reg))(u)
m_bias = Embedding(M, 1, embeddings_regularizer= l2(reg))(m)

x = Add()([x, u_bias, m_bias])
x = Flatten()(x)

# building and optimizatiion
model = Model(inputs = [u, m], outputs = x)
model.compile(optimizer = SGD(lr = .08, momentum = .9), 
              loss = 'mse', 
              metrics = ['mse'])

model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 1, 10)        1384940     input_3[0][0]                    
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 1, 10)        1312630     input_4[0][0]                    
__________________________________________________________________________________________________
dot_1 (Dot

### 3-2. Deeper model

In [21]:
# Input layer
u = Input(shape = (1, ))
m = Input(shape = (1, ))

# Embedding layer
u_embedding = Embedding(N, K)(u)
m_embedding = Embedding(M, K)(m)

u_embedding = Flatten()(u_embedding)
m_embedding = Flatten()(m_embedding)

x = Concatenate()([u_embedding, m_embedding])

In [26]:
# the neural network
x = Dense(400)(x)
# x = BatchNormalization()(x)
x = Activation('relu')(x)

# x = Dropout(0.5)(x)
# x = Dense(100)(x)
# x = BatchNormalization()(x)
# x = Activation('relu')(x)

x = Dense(1)(x)

# building and optimizatiion
model = Model(inputs = [u, m], outputs = x)
model.compile(optimizer = SGD(lr = .08, momentum = .9), 
              loss = 'mse', 
              metrics = ['mse'])

model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
input_6 (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_5 (Embedding)         (None, 1, 10)        1384940     input_5[0][0]                    
__________________________________________________________________________________________________
embedding_6 (Embedding)         (None, 1, 10)        1312630     input_6[0][0]                    
__________________________________________________________________________________________________
flatten_2 

### 3-2. Residual model

In [29]:
# input layer
u = Input(shape = (1, ))
m = Input(shape = (1, ))

u_embedding = Embedding(N, K)(u)
m_embedding = Embedding(M, K)(m)

# main branch
u_bias = Embedding(N, 1)(u)
m_bias = Embedding(M, 1)(m)

x = Dot(axes = 2)([u_embedding, m_embedding])
x = Add()([x, u_bias, m_bias])
x = Flatten()(x)

# side brance
u_embedding = Flatten()(u_embedding)
m_embedding = Flatten()(m_embedding)
x2 = Concatenate()([u_embedding, m_embedding])
x2 = Dense(400)(x2)
x2 = Activation('elu')(x2)
# x2 = Dropout(.5)(x2)
x2 = Dense(1)(x2)

# Combine two brances
X = Add()([x, x2])

In [30]:
# building and optimizatiion
model = Model(inputs = [u, m], outputs = X)
model.compile(optimizer = SGD(lr = .08, momentum = .9), 
              loss = 'mse', 
              metrics = ['mse'])

model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_11 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
input_12 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_15 (Embedding)        (None, 1, 10)        1384940     input_11[0][0]                   
__________________________________________________________________________________________________
embedding_16 (Embedding)        (None, 1, 10)        1312630     input_12[0][0]                   
__________________________________________________________________________________________________
flatten_11

## 4. Evaluation

In [None]:
# Fitting the model 
r = model.fit(x = X_tr, y = y_tr,
                    epochs = epochs, 
                    batch_size = 128,
                    validation_data = (X_te, y_te))

In [None]:
# plot the error
plt.plot(r.history['loss'], label = 'train loss')
plt.plot(r.history['val_loss'], label = 'test loss')
plt.legend()
plt.show()

In [None]:
# Plot the metrics
plt.plot(r.history['mean_squared_error'], label = 'train mse')
plt.plot(r.history['val_mean_squared_error'], label = 'test mse')
plt.legend()
plt.show()