# Loading data

In [2]:
import pandas as pd
import scipy.optimize
from numba import jit
import numpy as np

In [3]:
users=pd.read_csv("users.dat",sep="::",header=None,index_col=0)

  """Entry point for launching an IPython kernel.


In [4]:
movies=pd.read_csv("movies.dat",sep="::",header=None,index_col=0)

  """Entry point for launching an IPython kernel.


In [5]:
ratings=pd.read_csv("ratings.dat",sep="::",header=None)

  """Entry point for launching an IPython kernel.


In [6]:
print(ratings.head())
print(users.head())
print(movies.head())

   0     1  2          3
0  1  1193  5  978300760
1  1   661  3  978302109
2  1   914  3  978301968
3  1  3408  4  978300275
4  1  2355  5  978824291
   1   2   3      4
0                  
1  F   1  10  48067
2  M  56  16  70072
3  M  25  15  55117
4  M  45   7  02460
5  M  25  20  55455
                                    1                             2
0                                                                  
1                    Toy Story (1995)   Animation|Children's|Comedy
2                      Jumanji (1995)  Adventure|Children's|Fantasy
3             Grumpier Old Men (1995)                Comedy|Romance
4            Waiting to Exhale (1995)                  Comedy|Drama
5  Father of the Bride Part II (1995)                        Comedy


# Split the data

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X=ratings[[0,1]]
Y=ratings[2]

In [9]:
x_train,x_tv,y_train,y_tv=train_test_split(X,Y,test_size=0.2,random_state=888)

In [10]:
x_validation,x_test,y_validation,y_test=train_test_split(x_tv,y_tv,test_size=0.5)

In [11]:
s=max(x_train[0])
x_train.size

1600334

# Model building

## "Brute-force" Class

In [14]:
class recommendation_model:
    def __init__(self , x_train , y_train , K=5, r_lambda=0):
        #index = user/movie_id - 1 
        self.b_u=np.random.normal(0,1e-4,max(x_train[0]))#user bias
        self.b_i=np.random.normal(0,1e-4,max(x_train[1]))#item bias
        self.p_u=np.random.normal(0,1/max(1,np.sqrt(K)),[max(x_train[0]),K])# user taste
        self.p_i=np.random.normal(0,1/max(1,np.sqrt(K)),[max(x_train[1]),K])# item style
        self.x_train=x_train.values
        self.y_train=y_train.values
        self.mu=y_train.mean() # average
        self.r_lambda=r_lambda #regulation term
    
    @jit(parallel=True,fastmath = True)
    def predict(self,X):
        x_=np.array(X)
        y_=self.mu+self.b_u[x_[:,0]-1]+self.b_i[x_[:,1]-1]+np.sum((self.p_u[x_[:,0]-1]*self.p_i[x_[:,1]-1]),axis=1)
        return y_
    
    @jit(parallel=True,fastmath = True)
    def loss(self,X,Y):
        x_=np.array(X)
        y_=self.mu+self.b_u[x_[:,0]-1]+self.b_i[x_[:,1]-1]+np.sum((self.p_u[x_[:,0]-1]*self.p_i[x_[:,1]-1]),axis=1)
        e2=(Y-y_)**2
        return e2.mean()
    
    @jit(parallel=True,fastmath = True)
    def reg_loss(self,X,Y):
        x_=np.array(X)
        y_=self.mu+self.b_u[x_[:,0]-1]+self.b_i[x_[:,1]-1]+np.sum((self.p_u[x_[:,0]-1]*self.p_i[x_[:,1]-1]),axis=1)
        e2=(Y-y_)**2+0.5*self.r_lambda*np.sum((self.p_u[x_[:,0]-1]**2+self.p_i[x_[:,1]-1]**2),axis=1)
        #print(self.r_lambda*((self.p_u**2).sum()+(self.p_i**2).sum()))
        return e2.mean()
    
    @jit(parallel=True,fastmath = True,nogil=True)
    def fit(self,batch_size=100,learning_rate=0.1,epochs=20,n_show=1):
        for i in range(epochs):
            d_b_u=np.zeros_like(self.b_u)
            d_b_i=np.zeros_like(self.b_i)
            d_p_u=np.zeros_like(self.p_u)
            d_p_i=np.zeros_like(self.p_i)
            samples=np.random.choice(len(x_train),batch_size,replace=False)
            x_=self.x_train[samples]
            y_=self.y_train[samples]
            delta_y=y_-self.predict(x_)
            N=len(x_train)
           
            for j in range(batch_size):
                d_b_u[x_[j,0]-1]+=-delta_y[j]
                d_b_i[x_[j,1]-1]+=-delta_y[j]
                d_p_u[x_[j,0]-1]+=-self.p_i[x_[j,1]-1]*delta_y[j]+self.r_lambda*self.p_u[x_[j,0]-1]
                d_p_i[x_[j,1]-1]+=-self.p_u[x_[j,0]-1]*delta_y[j]+self.r_lambda*self.p_i[x_[j,1]-1]
            self.b_u-=learning_rate*d_b_u
            self.b_i-=learning_rate*d_b_i
            self.p_u-=learning_rate*d_p_u
            self.p_i-=learning_rate*d_p_i
            if (i+1)%n_show==0:
                print("batch_size:",batch_size,"epochs:",(i+1),"reg_loss:",self.reg_loss(self.x_train,self.y_train),"loss:",self.loss(self.x_train,self.y_train))
 




#model1=recommendation_model(x_train,y_train,10,r_lambda=0.1)


## Model selection, training and result

In [33]:
model3=recommendation_model(x_train,y_train,20,r_lambda=0.05)

%time model3.fit(batch_size=100,learning_rate=0.1 ,epochs=5000,n_show=100)


batch_size: 100 epochs: 100 reg_loss: 1.1471365802518025 loss: 1.0990765834742213
batch_size: 100 epochs: 200 reg_loss: 1.0826498358828385 loss: 1.036901913681903
batch_size: 100 epochs: 300 reg_loss: 1.0465873894866424 loss: 1.0025962805484587
batch_size: 100 epochs: 400 reg_loss: 1.0212923443119553 loss: 0.9789091521870878
batch_size: 100 epochs: 500 reg_loss: 1.0071867816270694 loss: 0.9660486284000551
batch_size: 100 epochs: 600 reg_loss: 0.9905795663832708 loss: 0.9506052995377677
batch_size: 100 epochs: 700 reg_loss: 0.9814125000812866 loss: 0.9422258477128274
batch_size: 100 epochs: 800 reg_loss: 0.9802510684870942 loss: 0.9418179513910383
batch_size: 100 epochs: 900 reg_loss: 0.9682420001638419 loss: 0.9304643790269749
batch_size: 100 epochs: 1000 reg_loss: 0.9605956984276111 loss: 0.9235788743609211
batch_size: 100 epochs: 1100 reg_loss: 0.9536465138146147 loss: 0.9172437242703015
batch_size: 100 epochs: 1200 reg_loss: 0.9548043322154652 loss: 0.9188369399200647
batch_size: 10

0.7562660254117667

In [196]:
%time model1.fit(batch_size=10000,learning_rate=0.0001 ,epochs=50000,n_show=1000)
model1.loss(x_validation,y_validation)

epochs: 1000 reg_loss: 0.7099770991523215 loss: 0.6586620173296177
epochs: 2000 reg_loss: 0.7099516428001302 loss: 0.658626928360047
epochs: 3000 reg_loss: 0.7099357887154836 loss: 0.6586082319367245
epochs: 4000 reg_loss: 0.7099206295717254 loss: 0.6585873181061028
epochs: 5000 reg_loss: 0.709896095681892 loss: 0.6585570897415286
epochs: 6000 reg_loss: 0.7098667049274535 loss: 0.6585176102379706
epochs: 7000 reg_loss: 0.7098123820839995 loss: 0.6584394299266967
epochs: 8000 reg_loss: 0.7098078936836509 loss: 0.6584244604878694
epochs: 9000 reg_loss: 0.7097991361436257 loss: 0.6584195902536865
epochs: 10000 reg_loss: 0.7097744151890429 loss: 0.6583740469146875
epochs: 11000 reg_loss: 0.7097459791431481 loss: 0.6583256962366428
epochs: 12000 reg_loss: 0.7097415420802768 loss: 0.6583099149724168
epochs: 13000 reg_loss: 0.709732168239454 loss: 0.658291960581764
epochs: 14000 reg_loss: 0.7097073275477422 loss: 0.6582608845907753
epochs: 15000 reg_loss: 0.7096856507020128 loss: 0.6582238241

0.7440090591742956

In [None]:
#K=10 lambda=0.1   validation=0.7440090591742956 model1 
#K=20 lambda=0.1  validation=0.7437028511035796 model2
#These two have been saved in the folder

# Keras Model

## Keras class

In [41]:
from keras import backend as K
from keras.engine.topology import Layer
from keras import regularizers
import keras

In [188]:
from keras.layers import Embedding
from keras.layers import Dot,Add,Input
from keras.models import Model
class keras_recommendation_model:
    def __init__(self , x_train , y_train , K=5, r_lambda=0):
        self.users=x_train[0].values
        self.movies=x_train[1].values
        self.y=y_train.values
        self.users_input=Input(shape=[1,],name='users')
        self.movies_input=Input(shape=[1,],name='movies')
        self.users_embedding=Embedding(7000,K,input_length=1
                                ,embeddings_initializer=keras.initializers.RandomNormal(stddev=1/np.sqrt(K))
                                ,embeddings_regularizer=keras.regularizers.l2(r_lambda),name='users_embedding')(self.users_input)
        self.movies_embedding=Embedding(10000,K,input_length=1
                                ,embeddings_initializer=keras.initializers.RandomNormal(stddev=1/np.sqrt(K))
                                ,embeddings_regularizer=keras.regularizers.l2(r_lambda),name='movies_embedding')(self.movies_input)
        self.users_bias=Embedding(7000,1,input_length=1
                                ,embeddings_initializer=keras.initializers.RandomNormal(stddev=0.0001)
                                 ,name='users_bias')(self.users_input)
        self.movies_bias=Embedding(10000,1,input_length=1,
                                 embeddings_initializer=keras.initializers.RandomNormal(stddev=0.0001)
                                  ,name='movies_bias')(self.movies_input)
        self.interaction=Dot(axes=-1,name="interaction")([self.users_embedding,self.movies_embedding])
        self.out=Add(name='y_')([self.users_bias,self.movies_bias,self.interaction])
        self.out_f=keras.layers.Flatten(name="output")(self.out)
        self.model=Model(inputs=[self.users_input,self.movies_input], outputs=self.out_f)
        self.model.compile(optimizer="sgd"
                           ,loss="mean_squared_error",metrics=['accuracy'])
        self.model.summary()
        keras.utils.plot_model(self.model,"cf_model.png",show_shapes=True)
    def fit(self,each_epochs=100,batch_size_=100,turns=10,lr=0.001):
        for i in range(turns):
            keras.backend.set_value(self.model.optimizer.lr,lr)
            result=self.model.fit([self.users,self.movies],self.y,epochs=each_epochs,
                                  batch_size=batch_size_,verbose=1)
            print(i*each_epochs,"epochs finished. Loss:",result.history)
    def predict(self,x):
        users=x[0].values
        movies=x[1].values
        return self.model.predict([users,movies])
    
                                                                                          

## Model selection, training and result

In [189]:
model5=keras_recommendation_model(x_train,y_train,20,r_lambda=0.001)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
users (InputLayer)              (None, 1)            0                                            
__________________________________________________________________________________________________
movies (InputLayer)             (None, 1)            0                                            
__________________________________________________________________________________________________
users_embedding (Embedding)     (None, 1, 20)        140000      users[0][0]                      
__________________________________________________________________________________________________
movies_embedding (Embedding)    (None, 1, 20)        200000      movies[0][0]                     
__________________________________________________________________________________________________
users_bias

In [199]:
%time model5.fit(10,1000,10,0.1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
0 epochs finished. Loss: {'loss': [0.8055700976617799, 0.8054698478723009, 0.8054033051177671, 0.8053565500006828, 0.8053204560638829, 0.8052932437484078, 0.8052721892346877, 0.8052550672633081, 0.8052412018354183, 0.8052298886841579], 'acc': [0.42919040632110983, 0.42921165187362104, 0.4292741393654636, 0.4292341475190779, 0.4292991333300115, 0.42927413889710614, 0.42936661948121757, 0.42928913572274663, 0.429395363911871, 0.4293678691113925]}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
10 epochs finished. Loss: {'loss': [0.8052200740774403, 0.8052114181711264, 0.8052054449472874, 0.8051985870914687, 0.8051936139484818, 0.8051887892395466, 0.8051840074147263, 0.805179826160147, 0.8051769409832854, 0.8051739596757053], 'acc': [0.4293591216014682, 0.42938286585073604, 0.42935412251132765, 0.4294678484735313, 0.4

Epoch 10/10
50 epochs finished. Loss: {'loss': [0.8051224408446912, 0.8051209156182599, 0.8051195196692958, 0.8051187776206052, 0.8051174355987876, 0.8051169899748344, 0.8051154921939656, 0.8051148519917053, 0.8051131052206002, 0.8051121555181043], 'acc': [0.4294578503739976, 0.4294815960677432, 0.42943910485940284, 0.42943285569587025, 0.42944410304605113, 0.42943910433022403, 0.42950909020708344, 0.4294266067172225, 0.4295528308740104, 0.4294715969818095]}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
60 epochs finished. Loss: {'loss': [0.8051111004141306, 0.8051102488564206, 0.805108739931735, 0.8051082163439707, 0.805108072573009, 0.8051058724817666, 0.8051056117091084, 0.80510371363049, 0.8051031819788576, 0.8051019606238057], 'acc': [0.4295328350197583, 0.4294815954050779, 0.4295628283820171, 0.42952408668723807, 0.4294665986933702, 0.4295365845789822, 0.4295003417520267, 0.4295290857662424, 0.4294865940929339, 0.42

In [140]:
from sklearn.metrics import mean_squared_error

In [198]:
mean_squared_error(y_validation,model5.predict(x_validation))

0.8283043232710366

Due to limited time my Keras model doesn't have enough time to train so the result is not as good as the "brute-force" model