## Import packages

In [1]:
import pandas as pd
from keras.callbacks import EarlyStopping, ModelCheckpoint
from CFModel import CFModel

Using Theano backend.


## Define constants


In [2]:
RATINGS_CSV_FILE = 'ml1m_ratings.csv'
MODEL_WEIGHTS_FILE = 'ml1m_weights.h5'
K_FACTORS = 20
RNG_SEED = 1446557

## Load MovieLens 1M data

In [3]:
ratings = pd.read_csv(RATINGS_CSV_FILE, 
                      sep='\t', 
                      encoding='latin-1', 
                      usecols=['userid', 'movieid', 'user_emb_id', 'movie_emb_id', 'rating'])
max_userid = ratings['userid'].drop_duplicates().max()
max_movieid = ratings['movieid'].drop_duplicates().max()
print len(ratings), 'ratings loaded.'

1000209 ratings loaded.


## Create training set

In [4]:
shuffled_ratings = ratings.sample(frac=1., random_state=RNG_SEED)
Users = shuffled_ratings['user_emb_id'].values
print 'Users:', Users, ', shape =', Users.shape
Movies = shuffled_ratings['movie_emb_id'].values
print 'Movies:', Movies, ', shape =', Movies.shape
Ratings = shuffled_ratings['rating'].values
print 'Ratings:', Ratings, ', shape =', Ratings.shape

Users: [4403 1819 2571 ...,  354 3510  831] , shape = (1000209,)
Movies: [2717 3692  149 ..., 2700 2920 3711] , shape = (1000209,)
Ratings: [5 4 4 ..., 2 4 4] , shape = (1000209,)


## Define model

In [5]:
model = CFModel(max_userid, max_movieid, K_FACTORS)
model.compile(loss='mse', optimizer='adamax')

## Train model

In [6]:
callbacks = [EarlyStopping('val_loss', patience=2), 
             ModelCheckpoint(MODEL_WEIGHTS_FILE, save_best_only=True)]
model.fit([Users, Movies], Ratings, nb_epoch=15, validation_split=.1, verbose=2, callbacks=callbacks)

Train on 900188 samples, validate on 100021 samples
Epoch 1/15
603s - loss: 11.2485 - val_loss: 5.5647
Epoch 2/15
615s - loss: 3.3028 - val_loss: 2.1138
Epoch 3/15
606s - loss: 1.6632 - val_loss: 1.3914
Epoch 4/15
640s - loss: 1.2292 - val_loss: 1.1320
Epoch 5/15
597s - loss: 1.0529 - val_loss: 1.0131
Epoch 6/15
594s - loss: 0.9665 - val_loss: 0.9502
Epoch 7/15
596s - loss: 0.9193 - val_loss: 0.9139
Epoch 8/15
594s - loss: 0.8915 - val_loss: 0.8922
Epoch 9/15
597s - loss: 0.8734 - val_loss: 0.8772
Epoch 10/15
610s - loss: 0.8603 - val_loss: 0.8663
Epoch 11/15
605s - loss: 0.8500 - val_loss: 0.8573
Epoch 12/15
605s - loss: 0.8411 - val_loss: 0.8490
Epoch 13/15
606s - loss: 0.8324 - val_loss: 0.8416
Epoch 14/15
596s - loss: 0.8243 - val_loss: 0.8342
Epoch 15/15
598s - loss: 0.8164 - val_loss: 0.8276


<keras.callbacks.History at 0x1199dc390>