# Movie ratings


In [5]:
import numpy as np
import csv
import math

def load_rating_data(file):
    csv_reader = csv.DictReader(file)
    users = []
    movies = []
    ratings = []
    for row in csv_reader:
        # We subtract 1 from the ids to get (0-based) indices
        users.append(int(row["userId"])-1)
        movies.append(int(row["movieId"])-1)
        ratings.append(float(row["rating"]))
    users = np.array(users)
    movies = np.array(movies)
    ratings = np.array(ratings)
    return users, movies, ratings

with open("ratings-train.csv") as train_file:
    train_users, train_movies, train_ratings = load_rating_data(train_file)
with open("ratings-test.csv") as test_file:
    test_users, test_movies, test_ratings = load_rating_data(test_file)
    
train_ratings

array([4., 4., 4., ..., 5., 5., 3.])

In [2]:
learning_rate = 0.005
k = 10 # the number of features (for each user/movie)
m = len(train_ratings) # the size of the training set
n_users = max(train_users)+1 # the largest index, plus 1
n_movies = max(train_movies)+1

def initialize(n_users, n_movies, k):
    """Initalize a random model, and normalize it so that it has sensible mean and variance"""
    # (The normalization helps make sure we start out at a reasonable parameter scale, which speeds up training)
    user_features = np.random.normal(size=(n_users, k))
    movie_features = np.random.normal(size=(n_movies, k))
    raw_predictions = predict((user_features, movie_features))
    
    s = np.sqrt(2*raw_predictions.std()) # We want to start out with roughly unit variance
    b = np.sqrt((3.5 - raw_predictions.mean()/s)/k) #We want to start out with average rating 3.5
    user_features /= s
    user_features += b
    movie_features /= s
    movie_features += b
    
    return (user_features, movie_features)

def predict(model):
    """The model's predictions for all user/movie pairs"""
    user_features, movie_features = model
    return user_features @ movie_features.T

def single_example_step(model, user, movie, rating):
    """Update the model using the gradient at a single training example"""
    user_features, movie_features = model
    residual = np.dot(user_features[user], movie_features[movie]) - rating
    grad_users = 2 * residual * movie_features[movie] # the gradient for the user_features matrix
    grad_movies = 2 * residual * user_features[user] # the gradient for the movie_features matrix
    user_features[user] -= learning_rate*grad_users
    movie_features[movie] -= learning_rate*grad_movies

def train_sgd(model, epochs):
    """Train the model for a number of epochs via SGD (batch size=1)"""
    user_features, movie_features = model
    # It's good practice to shuffle your data before doing batch gradient descent,
    # so that each mini-batch peforms like a random sample from the dataset
    shuffle = np.random.permutation(m) 
    shuffled_users = train_users[shuffle]
    shuffled_movies = train_movies[shuffle]
    shuffled_ratings = train_ratings[shuffle]
    for epoch in range(epochs):
        for user, movie, rating in zip(shuffled_users, shuffled_movies, shuffled_ratings):
            # update the model using the gradient at a single example
            single_example_step(model, user, movie, rating)
        # after each Epoch, we'll evaluate our model
        predicted = predict(model)
        train_loss = np.mean((train_ratings - predicted[train_users, train_movies])**2)
        test_loss = np.mean((test_ratings - predicted[test_users, test_movies])**2)
        print("Loss after epoch #{} is: train/{} --- test/{}".format(epoch+1, train_loss, test_loss))

sgd_model = initialize(n_users, n_movies, k)
train_sgd(sgd_model, 10)

Loss after epoch #1 is: train/0.9309470853710554 --- test/1.0152979183971957
Loss after epoch #2 is: train/0.810306179543858 --- test/0.9261089929226151
Loss after epoch #3 is: train/0.7537901733160836 --- test/0.890295285531908
Loss after epoch #4 is: train/0.7184458564419317 --- test/0.8718827479159976
Loss after epoch #5 is: train/0.6929537191787872 --- test/0.8615704867638968
Loss after epoch #6 is: train/0.6728511267669026 --- test/0.855765687070861
Loss after epoch #7 is: train/0.655977862546536 --- test/0.8527788995695277
Loss after epoch #8 is: train/0.6411523007014377 --- test/0.8517123702420355
Loss after epoch #9 is: train/0.6276769862736207 --- test/0.8520442025910373
Loss after epoch #10 is: train/0.6151216164774579 --- test/0.8534468901020557


In [34]:
 def all_examples_step(model):
    """Update the model using the gradient averaged over all training examples"""
    user_features, movie_features = model
    # To average the gradient over all training examples, it's convenient to
    #    initialize arrays of zeros to hold the full gradients, and then update
    #    these arrays at each training example, just like in the SGD procedure
    grad_users = np.zeros_like(user_features)
    grad_movies = np.zeros_like(movie_features)
    # We only need to compute the model's predicted ratings once
    predicted = predict(model)
    for user, movie, rating in zip(train_users, train_movies, train_ratings):
        # Mimic the SGD procedure, but store the gradients so they can be averaged
        residual = predicted[user, movie] - rating
        grad_users[user] += 2 * residual * movie_features[movie]
        grad_movies[movie] += 2 * residual * user_features[user]
    user_features -= learning_rate/m * grad_users # Update using the averaged gradients
    movie_features -= learning_rate/m * grad_movies

    
def train_full(model, epochs):
    """Train the model for a number of epochs using gradients estimated from the entire training set"""
    user_features, movie_features = model
    for epoch in range(epochs):
        all_examples_step(model)
        predicted = predict(model)
        train_loss = np.mean((train_ratings - predicted[train_users, train_movies])**2)
        test_loss = np.mean((test_ratings - predicted[test_users, test_movies])**2)
        print("Loss after epoch #{} is: train/{} --- test/{}".format(epoch+1, train_loss, test_loss))
        
full_model = initialize(n_users, n_movies, k)
learning_rate = 8. # Since we are averaging very sparse gradients,
# the gradients will be small and we can use a large learning rate
train_full(full_model, 100) # We only get a single update to the model from each epoch, so we'll need a lot more epochs



Loss after epoch #1 is: train/2.020499705837473 --- test/2.041398116822025
Loss after epoch #2 is: train/1.8452362869045502 --- test/1.865790746658124
Loss after epoch #3 is: train/1.7346698915775396 --- test/1.7596933304825082
Loss after epoch #4 is: train/1.6533089115962853 --- test/1.6795294299675327
Loss after epoch #5 is: train/1.588659848357695 --- test/1.6176778006070938
Loss after epoch #6 is: train/1.5349666322235342 --- test/1.565850032869927
Loss after epoch #7 is: train/1.4890800486591818 --- test/1.522309095276012
Loss after epoch #8 is: train/1.4490750668045365 --- test/1.484344056758278
Loss after epoch #9 is: train/1.413681801009058 --- test/1.4510888285122052
Loss after epoch #10 is: train/1.3820134039759004 --- test/1.4214149450622864
Loss after epoch #11 is: train/1.353422476735558 --- test/1.3947930631991257
Loss after epoch #12 is: train/1.3274193638069962 --- test/1.3706614024606327
Loss after epoch #13 is: train/1.303623058336582 --- test/1.3486759435000297
Loss 

In [39]:
# k=5
learning_rate = 0.005
k = 5 # the number of features (for each user/movie)
m = len(train_ratings) # the size of the training set
n_users = max(train_users)+1 # the largest index, plus 1
n_movies = max(train_movies)+1


sgd_model = initialize(n_users, n_movies, k)
train_sgd(sgd_model, 10)

Loss after epoch #1 is: train/0.9796749884518493 --- test/1.0411660488539876
Loss after epoch #2 is: train/0.8437374393115624 --- test/0.925966987361918
Loss after epoch #3 is: train/0.784311191958209 --- test/0.8829530230630737
Loss after epoch #4 is: train/0.7489426708883228 --- test/0.8611252495609698
Loss after epoch #5 is: train/0.7245977046432523 --- test/0.8483312823137272
Loss after epoch #6 is: train/0.7063208459315726 --- test/0.8402340279084958
Loss after epoch #7 is: train/0.6917601707419518 --- test/0.8349122722847582
Loss after epoch #8 is: train/0.6796369806969069 --- test/0.8313865436232689
Loss after epoch #9 is: train/0.6691896456852302 --- test/0.8291039647521733
Loss after epoch #10 is: train/0.6599343294677924 --- test/0.8277265922024674


In [40]:
# k=15
learning_rate = 0.005
k = 15 # the number of features (for each user/movie)
m = len(train_ratings) # the size of the training set
n_users = max(train_users)+1 # the largest index, plus 1
n_movies = max(train_movies)+1

sgd_model = initialize(n_users, n_movies, k)
train_sgd(sgd_model, 10)

Loss after epoch #1 is: train/0.9107490224332668 --- test/1.0244002481638164
Loss after epoch #2 is: train/0.7903246204202476 --- test/0.9343603225841822
Loss after epoch #3 is: train/0.7314271132558519 --- test/0.8973574367684758
Loss after epoch #4 is: train/0.6933162072776323 --- test/0.8787343859615363
Loss after epoch #5 is: train/0.6648578208137972 --- test/0.8689558249653597
Loss after epoch #6 is: train/0.6415847904793435 --- test/0.8642175541220632
Loss after epoch #7 is: train/0.6213306212223534 --- test/0.8626972005379843
Loss after epoch #8 is: train/0.602929240882611 --- test/0.8634097649480269
Loss after epoch #9 is: train/0.5857218599711521 --- test/0.8657695011093927
Loss after epoch #10 is: train/0.5693365174623799 --- test/0.8693973044897281


In [41]:
# k=15
learning_rate = 0.005
k = 30 # the number of features (for each user/movie)
m = len(train_ratings) # the size of the training set
n_users = max(train_users)+1 # the largest index, plus 1
n_movies = max(train_movies)+1

sgd_model = initialize(n_users, n_movies, k)
train_sgd(sgd_model, 10)

Loss after epoch #1 is: train/0.8826911106673225 --- test/0.9798710565376084
Loss after epoch #2 is: train/0.7576263129044959 --- test/0.9152627330836701
Loss after epoch #3 is: train/0.6904738091783367 --- test/0.8908730561148943
Loss after epoch #4 is: train/0.6435845922399914 --- test/0.8806929549738497
Loss after epoch #5 is: train/0.6060401289782779 --- test/0.8775516029910546
Loss after epoch #6 is: train/0.5734216363750773 --- test/0.8786278871027476
Loss after epoch #7 is: train/0.5436757440573078 --- test/0.8825768676231919
Loss after epoch #8 is: train/0.5158304999765658 --- test/0.888649651986109
Loss after epoch #9 is: train/0.48947223113411986 --- test/0.8963541432566015
Loss after epoch #10 is: train/0.4644765471384368 --- test/0.9053089464739565


In [42]:
# k=10
learning_rate = 0.005
k = 10 # the number of features (for each user/movie)
m = len(train_ratings) # the size of the training set
n_users = max(train_users)+1 # the largest index, plus 1
n_movies = max(train_movies)+1

sgd_model = initialize(n_users, n_movies, k)
train_sgd(sgd_model, 10)

Loss after epoch #1 is: train/0.9324304990798615 --- test/1.0401212085123999
Loss after epoch #2 is: train/0.8081840125315494 --- test/0.9383605117721585
Loss after epoch #3 is: train/0.7509945633018889 --- test/0.8979239486277207
Loss after epoch #4 is: train/0.7155744790345461 --- test/0.8769689255431026
Loss after epoch #5 is: train/0.690160819674249 --- test/0.8649548392672366
Loss after epoch #6 is: train/0.670201422555586 --- test/0.8578976502169289
Loss after epoch #7 is: train/0.653518526085807 --- test/0.8539350537810769
Loss after epoch #8 is: train/0.6389266811313314 --- test/0.8520744518431724
Loss after epoch #9 is: train/0.6257257595461857 --- test/0.8517360299984773
Loss after epoch #10 is: train/0.6134813502160514 --- test/0.8525555183149279
