In [1]:
import numpy as np
import pandas as pd
import random
import tensorflow as tf
from tensorflow.keras import Model

tf.enable_eager_execution()

In [2]:
data_dir = 'clean2.csv'
data = pd.read_csv(data_dir)
data.pop('Unnamed: 0') 
data.head()

Unnamed: 0,ProductId,UserId,Score
0,B001GVISJM,A18ECVX2RJ7HUE,4
1,B001GVISJM,A2MUGFV2TDQ47K,5
2,B001GVISJM,A2A9X58G2GTBLP,5
3,B001EO5QW8,A2G7B7FKP2O2PU,5
4,B001EO5QW8,AQLL2R1PPR46X,5


In [3]:
userIDs = {}
prodIDs = {}
interactions = []

for row in data.itertuples():
    prod, user, score = row[1], row[2], row[3]
    if not user in userIDs: userIDs[user] = len(userIDs)
    if not prod in prodIDs: prodIDs[prod] = len(prodIDs)
    interactions.append((prod, user, score))

## Latent Factor Model

In [4]:
# mean rating, just for initialization
mu = sum(data['Score']) / len(data)

In [19]:
# Gradient descent optimizer, experiment with learning rate
optimizer = tf.keras.optimizers.Adam(0.0001)

class LatentFactorModel(tf.keras.Model):
    def __init__(self, mu, K, lamb):
        super(LatentFactorModel, self).__init__()
        # Initialize to average
        self.alpha = tf.Variable(mu)
        # Initialize to small random values
        self.betaU = tf.Variable(tf.random.normal([len(userIDs)],stddev=0.001))
        self.betaI = tf.Variable(tf.random.normal([len(prodIDs)],stddev=0.001))
        self.gammaU = tf.Variable(tf.random.normal([len(userIDs),K],stddev=0.001))
        self.gammaI = tf.Variable(tf.random.normal([len(prodIDs),K],stddev=0.001))
        self.lamb = lamb

    # Prediction for a single instance (useful for evaluation)
    def predict(self, u, i):
        p = self.alpha + self.betaU[u] + self.betaI[i] +\
            tf.tensordot(self.gammaU[u], self.gammaI[i], 1)
        return p

    # Regularizer
    def reg(self):
        return self.lamb * tf.reduce_sum(self.betaU**2) +\
                           tf.reduce_sum(self.betaI**2) +\
                           tf.reduce_sum(self.gammaU**2) +\
                           tf.reduce_sum(self.gammaI**2)
    
    # Prediction for a sample of instances
    def predictSample(self, sampleU, sampleI):
        u = tf.convert_to_tensor(sampleU, dtype=tf.int32)
        i = tf.convert_to_tensor(sampleI, dtype=tf.int32)
        beta_u = tf.nn.embedding_lookup(self.betaU, u)
        beta_i = tf.nn.embedding_lookup(self.betaI, i)
        gamma_u = tf.nn.embedding_lookup(self.gammaU, u)
        gamma_i = tf.nn.embedding_lookup(self.gammaI, i)
        pred = self.alpha + beta_u + beta_i +\
               tf.reduce_sum(tf.multiply(gamma_u, gamma_i), 1)
        return pred
    
    # Loss
    def call(self, sampleU, sampleI, sampleR):
        pred = self.predictSample(sampleU, sampleI)
        r = tf.convert_to_tensor(sampleR, dtype=tf.float32)
        return tf.nn.l2_loss(pred - r) / len(sampleR)

In [20]:
# Experiment with number of factors and regularization rate
LFM = LatentFactorModel(mu, 5, 0.00001)

In [21]:
def train(interactions):
    Nsamples = 50000
    with tf.GradientTape() as tape:
        sampleU, sampleI, sampleR = [], [], []
        for _ in range(Nsamples):
            p,u,r = random.choice(interactions)
            sampleI.append(prodIDs[p])
            sampleU.append(userIDs[u])
            sampleR.append(r)

        loss = LFM(sampleU,sampleI,sampleR)
        loss += LFM.reg()
    gradients = tape.gradient(loss, LFM.trainable_variables)
    optimizer.apply_gradients((grad, var) for
                              (grad, var) in zip(gradients, LFM.trainable_variables)
                              if grad is not None)
    return loss.numpy()

In [22]:
# 10 iterations of gradient descent
for i in range(20):
    obj = train(interactions)
    print("iteration " + str(i) + ", objective = " + str(obj))

iteration 0, objective = 0.8316188
iteration 1, objective = 0.8109108
iteration 2, objective = 0.7862687
iteration 3, objective = 0.7666024
iteration 4, objective = 0.7766284
iteration 5, objective = 0.75968444
iteration 6, objective = 0.74327374
iteration 7, objective = 0.7366719
iteration 8, objective = 0.73471266
iteration 9, objective = 0.72575724
iteration 10, objective = 0.7192815
iteration 11, objective = 0.7164362
iteration 12, objective = 0.7090113
iteration 13, objective = 0.71427584
iteration 14, objective = 0.72207594
iteration 15, objective = 0.7163178
iteration 16, objective = 0.70892876
iteration 17, objective = 0.7065967
iteration 18, objective = 0.71722126
iteration 19, objective = 0.7037965


In [12]:
LFM.predict(userIDs[interactions[0][1]], prodIDs[interactions[0][0]]).numpy()

4.172021

In [13]:
baseline = [mu for _ in range(len(data))]
LFM_pred = [LFM.predict(userIDs[interactions[i][1]], prodIDs[interactions[i][0]]).numpy() for i in range(len(data))]

In [14]:
labels = list(data['Score'])

def MSE(predictions, labels):
    differences = [(x-y)**2 for x,y in zip(predictions,labels)]
    return sum(differences) / len(differences)

In [18]:
err_baseline = MSE(baseline, labels)
err_LFM = MSE(LFM_pred, labels)
print('For baseline =', err_baseline)
print('The MSE of rating estimation is', err_LFM)

For baseline = 1.4022396214695851
The MSE of rating estimation is 1.392199990352876
