# SVD model

### Set dataset and current working dir

In [0]:
%cd /content
!git clone https://github.com/Brycexu727/movielens-dataset.git
%cd movielens-dataset

### Import necessary modules

In [0]:
from sklearn.model_selection import train_test_split
import tensorflow as tf
import pandas as pd
import numpy as np

### Set SVD parameters

In [0]:
data_set_name = "ml-100k"
#data_set_name = "ml-1m"

train_test_ratio = 0.2 # 20% data for testing, 80% for training
#train_test_ratio = 0.5 # 50% data for testing, 50% for training

In [0]:
class SVD:
  def __init__(
      self, 
      hidden_feature=20, 
      batch_size=100, 
      learning_rate = 0.001,
      epoch_size = 20

  ):
    self.hidden_feature = hidden_feature
    self.batch_size = batch_size
    self.learning_rate = learning_rate
    self.epoch_size = epoch_size
    
    self.hd = tf.placeholder
    self.rd = tf.random_uniform
    self.var = tf.Variable
    self.zero = tf.zeros
    self.ebd = tf.nn.embedding_lookup
    self.reg = tf.contrib.layers.apply_regularization
    self.mul = tf.multiply
    self.reduce = tf.reduce_sum
    self.t = tf.train.AdamOptimizer
    self.mse = tf.losses.mean_squared_error
    self.l2 = tf.contrib.layers.l2_regularizer
    self.init = tf.compat.v1.global_variables_initializer
  def train(
      self, 
      train_data, 
      test_data, 
      mean_rating, 
      user_num, 
      movie_num, 
  ):

    
    userId = self.hd(tf.int32, [self.batch_size])
    movieId = self.hd(tf.int32, [self.batch_size])
    rate = self.hd(tf.float32, [self.batch_size, 1])
    
    user_ebd = self.var(self.rd([user_num+1, self.hidden_feature],0,0.3),
                         trainable = True)
    movie_ebd = self.var(self.rd([movie_num+1, self.hidden_feature],0,0.3),
                          trainable = True)
    user_input = self.ebd(user_ebd, userId)
    movie_input = self.ebd(movie_ebd, movieId)
    
    user_bias_ebd = self.var(self.zero([user_num+1,1]), 
                        trainable = True)
    movie_bias_ebd = self.var(self.zero([movie_num+1,1]), 
                         trainable = True)
    user_bias = self.ebd(user_bias_ebd, userId)
    movie_bias = self.ebd(movie_bias_ebd, movieId)

    r1 = self.l2(0.002)
    lamda1 = self.reg(r1,[user_input, movie_input])
    r2 = self.l2(0.002)
    lamda2 = self.reg(r2,[user_bias, movie_bias])

    product = self.reduce(self.mul(user_input, movie_input), axis=1, keep_dims=True)

    loss = self.mse(rate, product + user_bias + movie_bias + mean_rating) + lamda1 + lamda2
    train = self.t(self.learning_rate).minimize(loss)
    sess = tf.Session()
    sess.run(self.init())
    print('\nSVD training...')
    for epoch in range(self.epoch_size):
      errs = []
      for start, end in zip(
          range(0, len(train_data), self.batch_size), 
          range(self.batch_size, len(train_data), self.batch_size)
      ):
        tmp, tmpp = sess.run(
            [loss, train], 
            feed_dict = {
                userId: train_data[start: end, 0], 
                movieId: train_data[start: end,1 ], 
                rate: train_data[start: end, 2].reshape(self.batch_size,1)
            }
        )
        errs.append(tmp)
      err = []
      for start, end in zip(
          range(0, len(test_data), self.batch_size), 
          range(self.batch_size, len(test_data), self.batch_size)
      ):
        res = sess.run(
            product + user_bias + movie_bias + mean_rating, 
            feed_dict = {
                userId: test_data[start: end, 0], 
                movieId: test_data[start: end, 1], 
                rate: test_data[start: end, 2].reshape(self.batch_size,1)
            }
        )
        ground_truth = test_data[start:end, 2].reshape(self.batch_size,1)
        tmp2 = []
        for item in res:
          tmp2.append([min(max(item[0],1),5)])
        err.append((ground_truth - np.array(tmp2))*(ground_truth - np.array(tmp2)))
      print('Epoch ' + str(epoch+1) + '/' + str(self.epoch_size))
      print('Test RMSE: %.3f' % (np.sqrt(np.mean(err))))
    sess.close()

def data_process_svd(dataset_name, train_test_ratio=0.1):
  data_frame_title = ['userId', 'movieId', 'rating', 'timestamp']
  if dataset_name == "ml-100k":
    data_frame = pd.read_csv(
        './ml-100k/u.data', 
        sep ='\t', 
        names=data_frame_title
    )
    user_num = data_frame.userId.unique().shape[0]
    movie_num = data_frame.movieId.unique().shape[0]
  elif dataset_name == "ml-1m":
    data_frame = pd.read_csv(
        './ml-1m/ratings.dat', 
        sep='::', 
        names=data_frame_title
    )
    user_num = max(data_frame.userId)
    movie_num = max(data_frame.movieId)
  else:
    print("unknown data set!")
  
  train_data, test_data = train_test_split(
      data_frame, 
      test_size=train_test_ratio
  )
  train_data = np.array(train_data)
  mean_rating = np.mean(train_data[:,2])
  return train_data, np.array(test_data), mean_rating, user_num, movie_num

train_data, test_data, mean_rating, user_num, movie_num\
= data_process_svd(data_set_name, train_test_ratio)

s = SVD()
s.train(train_data, test_data, mean_rating, user_num, movie_num)

W0811 13:44:53.857661 140027953194880 lazy_loader.py:50] 
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

W0811 13:44:53.908244 140027953194880 deprecation.py:506] From <ipython-input-5-b5d52763a720>:60: calling reduce_sum_v1 (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version.
Instructions for updating:
keep_dims is deprecated, use keepdims instead
W0811 13:44:53.929272 140027953194880 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/losses/losses_impl.py:121: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future ver


SVD training...
Epoch 1/20
Test RMSE: 1.033
Epoch 2/20
Test RMSE: 0.998
Epoch 3/20
Test RMSE: 0.981
Epoch 4/20
Test RMSE: 0.970
Epoch 5/20
Test RMSE: 0.961
Epoch 6/20
Test RMSE: 0.953
