In [1]:
!pip install -q tensorflow-recommenders

[0m

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import deque
#from surprise import Dataset, NormalPredictor, Reader
#from surprise.model_selection import cross_validate
import os
from sklearn.model_selection import train_test_split
import pickle
#from surprise import accuracy
#from surprise import accuracy, Dataset, Reader, SVD, KNNBaseline
#from surprise.model_selection import PredefinedKFold
import pprint
import tempfile
from typing import Dict, Text
import tensorflow as tf
import tensorflow_recommenders as tfrs
from collections import defaultdict
import joblib

In [3]:
train_rmse0 = pd.read_csv('rmse/rmse_folds_actual/actual_rmse_train0.csv', header=None)
test_rmse0 = pd.read_csv('rmse/rmse_folds_actual/actual_rmse_test0.csv', header=None)

In [4]:
train_rmse0.head()

Unnamed: 0,0,1,2
0,1590329,7627,3.0
1,576570,14606,4.0
2,1757599,9156,3.0
3,1048852,15107,4.0
4,2187348,14644,3.0


In [5]:
test_rmse0.head()

Unnamed: 0,0,1,2
0,573762,9438,4.0
1,1560019,5760,4.0
2,2120159,1542,1.0
3,1388182,12034,1.0
4,142077,15755,3.0


In [6]:
train_rmse0.columns = ['user_id', 'movie_id', 'rating']
test_rmse0.columns = ['user_id', 'movie_id', 'rating']

In [7]:
train_movie_ids = list(df_rating['movie_id'].unique())
train_user_ids = list(df_rating['user_id'].unique())

In [8]:
class DeepRecRmseModel(tf.keras.Model):
    
    def __init__(self):
        super().__init__()
        embedding_dimension = 16
        
        self.user_embeddings = tf.keras.Sequential([
            tf.keras.layers.IntegerLookup(vocabulary=train_user_ids, mask_token=None),
            tf.keras.layers.Embedding(len(train_user_ids)+1, embedding_dimension)
        ])
        
        self.movie_embeddings = tf.keras.Sequential([
            tf.keras.layers.IntegerLookup(vocabulary=train_movie_ids, mask_token=None),
            tf.keras.layers.Embedding(len(train_movie_ids)+1, embedding_dimension)
        ])
        
        self.ratings = tf.keras.Sequential([
            tf.keras.layers.Dense(32, activation='relu'),
            tf.keras.layers.Dense(16, activation='relu'),
            tf.keras.layers.Dense(1)
        ])
        
    def call(self, inputs):
        user_id, movie_id = inputs
        user_embedding = self.user_embeddings(user_id)
        movie_embedding = self.movie_embeddings(movie_id)
        return self.ratings(tf.concat([user_embedding, movie_embedding], axis=1))
    
    

In [9]:
class NetflixRmseModel(tfrs.models.Model):
    
    def __init__(self):
        super().__init__()
        self.rmse_model: tf.keras.Model = DeepRecRmseModel()
        self.task: tf.keras.layers.Layer = tfrs.tasks.Ranking(loss=tf.keras.losses.MeanSquaredError(),
                                                             metrics=[tf.keras.metrics.RootMeanSquaredError()])
        
    def call(self, features: Dict[str, tf.Tensor]) -> tf.Tensor:
        return self.rmse_model((features['user_id'], features['movie_id']))
    
    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        labels = features.pop('rating')
        predicted_ratings = self(features)
        return self.task(labels=labels, predictions=predicted_ratings)

In [10]:
for i in range(5):
    train_file1 = f'rmse/rmse_folds_actual/actual_rmse_train{i}.csv'
    test_file1 = f'rmse/rmse_folds_actual/actual_rmse_train{i}.csv'
    df_train1 = pd.read_csv(train_file1, header=None)
    df_test1 = pd.read_csv(test_file1, header=None)
    df_train1.columns = ['user_id', 'movie_id', 'rating']
    df_test1.columns = ['user_id', 'movie_id', 'rating']
    
    deep_model1 = NetflixRmseModel()
    deep_model1.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))
    
    deep_model2 = NetflixRmseModel()
    deep_model2.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))
    
    df_train_tf1 = tf.data.Dataset.from_tensor_slices(dict(df_train1))
    df_test_tf1 = tf.data.Dataset.from_tensor_slices(dict(df_test1))
    
    df_train_tf1 = df_train_tf1.map(lambda x: {'user_id': x['user_id'],
    'movie_id': x['movie_id'],
    'rating': x['rating']})
    
    df_test_tf1 = df_test_tf1.map(lambda x: {'user_id': x['user_id'],
    'movie_id': x['movie_id'],
    'rating': x['rating']})
    
    df_train_tf1 = df_train_tf1.batch(8192).cache()
    df_test_tf1 = df_test_tf1.batch(8192).cache()
    
    history_baseline1 = deep_model1.fit(df_train_tf1, epochs=10)
    joblib.dump(deep_model1, f'model_rmse{i}a.pkl')
    
    history_baseline2 = deep_model2.fit(df_test_tf1, epochs=10)
    joblib.dump(deep_model2, f'model_rmse{i}b.pkl')
    

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: ram://e6b447f6-1158-4d92-8097-c993575ae58b/assets


INFO:tensorflow:Assets written to: ram://e6b447f6-1158-4d92-8097-c993575ae58b/assets


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: ram://4ea7ae18-1144-49ed-ba10-59b5c0368286/assets


INFO:tensorflow:Assets written to: ram://4ea7ae18-1144-49ed-ba10-59b5c0368286/assets


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: ram://120017db-5c53-4618-8537-662ba8fad572/assets


INFO:tensorflow:Assets written to: ram://120017db-5c53-4618-8537-662ba8fad572/assets


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: ram://e72dadc5-3b22-4b50-a213-b0146a96040b/assets


INFO:tensorflow:Assets written to: ram://e72dadc5-3b22-4b50-a213-b0146a96040b/assets


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: ram://2dd0d2bb-c983-4318-a7ea-2dfc087a850e/assets


INFO:tensorflow:Assets written to: ram://2dd0d2bb-c983-4318-a7ea-2dfc087a850e/assets


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: ram://ee063bb6-9eab-4a6d-a615-dd27986d8b95/assets


INFO:tensorflow:Assets written to: ram://ee063bb6-9eab-4a6d-a615-dd27986d8b95/assets


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: ram://4229541d-d879-4841-8d10-6d3f97da7c99/assets


INFO:tensorflow:Assets written to: ram://4229541d-d879-4841-8d10-6d3f97da7c99/assets


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: ram://e0483399-860f-40db-858b-f6c9ccfa6573/assets


INFO:tensorflow:Assets written to: ram://e0483399-860f-40db-858b-f6c9ccfa6573/assets


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: ram://eed474f1-1343-48ef-ab00-eb80b2e2ca11/assets


INFO:tensorflow:Assets written to: ram://eed474f1-1343-48ef-ab00-eb80b2e2ca11/assets
