### Import Libraries

In [None]:
import numpy as np
import numpy.ma as ma
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from numpy import genfromtxt
from collections import defaultdict
import csv
import pickle

### Dataset

In [None]:
top10_df = pd.read_csv("data1/content_top10_df.csv")
bygenre_df = pd.read_csv("data1/content_bygenre_df.csv")

In [None]:
top10_df

In [None]:
bygenre_df

In [None]:
item_train = genfromtxt('data1/content_item_train.csv', delimiter=',')
user_train = genfromtxt('data1/content_user_train.csv', delimiter=',')
y_train = genfromtxt('data1/content_y_train.csv', delimiter=',')

with open('data1/content_item_train_header.txt', newline='') as f:
    movie_features = np.array(list(csv.reader(f))[0])
with open('data1/content_user_train_header.txt', newline='') as f:
    user_features = np.array(list(csv.reader(f))[0])

item_vecs = genfromtxt('data1/content_item_vecs.csv', delimiter=',')
movie_dict = defaultdict(dict)
count = 0
with open('data1/content_movie_list.csv', newline='') as csvfile:
    reader = csv.reader(csvfile, delimiter=',', quotechar='"')
    for line in reader:
        if count == 0:
            count += 1
        else:
            count += 1
            movie_id = int(line[0])
            movie_dict[movie_id]["title"] = line[1]
            movie_dict[movie_id]["genres"] = line[2]
            
with open('data1/content_user_to_genre.pickle', 'rb') as f:
    user_to_genre = pickle.load(f)

In [None]:
user_data_df = pd.DataFrame(user_train, columns=user_features)
movie_data_df = pd.DataFrame(item_train, columns=movie_features)

In [None]:
user_data_df = user_data_df.drop_duplicates()
user_data_df

In [None]:
movie_data_df = movie_data_df.drop_duplicates()
movie_data_df

In [None]:
num_user_features = user_data_df.shape[1] - 3
num_item_features = movie_data_df.shape[1] - 1
uvs = 3
ivs = 3
u_s = 3
i_s = 1

print(f"Number of training vectors: {len(movie_data_df)}")

### Training Data

In [None]:
item_train_unscaled = item_train
user_train_unscaled = user_train
y_train_unscaled    = y_train

scalerItem = StandardScaler()
scalerItem.fit(item_train)
item_train = scalerItem.transform(item_train)

scalerUser = StandardScaler()
scalerUser.fit(user_train)
user_train = scalerUser.transform(user_train)

scalerTarget = MinMaxScaler((-1, 1))
scalerTarget.fit(y_train.reshape(-1, 1))
y_train = scalerTarget.transform(y_train.reshape(-1, 1))

item_train, item_test = train_test_split(item_train, train_size=0.80, shuffle=True, random_state=1)
user_train, user_test = train_test_split(user_train, train_size=0.80, shuffle=True, random_state=1)
y_train, y_test       = train_test_split(y_train,    train_size=0.80, shuffle=True, random_state=1)
print(f"movie/item training data shape: {item_train.shape}")
print(f"movie/item test data shape: {item_test.shape}")

### Model Training

In [None]:
num_outputs = 32
tf.random.set_seed(1)
user_NN = tf.keras.models.Sequential([
  tf.keras.layers.Dense(256, activation='relu'),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(num_outputs),
])

item_NN = tf.keras.models.Sequential([
  tf.keras.layers.Dense(256, activation='relu'),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(num_outputs),
])

input_user = tf.keras.layers.Input(shape=(num_user_features))
vu = user_NN(input_user)
vu = tf.linalg.l2_normalize(vu, axis=1)

input_item = tf.keras.layers.Input(shape=(num_item_features))
vm = item_NN(input_item)
vm = tf.linalg.l2_normalize(vm, axis=1)

output = tf.keras.layers.Dot(axes=1)([vu, vm])

model = tf.keras.Model([input_user, input_item], output)

model.summary()

In [None]:
tf.random.set_seed(1)
cost_fn = tf.keras.losses.MeanSquaredError()
opt = keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=opt,loss=cost_fn)

tf.random.set_seed(1)
model.fit([user_train[:, u_s:], item_train[:, i_s:]], y_train, epochs=30)

### New User

In [None]:
new_user_id = 5000
new_rating_ave = 0.0
new_action = 0.0
new_adventure = 5.0
new_animation = 0.0
new_childrens = 0.0
new_comedy = 0.0
new_crime = 0.0
new_documentary = 0.0
new_drama = 0.0
new_fantasy = 5.0
new_horror = 0.0
new_mystery = 0.0
new_romance = 0.0
new_scifi = 0.0
new_thriller = 0.0
new_rating_count = 3

user_vec = np.array([[new_user_id, new_rating_count, new_rating_ave,
                      new_action, new_adventure, new_animation, new_childrens,
                      new_comedy, new_crime, new_documentary,
                      new_drama, new_fantasy, new_horror, new_mystery,
                      new_romance, new_scifi, new_thriller]])