In [1]:
import os
import tqdm
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
import tensorflow as tf
from tfcf.metrics import mae
from tfcf.metrics import rmse
from tfcf.datasets import ml1m
from tfcf.config import Config
from tfcf.models.svd import SVD
from sklearn.model_selection import train_test_split

dir_ = '../../data/'
file_name = 'normalized_to_rating_filter_track_5_user_100.csv'

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# Note that x is a 2D numpy array, 
# x[i, :] contains the user-item pair, and y[i] is the corresponding rating.

df = pd.read_pickle(os.path.join(dir_, file_name[:-3] + 'pkl'))

x_train = np.loadtxt(os.path.join(dir_, 'train_x_' + file_name), delimiter=',')
y_train = np.loadtxt(os.path.join(dir_, 'train_y_' + file_name), delimiter=',')
print(len(x_train), len(y_train))

x_train, x_val = train_test_split(x_train, test_size=0.2, random_state= np.random)
y_train, y_val = train_test_split(y_train, test_size=0.2, random_state= np.random)
print(len(x_train), len(y_train), len(x_val), len(y_val))

x_test = np.loadtxt(os.path.join(dir_, 'test_x_' + file_name), delimiter=',')
y_test = np.loadtxt(os.path.join(dir_, 'test_y_' + file_name), delimiter=',')

561889 561889
449511 449511 112378 112378


In [3]:
x_train

array([[2.1600e+02, 3.0568e+04],
       [1.9800e+02, 2.3150e+04],
       [6.0000e+00, 7.7230e+03],
       ...,
       [1.8200e+02, 3.4798e+04],
       [4.0000e+00, 1.5350e+03],
       [8.8000e+01, 4.1283e+04]])

In [4]:
config = Config()
config.num_users = len(df['uid'].unique())
config.num_items = len(df['tid'].unique())
config.min_value = df['rating'].min()
config.max_value = df['rating'].max()

In [5]:
with tf.compat.v1.Session() as sess:
    # For SVD++ algorithm, if `dual` is True, then the dual term of items' 
    # implicit feedback will be added into the original SVD++ algorithm.
    # model = SVDPP(config, sess, dual=False)
    # model = SVDPP(config, sess, dual=True)
    model = SVD(config, sess)
    model.train(x_train, y_train, validation_data=(x_val, y_val), epochs=20, batch_size=1024)        
    y_pred = model.predict(x_test)
    print('rmse: {}, mae: {}'.format(rmse(y_test, y_pred), mae(y_test, y_pred)))


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
 96/439 [=====>........................] - ETA: 4s - rmse: 0.8966 - mae: 0.6303

KeyboardInterrupt: 

In [None]:
print(y_pred)

In [None]:
df = pd.DataFrame(y_pred, columns=['rating'])
df2 = pd.DataFrame(x_test, columns=['uid', 'tid'])
df2.insert(2, 'rating', y_pred, False) 
df2

In [10]:
df2.to_pickle(os.path.join(dir_, 'prediction_svd_top_N_' + file_name[:-3] + 'pkl'))