In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler

rating = pd.read_csv('Ratings.csv')

book = pd.read_csv('Books.csv')
book_rating = pd.merge(rating, book, on='ISBN')
cols = ['YearOfPublication', 'Publisher', 'BookAuthor', 'ImageURLS', 'ImageURLM', 'ImageURLL']
book_rating.drop(cols, axis=1, inplace=True)

rating_count = (book_rating.
     groupby(by = ['Book'])['BookRating'].
     count().
     reset_index().
     rename(columns = {'BookRating': 'RatingCount_book'})
     [['Book', 'RatingCount_book']]
    )
    
threshold = 25
rating_count = rating_count.query('RatingCount_book >= @threshold')

user_rating = pd.merge(rating_count, book_rating, left_on='Book', right_on='Book', how='left')

user_count = (user_rating.
     groupby(by = ['UserID'])['BookRating'].
     count().
     reset_index().
     rename(columns = {'BookRating': 'RatingCount_user'})
     [['UserID', 'RatingCount_user']]
    )
    
threshold = 20
user_count = user_count.query('RatingCount_user >= @threshold')

combined = user_rating.merge(user_count, left_on = 'UserID', right_on = 'UserID', how = 'inner')

print('Number of unique books: ', combined['Book'].nunique())
print('Number of unique users: ', combined['UserID'].nunique())

In [None]:
scaler = MinMaxScaler()
combined['BookRating'] = combined['BookRating'].values.astype(float)
rating_scaled = pd.DataFrame(scaler.fit_transform(combined['BookRating'].values.reshape(-1,1)))
combined['BookRating'] = rating_scaled

In [None]:
combined = combined.drop_duplicates(['UserID', 'Book'])
user_book_matrix = combined.pivot(index='UserID', columns='Book', values='BookRating')
user_book_matrix.fillna(0, inplace=True)
users = user_book_matrix.index.tolist()
books = user_book_matrix.columns.tolist()
user_book_matrix = user_book_matrix.values

In [None]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

In [None]:
num_input = combined['Book'].nunique()
num_hidden_1 = 10
num_hidden_2 = 5

X = tf.placeholder(tf.float64, [None, num_input])

weights = {
    'encoder_h1': tf.Variable(tf.random_normal([num_input, num_hidden_1], dtype=tf.float64)),
    'encoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_hidden_2], dtype=tf.float64)),
    'decoder_h1': tf.Variable(tf.random_normal([num_hidden_2, num_hidden_1], dtype=tf.float64)),
    'decoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_input], dtype=tf.float64)),
}

biases = {
    'encoder_b1': tf.Variable(tf.random_normal([num_hidden_1], dtype=tf.float64)),
    'encoder_b2': tf.Variable(tf.random_normal([num_hidden_2], dtype=tf.float64)),
    'decoder_b1': tf.Variable(tf.random_normal([num_hidden_1], dtype=tf.float64)),
    'decoder_b2': tf.Variable(tf.random_normal([num_input], dtype=tf.float64)),
}

In [None]:
def encoder(x):
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']), biases['encoder_b1']))
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']), biases['encoder_b2']))
    return layer_2

def decoder(x):
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']), biases['decoder_b1']))
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']), biases['decoder_b2']))
    return layer_2

In [None]:
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)
y_pred = decoder_op
y_true = X

In [None]:
loss = tf.losses.mean_squared_error(y_true, y_pred)
optimizer = tf.train.RMSPropOptimizer(0.03).minimize(loss)
eval_x = tf.placeholder(tf.int32, )
eval_y = tf.placeholder(tf.int32, )
pre, pre_op = tf.metrics.precision(labels=eval_x, predictions=eval_y)

In [None]:
init = tf.global_variables_initializer()
local_init = tf.local_variables_initializer()
pred_data = pd.DataFrame()

In [None]:
train_loss_results = []
train_accuracy_results = []
from sklearn.metrics import classification_report
target_names = ['class 0', 'class 1', 'class 2']
with tf.Session() as session:
    epochs = 100
    batch_size = 35

    session.run(init)
    session.run(local_init)

    num_batches = int(user_book_matrix.shape[0] / batch_size)
    user_book_matrix = np.array_split(user_book_matrix, num_batches)
    
    for i in range(epochs):
        epoch_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
        avg_cost = 0
        for batch in user_book_matrix:
            _, l = session.run([optimizer, loss], feed_dict={X: batch})
            avg_cost += l

        avg_cost /= num_batches
        print("epoch: {} Loss: {}".format(i + 1, avg_cost))

    # print(classification_report(y_true, y_pred, target_names=target_names))
    train_accuracy_results.append(epoch_accuracy.result())
    user_book_matrix = np.concatenate(user_book_matrix, axis=0)
    print(epoch_accuracy.result())
    preds = session.run(decoder_op, feed_dict={X: user_book_matrix})

    pred_data = pred_data.append(pd.DataFrame(preds))

    pred_data = pred_data.stack().reset_index(name='BookRating')
    pred_data.columns = ['UserID', 'Book', 'BookRating']
    pred_data['UserID'] = pred_data['UserID'].map(lambda value: users[value])
    pred_data['Book'] = pred_data['Book'].map(lambda value: books[value])
    
    keys = ['UserID', 'Book']
    index_1 = pred_data.set_index(keys).index
    index_2 = combined.set_index(keys).index

    top_ten_ranked = pred_data[~index_1.isin(index_2)]
    top_ten_ranked = top_ten_ranked.sort_values(['UserID', 'Book'], ascending=[True, False])
    top_ten_ranked = top_ten_ranked.groupby('UserID').head(10)

In [None]:
top_ten_ranked.loc[top_ten_ranked['UserID'] == 278582]