In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as MSE

# reading the ratings data
ratings = pd.read_csv('ml-latest-small/ratings.csv', sep=",")
ratings_pivot = pd.pivot_table(ratings[['userId', 'movieId', 'rating']], 
                               values='rating', index='userId', columns='movieId' ).fillna(0)
# creating train and test sets
X_train, X_test = train_test_split(ratings_pivot, train_size=0.8)
# print(X_train)


In [None]:
# Deciding how many nodes each layer should have - Depending on the dataset's size
n_nodes_inpl = 9724  
n_nodes_hl1  = 256  
n_nodes_outl = 9724  
# first hidden layer has 9724*256 weights and 256 biases
hidden_1_layer_vals = {'weights':tf.Variable(tf.random_normal([n_nodes_inpl+1, n_nodes_hl1]))}
output_layer_vals = {'weights':tf.Variable(tf.random_normal([n_nodes_hl1+1, n_nodes_outl]))}

input_layer = tf.placeholder('float', [None, 9724])

input_layer_const = tf.fill([tf.shape(input_layer)[0], 1], 1.0)
input_layer_concat = tf.concat([input_layer, input_layer_const], 1)

layer_1 = tf.nn.sigmoid(tf.matmul(input_layer_concat, hidden_1_layer_vals['weights']))

layer1_const = tf.fill( [tf.shape(layer_1)[0], 1], 1.0)
layer_concat =  tf.concat([layer_1, layer1_const], 1)

output_layer = tf.matmul(layer_concat, output_layer_vals['weights'])
output_true = tf.placeholder('float', [None, 9724])
meansq = tf.reduce_mean(tf.square(output_layer - output_true))

learn_rate = 0.1   # learning rate
optimizer = tf.train.AdagradOptimizer(learn_rate).minimize(meansq)

# initializing variables and starting the session
init = tf.global_variables_initializer()
sess = tf.Session()
writer = tf.summary.FileWriter('./graphs', tf.get_default_graph())
writer.close()
sess.run(init)    


## Autoencoder with one hidden layer

![autoencoder with one layer](Images/autoencoders-1layer.png)

In [None]:
# defining batch size, number of epochs and learning rate
batch_size = 100  # how many points to use together for training
hm_epochs = 200    # how many times to go through the entire dataset
tot_users = X_train.shape[0]
# print(tot_users)
# running the model for a 200 epochs taking 100 users in batches
for epoch in range(hm_epochs):
    epoch_loss = 0    # initializing error as 0
    for i in range(int(tot_users/batch_size)):
        # print(epoch_x)
        epoch_x = X_train[ i*batch_size : (i+1)*batch_size ]
        _, c = sess.run([optimizer, meansq],feed_dict={input_layer: epoch_x, output_true: epoch_x})
        epoch_loss += c
        
    output_train = sess.run(output_layer, feed_dict={input_layer:X_train})
    output_test = sess.run(output_layer, feed_dict={input_layer:X_test})
    print('MSE train', MSE(output_train, X_train),'MSE test', MSE(output_test, X_test))      
    print('Epoch', epoch, '/', hm_epochs, 'loss:',epoch_loss)


In [None]:
# pick a user
sample_user = X_test.iloc[99,:]
# get the predicted ratings
sample_user_pred = sess.run(output_layer, feed_dict={input_layer:[sample_user]})