In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as MSE

In [2]:
rating = pd.read_csv('/Users/blakemyers/Desktop/data/ratings.csv', error_bad_lines=False, encoding='latin-1')

In [3]:
rating.head(3)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224


In [4]:
movie = pd.read_csv("/Users/blakemyers/Desktop/data/movies.csv", error_bad_lines=False, encoding='latin-1')

In [5]:
movie.head(3)

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance


In [6]:
movie_rating = pd.merge(rating, movie, on = 'movieId')

In [7]:
movie_rating.head(3)

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,5,1,4.0,847434962,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,7,1,4.5,1106635946,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy


In [8]:
cols = ['timestamp']

In [9]:
movie_rating.drop(cols, axis=1, inplace=True)

In [10]:
movie_rating.head(3)

Unnamed: 0,userId,movieId,rating,title,genres
0,1,1,4.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,5,1,4.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,7,1,4.5,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy


In [11]:
numrate_movie = movie_rating.groupby("title")["rating"].count().reset_index()

In [12]:
numrate_movie.head(3)

Unnamed: 0,title,rating
0,'71 (2014),1
1,'Hellboy': The Seeds of Creation (2004),1
2,'Round Midnight (1986),2


In [13]:
numrate_movie.rename({"rating": "ratecount_movie"}, axis=1, inplace=True)

In [14]:
numrate_movie.head(3)

Unnamed: 0,title,ratecount_movie
0,'71 (2014),1
1,'Hellboy': The Seeds of Creation (2004),1
2,'Round Midnight (1986),2


In [15]:
numrate_movie = numrate_movie.query("ratecount_movie >= 20")

In [16]:
numrate_movie.head(3)

Unnamed: 0,title,ratecount_movie
8,(500) Days of Summer (2009),42
18,10 Things I Hate About You (1999),54
23,101 Dalmatians (1996),47


In [17]:
ratings20plus = pd.merge(numrate_movie, movie_rating, on = 'title', how = 'inner')

In [18]:
ratings20plus.head(3)

Unnamed: 0,title,ratecount_movie,userId,movieId,rating,genres
0,(500) Days of Summer (2009),42,15,69757,4.0,Comedy|Drama|Romance
1,(500) Days of Summer (2009),42,18,69757,4.0,Comedy|Drama|Romance
2,(500) Days of Summer (2009),42,22,69757,0.5,Comedy|Drama|Romance


In [19]:
numrate_user = ratings20plus.groupby("userId")["rating"].count().reset_index()

In [20]:
numrate_user.head(3)

Unnamed: 0,userId,rating
0,1,181
1,2,23
2,3,22


In [21]:
numrate_user.rename({"rating": "ratecount_user"}, axis=1, inplace=True)

In [22]:
numrate_user.head(3)

Unnamed: 0,userId,ratecount_user
0,1,181
1,2,23
2,3,22


In [23]:
numrate_user = numrate_user.query("ratecount_user >= 20")

In [24]:
ur20plus = pd.merge(ratings20plus, numrate_user, on = "userId", how = "inner")

In [25]:
ur20plus.head(5)

Unnamed: 0,title,ratecount_movie,userId,movieId,rating,genres,ratecount_user
0,(500) Days of Summer (2009),42,15,69757,4.0,Comedy|Drama|Romance,122
1,101 Dalmatians (One Hundred and One Dalmatians...,44,15,2085,1.5,Adventure|Animation|Children,122
2,28 Days Later (2002),58,15,6502,3.5,Action|Horror|Sci-Fi,122
3,A.I. Artificial Intelligence (2001),56,15,4370,4.0,Adventure|Drama|Sci-Fi,122
4,"Adjustment Bureau, The (2011)",21,15,84954,4.5,Romance|Sci-Fi|Thriller,122


In [26]:
# Next stages:
    # finish the final steps of preprocessing the data
    # build ML model using tensorflow
    # begin training ML model on data

In [27]:
ur20plus.head(3)

Unnamed: 0,title,ratecount_movie,userId,movieId,rating,genres,ratecount_user
0,(500) Days of Summer (2009),42,15,69757,4.0,Comedy|Drama|Romance,122
1,101 Dalmatians (One Hundred and One Dalmatians...,44,15,2085,1.5,Adventure|Animation|Children,122
2,28 Days Later (2002),58,15,6502,3.5,Action|Horror|Sci-Fi,122


In [28]:
ur20plus = ur20plus.append(pd.DataFrame([["A.I. Artificial Intelligence (2001)",1,9999999,4370,5,"genre",1]], columns =ur20plus.columns), ignore_index=True)

In [29]:
scaler = MinMaxScaler()
ur20plus['rating'] = ur20plus['rating'].values.astype(float)
rating_scaled = pd.DataFrame(scaler.fit_transform(ur20plus['rating'].values.reshape(-1,1)))
ur20plus['rating'] = rating_scaled

In [30]:
ur20plus = ur20plus.drop_duplicates(['userId', 'title'])
user_movie_matrix = ur20plus.pivot(index='userId', columns='title', values='rating')
user_movie_matrix.fillna(0, inplace=True)

In [31]:
user_movie_matrix.head()

title,(500) Days of Summer (2009),10 Things I Hate About You (1999),101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),13 Going on 30 (2004),"13th Warrior, The (1999)",1408 (2007),2001: A Space Odyssey (1968),2012 (2009),...,Young Frankenstein (1974),Young Guns (1988),Zack and Miri Make a Porno (2008),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zootopia (2016),eXistenZ (1999),xXx (2002),Â¡Three Amigos! (1986)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.777778,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.777778
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.555556,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [32]:
X_train, X_test = train_test_split(user_movie_matrix, train_size=0.8)

In [33]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

Instructions for updating:
non-resource variables are not supported in the long term


In [34]:
num_input = ur20plus['title'].nunique()
# Deciding how many nodes wach layer should have
n_nodes_inpl = num_input  
n_nodes_hl1  = 256  
n_nodes_outl = num_input  
# first hidden layer has 784*32 weights and 32 biases
hidden_1_layer_vals = {'weights':tf.Variable(tf.random_normal([n_nodes_inpl+1,n_nodes_hl1]))}
# first hidden layer has 784*32 weights and 32 biases
output_layer_vals = {'weights':tf.Variable(tf.random_normal([n_nodes_hl1+1,n_nodes_outl]))}

In [35]:
# user with 3706 ratings goes in
input_layer = tf.placeholder('float', [None, num_input])
# add a constant node to the first layer
# it needs to have the same shape as the input layer for me to be
# able to concatinate it later
input_layer_const = tf.fill( [tf.shape(input_layer)[0], 1] ,1.0  )
input_layer_concat =  tf.concat([input_layer, input_layer_const], 1)
# multiply output of input_layer wth a weight matrix 
layer_1 = tf.nn.sigmoid(tf.matmul(input_layer_concat,\
hidden_1_layer_vals['weights']))
# adding one bias node to the hidden layer
layer1_const = tf.fill( [tf.shape(layer_1)[0], 1] ,1.0  )
layer_concat =  tf.concat([layer_1, layer1_const], 1)
# multiply output of hidden with a weight matrix to get final output
output_layer = tf.matmul( layer_concat,output_layer_vals['weights'])
# output_true shall have the original shape for error calculations
output_true = tf.placeholder('float', [None, num_input])
# define our cost function
meansq =    tf.reduce_mean(tf.square(output_layer - output_true))
# define our optimizer
learn_rate = 0.1   # how fast the model should learn
optimizer = tf.train.AdagradOptimizer(learn_rate).minimize(meansq)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [36]:
# initialising variables and starting the session
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
# defining batch size, number of epochs and learning rate
batch_size = 100  # how many images to use together for training
hm_epochs =200    # how many times to go through the entire dataset
tot_images = X_train.shape[0] # total number of images

In [37]:
# running the model for a 200 epochs taking 100 users in batches
# total improvement is printed out after each epoch
for epoch in range(hm_epochs):
    epoch_loss = 0    # initializing error as 0
    
    for i in range(int(tot_images/batch_size)):
        epoch_x = X_train[ i*batch_size : (i+1)*batch_size ]
        _, c = sess.run([optimizer, meansq],\
               feed_dict={input_layer: epoch_x, \
               output_true: epoch_x})
        epoch_loss += c
        
    output_train = sess.run(output_layer,\
               feed_dict={input_layer:X_train})
    output_test = sess.run(output_layer,\
                   feed_dict={input_layer:X_test})
        
    print('MSE train', MSE(output_train, X_train),'MSE test', MSE(output_test, X_test))      
    print('Epoch', epoch, '/', hm_epochs, 'loss:',epoch_loss)

MSE train 86.24059155812355 MSE test 86.05552652149807
Epoch 0 / 200 loss: 397.7935562133789
MSE train 72.6642668727069 MSE test 72.75200727478028
Epoch 1 / 200 loss: 323.5645980834961
MSE train 63.62635496464935 MSE test 63.85698242554843
Epoch 2 / 200 loss: 276.64864349365234
MSE train 57.210926593434245 MSE test 57.55992331495236
Epoch 3 / 200 loss: 244.65919876098633
MSE train 52.44733057652548 MSE test 52.91401047378563
Epoch 4 / 200 loss: 221.51977157592773
MSE train 48.78456108024659 MSE test 49.36789130510473
Epoch 5 / 200 loss: 204.13033294677734
MSE train 45.88373329857348 MSE test 46.574407311512196
Epoch 6 / 200 loss: 190.61064529418945
MSE train 43.539111333341246 MSE test 44.31622686188256
Epoch 7 / 200 loss: 179.82915496826172
MSE train 41.6033905265686 MSE test 42.44800707062826
Epoch 8 / 200 loss: 171.0539093017578
MSE train 39.97502356931101 MSE test 40.87186186116251
Epoch 9 / 200 loss: 163.76220703125
MSE train 38.58592501477313 MSE test 39.52333689713321
Epoch 10 /

MSE train 13.13673973118258 MSE test 14.678400299416339
Epoch 87 / 200 loss: 51.9025239944458
MSE train 12.990042080715591 MSE test 14.53451650807695
Epoch 88 / 200 loss: 51.30659198760986
MSE train 12.8454725690568 MSE test 14.392676693636894
Epoch 89 / 200 loss: 50.718610763549805
MSE train 12.703079198025936 MSE test 14.25278547633989
Epoch 90 / 200 loss: 50.13915252685547
MSE train 12.562967523309466 MSE test 14.114724598903162
Epoch 91 / 200 loss: 49.56850242614746
MSE train 12.425524944407455 MSE test 13.978602551314179
Epoch 92 / 200 loss: 49.00768852233887
MSE train 12.290927554943192 MSE test 13.844608085327682
Epoch 93 / 200 loss: 48.45840358734131
MSE train 12.159116049259772 MSE test 13.712784231304958
Epoch 94 / 200 loss: 47.92093563079834
MSE train 12.030008036731145 MSE test 13.583105486549087
Epoch 95 / 200 loss: 47.395036697387695
MSE train 11.903324149225407 MSE test 13.4553923567726
Epoch 96 / 200 loss: 46.88009071350098
MSE train 11.778668031431163 MSE test 13.32933

MSE train 5.860709732736984 MSE test 7.259050320314119
Epoch 173 / 200 loss: 22.30119276046753
MSE train 5.819033832414275 MSE test 7.216188228278592
Epoch 174 / 200 loss: 22.133257389068604
MSE train 5.777861245844892 MSE test 7.173988199037769
Epoch 175 / 200 loss: 21.967336177825928
MSE train 5.737246708611544 MSE test 7.13244235949891
Epoch 176 / 200 loss: 21.803627967834473
MSE train 5.69723881817597 MSE test 7.091546183963361
Epoch 177 / 200 loss: 21.642327308654785
MSE train 5.657876804206143 MSE test 7.051291771370371
Epoch 178 / 200 loss: 21.483574867248535
MSE train 5.619186352467403 MSE test 7.011667400427529
Epoch 179 / 200 loss: 21.327467918395996
MSE train 5.5811666258678745 MSE test 6.9726557203574515
Epoch 180 / 200 loss: 21.17404270172119
MSE train 5.5437812419545915 MSE test 6.934228061199409
Epoch 181 / 200 loss: 21.02324628829956
MSE train 5.506964885290777 MSE test 6.896345742541242
Epoch 182 / 200 loss: 20.874892234802246
MSE train 5.470640787957771 MSE test 6.858

In [38]:
# ur20plus = ur20plus.append(pd.DataFrame([["A.I. Artificial Intelligence (2001)",1,9999999,4370,5,"genre",1]], columns =ur20plus.columns), ignore_index=True)

# scaler = MinMaxScaler()
# ur20plus['rating'] = ur20plus['rating'].values.astype(float)
# rating_scaled = pd.DataFrame(scaler.fit_transform(ur20plus['rating'].values.reshape(-1,1)))
# ur20plus['rating'] = rating_scaled

# ur20plus = ur20plus.drop_duplicates(['userId', 'title'])
# user_movie_matrix = ur20plus.pivot(index='userId', columns='title', values='rating')
# user_movie_matrix.fillna(0, inplace=True)

In [39]:
pred_data = pd.DataFrame()
preds = sess.run(output_layer, feed_dict={input_layer: user_movie_matrix})
pred_data = pred_data.append(pd.DataFrame(preds))
pred_data = pred_data.stack().reset_index(name='rating')
pred_data.columns = ['userId', 'title', 'rating']
users = user_movie_matrix.index.tolist()
movies = user_movie_matrix.columns.tolist()
pred_data['userId'] = pred_data['userId'].map(lambda value: users[value])
pred_data['title'] = pred_data['title'].map(lambda value: movies[value])
keys = ['userId', 'title']
index_1 = pred_data.set_index(keys).index
index_2 = ur20plus.set_index(keys).index
top_ten_ranked = pred_data[~index_1.isin(index_2)]
top_ten_ranked = top_ten_ranked.sort_values(['userId', 'rating'], ascending=[True, False])
top_ten_ranked = top_ten_ranked.groupby('userId').head(10)

In [40]:
top_ten_ranked.loc[top_ten_ranked['userId'] == 9999999]

Unnamed: 0,userId,title,rating
735141,9999999,Skyfall (2012),7.716952
734742,9999999,King Kong (1933),7.497094
734759,9999999,"Last King of Scotland, The (2006)",7.470444
735291,9999999,Toys (1992),7.462188
734701,9999999,Into the Wild (2007),7.319569
734992,9999999,Poltergeist (1982),7.074063
734612,9999999,Guardians of the Galaxy (2014),6.922525
734464,9999999,Ed Wood (1994),6.84463
735080,9999999,"Saint, The (1997)",6.807467
734295,9999999,Brokeback Mountain (2005),6.425684


In [41]:
ur20plus = ur20plus.append(pd.DataFrame([["Hulk (2003)",1,9999991,4370,5,"genre",1]], columns =ur20plus.columns), ignore_index=True)
scaler = MinMaxScaler()
ur20plus['rating'] = ur20plus['rating'].values.astype(float)
rating_scaled = pd.DataFrame(scaler.fit_transform(ur20plus['rating'].values.reshape(-1,1)))
ur20plus['rating'] = rating_scaled
ur20plus = ur20plus.drop_duplicates(['userId', 'title'])
user_movie_matrix = ur20plus.pivot(index='userId', columns='title', values='rating')
user_movie_matrix.fillna(0, inplace=True)

In [42]:
pred_data = pd.DataFrame()
preds = sess.run(output_layer, feed_dict={input_layer: user_movie_matrix})
pred_data = pred_data.append(pd.DataFrame(preds))
pred_data = pred_data.stack().reset_index(name='rating')
pred_data.columns = ['userId', 'title', 'rating']
users = user_movie_matrix.index.tolist()
movies = user_movie_matrix.columns.tolist()
pred_data['userId'] = pred_data['userId'].map(lambda value: users[value])
pred_data['title'] = pred_data['title'].map(lambda value: movies[value])
keys = ['userId', 'title']
index_1 = pred_data.set_index(keys).index
index_2 = ur20plus.set_index(keys).index
top_ten_ranked = pred_data[~index_1.isin(index_2)]
top_ten_ranked = top_ten_ranked.sort_values(['userId', 'rating'], ascending=[True, False])
top_ten_ranked = top_ten_ranked.groupby('userId').head(10)

In [43]:
top_ten_ranked.loc[top_ten_ranked['userId'] == 9999991]

Unnamed: 0,userId,title,rating
734475,9999991,Englishman Who Went Up a Hill But Came Down a ...,7.362302
734929,9999991,North by Northwest (1959),7.205393
734126,9999991,About Schmidt (2002),6.790761
734980,9999991,Pirates of the Caribbean: The Curse of the Bla...,6.749935
734357,9999991,Cocoon (1985),6.604098
735080,9999991,"Saint, The (1997)",6.315989
735245,9999991,The Hunger Games (2012),6.303861
734361,9999991,Conan the Barbarian (1982),6.275743
735009,9999991,Prometheus (2012),6.234092
734941,9999991,Office Space (1999),6.181133


In [44]:
user_movie_matrix.tail()

title,(500) Days of Summer (2009),10 Things I Hate About You (1999),101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),13 Going on 30 (2004),"13th Warrior, The (1999)",1408 (2007),2001: A Space Odyssey (1968),2012 (2009),...,Young Frankenstein (1974),Young Guns (1988),Zack and Miri Make a Porno (2008),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zootopia (2016),eXistenZ (1999),xXx (2002),Â¡Three Amigos! (1986)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,...,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.177778,0.133333,0.0
609,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
610,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.155556,0.177778,0.111111,...,0.0,0.0,0.133333,0.2,0.133333,0.155556,0.155556,0.0,0.066667,0.0
9999991,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9999999,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [45]:
ur20plus = ur20plus.append(pd.DataFrame([["Wild Wild West (1999)",1,9999995,900000000,5,"genre",1]], columns =ur20plus.columns), ignore_index=True)
scaler = MinMaxScaler()
ur20plus['rating'] = ur20plus['rating'].values.astype(float)
rating_scaled = pd.DataFrame(scaler.fit_transform(ur20plus['rating'].values.reshape(-1,1)))
ur20plus['rating'] = rating_scaled
ur20plus = ur20plus.drop_duplicates(['userId', 'title'])
user_movie_matrix = ur20plus.pivot(index='userId', columns='title', values='rating')
user_movie_matrix.fillna(0, inplace=True)
pred_data = pd.DataFrame()
preds = sess.run(output_layer, feed_dict={input_layer: user_movie_matrix})
pred_data = pred_data.append(pd.DataFrame(preds))
pred_data = pred_data.stack().reset_index(name='rating')
pred_data.columns = ['userId', 'title', 'rating']
users = user_movie_matrix.index.tolist()
movies = user_movie_matrix.columns.tolist()
pred_data['userId'] = pred_data['userId'].map(lambda value: users[value])
pred_data['title'] = pred_data['title'].map(lambda value: movies[value])
keys = ['userId', 'title']
index_1 = pred_data.set_index(keys).index
index_2 = ur20plus.set_index(keys).index
top_ten_ranked = pred_data[~index_1.isin(index_2)]
top_ten_ranked = top_ten_ranked.sort_values(['userId', 'rating'], ascending=[True, False])
top_ten_ranked = top_ten_ranked.groupby('userId').head(10)

In [46]:
top_ten_ranked.loc[top_ten_ranked['userId'] == 9999995]

Unnamed: 0,userId,title,rating
736106,9999995,M*A*S*H (a.k.a. MASH) (1970),8.204329
736332,9999995,"Return of the Pink Panther, The (1975)",7.79102
736009,9999995,JFK (1991),7.564932
736172,9999995,Monty Python's The Meaning of Life (1983),7.076991
736370,9999995,Runaway Bride (1999),6.899931
736150,9999995,Midnight in the Garden of Good and Evil (1997),6.889815
736015,9999995,"Jerk, The (1979)",6.611366
736137,9999995,Meet the Fockers (2004),6.414832
736684,9999995,Yes Man (2008),6.357397
735726,9999995,Disclosure (1994),6.298272


In [47]:
ur20plus = ur20plus.append(pd.DataFrame([["Jerk, The (1979)",1,9999996,900000000,5,"genre",1]], columns =ur20plus.columns), ignore_index=True)
scaler = MinMaxScaler()
ur20plus['rating'] = ur20plus['rating'].values.astype(float)
rating_scaled = pd.DataFrame(scaler.fit_transform(ur20plus['rating'].values.reshape(-1,1)))
ur20plus['rating'] = rating_scaled
ur20plus = ur20plus.drop_duplicates(['userId', 'title'])
user_movie_matrix = ur20plus.pivot(index='userId', columns='title', values='rating')
user_movie_matrix.fillna(0, inplace=True)
pred_data = pd.DataFrame()
preds = sess.run(output_layer, feed_dict={input_layer: user_movie_matrix})
pred_data = pred_data.append(pd.DataFrame(preds))
pred_data = pred_data.stack().reset_index(name='rating')
pred_data.columns = ['userId', 'title', 'rating']
users = user_movie_matrix.index.tolist()
movies = user_movie_matrix.columns.tolist()
pred_data['userId'] = pred_data['userId'].map(lambda value: users[value])
pred_data['title'] = pred_data['title'].map(lambda value: movies[value])
keys = ['userId', 'title']
index_1 = pred_data.set_index(keys).index
index_2 = ur20plus.set_index(keys).index
top_ten_ranked = pred_data[~index_1.isin(index_2)]
top_ten_ranked = top_ten_ranked.sort_values(['userId', 'rating'], ascending=[True, False])
top_ten_ranked = top_ten_ranked.groupby('userId').head(10)

In [48]:
top_ten_ranked.loc[top_ten_ranked['userId'] == 9999996]

Unnamed: 0,userId,title,rating
736700,9999996,12 Angry Men (1957),7.395582
737629,9999996,"Return of the Pink Panther, The (1975)",6.805464
737434,9999996,Meet the Fockers (2004),6.767052
736739,9999996,Alice in Wonderland (1951),6.749312
736951,9999996,Cocoon (1985),6.60778
737110,9999996,Finding Nemo (2003),6.279869
737957,9999996,White Men Can't Jump (1992),6.248486
737295,9999996,Into the Wild (2007),6.197792
737084,9999996,Executive Decision (1996),6.153769
737221,9999996,Harry Potter and the Order of the Phoenix (2007),6.092871
