In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler

In [2]:
rating=pd.read_csv('ratings.dat',sep='\s+',names=['user','item','rating','timestamp'])
rating=rating.drop('timestamp',axis=1)
rating.head()

Unnamed: 0,user,item,rating
0,1,1193,5
1,1,661,3
2,1,914,3
3,1,3408,4
4,1,2355,5


In [3]:
num_items=rating.item.nunique()
num_users=rating.user.nunique()
print('USER:{} ITEMS:{}'.format(num_users,num_items))

USER:6040 ITEMS:3706


In [4]:
r=rating['rating'].values.astype(float)
min_max_scaler=MinMaxScaler()
x_scaler=min_max_scaler.fit_transform(r.reshape(-1,1))
df_normalized=pd.DataFrame(x_scaler)
rating['rating']=df_normalized
rating.head()

Unnamed: 0,user,item,rating
0,1,1193,1.0
1,1,661,0.5
2,1,914,0.5
3,1,3408,0.75
4,1,2355,1.0


In [5]:
matrix=rating.pivot(index='user',columns='item',values='rating').fillna(0)
matrix.head()

item,1,2,3,4,5,6,7,8,9,10,...,3943,3944,3945,3946,3947,3948,3949,3950,3951,3952
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
users=matrix.index.tolist()
items=matrix.columns.tolist()
matrix=matrix.as_matrix()
matrix.shape

(6040, 3706)

In [7]:
num_input=num_items
num_hidden_1=10
num_hidden_2=5
X=tf.placeholder(tf.float64,[None,num_input])
weights={
    'encoder_h1':tf.Variable(tf.random_normal([num_items,10],dtype=tf.float64)),
    'encoder_h2':tf.Variable(tf.random_normal([10,5],dtype=tf.float64)),
    'decoder_h1':tf.Variable(tf.random_normal([5,10],dtype=tf.float64)),
    'decoder_h2':tf.Variable(tf.random_normal([10,num_items],dtype=tf.float64)),
}

biases={
    'encoder_b1':tf.Variable(tf.random_normal([10],dtype=tf.float64)),
    'encoder_b2':tf.Variable(tf.random_normal([5],dtype=tf.float64)),
    'decoder_b1':tf.Variable(tf.random_normal([10],dtype=tf.float64)),
    'decoder_b2':tf.Variable(tf.random_normal([num_items],dtype=tf.float64)),
}
X

<tf.Tensor 'Placeholder:0' shape=(?, 3706) dtype=float64>

In [8]:
def encoder(x):
    layer_1=tf.nn.sigmoid(tf.add(tf.matmul(x,weights['encoder_h1']),biases['encoder_b1']))
    layer_2=tf.nn.sigmoid(tf.add(tf.matmul(layer_1,weights['encoder_h2']),biases['encoder_b2']))
    return layer_2

In [9]:
def decoder(x):
    # Decoder Hidden layer with sigmoid activation #1
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']), biases['decoder_b1']))
    # Decoder Hidden layer with sigmoid activation #2
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']), biases['decoder_b2']))
    return layer_2

In [10]:
decoder_op=decoder(encoder(X))
y_pred=decoder_op
y_true=X

In [11]:
y_pred.shape

TensorShape([Dimension(None), Dimension(3706)])

In [11]:
loss=tf.losses.mean_squared_error(y_true,y_pred)
optimizer=tf.train.RMSPropOptimizer(0.03).minimize(loss)
predictions=pd.DataFrame()
eval_x=tf.placeholder(tf.float64,)
eval_y=tf.placeholder(tf.float64,)
pre, pre_op = tf.metrics.precision(labels=eval_x, predictions=eval_y)

In [12]:
init=tf.global_variables_initializer()
local_init=tf.local_variables_initializer()

In [None]:

with tf.Session() as session:
    epochs = 3
    batch_size = 250

    session.run(init)
    session.run(local_init)

    num_batches = int(matrix.shape[0] / batch_size)
    matrix = np.array_split(matrix, num_batches)

    for i in range(epochs):

        avg_cost = 0

        for batch in matrix:
            _, l = session.run([optimizer, loss], feed_dict={X: batch})
            avg_cost += l

        avg_cost /= num_batches

        print("Epoch: {} Loss: {}".format(i + 1, avg_cost))

    print("Predictions...")
    
    matrix=np.concatenate(matrix,axis=0)
    preds = session.run(decoder_op, feed_dict={X: matrix})
    predictions=predictions.append(pd.DataFrame(preds))
    predictions=predictions.stack().reset_index(name='rating')
    predictions.columns=['user','item','rating']
    predictions['user']=predictions['user'].map(lambda value:users[value])
    predictions['item']=predictions['item'].map(lambda value:items[value])
    #print(predictions)
    
    keys=['user','item']
    i1=predictions.set_index(keys).index
    i2=rating.set_index(keys).index
    
    recs=predictions[~i1.isin(i2)]
    recs=recs.sort_values(['user','rating'],ascending=[True,False])
    recs=recs.groupby('user').head(10)
    print(recs)

    

Epoch: 1 Loss: 0.34307469924290973
Epoch: 2 Loss: 0.342529971152544
Epoch: 3 Loss: 0.34053149198492366
Predictions...
