In [1]:
import pandas as pd
import numpy as np
from collections import Counter
import tqdm
import math
from sklearn.metrics import mean_squared_error
import warnings
# warnings.filterwarnings('ignore')
import random
from random import randint

import tensorflow as tf

from CF.collaborative_filtering import locationRec

In [2]:
recmodel = locationRec()
recmodel.datapipeline(preproccesing=2)
# all_df = pd.concat([recmodel.train, recmodel.validate, recmodel.test], axis=0)

 train shape (4102336, 3)
 validation shape (1849592, 3)
 test shape (1941760, 3)
4712 21347


In [3]:
users = recmodel.train.user_nickname.tolist()
items = recmodel.train.town.tolist()

In [16]:
num_items = len(set(items))
num_users = len(set(users))
print("#Items: {}, #Users: {}".format(num_items, num_users))

#Items: 4712, #Users: 21347


In [17]:
# Network Parameters
epochs = 100
batch_size = 250
num_input = num_items   # num of items
num_hidden_1 = 15       # 1st layer num features
num_hidden_2 = 10 # 2nd layer num features (the latent dim)

In [18]:
X = tf.placeholder(tf.float64, [None, num_input])

In [19]:
weights = {
    'encoder_h1': tf.Variable(tf.random_normal([num_input, num_hidden_1], dtype=tf.float64)),
    'encoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_hidden_2], dtype=tf.float64)),
    'decoder_h1': tf.Variable(tf.random_normal([num_hidden_2, num_hidden_1], dtype=tf.float64)),
    'decoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_input], dtype=tf.float64)),
}

biases = {
    'encoder_b1': tf.Variable(tf.random_normal([num_hidden_1], dtype=tf.float64)),
    'encoder_b2': tf.Variable(tf.random_normal([num_hidden_2], dtype=tf.float64)),
    'decoder_b1': tf.Variable(tf.random_normal([num_hidden_1], dtype=tf.float64)),
    'decoder_b2': tf.Variable(tf.random_normal([num_input], dtype=tf.float64)),
}

In [20]:
def encoder(x):
    # Encoder Hidden layer with sigmoid activation #1
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']), biases['encoder_b1']))
    # Encoder Hidden layer with sigmoid activation #2
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']), biases['encoder_b2']))
    return layer_2

# Building the decoder

def decoder(x):
    # Decoder Hidden layer with sigmoid activation #1
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']), biases['decoder_b1']))
    # Decoder Hidden layer with sigmoid activation #2
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']), biases['decoder_b2']))
    return layer_2

In [21]:
# Construct model
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)
y_pred = decoder_op
y_true = X

In [22]:
# Define loss and optimizer, minimize the squared error

loss = tf.losses.mean_squared_error(y_true, y_pred)
optimizer = tf.train.RMSPropOptimizer(0.1).minimize(loss)

predictions = pd.DataFrame()

In [23]:
matrix = recmodel.user_item_df.values

In [24]:
matrix.shape

(21347, 4712)

In [25]:
sess = tf.Session()

In [26]:
# Initialize the variables (i.e. assign their default value)
saver = tf.train.Saver()
init = tf.global_variables_initializer()
local_init = tf.local_variables_initializer()
save_dir = "./model/cf_tf/"
i_global=0

In [27]:
sess.run(init)
sess.run(local_init)
train_writer = tf.summary.FileWriter(save_dir, sess.graph)

In [28]:
num_batches = int(matrix.shape[0] / batch_size)
matrix = np.array_split(matrix, num_batches)

In [31]:
len(matrix)

85

In [None]:
for i in range(20):
    print(i)
    avg_cost = 0

    for batch in matrix:
        _, l = sess.run([optimizer, loss], feed_dict={X: batch})
        avg_cost += l

    avg_cost /= num_batches

    print("Epoch: {} Loss: {}".format(i + 1, avg_cost))

0


In [None]:
# Initialize the variables (i.e. assign their default value)
saver = tf.train.Saver()
init = tf.global_variables_initializer()
local_init = tf.local_variables_initializer()
save_dir = "./model/cf_tf/"
i_global=0

with tf.Session() as session:
    session.run(init)
    session.run(local_init)
    train_writer = tf.summary.FileWriter(save_dir, session.graph)

    num_batches = int(matrix.shape[0] / batch_size)
    matrix = np.array_split(matrix, num_batches)

    for i in range(20):

        avg_cost = 0

        for batch in matrix:
            _, l = session.run([optimizer, loss], feed_dict={X: batch})
            avg_cost += l

        avg_cost /= num_batches

        print("Epoch: {} Loss: {}".format(i + 1, avg_cost))

        # if i % display_step == 0 or i == 1:
        #     print('Step %i: Minibatch Loss: %f' % (i, l))
        
    summary = tf.Summary(value=[
            tf.Summary.Value(tag="loss/test", simple_value=avg_cost),])
        
    train_writer.add_summary(summary, i_global)

    saver.save(session, save_path=save_dir, global_step=i_global)

    

In [None]:
print("Predictions...")

    matrix = train.values
    
    preds = session.run(decoder_op, feed_dict={X: matrix})

    print(matrix.shape)
    print(preds.shape)

    predictions = predictions.append(pd.DataFrame(preds))

    predictions = predictions.stack().reset_index(name='rating')
    predictions.columns = ['user_nickname', 'town', 'checkins']
    predictions['user_nickname'] = predictions['user_nickname'].map(lambda value: users[value])
    predictions['town'] = predictions['town'].map(lambda value: items[value])

    #print(predictions.shape)

    print("Filtering out items in training set")

    keys = ['user_nickname', 'town']
    i1 = predictions.set_index(keys).index
    i2 = recmodel.user_item_network_training.CF_data.set_index(keys).index

    recs = predictions[~i1.isin(i2)]
    recs = recs.sort_values(['user_nickname', 'Rating'], ascending=[True, False])
    recs = recs.groupby('user_nickname').head(k)
    recs.to_csv('recs.tsv', sep='\t', index=False, header=False)

    test = recmodel.user_item_network_training.CF_data
    test = test.sort_values(['user_nickname', 'Rating'], ascending=[True, False])

    print("Evaluating...")

    p = 0.0
    for user in users[:10]:
        test_list = test[(test.User == user)].head(k).as_matrix(columns=['Movie']).flatten()
        recs_list = recs[(recs.User == user)].head(k).as_matrix(columns=['Movie']).flatten()

        #session.run(pre_op, feed_dict={eval_x: test_list, eval_y: recs_list})

        pu = precision_score(test_list, recs_list, average='micro')
        p += pu

        print("Precision for user {}: {}".format(user, pu))
        print("User test:--\n{}".format([mov_dict[x][1] for x in test_list]))
        print("User recs:--\n{}".format([mov_dict[x][1] for x in recs_list]))
        print()
    p /= 10#len(users)

    #p = session.run(pre)
    print("Precision@{}: {}".format(k, p))