
# TF-recomm


In [1]:
import time
from collections import deque
import socket
import sys
import numpy as np
import tensorflow as tf
from six import next
from tensorflow.core.framework import summary_pb2
from tensorflow.python.tools.inspect_checkpoint import print_tensors_in_checkpoint_file

import dataio
import ops

In [2]:
np.random.seed(13575)

BATCH_SIZE = 1000
USER_NUM = 6040
ITEM_NUM = 3952
DIM = 15
EPOCH_MAX = 100
DEVICE = "/cpu:0"

In [3]:
def clip(x):
    return np.clip(x, 1.0, 5.0)


def make_scalar_summary(name, val):
    return summary_pb2.Summary(value=[summary_pb2.Summary.Value(tag=name, simple_value=val)])

def get_data():
    df = dataio.read_process("./tmp/movielens/ml-1m/ratings.dat", sep="::")
    rows = len(df)
    df = df.iloc[np.random.permutation(rows)].reset_index(drop=True)
    split_index = int(rows * 0.9)
    df_train = df[0:split_index]
    df_test = df[split_index:].reset_index(drop=True)
    return df_train, df_test, rows

def get_movies():
    df = dataio.read_movies("./tmp/movielens/ml-1m/movies.dat", sep="::")
    rows = len(df)
    return df, rows

# Data

In [4]:
df_train, df_test, length = get_data()
df_movies,rows = get_movies()

Movies file length:
3883
Toy Story (1995)
1
Animation|Children's|Comedy


In [23]:
df_train.iloc[0:10]
df_train.describe()

Unnamed: 0,user,item,rate,st
count,900188.0,900188.0,900188.0,900188.0
mean,3022.600601,1864.635758,3.581352,972246100.0
std,1728.384446,1096.003424,1.117216,12153770.0
min,0.0,0.0,1.0,956703900.0
25%,1504.0,1029.0,3.0,965302700.0
50%,3068.0,1834.0,4.0,973021500.0
75%,4475.0,2769.0,4.0,975221200.0
max,6039.0,3951.0,5.0,1046455000.0


In [5]:
print ("Movies file length:")
print (len(df_movies))

Movies file length:
3883


In [6]:
df_movies.iloc[0:10]

Unnamed: 0,movie,title,tags
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy
5,6,Heat (1995),Action|Crime|Thriller
6,7,Sabrina (1995),Comedy|Romance
7,8,Tom and Huck (1995),Adventure|Children's
8,9,Sudden Death (1995),Action
9,10,GoldenEye (1995),Action|Adventure|Thriller


In [7]:
df_movies.iloc[0].title

'Toy Story (1995)'

# Network + train + test

In [8]:

samples_per_batch = len(df_train) // BATCH_SIZE

iter_train = dataio.ShuffleIterator([df_train["user"],
                                     df_train["item"],
                                    df_train["rate"]],
                                    batch_size=BATCH_SIZE)

iter_test = dataio.OneEpochIterator([df_test["user"],
                                     df_test["item"],
                                    df_test["rate"]],
                                    batch_size=-1)

user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user")
item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
rate_batch = tf.placeholder(tf.float32, shape=[None])

infer, regularizer = ops.inference_svd(user_batch, item_batch, user_num=USER_NUM, item_num=ITEM_NUM, dim=DIM, device=DEVICE)
global_step = tf.contrib.framework.get_or_create_global_step()
_, train_op = ops.optimization(infer, regularizer, rate_batch, learning_rate=0.001, reg=0.05, device=DEVICE)
#zeros= tf.Variable(tf.zeros([1]),name="zeros")


In [9]:

def svd(train, test,length,moviefile, trainFl=False):
   


    init_op = tf.global_variables_initializer()
    saver=tf.train.Saver()
    with tf.Session() as sess:
        sess.run(init_op)
        summary_writer = tf.summary.FileWriter(logdir="./tmp/svd/log", graph=sess.graph)
        print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time"))
        errors = deque(maxlen=samples_per_batch)
        start = time.time()

        if trainFl == True: 
            for i in range(EPOCH_MAX * samples_per_batch):
                users, items, rates = next(iter_train)
                _, pred_batch = sess.run([train_op, infer], feed_dict={user_batch: users,
                                                                    item_batch: items,
                                                                                                                        rate_batch: rates})
                pred_batch = clip(pred_batch)
                errors.append(np.power(pred_batch - rates, 2))
                if i % samples_per_batch == 0:
                    train_err = np.sqrt(np.mean(errors))
                    test_err2 = np.array([])
                    for users, items, rates in iter_test:
                        pred_batch = sess.run(infer, feed_dict={user_batch: users,
                                                                item_batch: items})
                        pred_batch = clip(pred_batch)
                        test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2))
                    end = time.time()
                    test_err = np.sqrt(np.mean(test_err2))
                    print("{:3d} {:f} {:f} {:f}(s)".format(i // samples_per_batch, train_err, test_err,
                                                        end - start))
                    train_err_summary = make_scalar_summary("training_error", train_err)
                    test_err_summary = make_scalar_summary("test_error", test_err)
                    summary_writer.add_summary(train_err_summary, i)
                    summary_writer.add_summary(test_err_summary, i)
                    start = end

            #meta_graph_def = tf.train.export_meta_graph(filename='/tmp/tfrecomm.meta')
            save_path=saver.save(sess,"./tmp/")
        else: 
            saver.restore(sess, "./tmp/")

In [10]:
svd(df_train, df_test, length,df_movies, trainFl=False) 
print("Done!")

epoch train_error val_error elapsed_time
INFO:tensorflow:Restoring parameters from ./tmp/
Done!


# EXECUTION

In [11]:
def printMM(topmovies):
    print (topmovies)
    for i,r in topmovies:
        print("{0:5} - {1:1.2f} - {2}" .format(i,  r, df_movies.iloc[i].title  ))

In [18]:
def test(train, test,length,moviefile, data, trainFl=False):
    init_op = tf.global_variables_initializer()
    saver=tf.train.Saver()
    with tf.Session() as sess:
        sess.run(init_op)
        saver.restore(sess, "./tmp/")

        movies=list(range(len(moviefile)))
        #print (movies)
        users=[1]
        pred_batch = sess.run(infer, feed_dict={user_batch: users,item_batch: movies})
        moviesrecomm=list(zip(movies,pred_batch))
        smovies=sorted (moviesrecomm,key=lambda x:x[1],reverse=True)

        print (" Top Movies ------------------------------------------------------------")
        topmovies= smovies[0:10]
        #print (topmovies)
        printMM(topmovies)
        
        #-----------------------------------------------------------------------------
        # give number between 1 - 5000
        del users[:]
        users.append(int(data))
        print (users)
        pred_batch = sess.run(infer, feed_dict={user_batch: users,item_batch: movies})
        moviesrecomm=list(zip(movies,pred_batch))
        smovies=sorted (moviesrecomm,key=lambda x:x[1],reverse=True)
        topmovies= smovies[0:10]
        printMM(topmovies)
        for item in topmovies:
            itopmovie=item[0]
            recommendedmovie=moviefile["title"][itopmovie]
            recommendedtags=moviefile["tags"][itopmovie]
    return

In [21]:
test(df_train, df_test, length,df_movies, 5000, trainFl=False) 

INFO:tensorflow:Restoring parameters from ./tmp/
 Top Movies ------------------------------------------------------------
[(571, 5.6326408), (1871, 4.8334017), (355, 4.8078384), (952, 4.7772546), (36, 4.7685165), (3091, 4.7542076), (526, 4.7319651), (317, 4.7229209), (810, 4.6377292), (213, 4.6228356)]
  571 - 5.63 - Little Rascals, The (1994)
 1871 - 4.83 - Gentleman's Agreement (1947)
  355 - 4.81 - I Like It Like That (1994)
  952 - 4.78 - Angel and the Badman (1947)
   36 - 4.77 - Across the Sea of Time (1995)
 3091 - 4.75 - Magnolia (1999)
  526 - 4.73 - Second Best (1994)
  317 - 4.72 - Suture (1993)
  810 - 4.64 - Crude Oasis, The (1995)
  213 - 4.62 - Before Sunrise (1995)
[5000]
[(2561, 5.8375769), (1116, 5.5035367), (3850, 5.3975534), (556, 5.3632798), (2511, 5.2982941), (1062, 5.2961969), (2062, 5.2393155), (597, 5.2248325), (1163, 5.199657), (754, 5.1572347)]
 2561 - 5.84 - Besieged (L' Assedio) (1998)
 1116 - 5.50 - Manon of the Spring (Manon des sources) (1986)
 3850 - 5.

# TO DO: 
* Read Users 
* Compare results for similar users 
* Show statistics