In [1]:
import os
os.chdir('/content/drive/MyDrive/NCF')

In [2]:
%%bash
git clone https://github.com/hexiangnan/neural_collaborative_filtering.git

Cloning into 'neural_collaborative_filtering'...


In [31]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os
import sys
import math


In [6]:
import time

In [None]:
# python GMF.py
# --dataset ml-1m
# --epochs 20
# --batch_size 256
# --num_factors 8
# --regs [0,0]
# --num_neg 4
# --lr 0.001
# --learner adam
# --verbose 1
# --out 1

In [15]:
num_factors = 8
regs = [0, 0]
num_negatives = 4
learner = 'adam'
learning_rate = 0.001
epochs = 20
batch_size = 256
verbose = 1
dataset = 'ml-1m'

In [5]:
topK = 10
evaluation_threads = 1 #mp.cpu_count()
model_out_file = 'Pretrain_GMF.h5'

In [7]:
import scipy.sparse as sp
import numpy as np

class Dataset(object):
    '''
    classdocs
    '''

    def __init__(self, path):
        '''
        Constructor
        '''
        self.trainMatrix = self.load_rating_file_as_matrix(path + ".train.rating")
        self.testRatings = self.load_rating_file_as_list(path + ".test.rating")
        self.testNegatives = self.load_negative_file(path + ".test.negative")
        assert len(self.testRatings) == len(self.testNegatives)

        self.num_users, self.num_items = self.trainMatrix.shape

    def load_rating_file_as_list(self, filename):
        ratingList = []
        with open(filename, "r") as f:
            line = f.readline()
            while line != None and line != "":
                arr = line.split("\t")
                user, item = int(arr[0]), int(arr[1])
                ratingList.append([user, item])
                line = f.readline()
        return ratingList

    def load_negative_file(self, filename):
        negativeList = []
        with open(filename, "r") as f:
            line = f.readline()
            while line != None and line != "":
                arr = line.split("\t")
                negatives = []
                for x in arr[1: ]:
                    negatives.append(int(x))
                negativeList.append(negatives)
                line = f.readline()
        return negativeList

    def load_rating_file_as_matrix(self, filename):
        '''
        Read .rating file and Return dok matrix.
        The first line of .rating file is: num_users\t num_items
        '''
        # Get number of users and items
        num_users, num_items = 0, 0
        with open(filename, "r") as f:
            line = f.readline()
            while line != None and line != "":
                arr = line.split("\t")
                u, i = int(arr[0]), int(arr[1])
                num_users = max(num_users, u)
                num_items = max(num_items, i)
                line = f.readline()
        # Construct matrix
        mat = sp.dok_matrix((num_users+1, num_items+1), dtype=np.float32)
        with open(filename, "r") as f:
            line = f.readline()
            while line != None and line != "":
                arr = line.split("\t")
                user, item, rating = int(arr[0]), int(arr[1]), float(arr[2])
                if (rating > 0):
                    mat[user, item] = 1.0
                line = f.readline()
        return mat

In [18]:
# Loading data
t1 = time.time()

dataset = Dataset(f'./neural_collaborative_filtering/Data/ml-1m')
train, testRatings, testNegatives = dataset.trainMatrix, dataset.testRatings, dataset.testNegatives
num_users, num_items = train.shape
print("Load data done [%.1f s]. #user=%d, #item=%d, #train=%d, #test=%d" %(time.time()-t1, num_users, num_items, train.nnz, len(testRatings)))

Load data done [10.4 s]. #user=6040, #item=3706, #train=994169, #test=6040


In [None]:
# os.listdir('./neural_collaborative_filtering/Data/')

In [22]:
train.toarray()

array([[1., 1., 1., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [32]:
# tf.keras.initializers.HeNormal()

In [None]:

import keras
from keras import backend as K
from keras import initializations
from keras.models import Sequential, Model, load_model, save_model
from keras.layers.core import Dense, Lambda, Activation
from keras.layers import Embedding, Input, Dense, merge, Reshape, Merge, Flatten
from keras.optimizers import Adagrad, Adam, SGD, RMSprop
from keras.regularizers import l2
from Dataset import Dataset
from evaluate import evaluate_model
from time import time
import multiprocessing as mp
import sys
import math
import argparse


In [35]:
user_dim = num_users
user_items = num_items
latent_dim = num_factors

In [42]:
### GMF : Generalized Matrix Factorization

user_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='user_input')
item_input = tf.keras.layers.Input(shape=(1,), dtype='int32', name='item_input')

MF_embeding_user = tf.keras.layers.Embedding(input_dim=user_dim,
                                             output_dim=latent_dim,
                                             name='user_embedding',
                                             embeddings_initializer='normal',
                                             input_length=1)


MF_embeding_item = tf.keras.layers.Embedding(input_dim=user_items,
                                             output_dim=latent_dim,
                                             name='item_embedding',
                                             embeddings_initializer='normal',
                                             input_length=1)

# Crucial to flatten an embedding vector!
user_latent = tf.keras.layers.Flatten()(MF_embeding_user(user_input))
item_latent = tf.keras.layers.Flatten()(MF_embeding_item(item_input))

predict_vector = tf.keras.layers.Dot(axes=1)([user_latent,item_latent]) #tf.keras.layers.merge([user_latent, item_latent], mode = 'mul')


# Final prediction layer
#prediction = Lambda(lambda x: K.sigmoid(K.sum(x)), output_shape=(1,))(predict_vector)
prediction = tf.keras.layers.Dense(1, activation='sigmoid', kernel_initializer='lecun_uniform', name = 'prediction')(predict_vector)

model = tf.keras.Model(inputs=[user_input, item_input],
            outputs=prediction)

In [43]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 user_input (InputLayer)        [(None, 1)]          0           []                               
                                                                                                  
 item_input (InputLayer)        [(None, 1)]          0           []                               
                                                                                                  
 user_embedding (Embedding)     (None, 1, 8)         48320       ['user_input[0][0]']             
                                                                                                  
 item_embedding (Embedding)     (None, 1, 8)         29648       ['item_input[0][0]']             
                                                                                              

In [None]:
def get_model(num_users, num_items, latent_dim, regs=[0,0]):
    # Input variables
    user_input = Input(shape=(1,), dtype='int32', name = 'user_input')
    item_input = Input(shape=(1,), dtype='int32', name = 'item_input')

    MF_Embedding_User = Embedding(input_dim = num_users, output_dim = latent_dim, name = 'user_embedding',
                                  init = init_normal, W_regularizer = l2(regs[0]), input_length=1)
    MF_Embedding_Item = Embedding(input_dim = num_items, output_dim = latent_dim, name = 'item_embedding',
                                  init = init_normal, W_regularizer = l2(regs[1]), input_length=1)

    # Crucial to flatten an embedding vector!
    user_latent = Flatten()(MF_Embedding_User(user_input))
    item_latent = Flatten()(MF_Embedding_Item(item_input))

    # Element-wise product of user and item embeddings
    predict_vector = merge([user_latent, item_latent], mode = 'mul')

    # Final prediction layer
    #prediction = Lambda(lambda x: K.sigmoid(K.sum(x)), output_shape=(1,))(predict_vector)
    prediction = Dense(1, activation='sigmoid', init='lecun_uniform', name = 'prediction')(predict_vector)

    model = Model(input=[user_input, item_input],
                output=prediction)

In [None]:
model = get_model(num_users, num_items, num_factors, regs)