In [None]:
import cudf
import pandas as pd
import numpy as np
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
#from tensorflow.keras import layers

import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import *



from pathlib import Path
import matplotlib.pyplot as plt

from os.path import exists
import cv2, matplotlib.pyplot as plt
print('RAPIDS version',cudf.__version__)


train = pd.read_csv('../input/h-and-m-personalized-fashion-recommendations/transactions_train.csv',nrows =20000000)
train['article_id'] = train.article_id.astype('int32')
print( train.shape )
train.head()

In [None]:
##### import math
df_customers = train.groupby(by='customer_id').count().sort_values(by='t_dat',ascending=False)
print("# Unique customer: ", df_customers.shape[0])
print("#customer with only one entry: ", df_customers[df_customers.t_dat==1].shape[0],"(","{:.2f}".format(100*df_customers[df_customers.t_dat==1].shape[0]/df_customers.shape[0]),"%)")
print("#customer with 1 or 2 entries: ", df_customers[df_customers.t_dat==1].shape[0],"(","{:.2f}".format(100*df_customers[df_customers.t_dat<3].shape[0]/df_customers.shape[0]),"%)")

print()
df_articles = train.groupby(by='article_id').count().sort_values(by='t_dat',ascending=False)
print("# Unique articles: ", df_articles.shape[0])
print("#customer with only one entry: ", df_articles[df_articles.t_dat==1].shape[0],"(","{:.2f}".format(100*df_articles[df_articles.t_dat==1].shape[0]/df_articles.shape[0]),"%)")
print("#customer with 1 or 2 entries: ", df_articles[df_articles.t_dat==1].shape[0],"(","{:.2f}".format(100*df_articles[df_articles.t_dat<3].shape[0]/df_articles.shape[0]),"%)")
print()
print("TOP 5 products")
df_articles.head()

In [None]:
items = pd.read_csv('../input/h-and-m-personalized-fashion-recommendations/articles.csv')
items.head()

In [None]:
# REDUCE DATASET: delete those articles sold less than X times
min_times = 50
df_tmp = train.groupby(by='customer_id').count()
df_tmp = df_tmp[df_tmp.article_id>min_times][['article_id']].reset_index()
df = train.merge(df_tmp[['customer_id']],on='customer_id')
print(train.shape[0],df.shape[0],df_tmp.shape[0])
df.head()

In [None]:
# add output column
df['output'] = 1 

In [None]:
#prepare data
user_ids = df["customer_id"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
user_encoded2user = {i: x for i, x in enumerate(user_ids)}
article_ids = df["article_id"].unique().tolist()
article2article_encoded = {x: i for i, x in enumerate(article_ids)}
article_encoded2article = {i: x for i, x in enumerate(article_ids)}
df["uid"] = df["customer_id"].map(user2user_encoded)
df["iid"] = df["article_id"].map(article2article_encoded)

num_users = len(user2user_encoded)
num_articles = len(article_encoded2article)

## ADD NEGATIVES
# we create a copy of the positives ones and shuffle the user id
df_neg = df.copy()
df_neg['uid'] = df_neg['uid'].sample(frac=1).reset_index(drop=True)
df_neg['output'] = 0
df_final = pd.concat([df, df_neg], ignore_index=True)
df_final = df_final.sample(frac=1)
df_final.head()
del df
del df_neg

In [None]:
df_final.head()

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test = train_test_split(df_final[["uid", "iid","price","sales_channel_id","output"]], test_size=0.05, random_state=42)

print("Training data_set has "+ str(X_train.shape[0]) +" entries")
print("Test data set has "+ str(X_test.shape[0]) +" entries")
X_train.head()

# Deep Factorization Machines
Implementation of [Deep Factorization Machines](https://arxiv.org/abs/1703.04247) with keras  
![](https://bangdasun.github.io/images/deepfm.PNG)

## (1) Define Input Layers

In [None]:
def define_input_layers():
    # numerical features
    fea3_input = Input((1,), name = 'input_fea3')
    num_inputs = [fea3_input]
    # single level categorical features
    uid_input = Input((1,), name = 'input_uid') #user_id
    mid_input = Input((1,), name= 'input_mid')  #movie_id
    cat_sl_inputs = [uid_input, mid_input]

    # multi level categorical features (with 3 genres at most)
    #genre_input = Input((3,), name = 'input_genre')
    #cat_ml_inputs = [genre_input]

    inputs = num_inputs + cat_sl_inputs #+ cat_ml_inputs
    
    return inputs

inputs = define_input_layers()
inputs

## (2) 1st order factorization machines

In [None]:
def Tensor_Mean_Pooling(name = 'mean_pooling', keepdims = False):
    return Lambda(lambda x: K.mean(x, axis = 1, keepdims=keepdims), name = name)

def fm_1d(inputs, n_uid, n_mid):
    
    # user feat3 + user embedding + movie embedding + genre embedding
    fea3_input, uid_input, mid_input = inputs
    
    # all tensors are reshape to (None, 1)
    num_dense_1d = [Dense(1, name = 'num_dense_1d_fea4')(fea3_input)]
    cat_sl_embed_1d = [Embedding(n_uid + 1, 1, name = 'cat_embed_1d_uid')(uid_input),
                        Embedding(n_mid + 1, 1, name = 'cat_embed_1d_mid')(mid_input)]

    cat_sl_embed_1d = [Reshape((1,))(i) for i in cat_sl_embed_1d]
    
    # add all tensors
    y_fm_1d = Add(name = 'fm_1d_output')(num_dense_1d + cat_sl_embed_1d )
    
    return y_fm_1d

y_1d = fm_1d(inputs, 10, 10)

## (3) 2nd order factorization machines

In 2nd order FM, each feature is map to shape (None, 1, k) and then stack to concat_embed_2d layer with shape (None, p, k).
k - matrix factorization latent dimension, p is feature dimension.

the calculation of interaction terms can be simplified, using
\begin{equation*} \sum{x_ix_j} = \frac{1}{2} \left((\sum{x})^2 - \sum({x}^2)\right) \end{equation*}

Hence, the sum of 2nd order interactions = square of sum of concat_embed_2d - sum of squared concat_embed_2d in p dimension, the resulting tensor will have a shape (None, k)

In [None]:
def fm_2d(inputs, n_uid, n_mid, k):
    
    fea3_input, uid_input, mid_input = inputs
    
    num_dense_2d = [Dense(k, name = 'num_dense_2d_fea3')(fea3_input)] # shape (None, k)
    num_dense_2d = [Reshape((1,k))(i) for i in num_dense_2d] # shape (None, 1, k)

    cat_sl_embed_2d = [Embedding(n_uid + 1, k, name = 'cat_embed_2d_uid')(uid_input), 
                       Embedding(n_mid + 1, k, name = 'cat_embed_2d_mid')(mid_input)] # shape (None, 1, k)
    
   
    # concatenate all 2d embed layers => (None, ?, k)
    embed_2d = Concatenate(axis=1, name = 'concat_embed_2d')(num_dense_2d + cat_sl_embed_2d )

    # calcuate the interactions by simplication
    # sum of (x1*x2) = sum of (0.5*[(xi)^2 - (xi^2)])
    tensor_sum = Lambda(lambda x: K.sum(x, axis = 1), name = 'sum_of_tensors')
    tensor_square = Lambda(lambda x: K.square(x), name = 'square_of_tensors')

    sum_of_embed = tensor_sum(embed_2d)
    square_of_embed = tensor_square(embed_2d)

    square_of_sum = Multiply()([sum_of_embed, sum_of_embed])
    sum_of_square = tensor_sum(square_of_embed)

    sub = Subtract()([square_of_sum, sum_of_square])
    sub = Lambda(lambda x: x*0.5)(sub)
    y_fm_2d = Reshape((1,), name = 'fm_2d_output')(tensor_sum(sub))
    
    return y_fm_2d, embed_2d

y_fm2_d, embed_2d = fm_2d(inputs, 10, 10, 5)

## (4) deep part

In [None]:
def deep_part(embed_2d, dnn_dim, dnn_dr):
    
    # flat embed layers from 3D to 2D tensors
    y_dnn = Flatten(name = 'flat_embed_2d')(embed_2d)
    for h in dnn_dim:
        y_dnn = Dropout(dnn_dr)(y_dnn)
        y_dnn = Dense(h, activation='relu')(y_dnn)
    y_dnn = Dense(1, activation='linear', name = 'deep_output')(y_dnn)
    
    return y_dnn

y_dnn = deep_part(embed_2d, [16, 16], 0.5)

## (5) Put all together

In [None]:
def deep_fm_model(n_uid, n_mid, k, dnn_dim, dnn_dr):
    
    inputs = define_input_layers()
    
    y_fm_1d = fm_1d(inputs, n_uid, n_mid)
    y_fm_2d, embed_2d = fm_2d(inputs, n_uid, n_mid, k)
    y_dnn = deep_part(embed_2d, dnn_dim, dnn_dr)
    
    # combinded deep and fm parts
    y = Concatenate()([y_fm_1d, y_fm_2d, y_dnn])
    y = Dense(1, activation='sigmoid', name = 'deepfm_output')(y)
    
    fm_model_1d = Model(inputs, y_fm_1d)
    fm_model_2d = Model(inputs, y_fm_2d)
    deep_model = Model(inputs, y_dnn)
    deep_fm_model = Model(inputs, y)
    
    return fm_model_1d, fm_model_2d, deep_model, deep_fm_model

In [None]:
def df2xy(df):
    x = [df.price.values, 
         df.uid.values, 
         df.iid.values]
    y = df.output.values
    return x,y

train_x, train_y = df2xy(X_train)
valid_x, valid_y = df2xy(X_test)

In [None]:
params = {
    'n_uid': df_final.uid.max(),
    'n_mid': df_final.iid.max(),
    'k':20,
    'dnn_dim':[32,32],
    'dnn_dr': 0.1
}
print(params)
fm_model_1d, fm_model_2d, deep_model, deep_fm_model = deep_fm_model(**params)

In [None]:
from tensorflow.keras.callbacks import  EarlyStopping, ModelCheckpoint
# train  model
deep_fm_model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),       
    metrics = ['accuracy'],
    optimizer=keras.optimizers.Adam(learning_rate=0.01)
)
early_stop = EarlyStopping(monitor='val_loss', patience=5)
model_ckp = ModelCheckpoint(filepath='deepfm_weights.h5', 
                            monitor='val_loss',
                            save_weights_only=True, 
                            save_best_only=True)
callbacks = [model_ckp,early_stop]
train_history = deep_fm_model.fit(train_x, train_y, 
                                  epochs=30, batch_size=4096,
                                  validation_data=(valid_x, valid_y),
                                  #callbacks = callbacks
                                 )

In [None]:
def plot_train_val_losses(history):
    plt.plot(history.history["loss"])
    plt.plot(history.history["val_loss"])
    plt.title("model loss")
    plt.ylabel("loss")
    plt.xlabel("epoch")
    plt.legend(["train", "test"], loc="upper left")
    plt.axis([0,len(history.history["loss"]),np.min(history.history["loss"]),np.max(history.history["val_loss"])])
    plt.show()

In [None]:
plot_train_val_losses(train_history)