# VAE for collaborative filtering

This implemetation is inspired by the paper of G. Krishnan et al. "Variational Autoencoders for Collaborative Filtering" [1] an their implemetation [2]. It works with implicit feedback data.

In [127]:
import os

import bottleneck as bn
import pandas as pd

import tensorflow as tf
from tensorflow.contrib.layers import apply_regularization, l2_regularizer

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [128]:
from keras.losses import binary_crossentropy
from keras import backend as K
from keras.layers import Lambda, Input, Dense, Dropout
from keras.models import Model
from keras.callbacks import LambdaCallback, EarlyStopping, Callback
from keras.utils import plot_model
from tensorflow.metrics import recall_at_k

In [129]:
os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [130]:
DATA_PATH_SMALL='./data/ml-1m/'
DATA_PATH_LARGE='./data/ml-20m/'
DATA_FILE_NAME='ratings.dat'

In [131]:
data=pd.read_csv(DATA_PATH_SMALL + DATA_FILE_NAME, sep='::',names=['userId','movieId','rating','timestamp'])

  """Entry point for launching an IPython kernel.


In [132]:
data.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


We don't need explicit data (e.g. specific ratings), so we will convert the rating to a binary property. Every rating greater or equal to 4 becomes 1 and each rating smaller than 4 becomes 0.

In [133]:
bin_data=data[data['rating'] >= 4]
bin_data=bin_data.assign(rating=1)

In [134]:
bin_data.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1193,1,978300760
3,1,3408,1,978300275
4,1,2355,1,978824291
6,1,1287,1,978302039
7,1,2804,1,978300719


**Create the utility matrix**

In [135]:
click_matrix=bin_data.pivot(index='movieId', columns='userId', values='rating')

In [136]:
click_matrix=click_matrix.fillna(0)

In [137]:
click_matrix.head()

userId,1,2,3,4,5,6,7,8,9,10,...,6031,6032,6033,6034,6035,6036,6037,6038,6039,6040
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,...,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


**Drop all rows and columns with less then 5 ratings. This is due to the cold start problem. Removing these entries will cause other rows/columns to fall under 5 ratings, but this is just a small portion.**

In [138]:
min_u=5
min_i=5

In [139]:
click_matrix.shape

(3533, 6038)

In [140]:
click_matrix=click_matrix.loc[(click_matrix.sum(axis=1) >= min_i), (click_matrix.sum(axis=0) >= min_u)]

In [141]:
click_matrix.head()

userId,1,2,3,4,5,6,7,8,9,10,...,6031,6032,6033,6034,6035,6036,6037,6038,6039,6040
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,...,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [142]:
(n_items,n_users)=click_matrix.shape
n_entries=click_matrix.values.sum()

sparsity=n_entries/(n_items*n_users)

print(f'There are {n_items} items and {n_users} users in the dataframe. The sparsity of the matrix is {sparsity: .4f}')

There are 3125 items and 6034 users in the dataframe. The sparsity of the matrix is  0.0305


**Shuffle the whole matrix**

In [143]:
click_matrix=click_matrix.reindex(np.random.RandomState(seed=42).permutation(click_matrix.index), axis='rows')
click_matrix=click_matrix.reindex(np.random.RandomState(seed=42).permutation(click_matrix.columns), axis='columns')
click_matrix.head()

userId,5323,3219,5123,1667,3661,5632,5143,2446,5257,1371,...,4430,5340,467,5740,3093,3774,5197,5232,5396,861
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1036,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2651,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
394,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2272,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1359,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Split train, validation and test sets

In [144]:
test_set_size=600
val_set_size=600
train_set_size=n_users-test_set_size-val_set_size

In [170]:
train_set=click_matrix.iloc[:,:train_set_size]
val_set=click_matrix.iloc[:,train_set_size:train_set_size+val_set_size]
test_set=click_matrix.iloc[:,train_set_size+val_set_size:]
print(train_set.shape)
print(val_set.shape)
print(test_set.shape)

(3125, 4834)
(3125, 600)
(3125, 600)


## Mask a part of the test and validation set, to evaluate our model

In [171]:
with_held_rate=0.2

mask_ones=pd.DataFrame(1, index=val_set.index.values[:int(np.ceil(len(val_set.index)*with_held_rate))], columns=val_set.columns)
mask_zeros=pd.DataFrame(0, index=val_set.index.values[int(np.ceil(len(val_set.index)*with_held_rate)):], columns=val_set.columns)

mask=pd.concat([mask_ones,mask_zeros])
mask.shape

(3125, 600)

In [172]:
min_item = 2

In [173]:
val_set_masked=val_set-mask
held_out_val=val_set_masked.replace(-1,0)
held_out_val=held_out_val.loc[:, (held_out_val.sum(axis=0) >= min_item)]

(3125, 600)

In [175]:
mask_ones=pd.DataFrame(1, index=test_set.index.values[:int(np.ceil(len(test_set.index)*with_held_rate))], columns=test_set.columns)
mask_zeros=pd.DataFrame(0, index=test_set.index.values[int(np.ceil(len(test_set.index)*with_held_rate)):], columns=test_set.columns)

mask=pd.concat([mask_ones,mask_zeros])
mask.shape

(3125, 600)

In [176]:
test_set_masked=test_set-mask
held_out_test=test_set_masked.replace(-1,0)
held_out_test=held_out_test.loc[:,(held_out_test.sum(axis=0) >= min_item)]

# Metric

We are using the **Recall@R** metric, which compares the top **R** recommended items with the held-out ratings to see how many of the recommended items are in the held-out set.

In [246]:
def Recall_at_k(X_pred, X_true_held_out, X_true_complet, k=20):
    
    if X_pred.shape[0] == 0 or k < 1:
        return -1
    
    actual_relevant_items = X_true_complet-X_true_held_out
    
    user_item_tuples = actual_relevant_items.eq(1).stack()
    relevant_items_tuples = user_item_tuples[user_item_tuples].index.values

    known_user_item_tuples = X_true_held_out.eq(1).stack()
    known_items_tuples = known_user_item_tuples[known_user_item_tuples].index.values

    u_recalls = []
    recall = 0
    skiped = 0
    
    for i in X_pred.index:
        nom=0
        
        user = X_pred.loc[i]
        user_actual_relevant_items = actual_relevant_items.loc[i]
        n_user_actual_relevant_items = user_actual_relevant_items.sum()
        
        if n_user_actual_relevant_items == 0:
            skiped += 1
            continue 
            
        denom = np.minimum(k,n_user_actual_relevant_items)

        
        known_items = [item[1] for item in known_items_tuples if item[0] == i]
        for item in known_items:
            user.drop(item, axis=0, inplace=True)
            
        user_sorted = user.sort_values(ascending=False)
        k_recommendations = user_sorted.iloc[:k]
        k_recommendations = k_recommendations.index
        
        relevant_items = [item[1] for item in relevant_items_tuples if item[0] == i]
        
        for rec in k_recommendations:
            if rec in relevant_items:
                nom += 1
        
        u_recall = float(nom)/denom
        u_recalls.append(u_recall)
        
            
        recall += u_recall
        
    return float(recall) / (X_pred.shape[0] - skiped)
    

In [25]:
# This code is from https://keras.io/examples/variational_autoencoder/
# reparameterization trick
# instead of sampling from Q(z|X), sample eps = N(0,I)
# z = z_mean + sqrt(var)*eps
def sampling(args):
    """Reparameterization trick by sampling from an isotropic unit Gaussian.

    # Arguments
        args (tensor): mean and log of variance of Q(z|X)

    # Returns
        z (tensor): sampled latent vector
    """

    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    # by default, random_normal has mean=0 and std=1.0
    epsilon = K.random_normal(shape=(batch, dim))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

## Annealing
**We anneal the regularization parameter beta over the course of the training. Concept from Liang et al. [1]**

In [26]:
# the total number of gradient updates for annealing
total_anneal_steps = 5000
anneal_multiplier = 40
# largest annealing parameter
anneal_cap = 0.2

In [67]:
beta = K.variable(0., dtype=tf.float32)
betas = []

In [68]:
def changeBeta(epoch,logs):
    if total_anneal_steps > 0:
        anneal = min(anneal_cap, anneal_multiplier * epoch / total_anneal_steps)
    else:
        anneal = anneal_cap
        
    betas.append((epoch,anneal))
    
    K.set_value(beta, anneal)
    
betaChanger = LambdaCallback(on_epoch_end=changeBeta)

**We define a custom loss function which consists of Kullback-Leiber Divergence and a reconstruction loss. The KL Divergence in the CF context takes the role of a regularization term which can be controled with beta [1].** 

In [52]:
# Custom loss for VAE
def vae_loss(y_true, y_pred):
    rec_loss = binary_crossentropy(y_true, y_pred)
    kl_loss = -0.5*K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
    
    total_loss = K.mean(rec_loss + beta*kl_loss)
    
    return total_loss

## Model

In [252]:
# network parameters
input_shape = (n_items, )
dropout_rate = 0.5
intermediate_dim = 100
batch_size = 32
latent_dim = 25
epochs = 75

In [253]:
# build encoder model

# input layer
inputs = Input(shape=input_shape, name='encoder_input')

# dropout layer to help regularize
drop = Dropout(dropout_rate, input_shape=input_shape)(inputs)

# dense hidden layer
x = Dense(intermediate_dim, activation='relu')(drop)

# z_mean and z_log_var
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)

# use reparameterization trick to push the sampling out as input
z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

# instantiate encoder model
encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
encoder.summary()


# build decoder model

# sampled latent layer
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')

# dense hidden layer
x = Dense(intermediate_dim, activation='relu')(latent_inputs)

# output layer
outputs = Dense(n_items, activation='sigmoid')(x)

# instantiate decoder model
decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()


# instantiate VAE model
outputs = decoder(encoder(inputs)[2])
vae = Model(inputs, outputs, name='vae_mlp')

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      (None, 3125)         0                                            
__________________________________________________________________________________________________
dropout_7 (Dropout)             (None, 3125)         0           encoder_input[0][0]              
__________________________________________________________________________________________________
dense_19 (Dense)                (None, 100)          312600      dropout_7[0][0]                  
__________________________________________________________________________________________________
z_mean (Dense)                  (None, 25)           2525        dense_19[0][0]                   
__________________________________________________________________________________________________
z_log_var 

### Training

In [254]:
# Quick hack to get custom metric for evaluation
# class RecallMetric(Callback):
#    def on_train_begin(self, logs={}):
#        self._data = []
#
#    def on_epoch_end(self, batch, logs={}):
#        X_val, y_val = self.validation_data[0], self.validation_data[1]
#        y_predict = np.asarray(vae.predict(X_val))
#
#        recall = Recall_at_k(y_predict,y_val)
#
#        self._data.append({
#            f'batch-{batch}': np.mean(recall),
#        })
#        return
#
#    def get_data(self):
#        return self._data

In [255]:
vae.compile(optimizer='adam', loss=vae_loss)
earlyStopping = EarlyStopping(monitor='val_loss', patience=8)
vae.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_input (InputLayer)   (None, 3125)              0         
_________________________________________________________________
encoder (Model)              [(None, 25), (None, 25),  317650    
_________________________________________________________________
decoder (Model)              (None, 3125)              318225    
Total params: 635,875
Trainable params: 635,875
Non-trainable params: 0
_________________________________________________________________


In [256]:
recall=RecallMetric()

train_set_transposed = train_set.T
val_set_transposed = val_set.T
held_out_val_set_transposed = held_out_val.T

vae.fit(train_set_transposed,
                train_set_transposed,
                epochs=epochs,
                batch_size=batch_size,
                validation_data=(val_set_transposed, val_set_transposed),
                callbacks = [betaChanger, earlyStopping]) #, recall

vae.save_weights('vae_mlp_mnist.h5')

Train on 4834 samples, validate on 600 samples
Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75


In [257]:
# training_data=recall.get_data()
test_set_transposed = held_out_test.T
X_pred = vae.predict(test_set_transposed)
X_pred_indexed=pd.DataFrame(data=X_pred[:,:],index=test_set_transposed.index,columns=test_set_transposed.columns)

In [258]:
X_true=test_set.T.values

In [259]:
print(f'Recall@5 is {Recall_at_k(X_pred_indexed, test_set_transposed, X_true, 5)}')
print(f'Recall@20 is {Recall_at_k(X_pred_indexed, test_set_transposed, X_true, 20)}')
print(f'Recall@50 is {Recall_at_k(X_pred_indexed, test_set_transposed, X_true, 50)}')
print(f'Recall@100 is {Recall_at_k(X_pred_indexed, test_set_transposed, X_true, 100)}')

Recall@5 is 0.23603147835862892
Recall@20 is 0.23091490736063142
Recall@50 is 0.3305511498808608
Recall@100 is 0.41464095188012323


## Non-Negative Matrix Factorization (NMF)

**Here we use sklearns NMF to compare the performance of the vae approach for collaborative filtering.**

In [236]:
import scipy.sparse as sps
from sklearn.decomposition import NMF
from sklearn.model_selection import GridSearchCV
import itertools as it

In [227]:
train_set_MF = train_set_transposed.append(test_set_transposed)
train_set_MF.head()

movieId,1036,2651,394,2272,1359,2455,338,1020,3714,2085,...,377,1599,534,2749,2110,3911,1390,1432,1669,1093
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5323,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3219,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5123,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1667,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3661,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


**Simple Grid Search for hyperparameter optimization**

In [248]:
params = {'n_components':[25,50,75,100,125], 'alpha':[0.02,0.04,0.08,0.16,0.2]}

In [249]:
allNames = sorted(params)
combinations = it.product(*(params[Name] for Name in allNames))

In [250]:
model_scores = []
for combination in list(combinations):
    model = NMF(n_components=combination[1], init='random', random_state=0, alpha=combination[0])
    W = model.fit_transform(train_set_MF)
    H = model.components_
    
    X_pred_MF = np.matmul(W,H)
    X_pred_MF_indexed=pd.DataFrame(data=X_pred_MF[:,:],index=train_set_MF.index,columns=train_set_MF.columns)
    X_pred_MF_test=X_pred_MF_indexed.drop([user for user in train_set_transposed.index],axis='rows')
    
    rec = Recall_at_k(X_pred_MF_test, test_set_transposed, X_true, 20)
    model_scores.append((combination[0],combination[1],rec))
    


**Recall@20 Scores for the different models**

In [261]:
print(model_scores)

[(0.02, 25, 0.23449805178865588), (0.02, 50, 0.18054233513513362), (0.02, 75, 0.16327555590342835), (0.02, 100, 0.1537941439391666), (0.02, 125, 0.14868810511251487), (0.04, 25, 0.23461047449804318), (0.04, 50, 0.17640964615999746), (0.04, 75, 0.15919039782065061), (0.04, 100, 0.16097746912515715), (0.04, 125, 0.14886667248953794), (0.08, 25, 0.2350117088676208), (0.08, 50, 0.172944747377696), (0.08, 75, 0.16320482126538616), (0.08, 100, 0.14637689611298127), (0.08, 125, 0.15150337306347317), (0.16, 25, 0.23905909999264388), (0.16, 50, 0.18735283457141708), (0.16, 75, 0.1737032212417776), (0.16, 100, 0.16468879430834962), (0.16, 125, 0.1362655797146293), (0.2, 25, 0.2522697715620571), (0.2, 50, 0.20292844590141773), (0.2, 75, 0.17228912871569219), (0.2, 100, 0.16249865268811808), (0.2, 125, 0.14241564393459066)]


[(0.1, 50, 0.27221950496251063), (0.1, 75, 0.2719786784907962), (0.1, 100, 0.23936998398270187), (0.1, 125, 0.23593836395729662), (0.2, 50, 0.30776454523018787), (0.2, 75, 0.283389143878594), (0.2, 100, 0.25521517290729095), (0.2, 125, 0.22161018900936283), (0.4, 50, 0.28890859781005035), (0.4, 75, 0.2742941854765402), (0.4, 100, 0.245488050934729), (0.4, 125, 0.23098952425629882), (0.6, 50, 0.3020182569979104), (0.6, 75, 0.26810701604331394), (0.6, 100, 0.25252348383319817), (0.6, 125, 0.24501358018739602)]

## Sources

[1] Liang, D., Krishnan, R. G., Hoffman, M. D., & Jebara, T. (2018, April). Variational autoencoders for collaborative filtering. In Proceedings of the 2018 World Wide Web Conference (pp. 689-698). International World Wide Web Conferences Steering Committee.

[2] https://github.com/dawenl/vae_cf/

[3] https://keras.io/examples/variational_autoencoder/