### Load Packages

In [None]:
%matplotlib inline

import numpy as np
import pandas as pd
from tqdm import tqdm
import tensorflow as tf
import matplotlib.pyplot as plt
tqdm.pandas()
np.set_printoptions(5,)
assert int(tf.__version__[0]) == 2, "tensorflow 2.0 should be installed"

### Download Data

In [None]:
from tensorflow.keras.utils import get_file

ROOT_URL = "https://craftsangjae.s3.ap-northeast-2.amazonaws.com/data/"

play_path = get_file("lastfm_play.csv",
                     ROOT_URL+"lastfm_play.csv")
artist_path = get_file("lastfm_artist.csv",
                       ROOT_URL+"lastfm_artist.csv")
user_path = get_file("lastfm_user.csv",
                     ROOT_URL+"lastfm_user.csv")

play_df = pd.read_csv(play_path)
artist_df = pd.read_csv(artist_path)
user_df = pd.read_csv(user_path)

## Neural Collaborative Filtering
---

![](https://imgur.com/2XCYGE8.png)

### Build Model

In [None]:
from tensorflow.keras.layers import Input, Concatenate
from tensorflow.keras.layers import Dense, Embedding
from tensorflow.keras.models import Model

def neural_collaborative_filtering(num_user,num_item, num_factor):
    user_id = Input(shape=(), name='user')
    item_id = Input(shape=(), name='item')
    
    user_embedding = Embedding(num_user, num_factor)(user_id)
    item_embedding = Embedding(num_item, num_factor)(item_id)
    
    concat_embedding = Concatenate()([user_embedding, item_embedding])
    
    hidden1 = Dense(num_factor,    activation='relu')(concat_embedding)
    hidden2 = Dense(num_factor//2, activation='relu')(hidden1)
    hidden3 = Dense(num_factor//4, activation='relu')(hidden2)
    probs = Dense(1, activation='sigmoid')(hidden3)
    
    model = Model([user_id, item_id],  probs, name='NCF')
    
    return model

In [None]:
num_user = play_df.user_id.max() + 1
num_item = play_df.artist_id.max() + 1
num_factor = 32

model = neural_collaborative_filtering(num_user, num_item, num_factor)

### Compile Model

In [None]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import BinaryAccuracy

model.compile(Adam(1e-3), 
              loss=BinaryCrossentropy(),
              metrics=[BinaryAccuracy()])

### Build Data Input Pipeline

In [None]:
def bootstrap_dataset(df, neg_ratio=3., batch_size=128):
    pos_df = df[['user_id','artist_id']].copy()
    neg_df = df[['user_id','artist_id']].sample(
        frac=neg_ratio, replace=True).copy()
    neg_df.artist_id = neg_df.artist_id.sample(frac=1.).values
    
    pos_df['label'] = 1.
    neg_df['label'] = 0.
    merge_df = pd.concat([pos_df,neg_df]).sample(frac=1.)
    
    X = {
        "user": merge_df['user_id'].values,
        "item": merge_df['artist_id'].values
    }
    Y = merge_df.label.values
    
    dataset = (
    tf.data.Dataset
    .from_tensor_slices((X,Y))
    .batch(batch_size))
    
    return dataset

### Train Model

In [None]:
num_epoch = 10
batch_size = 1024 * 16
for i in range(num_epoch):
    print(f"{i+1}th epoch :")
    dataset = bootstrap_dataset(play_df, num_epoch, batch_size)
    model.fit(dataset)