In [3]:
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.layers import Input, Dense, Embedding, Flatten, Dropout, Activation
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras import backend as K
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras import initializers
from tensorflow.keras.layers import add
from BaseModel import BaseModel
import numpy as np
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [9]:
from tensorflow.keras.activations import selu

In [11]:
activation= selu
batch=64 
dropout=0.8 
epochs=50 
layers='[512,256,512]' 
lr=0.0001 
reg=0.01

In [12]:
def data_preparation(self, interactions, user_item_matrix):
    '''
    Create a Input to Model
    '''

    # Params
    #   integer encode the documents
    vocab_size   = 100
    #   pad documents to a max length of 4 words
    max_length   = 50


    def split_str(val):
      '''
      Split and Join Array(Array(str))
      '''
      tokens = []
      for v in val:
          tokens.extend(v.split(' '))
      return ' '.join(tokens)

    #  Order users in matrix interactions
    users_ids  = list(user_item_matrix.index)
    
    # Dataset with User X Content information
    user_games = interactions.groupby('user_id')['game'].apply(list).loc[users_ids].reset_index()
    user_games['tokens'] = user_games['game'].apply(split_str)

    # Prepare input layer
    encoded_tokens = [one_hot(d, vocab_size) for d in user_games.tokens]
    padded_tokens  = pad_sequences(encoded_tokens, maxlen=max_length, padding='post')

    # Input  
    X = [user_item_matrix.values, padded_tokens]
    y = user_item_matrix.values

    return X, y


In [13]:
 def fit(self, X, y):
    '''
    Train Model
    '''

    # Build model
    model = self.build_model(X)

    model.compile(optimizer = Adam(lr=self.lr), 
                    loss='mse')#'mean_absolute_error'

    # train
    hist = model.fit(x=X, y=y,
                      epochs=self.epochs,
                      batch_size=self.batch,
                      shuffle=True,
                      validation_split=0.1,
                      callbacks=self.callbacks_list())

    # Melhor peso
    model.load_weights(self.WEIGHT_MODEL)
    self.model = model

    return model, hist

In [14]:
def predict(self, X):

    # Predict
    pred = self.model.predict(X)

    # remove watched items from predictions
    pred = pred * (X[0] == 0) 

    return pred

In [15]:
def build_model(self, X):
    '''
    Autoencoder for Collaborative Filter Model
    '''

    # Params
    users_items_matrix, content_info = X

    # Input
    input_layer   = x = Input(shape=(users_items_matrix.shape[1],), name='UserScore')
    input_content = Input(shape=(content_info.shape[1],), name='Itemcontent')

    # Encoder
    k = int(len(self.layers)/2)
    i = 0
    for l in self.layers[:k]:
      x = Dense(l, activation=self.activation, 
                      name='EncLayer{}'.format(i))(x)
      i = i+1

    # Latent Space
    x = Dense(self.layers[k], activation=self.activation, 
                                name='UserLatentSpace')(x)

    # Content Information
    x_content = Embedding(100, self.layers[k], 
                        input_length=content_info.shape[1])(input_content)
    x_content = Flatten()(x_content)
    x_content = Dense(self.layers[k], activation=self.activation, 
                                name='ItemLatentSpace')(x_content)
    # Concatenate
    x = add([x, x_content], name='LatentSpace')

    # Dropout
    x = Dropout(self.dropout)(x)

    # Decoder
    for l in self.layers[k+1:]:
      i = i-1
      x = Dense(l, activation=self.activation, 
                      name='DecLayer{}'.format(i))(x)

    # Output
    output_layer = Dense(users_items_matrix.shape[1], activation='linear', name='UserScorePred')(x)


    # this model maps an input to its reconstruction
    model = Model([input_layer, input_content], output_layer)

    return model

In [17]:
! pip install mlflow

Collecting mlflow
  Downloading mlflow-1.8.0-py3-none-any.whl (10.4 MB)
[K     |████████████████████████████████| 10.4 MB 196 kB/s eta 0:00:01
[?25hCollecting alembic
  Downloading alembic-1.4.2.tar.gz (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 135 kB/s eta 0:00:01
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h    Preparing wheel metadata ... [?25ldone
[?25hCollecting docker>=4.0.0
  Downloading docker-4.2.1-py2.py3-none-any.whl (143 kB)
[K     |████████████████████████████████| 143 kB 142 kB/s eta 0:00:01
[?25hCollecting prometheus-flask-exporter
  Downloading prometheus_flask_exporter-0.13.0.tar.gz (18 kB)
Collecting gunicorn; platform_system != "Windows"
  Downloading gunicorn-20.0.4-py2.py3-none-any.whl (77 kB)
[K     |████████████████████████████████| 77 kB 201 kB/s eta 0:00:01
Collecting gorilla
  Downloading gorilla-0.3.0-py2.py3-none-any.whl (11 kB)
Collecting querystring-parser
  Dow

In [18]:
! mlflow run . \
          -P activation=selu \
          -P batch=64 \
          -P dropout=0.8 \
          -P epochs=50 \
          -P layers='[512,256,512]' \
          -P lr=0.0001 \
          -P name=auto_enc \
          -P reg=0.01

2020/06/11 17:01:43 ERROR mlflow.cli: === Could not find main among entry points [] or interpret main as a runnable script. Supported script file extensions: ['.py', '.sh'] ===
