In [None]:
!pip install autorec==0.0.2
url = 'https://raw.githubusercontent.com/datamllab/AutoRec/master/examples/example_datasets/criteo/train-10k.txt'


# AutoRec
*Click-through rate*

Description:
Our AutoRec has a specaility when you want to search a model without any prior knowledge. Autorec selects different blocks in the interactor. Autorec is especially useful for 1) users who need the optimal model after systematic exploration and  2) users who wants to contemplate about the intuition behind the searched model.


## A simple example below

**Step 0:Imports and Configurations**

First, handle the imports with the correct configurations set. Also include the logging settings here.


In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "7"
import logging
import tensorflow as tf
import numpy as np
from autorecsys.auto_search import Search
from autorecsys.pipeline import Input, DenseFeatureMapper, SparseFeatureMapper, HyperInteraction, CTRPredictionOptimizer
from autorecsys.recommender import CTRRecommender
from autorecsys.pipeline.preprocessor import CriteoPreprocessor


# logging setting
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


**Step 1: Preprocess data**

CriteoPreprocessor() is a already built-in preprocessor for the Criteo dataset. 


In [None]:
# Step 1: Preprocess data
criteo = CriteoPreprocessor(csv_path=url)  # the default arguments are setup to preprocess the Criteo example dataset
train_X, train_y, val_X, val_y, test_X, test_y = criteo.preprocess()
train_X_numerical, train_X_categorical = criteo.get_x_numerical(train_X), criteo.get_x_categorical(train_X)
val_X_numerical, val_X_categorical = criteo.get_x_numerical(val_X), criteo.get_x_categorical(val_X)
test_X_numerical, test_X_categorical = criteo.get_x_numerical(test_X), criteo.get_x_categorical(test_X)
numerical_count = criteo.get_numerical_count()
categorical_count = criteo.get_categorical_count()
hash_size = criteo.get_hash_size()


**Step 2.0: Build the recommender**

Creates the pipeline

Models can be customized for a searchable recommender

**Step 2.1: Setup mappers to handle inputs**

This step is to build a input node for your data. Criteo dataset has both dense & sparse inputs.


In [None]:
dense_input_node = Input(shape=[numerical_count]) # shape=13 
sparse_input_node = Input(shape=[categorical_count]) # shape=26
dense_feat_emb = DenseFeatureMapper(
    num_of_fields=numerical_count,
    embedding_dim=2)(dense_input_node)
sparse_feat_emb = SparseFeatureMapper(
    num_of_fields=categorical_count,
    hash_size=hash_size,
    embedding_dim=2)(sparse_input_node)



**Step 2.2: Setup interactors to handle models**

This example has one interactor that is used three times. Any of other interactors can be stacked of your choice. 


In [None]:
sparse_feat_bottom_output = HyperInteraction(meta_interactor_num=2)([sparse_feat_emb])
dense_feat_bottom_output = HyperInteraction(meta_interactor_num=2)([dense_feat_emb])
hyper_output = HyperInteraction(meta_interactor_num=2)([sparse_feat_bottom_output, dense_feat_bottom_output])


**Step 2.3: Setup optimizer to handle the target task**


In [None]:
output = CTRPredictionOptimizer()(hyper_output)
model = CTRRecommender(inputs=[dense_input_node, sparse_input_node], outputs=output)


**Step 3: Build the searcher**

This provides the search algorithm.

In [None]:
searcher = Search(model=model,
                  tuner='random',
                  tuner_params={'max_trials': 2, 'overwrite': True},
                  )


**Step 4: Use the searcher to search the recommender**

Search the best model and validate accuracy of the model

Inputs such as objective, batch_size, call backs can be customized for you own model.

In [None]:
searcher.search(x=[train_X_numerical, train_X_categorical],
                y=train_y,
                x_val=[val_X_numerical, val_X_categorical],
                y_val=val_y,
                objective='val_BinaryCrossentropy',
                batch_size=10000,
                epochs=2,
                callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=1)]
                )
logger.info('Validation Accuracy (logloss): {}'.format(searcher.evaluate(x=[val_X_numerical, val_X_categorical],
                                                                         y_true=val_y)))


**Step 5: Evaluate the searched model**


In [None]:
logger.info('Test Accuracy (logloss): {}'.format(searcher.evaluate(x=[test_X_numerical, test_X_categorical],
                                                                   y_true=test_y)))
