In [None]:
!pip install autorec

# Matrix Factorization (MF)
*Rating Prediction*

https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf

Description:
Factorization Machine is a supervised learning algorithm that obtains feature interactions under both density and sparsity. The model captures all single and pairwise interactions by factorizing each and every parameters; the model is calculated in linear time and depends on only linear parameters. Unlike other models which are specialized for specific tasks, FM is built for general purpose that can be handled with any real valued vector inputs.


## A simple example below

**Step 0:Imports and Configurations**

First, handle the imports with the correct configurations set. Also include the logging settings here.


In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "5"
import tensorflow as tf
import logging
from autorecsys.auto_search import Search
from autorecsys.pipeline import Input, LatentFactorMapper, RatingPredictionOptimizer
from autorecsys.pipeline.interactor import InnerProductInteraction
from autorecsys.pipeline.preprocessor import MovielensPreprocessor
from autorecsys.recommender import RPRecommender

# logging setting
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


**Step 1: Preprocess data**

MovielensPreprocessor() is a already built-in preprocessor for the Movielens 1M dataset. 

Load the dataset, which in this case is set to the Movielens 1 million dataset, although options exist for a 10 million, latest, and Netflix dataset.

In [None]:
# Step 1: Preprocess data
movielens = MovielensPreprocessor()
train_X, train_y, val_X, val_y, test_X, test_y = movielens.preprocess()
train_X_categorical = movielens.get_x_categorical(train_X)
val_X_categorical = movielens.get_x_categorical(val_X)
test_X_categorical = movielens.get_x_categorical(test_X)
user_num, item_num = movielens.get_hash_size()


**Step 2.0: Build the recommender**

Creates the pipeline

Models can be customized for a searchable recommender

**Step 2.1: Setup mappers to handle inputs**

This step is to build a input node for your data. Then embed into a dimension of your choice. This example, embedding dim is declared to 64. 


In [None]:
input = Input(shape=[2])
user_emb = LatentFactorMapper(feat_column_id=0,
                              id_num=user_num,
                              embedding_dim=64)(input)
item_emb = LatentFactorMapper(feat_column_id=1,
                              id_num=item_num,
                              embedding_dim=64)(input)


**Step 2.2: Setup interactors to handle models**

This example has only one interactor. Any of other interactors can be stacked of your choice. 



In [None]:
output = InnerProductInteraction()([user_emb, item_emb])


**Step 2.3: Setup optimizer to handle the target task**

In [None]:
output = RatingPredictionOptimizer()(output)
model = RPRecommender(inputs=input, outputs=output)


**Step 3: Build the searcher**

This provides the search algorithm.


In [None]:
searcher = Search(model=model,
                  tuner='greedy',  # random, greedy
                  tuner_params={"max_trials": 5, 'overwrite': True}


**Step 4: Use the searcher to search the recommender**

Search the best model and validate accuracy of the model

Inputs such as objective, batch_size, call backs can be customized for you own model.

In [None]:
searcher.search(x=[train_X_categorical],
                y=train_y,
                x_val=[val_X_categorical],
                y_val=val_y,
                objective='val_mse',
                batch_size=1024,
                epochs=10,
                callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=1)])
logger.info('Validation Accuracy (mse): {}'.format(searcher.evaluate(x=val_X_categorical,
                                                                     y_true=val_y)))


**Step 5: Evaluate the searched model**



In [None]:
logger.info('Test Accuracy (mse): {}'.format(searcher.evaluate(x=test_X_categorical,
                                                               y_true=test_y)))
