# Leaderboard Notebook

In this notebook we will be executing one GreenGuard pipeline on the previously
loaded Yaw Failure prediction problem.

In [238]:
import logging;

logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(level=logging.INFO)

import warnings
warnings.simplefilter("ignore")

import gc
gc.enable()

In [239]:
from greenguard import get_pipelines

get_pipelines()

['resample_3600s_unstack_double_24_lstm_timeseries_classifier',
 'resample_3600s_unstack_24_lstm_timeseries_classifier',
 'resample_600s_unstack_144_lstm_timeseries_classifier',
 'resample_600s_normalize_dfs_1d_xgb_classifier',
 'resample_600s_unstack_dfs_1d_xgb_classifier',
 'resample_600s_unstack_double_144_lstm_timeseries_classifier',
 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier']

In [240]:
template = 'resample_600s_unstack_double_144_lstm_timeseries_classifier'
window_size = 42
rule = '4h'

## 1. Load the Data

The first step is to load the data that we are going to use.

We will be loading the `problem.pkl` file using `pickle`.

In [241]:
from greenguard.demo import load_demo

In [242]:
target_times, readings = load_demo()

In [243]:
target_times.shape

(353, 3)

In [244]:
readings.shape

(1313540, 4)

## 2. Split the data

In [245]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(target_times, test_size=0.33, random_state=0)

In [246]:
train.shape, test.shape

((236, 3), (117, 3))

In [247]:
train.target.mean(), test.target.mean()

(0.3177966101694915, 0.26495726495726496)

## 3. Select a Template

In [248]:
init_params = [{
    'pandas.DataFrame.resample#1': {
        'rule': rule,
    },
    'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1': {
        'window_size': window_size,
    }
}]

In [249]:
from greenguard.pipeline import GreenGuardPipeline

pipeline = GreenGuardPipeline(
    template,
    metric='f1',
    cv_splits=3,
    init_params=init_params,
)

In [250]:
pipeline.fit(train, readings)

ERROR:mlblocks.mlpipeline:Exception caught fitting MLBlock keras.Sequential.DoubleLSTMTimeSeriesClassifier#1
Traceback (most recent call last):
  File "/home/usuario/.virtualenvs/GreenGuard/lib/python3.6/site-packages/mlblocks/mlpipeline.py", line 549, in _fit_block
    block.fit(**fit_args)
  File "/home/usuario/.virtualenvs/GreenGuard/lib/python3.6/site-packages/mlblocks/mlblock.py", line 302, in fit
    getattr(self.instance, self.fit_method)(**fit_kwargs)
  File "/home/usuario/.virtualenvs/GreenGuard/lib/python3.6/site-packages/mlprimitives/adapters/keras.py", line 111, in fit
    shuffle=self.shuffle)
  File "/home/usuario/.virtualenvs/GreenGuard/lib/python3.6/site-packages/keras/engine/training.py", line 1154, in fit
    batch_size=batch_size)
  File "/home/usuario/.virtualenvs/GreenGuard/lib/python3.6/site-packages/keras/engine/training.py", line 579, in _standardize_user_data
    exception_prefix='input')
  File "/home/usuario/.virtualenvs/GreenGuard/lib/python3.6/site-packages

ValueError: Error when checking input: expected lstm_7_input to have 3 dimensions, but got array with shape (236, 1)

In [None]:
predictions = pipeline.predict(test, readings)

In [None]:
from sklearn.metrics import f1_score

default_test_score = f1_score(test['target'], predictions)
default_test_score

## 4. Find the best pipeline

In [None]:
gc.collect()

In [None]:
session = pipeline.tune(train, readings)
session.run(1)
default_cv_score = pipeline.cv_score
default_cv_score

In [None]:
session.run(49)

In [None]:
pipeline.get_hyperparameters()

In [None]:
tuned_cv_score = pipeline.cv_score
tuned_cv_score

## 5. Fit the pipeline

Once we are satisfied with the obtained cross validation score, we can proceed to call
the `fit` method passing again the same data elements.

This will fit the pipeline with all the training data available using the best hyperparameters
found during the tuning process:

In [None]:
pipeline.fit(train, readings)

In [None]:
predictions = pipeline.predict(test, readings)

In [None]:
predictions

In [None]:
from sklearn.metrics import f1_score

tuned_test_score = f1_score(test['target'], predictions)
tuned_test_score

In [None]:
(default_test_score, default_cv_score, tuned_cv_score, tuned_test_score)