In [1]:
import logging;

logging.basicConfig(level=logging.WARNING)
logging.getLogger().setLevel(level=logging.WARNING)

import warnings
warnings.simplefilter("ignore")

import gc
gc.enable()

In [2]:
from greenguard import get_pipelines

get_pipelines()

Using TensorFlow backend.


['resample_3600s_unstack_double_24_lstm_timeseries_classifier',
 'resample_3600s_unstack_24_lstm_timeseries_classifier',
 'resample_600s_unstack_144_lstm_timeseries_classifier',
 'resample_600s_normalize_dfs_1d_xgb_classifier',
 'resample_600s_unstack_dfs_1d_xgb_classifier',
 'resample_600s_unstack_double_144_lstm_timeseries_classifier',
 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier']

In [3]:
def score_template(template, metric, target_times, readings, tuning_iterations,
                   init_params=None, cost=False, test_size=0.25, cv_splits=3, random_state=0):
    
    scores = {}
    
    try:
    
        train, test = train_test_split(target_times, test_size=test_size, random_state=random_state)

        pipeline = GreenGuardPipeline(template, metric, cost=cost, cv_splits=cv_splits, init_params=init_params)

        #Computing the default test score
        pipeline.fit(train, readings)
        predictions = pipeline.predict(test, readings)

        scores['default_test'] = f1_score(test['target'], predictions)

        #Computing de default cross validation score
        gc.collect()
        session = pipeline.tune(train, readings)
        session.run(1)

        scores['default_cv'] = pipeline.cv_score

        #Computing the cross validation score with tuned hyperparameters
        session.run(tuning_iterations)    
        pipeline.get_hyperparameters()

        scores['tuned_cv'] = pipeline.cv_score

        #Computing the test score with tuned hyperparameters
        pipeline.fit(train, readings)
        predictions = pipeline.predict(test, readings)

        scores['tuned_test']= f1_score(test['target'], predictions)
    
    except:
        return scores
    else:
        return scores

In [4]:
#returns the init_params of lstm
import pandas as pd

def build_lstm_init_params(rule, window_size):
    window_size = int(pd.to_timedelta(window_size) / pd.to_timedelta(rule))
    return [{
        'pandas.DataFrame.resample#1': {
            'rule': rule,
        },
        'featuretools.dfs.json#1': {
            'window_size': window_size,
        }
    }]

In [5]:
#returns the init_params of the dfs
def build_dfs_init_params(rule, window_size):
    return [{
        'pandas.DataFrame.resample#1': {
            'rule': rule,
        },
        'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1': {
            'training_window': window_size,
        }
    }]

In [6]:
#evaluates the score of a pipeline with diferents window_size and rule    
def evaluate_template(template, window_rule_size, metric, tuning_iterations, cost=False,
                     test_size=0.25, cv_splits=3, random_state=0):
    scores_list = []
    
    INIT_PARAMS_BUILDERS = {
    'resample_600s_normalize_dfs_1d_xgb_classifier': build_dfs_init_params,
    'resample_600s_unstack_double_144_lstm_timeseries_classifier': build_lstm_init_params,
    }
    
    target_times, readings = load_demo()
    
    for x in window_rule_size:
        window_size = x[0]
        rule = x[1]
        
        init_params_builder = INIT_PARAMS_BUILDERS[template]
        init_params = init_params_builder(rule, window_size)
    
        scores = score_template(
            template=template,
            metric=f1_score, 
            target_times=target_times, 
            readings=readings, 
            tuning_iterations=50,    
            init_params=init_params, 
            cost=False, 
            test_size=0.25, 
            cv_splits=3,
            random_state=0)
        
        scores['template'] = template
        scores['window_size'] = window_size
        scores['rule'] = rule
        scores_list.append(scores)
   
    return scores_list

In [7]:
from greenguard.demo import load_demo
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from greenguard.pipeline import GreenGuardPipeline

In [8]:
window_rule_size = {
    ('30d','12h'),
    ('30d','1d')
}

In [9]:
#We evaluate the score of the pipeline with the following parameters
template = 'resample_600s_normalize_dfs_1d_xgb_classifier'

scores_list = evaluate_template(
            template=template, 
            window_rule_size=window_rule_size,
            metric=f1_score, 
            tuning_iterations=50, 
            cost=False,
            test_size=0.25, 
            cv_splits=3, 
            random_state=0)

Built 99 features
Elapsed: 00:45 | Progress: 100%|██████████
Elapsed: 00:16 | Progress: 100%|██████████
Built 99 features
Elapsed: 00:31 | Progress: 100%|██████████
Elapsed: 00:16 | Progress: 100%|██████████
Built 99 features
Elapsed: 00:33 | Progress: 100%|██████████
Elapsed: 00:15 | Progress: 100%|██████████
Built 99 features
Elapsed: 00:30 | Progress: 100%|██████████
Elapsed: 00:16 | Progress: 100%|██████████
Built 99 features
Elapsed: 00:50 | Progress: 100%|██████████
Elapsed: 00:17 | Progress: 100%|██████████
Built 99 features
Elapsed: 00:49 | Progress: 100%|██████████
Elapsed: 00:16 | Progress: 100%|██████████
Built 99 features
Elapsed: 00:33 | Progress: 100%|██████████
Elapsed: 00:16 | Progress: 100%|██████████
Built 99 features
Elapsed: 00:32 | Progress: 100%|██████████
Elapsed: 00:16 | Progress: 100%|██████████
Built 99 features
Elapsed: 00:34 | Progress: 100%|██████████
Elapsed: 00:15 | Progress: 100%|██████████
Built 99 features
Elapsed: 00:46 | Progress: 100%|██████████
Ela

In [10]:
#result
scores_list

[{'default_test': 0.6415094339622641,
  'default_cv': 0.6759173604687018,
  'tuned_cv': 0.6897163120567377,
  'tuned_test': 0.6792452830188679,
  'template': 'resample_600s_normalize_dfs_1d_xgb_classifier',
  'window_size': '30d',
  'rule': '12h'},
 {'default_test': 0.7058823529411765,
  'default_cv': 0.73645390070922,
  'tuned_cv': 0.7512338268640789,
  'tuned_test': 0.7450980392156863,
  'template': 'resample_600s_normalize_dfs_1d_xgb_classifier',
  'window_size': '30d',
  'rule': '1d'}]