In [1]:
from time import time
import os
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime
from tqdm import tqdm


if not "root" in locals():
    current_path = Path(os.getcwd())
    root = current_path.parent.absolute()
os.chdir(root)
    
from examples.example_utils import add_AR_cols
from examples.example_utils import create_binary_box
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from TinyAutoML.Models import *
from TinyAutoML import MetaPipeline

In [3]:
def make_benchmark(X, y, modelType, test_train_split = 0.8, metrics = "accuracy", pools = [None, None, None, None]):
    print(modelType)
    cut = round(len(y) * test_train_split)

    X_train, X_test = X[:cut], X[cut:]
    y_train, y_test = y[:cut], y[cut:]
    
    model00 = MetaPipeline(modelType(comprehensiveSearch=False, parameterTuning=False, metrics = metrics))
    model01 = MetaPipeline(modelType(comprehensiveSearch=False, parameterTuning=True, metrics = metrics))
    model10 = MetaPipeline(modelType(comprehensiveSearch=True, parameterTuning=False, metrics = metrics))
    #model11 = MetaPipeline(modelType(comprehensiveSearch=True, parameterTuning=True))
    
    t0 = time()
    
    model00.fit(X_train, y_train, pool=pools[0])
    t1 = time()
    
    model01.fit(X_train, y_train, pool=pools[1])
    t2 = time()
    
    model10.fit(X_train, y_train, pool=pools[2])
    t3 = time()
    
    #model11.fit(X_train, y_train, pool=pools[3])
    #t4 = time()
    
    times = [t1-t0,
             t2-t1,
             t3-t2,
             #t4-t3
             ]
    print(times)
    scores = [model00.score(X_test, y_test),
              model01.score(X_test, y_test),
              model10.score(X_test, y_test),
              #model11.score(X_test, y_test)
              ]
    print(scores)
    models = [model00, model01, model10]
    
    return times, models

In [None]:
df = pd.read_csv('examples/database.csv')
df['Date'] = pd.to_datetime(df['Date'])
df = create_binary_box(df, relative_threshold = 0.05, box_length=5).set_index(['Date']).dropna(axis = 0)
df = add_AR_cols(df,7).dropna(axis = 0)
X,y = df.drop('Box', axis=1)[:'2020'], df['Box'][:'2020']

models = []
times = []
pools = [None, None, None, None]
for i, modelType in enumerate([BestModel, OneRulerForAll]):
    
    
    t, m = make_benchmark(X, y, modelType, pools = pools)
    models.append(m)
    times.append(t)
    
    if i == 0:
        pools = [model.get_pool() for model in m]