# Benchmarking with sktime

In [1]:
%load_ext autoreload
%autoreload 2

In [1]:
from sktime.benchmarking import Orchestrator
from sktime.benchmarking import Data
from sktime.benchmarking import Results
from sktime.benchmarking import Evaluator
from sktime.highlevel import TSCTask 
from sktime.highlevel import TSCStrategy

from sktime.classifiers.ensemble import TimeSeriesForestClassifier

from sktime.model_selection import PresplitFilesCV

import os
import pandas as pd
import numpy as np

### Set up orchestration
1. Initialise `Data` object which encapsulates all the necessary information about the data, including whether there exist predefined train and test folds, and iteratively loads the data during orchestration
2. Specify the tasks of interest, one for each dataset with the corresponding name of the target variable to be predicted
3. Specify the prediction strategies to evaluate 
4. Specify where to save results

In [2]:
data_dir = 'sktime/datasets/data/'
results_dir = '../sktime-benchmarking-results/'
datasets = os.listdir(data_dir)
print('Datasets: ', datasets)
n_datasets = len(datasets)

# in this example, the target name is the same for all datasets
tasks = [TSCTask(target='class_val') for _ in range(n_datasets)]

# data input 
data = Data(data_dir=data_dir, names=datasets, train_test_exists=True)

# results output
results = Results(results_dir=results_dir)

# cv using the presplit files
cv = PresplitFilesCV()

# strategies
strategies = [
    ('tsf10', TSCStrategy(TimeSeriesForestClassifier(n_estimators=10))),
    ('tsf20', TSCStrategy(TimeSeriesForestClassifier(n_estimators=20)))
]

Datasets:  ['ItalyPowerDemand', 'ArrowHead', 'GunPoint']


### Run benchmarking

In [3]:
orchestrator = Orchestrator(data=data,
                            results=results,
                            tasks=tasks, 
                            strategies=strategies,
                            cv=cv)
results = orchestrator.fit_predict(save_training_predictions=True, save_fitted_strategies=True)

Running strategies on ItalyPowerDemand


TypeError: argument of type 'NoneType' is not iterable

In [7]:
results.__dict__

{'results_dir': '../sktime-benchmarking-results/', 'dataset_names': None}

In [5]:
os.listdir(os.path.join(results_dir, 'tsf10', 'ItalyPowerDemand'))

['tsf100.joblib', 'train0.csv', 'test0.csv']

In [8]:
pd.read_csv(os.path.join(results_dir, 'tsf10', 'ItalyPowerDemand') + '/test0.csv')

Unnamed: 0,index,y_true,y_pred
0,67,2,2
1,68,2,2
2,69,2,2
3,70,2,2
4,71,2,2
5,72,1,1
6,73,2,2
7,74,2,2
8,75,2,2
9,76,2,2
