Cleaning
- first to truncate the data by merging it with the flight data. then removing those data where the flight data is relatively non-eventful (we call this the setup stage, where the subject wears all the apparatus and get ready)
- next is to perform FFT to remove the noise. based on equipment specs (normal range should be between 10-30 hz?)

There could be 2 source of features from the PPG raw data that was collected
- heart beats/min every 20seconds (this is calculated by taking average time interval between each peak , take the average over every 20s, then take 20s divided by average time interval (eg 1s between each peak))
- amptitude. but this could be due to how strong the device is pressed against the finger. So we disregard this

In [1]:
import re, os, time
import pandas as pd
import datetime
import numpy as np
from scipy import stats
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score
from matplotlib import pyplot as plt
# import plotData # helper function in starter code package


from sktime.transformations.panel.padder import PaddingTransformer
from sktime.classification.compose import ClassifierPipeline, ComposableTimeSeriesForestClassifier
from sktime.transformations.panel.summarize import RandomIntervalFeatureExtractor
from sklearn.tree import DecisionTreeClassifier
# only classifier in sktime that can process unequal length data
# https://github.com/sktime/sktime/issues/3649#issuecomment-1292459843
# from sktime.alignment.dtw_python import AlignerDTW   ## NOTE THAT THIS SOMEHOW AFFECT ALL PRINT OUTPUT. NOTHING WILL BE SHOWN FOR PRINT STATEMENT AFTER YOU RUN THIS
from sktime.classification.feature_based import RandomIntervalClassifier
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
from sktime.classification.dictionary_based import IndividualBOSS, ContractableBOSS
from sktime.classification.kernel_based import RocketClassifier
from sktime.classification.hybrid import HIVECOTEV1, HIVECOTEV2
from sktime.classification.shapelet_based import ShapeletTransformClassifier
from sktime.classification.sklearn import RotationForest

from sktime.dists_kernels.compose_from_align import DistFromAligner
from sktime.utils.slope_and_trend import _slope
from sklearn.pipeline import Pipeline
# https://www.sktime.org/en/stable/api_reference/auto_generated/sktime.transformations.panel.catch22.Catch22.html
from sktime.transformations.panel.catch22 import Catch22

from sktime.classification.interval_based import CanonicalIntervalForest,DrCIF,RandomIntervalSpectralEnsemble,SupervisedTimeSeriesForest,TimeSeriesForestClassifier

# identify classifiers that support unequal length
from sktime.registry import all_estimators

from sktime.classification.deep_learning.cnn import CNNClassifier
from sktime.classification.deep_learning.fcn import FCNClassifier

from sklearn.metrics import log_loss # cross-entropy loss

import pygad #genetic algorithm



In [2]:
data_root = "..\\cleanedDataPPG\\"

## Preparing data from Xue Yang

In [118]:
df_ppg = pd.read_csv(data_root+"eda_ppg_window_20s.csv")

In [119]:
df_combined_subject_20s = pd.DataFrame()
df_combined_subject_20s["subject"] = df_ppg["group"].apply(lambda x: x.split("-")[0])
df_combined_subject_20s["difficulty"] = df_ppg["group"].apply(lambda x: x.split("-")[1])
df_combined_subject_20s["run"] = df_ppg["file_name"].apply(lambda x: x.split("_")[-1].split(".")[0])

In [120]:
df_combined_subject_20s["normalised_pulse"] = ""
for i in range(len(df_combined_subject_20s)):
    temp_series = pd.DataFrame(df_ppg.drop(['file_name', 'level_idx', 'level', 'level_0', 'level_1', 'level_2','level_3', 'level_4', 'group_idx', 'group'], axis = 1).iloc[i].dropna())
    # somehow we need to convert to array, flatten then back to sereis again for sktime to read it properly
    temp_series = pd.Series(np.array(temp_series).flatten())
    df_combined_subject_20s.at[i,"normalised_pulse"] = temp_series

In [121]:
# save to pickle rather than csv to preserve the nested series inside the dataframe
df_combined_subject_20s.to_pickle(data_root+"df_combined_subject_20s.pkl", protocol=4)

### scaled

In [122]:
df_combined_subject_20s_scaled = df_combined_subject_20s.copy()

In [126]:
df_combined_subject_20s_scaled["normalised_pulse"] = ""
for i in range(len(df_combined_subject_20s_scaled)):
    temp_series = pd.DataFrame(df_ppg.drop(['file_name', 'level_idx', 'level', 'level_0', 'level_1', 'level_2','level_3', 'level_4', 'group_idx', 'group'], axis = 1).iloc[i].dropna())
    scaler = StandardScaler()
    temp_series = pd.Series(scaler.fit_transform(temp_series).flatten())
    df_combined_subject_20s_scaled.at[i,"normalised_pulse"] = temp_series

In [127]:
df_combined_subject_20s_scaled.to_pickle(data_root+"df_combined_subject_20s_scaled.pkl", protocol=4)

### minmax

In [129]:
df_combined_subject_20s_minmax = df_combined_subject_20s.copy()

In [130]:
df_combined_subject_20s_minmax["normalised_pulse"] = ""
for i in range(len(df_combined_subject_20s_minmax)):
    temp_series = pd.DataFrame(df_ppg.drop(['file_name', 'level_idx', 'level', 'level_0', 'level_1', 'level_2','level_3', 'level_4', 'group_idx', 'group'], axis = 1).iloc[i].dropna())
    scaler = MinMaxScaler()
    temp_series = pd.Series(scaler.fit_transform(temp_series).flatten())
    df_combined_subject_20s_minmax.at[i,"normalised_pulse"] = temp_series

In [131]:
df_combined_subject_20s_minmax.to_pickle(data_root+"df_combined_subject_20s_minmax.pkl", protocol=4)

# Modelling NO scaling

In [12]:
df_combined_subject_20s = pd.read_pickle(data_root+"df_combined_subject_20s.pkl")

In [13]:
%%time
# perform train test split according by subject
# split into 5 different folds for CV
from sklearn.model_selection import GroupKFold
X_train, X_test, y_train, y_test = [], [], [], []
gss = GroupKFold(n_splits=5)
for train, test in gss.split(df_combined_subject_20s["normalised_pulse"], df_combined_subject_20s["difficulty"], df_combined_subject_20s["subject"]):
  X_train.append(df_combined_subject_20s.loc[train,["normalised_pulse"]])
  X_test.append(df_combined_subject_20s.loc[test,["normalised_pulse"]])
  y_train.append(df_combined_subject_20s.loc[train,"difficulty"].astype("string"))
  y_test.append(df_combined_subject_20s.loc[test,"difficulty"].astype("string"))

CPU times: total: 0 ns
Wall time: 11.5 ms


In [41]:
model_result = {
  "classifier":[],
  "accuracy_score":[],
  "AUC_score":[],
  "F1_score":[],
  "runtime(s)":[],
}

def get_class(class_list, prob_list):
  idx = list(prob_list).index(max(prob_list))
  return class_list[idx]

def log_result(classifier_name, class_list, y_test, y_pred_proba, runtime):
  y_pred = []
  for y_list in y_pred_proba:
    y_pred.append(get_class(class_list, y_list))
  acc = accuracy_score(y_test, y_pred)
  auc = roc_auc_score(y_test, y_pred_proba, multi_class='ovr')
  f1 = f1_score(y_test, y_pred, average='micro')
  model_result["classifier"].append(classifier_name)
  model_result["accuracy_score"].append(acc)
  model_result["AUC_score"].append(auc)
  model_result["F1_score"].append(f1)
  model_result["runtime(s)"].append(runtime)

  display(pd.DataFrame(model_result))
  pd.DataFrame(model_result).to_csv(data_root+"ppg_split_pilot_result.csv")

In [42]:
def run_model(classifier_name,classifier, padding = True,  fold = 0):
  start = time.time()
  
  # set up pipeline
  if padding:
    clf = PaddingTransformer() * classifier()
  else:
    clf = classifier()
  
  clf.fit(X_train[fold], y_train[fold])
  y_pred_proba = clf.predict_proba(X_test[fold])
  end = time.time()

  log_result(classifier_name, clf.classes_, y_test[fold], y_pred_proba, end-start)

## Classification using catch22

Refer to respiration_split_pilot_pycaret.ipynb

## RandomIntervalClassifier
extract at random interval and perform Rotation forest with 200 trees

In [16]:
run_model(
  "RandomIntervalClassifier", 
  lambda: RandomIntervalClassifier(n_intervals=5, n_jobs=1, random_state = 42),
  padding = True,
  fold = 0
  )

Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.382699


## Decision Trees with mean, std, slope


In [17]:
steps = [
    ("padding",PaddingTransformer()),
    (
        "extract",
        RandomIntervalFeatureExtractor(
            n_intervals="sqrt", features=[np.mean, np.std, _slope]
        ),
    ),
    ("clf", DecisionTreeClassifier()),
]
time_series_tree = Pipeline(steps)

In [18]:
start = time.time()
time_series_tree.fit(X_train[0], y_train[0])
y_pred_proba = time_series_tree.predict_proba(X_test[0])
end = time.time()
log_result('RandomeIntervalDecisionTree',time_series_tree.classes_, y_test[0], y_pred_proba, end-start)

Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.382699
1,RandomeIntervalDecisionTree,0.375,0.583333,0.375,2.12486


## Individual Boss


In [20]:
run_model(
  "IndividualBOSS", 
  lambda: IndividualBOSS(),
  padding = True,
  fold = 0
  )

Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981


## ContractableBoss

In [21]:
run_model(
  "ContractableBOSS", 
  lambda: ContractableBOSS(n_parameter_samples=10, max_ensemble_size=3),
  padding = True,
  fold = 0
  )

Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575


## Random Interval Spectral Ensemble

In [22]:
run_model(
  "RandomIntervalSpectralEnsemble", 
  lambda: RandomIntervalSpectralEnsemble(n_estimators=50, random_state=42),
  padding = True,
  fold = 0
  )

Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865


## Supervised Time Series Forest (STSF)


In [23]:
run_model(
  "SupervisedTimeSeriesForest", 
  lambda: SupervisedTimeSeriesForest(n_estimators=50, random_state=42),
  padding = True,
  fold = 0
  )

Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725


## Canonical Interval Forest (CIF)

In [24]:
run_model(
  "CanonicalIntervalForest", 
  lambda: CanonicalIntervalForest(n_estimators=5, att_subsample_size=10, random_state=42),
  padding = True,
  fold = 0
  )

Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219


## Diverse Representation Canonical Interval Forest (DrCIF)

In [25]:
run_model(
  "DiverseRepresentationCanonicalIntervalForest", 
  lambda: DrCIF(n_estimators=5, att_subsample_size=10, random_state=42),
  padding = True,
  fold = 0
  )

Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597


## ShapeletTransformClassifier

In [26]:
run_model(
  "ShapeletTransformClassifier", 
  lambda: ShapeletTransformClassifier(
    estimator=RotationForest(n_estimators=3),
    n_shapelet_samples=100,
    max_shapelets=10,
    batch_size=20,
    ),
  padding = True,
  fold = 0
  )

Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593


## RocketClassifier

In [27]:
run_model(
  "RocketClassifier", 
  lambda: RocketClassifier(num_kernels=500),
  padding = True,
  fold = 0
  )

Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


## KNeighborsTimeSeriesClassifier

In [28]:
from sktime.alignment.dtw_python import AlignerDTW
from sktime.dists_kernels.compose_from_align import DistFromAligner

start = time.time()
aligner = AlignerDTW()
dtw_dist = DistFromAligner(aligner)
knclassifier = KNeighborsTimeSeriesClassifier(n_neighbors=3, distance = dtw_dist, n_jobs= -1)
knclassifier.fit(X_train[0], y_train[0])
y_pred_proba = knclassifier.predict_proba(X_test[0])
end = time.time()

log_result('KNeighborsTimeSeriesClassifier_AlignerDTW', knclassifier.classes_ ,y_test[0], y_pred_proba, end-start)


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


## KNeighborsTimeSeriesClassifier with padding

In [29]:
run_model(
  "KNeighborsTimeSeriesClassifier_padded", 
  lambda: KNeighborsTimeSeriesClassifier(n_neighbors=5, distance ="dtw", n_jobs= 1, leaf_size = 2000),
  padding = True,
  fold = 0
  )

Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


## ComposableTimeSeriesForestClassifier
https://www.sktime.org/en/v0.8.1/examples/02_classification_univariate.html

In [30]:
run_model(
  "ComposableTimeSeriesForestClassifier", 
  lambda: ComposableTimeSeriesForestClassifier(
    estimator=time_series_tree,
    n_estimators=100,
    bootstrap=True,
    oob_score=True,
    random_state=1,
    n_jobs=-1,
    ),
  padding = True,
  fold = 0
  )

Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


## FCN

In [31]:
run_model(
    "FCNClassifier_mixmax", 
    lambda: FCNClassifier(n_epochs=20,batch_size=4)  ,
    padding = True,
    fold = 0
    )



Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


## CNN

In [32]:

run_model(
    "CNNClassifier_minmax", 
    lambda: CNNClassifier(n_epochs=20,batch_size=4)  ,
    padding = True,
    fold = 0
    )



Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


## FAILED MODELS
### HIVECOTEV1

In [33]:
# run_model(
#   "HIVECOTEV1", 
#   lambda: HIVECOTEV1(),
#   padding = True,
#   fold = 0
#   )

# # STOPPED PREMATURELY BECAUSE NO OUTPUT EVEN AFTER 24 hours

# Modelling Scaled

In [3]:
df_combined_subject_20s_scaled = pd.read_pickle(data_root+"df_combined_subject_20s_scaled.pkl")

In [4]:
# perform train test split according by subject
# split into 5 different folds for CV
from sklearn.model_selection import GroupKFold
X_train, X_test, y_train, y_test = [], [], [], []
gss = GroupKFold(n_splits=5)
for train, test in gss.split(df_combined_subject_20s_scaled["normalised_pulse"], df_combined_subject_20s_scaled["difficulty"], df_combined_subject_20s_scaled["subject"]):
  X_train.append(df_combined_subject_20s_scaled.loc[train,["normalised_pulse"]])
  X_test.append(df_combined_subject_20s_scaled.loc[test,["normalised_pulse"]])
  y_train.append(df_combined_subject_20s_scaled.loc[train,"difficulty"].astype("string"))
  y_test.append(df_combined_subject_20s_scaled.loc[test,"difficulty"].astype("string"))

In [36]:
scaled_model_name = [
    'RandomIntervalClassifier_scaled',
    'IndividualBOSS_scaled',
    'ContractableBOSS_scaled',
    "RandomIntervalSpectralEnsemble_scaled", 
    "SupervisedTimeSeriesForest_scaled",
    "CanonicalIntervalForest_scaled", 
    "DiverseRepresentationCanonicalIntervalForest_scaled", 
    "ShapeletTransformClassifier_scaled", 
    "RocketClassifier_scaled", 
    "KNeighborsTimeSeriesClassifier_padded_scaled", 
    "ComposableTimeSeriesForestClassifier_scaled",
    "FCNClassifier_scaled", 
    "CNNClassifier_scaled", 
    ]
scaled_model=[
    RandomIntervalClassifier(n_intervals=5, n_jobs=1, random_state = 42),
    IndividualBOSS(),
    ContractableBOSS(n_parameter_samples=10, max_ensemble_size=3),
    RandomIntervalSpectralEnsemble(n_estimators=50, random_state=42),
    SupervisedTimeSeriesForest(n_estimators=50, random_state=42),
    CanonicalIntervalForest(n_estimators=5, att_subsample_size=10, random_state=42),
    DrCIF(n_estimators=5, att_subsample_size=10, random_state=42),
    ShapeletTransformClassifier(estimator=RotationForest(n_estimators=3),n_shapelet_samples=100,max_shapelets=10,batch_size=20,),
    RocketClassifier(num_kernels=500),
    KNeighborsTimeSeriesClassifier(n_neighbors=5, distance ="dtw", n_jobs= 1, leaf_size = 2000),
    ComposableTimeSeriesForestClassifier(estimator=time_series_tree,n_estimators=100,bootstrap=True,oob_score=True,random_state=1,n_jobs=-1),
    FCNClassifier(n_epochs=20,batch_size=4),
    CNNClassifier(n_epochs=20,batch_size=4),
    ]

# Not doing the following models as based on previous run, they have lower performance anyway
## Decision Trees with mean, std, slope


In [37]:
for i in range(len(scaled_model)):
    run_model(
        scaled_model_name[i], 
        lambda: scaled_model[i],
        padding = True,
        fold = 0
        )


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285




Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285




Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


# Modelling MinMax

In [38]:
df_combined_subject_20s_minmax = pd.read_pickle(data_root+"df_combined_subject_20s_minmax.pkl")

In [39]:
# perform train test split according by subject
# split into 5 different folds for CV
from sklearn.model_selection import GroupKFold
X_train, X_test, y_train, y_test = [], [], [], []
gss = GroupKFold(n_splits=5)
for train, test in gss.split(df_combined_subject_20s_minmax["normalised_pulse"], df_combined_subject_20s_minmax["difficulty"], df_combined_subject_20s_minmax["subject"]):
  X_train.append(df_combined_subject_20s_minmax.loc[train,["normalised_pulse"]])
  X_test.append(df_combined_subject_20s_minmax.loc[test,["normalised_pulse"]])
  y_train.append(df_combined_subject_20s_minmax.loc[train,"difficulty"].astype("string"))
  y_test.append(df_combined_subject_20s_minmax.loc[test,"difficulty"].astype("string"))

In [40]:
minmax_model_name = [
    'RandomIntervalClassifier_minmax',
    'IndividualBOSS_minmax',
    'ContractableBOSS_minmax',
    "RandomIntervalSpectralEnsemble_minmax", 
    "SupervisedTimeSeriesForest_minmax",
    "CanonicalIntervalForest_minmax", 
    "DiverseRepresentationCanonicalIntervalForest_minmax", 
    "ShapeletTransformClassifier_minmax", 
    "RocketClassifier_minmax", 
    "KNeighborsTimeSeriesClassifier_padded_minmax", 
    "ComposableTimeSeriesForestClassifier_minmax", 
    "FCNClassifier_mixmax", 
    "CNNClassifier_minmax", 
    ]
minmax_model=[
    RandomIntervalClassifier(n_intervals=5, n_jobs=1, random_state = 42),
    IndividualBOSS(),
    ContractableBOSS(n_parameter_samples=10, max_ensemble_size=3),
    RandomIntervalSpectralEnsemble(n_estimators=50, random_state=42),
    SupervisedTimeSeriesForest(n_estimators=50, random_state=42),
    CanonicalIntervalForest(n_estimators=5, att_subsample_size=10, random_state=42),
    DrCIF(n_estimators=5, att_subsample_size=10, random_state=42),
    ShapeletTransformClassifier(estimator=RotationForest(n_estimators=3),n_shapelet_samples=100,max_shapelets=10,batch_size=20,),
    RocketClassifier(num_kernels=500),
    KNeighborsTimeSeriesClassifier(n_neighbors=5, distance ="dtw", n_jobs= 1, leaf_size = 2000),
    ComposableTimeSeriesForestClassifier(estimator=time_series_tree,n_estimators=100,bootstrap=True,oob_score=True,random_state=1,n_jobs=-1),
    FCNClassifier(n_epochs=20,batch_size=4)  ,
    CNNClassifier(n_epochs=20,batch_size=4)  ,
    ]

In [43]:
for i in range(len(minmax_model)):
    run_model(
        minmax_model_name[i], 
        lambda: minmax_model[i],
        padding = True,
        fold = 0
        )


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285




Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285




Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
1,RandomeIntervalDecisionTree,0.388889,0.592593,0.388889,2.125046
2,IndividualBOSS,0.222222,0.481481,0.222222,3.261981
3,ContractableBOSS,0.236111,0.541024,0.236111,3.096575
4,RandomIntervalSpectralEnsemble,0.277778,0.573174,0.277778,4.349865
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
6,CanonicalIntervalForest,0.347222,0.606096,0.347222,11.834219
7,DiverseRepresentationCanonicalIntervalForest,0.291667,0.596579,0.291667,18.357597
8,ShapeletTransformClassifier,0.347222,0.540509,0.347222,4.731593
9,RocketClassifier,0.416667,0.611111,0.416667,5.228285


In [44]:
pd.DataFrame(model_result).sort_values("AUC_score", ascending = False)

Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
25,ComposableTimeSeriesForestClassifier_scaled,0.458333,0.709619,0.458333,45.703371
5,SupervisedTimeSeriesForest,0.444444,0.70589,0.444444,8.069725
21,DiverseRepresentationCanonicalIntervalForest_s...,0.388889,0.669624,0.388889,18.754414
12,ComposableTimeSeriesForestClassifier,0.402778,0.669239,0.402778,52.238524
19,SupervisedTimeSeriesForest_scaled,0.416667,0.664609,0.416667,8.700789
14,CNNClassifier_minmax,0.347222,0.659851,0.347222,4.964278
27,CNNClassifier_scaled,0.430556,0.657665,0.430556,5.219523
0,RandomIntervalClassifier,0.375,0.646991,0.375,12.695246
13,FCNClassifier_mixmax,0.263889,0.645833,0.263889,14.375528
32,SupervisedTimeSeriesForest_minmax,0.347222,0.643261,0.347222,8.349329


# Optimising 

In [4]:
# perform train test split according by subject
# split into 5 different folds for CV
from sklearn.model_selection import GroupKFold

df_combined_subject_20s_scaled = pd.read_pickle(data_root+"df_combined_subject_20s_scaled.pkl")

X_train, X_test, y_train, y_test = [], [], [], []
gss = GroupKFold(n_splits=5)
for train, test in gss.split(df_combined_subject_20s_scaled["normalised_pulse"], df_combined_subject_20s_scaled["difficulty"], df_combined_subject_20s_scaled["subject"]):
  X_train.append(df_combined_subject_20s_scaled.loc[train,["normalised_pulse"]])
  X_test.append(df_combined_subject_20s_scaled.loc[test,["normalised_pulse"]])
  y_train.append(df_combined_subject_20s_scaled.loc[train,"difficulty"].astype("string"))
  y_test.append(df_combined_subject_20s_scaled.loc[test,"difficulty"].astype("string"))

In [38]:
# PREPAREING DATA BASED ON SPLIT IN RESP
# Remove 'cp030' as it is the longest and has to be in the train data
test_subj = ['cp004', 'cp005', 'cp014', 'cp015', 'cp023', 'cp024', 'cp039', 'cp042',
 'cp008', 'cp016', 'cp018', 'cp029', 'cp043', 'cp009', 'cp017', 'cp019',
 'cp025']


In [39]:
X_train[0]=df_combined_subject_20s_scaled[["normalised_pulse"]][~df_combined_subject_20s_scaled["subject"].isin(test_subj)]
X_test[0]=df_combined_subject_20s_scaled[["normalised_pulse"]][df_combined_subject_20s_scaled["subject"].isin(test_subj)]
y_train[0]=df_combined_subject_20s_scaled["difficulty"][~df_combined_subject_20s_scaled["subject"].isin(test_subj)]
y_test[0]=df_combined_subject_20s_scaled["difficulty"][df_combined_subject_20s_scaled["subject"].isin(test_subj)]

## ComposableTimeSeriesForestClassifier_scaled

In [44]:
steps = [
    ("padding",PaddingTransformer()),
    (
        "extract",
        RandomIntervalFeatureExtractor(
            n_intervals="sqrt", features=[np.mean, np.std, _slope]
        ),
    ),
    ("clf", DecisionTreeClassifier()),
]
time_series_tree = Pipeline(steps)


In [45]:
last_fitness = 0
def on_generation(ga_instance):
    print("on_generation()")
    global last_fitness
    print("Generation = {generation}".format(generation=ga_instance.generations_completed))
    print("Fitness    = {fitness}".format(fitness=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1]))
    print("Change     = {change}".format(change=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1] - last_fitness))
    last_fitness = ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1]

def on_stop(ga_instance, last_population_fitness):
    print("on_stop()")

In [46]:
def fitness_func(solution, solution_idx):

    try:
        clf = PaddingTransformer() * ComposableTimeSeriesForestClassifier(
            estimator=time_series_tree,
            n_estimators=int(solution[0]),
            bootstrap= solution[1],
            # oob_score=True,
            random_state=1,
            n_jobs=-1,
            )
    except:
        return -9999
        
    print()
    clf.fit(X_train[0], y_train[0])
    y_pred_proba = clf.predict_proba(X_test[0])

    # MAPE
    try:
        loss = log_loss(y_test[0], y_pred_proba)
    except:
        return -9999

    fitness = 1/(loss)
    print(solution[0], solution[1], loss, fitness)
    
    return fitness

In [47]:
num_generations = 20
num_parents_mating = 4

fitness_function = fitness_func

sol_per_pop = 10
num_genes = 2

init_range_low = 10
init_range_high = 1000

parent_selection_type = "sss"
keep_parents = 1 

crossover_type = "single_point"
crossover_probability=0.5

mutation_type = "random"
mutation_probability = 0.25
# mutation_percent_genes=50

# random_mutation_min_val=-0.01
# random_mutation_max_val=0.01

gene_space = [{"low":10, "high":1000,"step": 10},[0,1]]
# gene_space = [{"low":1, "high":10,"step": 1}, {"low":1, "high":10,"step": 1}]


In [48]:
ga_instance = pygad.GA(num_generations=num_generations,
                       num_parents_mating=num_parents_mating,
                       fitness_func=fitness_function,
                       sol_per_pop=sol_per_pop,
                       num_genes=num_genes,
                       init_range_low=init_range_low,
                       init_range_high=init_range_high,
                       parent_selection_type=parent_selection_type,
                       keep_parents=keep_parents,
                       crossover_type=crossover_type,
                       mutation_type=mutation_type,
                       on_generation=on_generation,
                       on_stop=on_stop,
                       #stop_criteria=["saturate_15"],
                       allow_duplicate_genes=True,
                       gene_type=int,
                       gene_space = gene_space,
                      #  parallel_processing=5
                     #   save_solutions=True,
                    #    random_mutation_min_val= random_mutation_min_val,
                    #    random_mutation_max_val= random_mutation_max_val
                       )

If you do not want to mutate any gene, please set mutation_type=None.


In [49]:
ga_instance.run()


600 0 1.4044687952443446 0.7120129712999593

500 1 1.4050524082734532 0.7117172242911658

260 0 1.4057103853955473 0.7113840876395133

530 0 1.40297875554151 0.7127691677797561

260 0 1.4057103853955473 0.7113840876395133

890 0 1.4019532772275685 0.7132905327469609

930 1 1.4050200996628823 0.7117335903165641

410 1 1.4051084195139474 0.7116888534095598

810 1 1.4035230774684881 0.7124927377779094

360 0 1.4060091674956285 0.7112329159141909

530 1 1.40297875554151 0.7127691677797561

600 1 1.4044687952443446 0.7120129712999593

600 1 1.4044687952443446 0.7120129712999593

890 1 1.4019532772275685 0.7132905327469609

60 1 1.596508955432787 0.626366671227921

600 1 1.4044687952443446 0.7120129712999593

510 0 1.4042228237365102 0.7121376914662947

710 0 1.4033773710855653 0.7125667127056939
on_generation()
Generation = 1
Fitness    = 0.7132905327469609
Change     = 0.7132905327469609

360 0 1.4060091674956285 0.7112329159141909

420 0 1.4060810230126755 0.7111965694959709

690 1 1.403

In [50]:
solution, solution_fitness, solution_idx = ga_instance.best_solution()
print("Parameters of the best solution : {solution}".format(solution=solution))
print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness))
print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx))
if ga_instance.best_solution_generation != -1:
    print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation))


490 0 1.4062808682557932 0.7110955020246401

950 1 1.4046142453941515 0.7119392411682313

740 1 1.4042465485666729 0.7121256598570308

190 1 1.4062357880828296 0.7111182978519804
Parameters of the best solution : [760   1]
Fitness value of the best solution = 0.7133252873399052
Index of the best solution : 0
Best fitness value reached after 15 generations.


### Extract results

In [53]:
#extract y_pred out and classes
start = time.time()
clf = PaddingTransformer() * ComposableTimeSeriesForestClassifier(
            estimator=time_series_tree,
            n_estimators=190, #we continue using this first instead of 40 because the result is still worse than 100. We will do more generation later 
            bootstrap= True,
            oob_score=True,
            random_state=1,
            n_jobs=-1,
            )
clf.fit(X_train[0], y_train[0])
y_pred_proba = clf.predict_proba(X_test[0])
end = time.time()

log_result("ComposableTimeSeriesForestClassifier_scaled_optim190", clf.classes_, y_test[0], y_pred_proba, end-start)

Unnamed: 0,classifier,accuracy_score,AUC_score,F1_score,runtime(s)
0,ComposableTimeSeriesForestClassifier_scaled_optim,0.341463,0.620464,0.341463,58.8395
1,ComposableTimeSeriesForestClassifier_scaled_optim,0.365854,0.618258,0.365854,217.25363
2,ComposableTimeSeriesForestClassifier_scaled_op...,0.353659,0.617093,0.353659,33.045394
3,ComposableTimeSeriesForestClassifier_scaled_op...,0.341463,0.620464,0.341463,57.639098


In [17]:
# y_pred_proba = clf.predict_proba(X_train[0])
# end = time.time()

# log_result("ComposableTimeSeriesForestClassifier_scaled_optim", clf.classes_, y_train[0], y_pred_proba, end-start)

In [55]:
df_combined_all = pd.read_csv("df_combined_all.csv")

In [74]:
df_combined_all

Unnamed: 0,subject,run,resp_000,resp_01B,resp_02B,resp_03B,resp_04B,difficulty
0,cp003,1,0.850,0.050,0.025,0.075,0.000,000
1,cp003,2,0.825,0.050,0.025,0.000,0.100,000
2,cp004,1,0.875,0.050,0.000,0.025,0.050,000
3,cp004,2,0.500,0.100,0.075,0.175,0.150,000
4,cp005,1,0.925,0.025,0.000,0.000,0.050,000
...,...,...,...,...,...,...,...,...
482,cp042,2,0.625,0.275,0.075,0.025,0.000,04B
483,cp042,3,0.000,0.075,0.300,0.550,0.075,04B
484,cp043,1,0.000,0.200,0.300,0.475,0.025,04B
485,cp043,2,0.000,0.125,0.325,0.525,0.025,04B


In [68]:
# WE CAN"T COMBINE NOW BECAUSE 1) THERE"S MISSING 000 DATA iN PPG. 2) THE GROUPKFOLD IS NOT THE SAME. THEREFORE ABLE TO DO PROPER COMPARISON

# perform prediction on all data

df_combined_all_ppg = pd.DataFrame(clf.predict_proba(df_combined_subject_20s_scaled[['normalised_pulse']]))
df_combined_all_ppg.columns = 'ppg_'+clf.classes_
df_combined_all_ppg['difficulty'] = df_combined_subject_20s_scaled['difficulty']
df_combined_all_ppg['subject'] = df_combined_subject_20s_scaled['subject']
df_combined_all_ppg['run'] = df_combined_subject_20s_scaled['run'].astype(int)

In [75]:
df_combined_all = df_combined_all.merge(df_combined_all_ppg, how='left' , on=["subject","run", "difficulty"])
df_combined_all

Unnamed: 0,subject,run,resp_000,resp_01B,resp_02B,resp_03B,resp_04B,difficulty,ppg_01B,ppg_02B,ppg_03B,ppg_04B
0,cp003,1,0.850,0.050,0.025,0.075,0.000,000,,,,
1,cp003,2,0.825,0.050,0.025,0.000,0.100,000,,,,
2,cp004,1,0.875,0.050,0.000,0.025,0.050,000,,,,
3,cp004,2,0.500,0.100,0.075,0.175,0.150,000,,,,
4,cp005,1,0.925,0.025,0.000,0.000,0.050,000,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
482,cp042,2,0.625,0.275,0.075,0.025,0.000,04B,0.289474,0.357895,0.273684,0.078947
483,cp042,3,0.000,0.075,0.300,0.550,0.075,04B,0.178947,0.036842,0.157895,0.626316
484,cp043,1,0.000,0.200,0.300,0.475,0.025,04B,0.089474,0.121053,0.073684,0.715789
485,cp043,2,0.000,0.125,0.325,0.525,0.025,04B,0.189474,0.394737,0.205263,0.210526


In [77]:
df_combined_all.to_csv("df_combined_all.csv")

## DiverseRepresentationCanonicalIntervalForest_scaled

In [51]:
def fitness_func(solution, solution_idx):

    try:
        clf = PaddingTransformer() * DrCIF(n_estimators=int(solution[0]), att_subsample_size=int(solution[1]), random_state=42)
        # clf = PaddingTransformer() * ComposableTimeSeriesForestClassifier(
        #     estimator=time_series_tree,
        #     n_estimators=int(solution[0]),
        #     bootstrap= solution[1],
        #     # oob_score=True,
        #     random_state=1,
        #     n_jobs=-1,
        #     )
    except:
        return -9999
        
    print()
    clf.fit(X_train[0], y_train[0])
    y_pred_proba = clf.predict_proba(X_test[0])

    # MAPE
    try:
        loss = log_loss(y_test[0], y_pred_proba)
    except:
        return -9999

    fitness = 1/(loss)
    print(solution[0], solution[1], loss, fitness)
    
    return fitness

In [52]:
num_generations = 10
num_parents_mating = 4

fitness_function = fitness_func

sol_per_pop = 10
num_genes = 2

init_range_low = 1
init_range_high = 10

parent_selection_type = "sss"
# keep_parents = -1 

crossover_type = "single_point"
crossover_probability=0.5

mutation_type = "random"
mutation_probability = 0.5
mutation_percent_genes=50

# random_mutation_min_val=-0.01
# random_mutation_max_val=0.01

# gene_space = [{"low":10, "high":1000,"step": 10},[0,1]]
gene_space = [{"low":1, "high":10,"step": 1}, {"low":1, "high":10,"step": 1}]


In [53]:
ga_instance = pygad.GA(num_generations=num_generations,
                       num_parents_mating=num_parents_mating,
                       fitness_func=fitness_function,
                       sol_per_pop=sol_per_pop,
                       num_genes=num_genes,
                       init_range_low=init_range_low,
                       init_range_high=init_range_high,
                       parent_selection_type=parent_selection_type,
                       keep_parents=keep_parents,
                       crossover_type=crossover_type,
                       mutation_type=mutation_type,
                       on_generation=on_generation,
                       on_stop=on_stop,
                       #stop_criteria=["saturate_15"],
                       allow_duplicate_genes=True,
                       gene_type=int,
                       gene_space = gene_space,
                       parallel_processing=5,
                     #   save_solutions=True,
                    #    random_mutation_min_val= random_mutation_min_val,
                    #    random_mutation_max_val= random_mutation_max_val
                       )

If you do not want to mutate any gene, please set mutation_type=None.


In [33]:
# num_generations = 10
# num_parents_mating = 4

# fitness_function = fitness_func

# sol_per_pop = 10
# num_genes = 2

# init_range_low = 0
# init_range_high = 0.3

# parent_selection_type = "sss"
# keep_parents = -1 

# crossover_type = "single_point"
# crossover_probability=0.5

# mutation_type = "random"
# mutation_probability = 0.5
# mutation_percent_genes=50

# # random_mutation_min_val=-0.01
# # random_mutation_max_val=0.01

# # gene_space = [{"low":10, "high":1000,"step": 10},[0,1]]
# gene_space = [{"low":1, "high":10,"step": 1}, {"low":1, "high":10,"step": 1}]


ga_instance.run()

4 3 ClassifierPipeline(classifier=DrCIF(att_subsample_size=3, n_estimators=4,
                                    random_state=42),
                   transformers=[PaddingTransformer()])
13.11911329212544 0.07622466379646524
9 4 ClassifierPipeline(classifier=DrCIF(att_subsample_size=4, n_estimators=9,
                                    random_state=42),
                   transformers=[PaddingTransformer()])
4.144856369567738 0.24126288364107748
4 8 ClassifierPipeline(classifier=DrCIF(att_subsample_size=8, n_estimators=4,
                                    random_state=42),
                   transformers=[PaddingTransformer()])
8.700300016055788 0.11493856512471648
2 2 ClassifierPipeline(classifier=DrCIF(att_subsample_size=2, n_estimators=2,
                                    random_state=42),
                   transformers=[PaddingTransformer()])
19.282969481410696 0.051859232623068095
7 9 ClassifierPipeline(classifier=DrCIF(att_subsample_size=9, n_estimators=7,
                

In [35]:
solution, solution_fitness, solution_idx = ga_instance.best_solution()
print("Parameters of the best solution : {solution}".format(solution=solution))
print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness))
print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx))
if ga_instance.best_solution_generation != -1:
    print("Best fitness value reached after {best_solution_generation} generations.".format(best_solution_generation=ga_instance.best_solution_generation))

8 2 ClassifierPipeline(classifier=DrCIF(att_subsample_size=2, n_estimators=8,
                                    random_state=42),
                   transformers=[PaddingTransformer()])
4.647412805096963 0.21517348295448785
4 1 ClassifierPipeline(classifier=DrCIF(att_subsample_size=1, n_estimators=4,
                                    random_state=42),
                   transformers=[PaddingTransformer()])
1.4738129793359844 0.6785121409709274
8 9 ClassifierPipeline(classifier=DrCIF(att_subsample_size=9, n_estimators=8,
                                    random_state=42),
                   transformers=[PaddingTransformer()])
6.40756563948302 0.15606551009607492
7 6 ClassifierPipeline(classifier=DrCIF(att_subsample_size=6, n_estimators=7,
                                    random_state=42),
                   transformers=[PaddingTransformer()])
6.065388785324807 0.16486989299342153
4 1 ClassifierPipeline(classifier=DrCIF(att_subsample_size=1, n_estimators=4,
                   

# Try looking
- filter out different subject with different resp hz then do training for them.
- do gridsearch using genetic to improve result

In [2]:
import pygad, numpy

In [3]:
function_inputs = [4,-2,3.5,5,-11,-4.7]
desired_output = 44

In [4]:
def fitness_func(solution, solution_idx):
    output = numpy.sum(solution*function_inputs)
    fitness = 1.0 / numpy.abs(output - desired_output)
    return fitness

In [5]:
fitness_function = fitness_func

num_generations = 50
num_parents_mating = 4

sol_per_pop = 8
num_genes = len(function_inputs)

init_range_low = -2
init_range_high = 5

parent_selection_type = "sss"
keep_parents = 1

crossover_type = "single_point"

mutation_type = "random"
mutation_percent_genes = 10

In [6]:
ga_instance = pygad.GA(num_generations=num_generations,
                       num_parents_mating=num_parents_mating,
                       fitness_func=fitness_function,
                       sol_per_pop=sol_per_pop,
                       num_genes=num_genes,
                       init_range_low=init_range_low,
                       init_range_high=init_range_high,
                       parent_selection_type=parent_selection_type,
                       keep_parents=keep_parents,
                       crossover_type=crossover_type,
                       mutation_type=mutation_type,
                       mutation_percent_genes=mutation_percent_genes)

If you do not want to mutate any gene, please set mutation_type=None.


In [7]:
ga_instance.run()

In [8]:
ga_instance.best_solution()[2]

0

In [9]:
solution, solution_fitness, solution_idx = ga_instance.best_solution()
print("Parameters of the best solution : {solution}".format(solution=solution))
print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness))

prediction = numpy.sum(numpy.array(function_inputs)*solution)
print("Predicted output based on the best solution : {prediction}".format(prediction=prediction))

Parameters of the best solution : [ 4.16334847  3.30225401  0.15486736  3.58753694 -0.66158332 -1.74594477]
Fitness value of the best solution = 83.58863857772198
Predicted output based on the best solution : 44.01196334833316
