In [1]:
import pandas as pd 
import numpy as np 
from sklearn.metrics import accuracy_score

In [2]:
# Helper function to get classifier name
def get_clf_name(estimator):
    return(estimator.__class__.__name__)

def get_clf_class(estimator):
    return clf.__class__.__module__.split(".")[2]


In [3]:
from sktime.datatypes._panel._convert import (
    from_2d_array_to_nested,
    from_nested_to_2d_array,
    is_nested_dataframe,
)

  VALID_INDEX_TYPES = (pd.Int64Index, pd.RangeIndex, pd.PeriodIndex, pd.DatetimeIndex)
  VALID_INDEX_TYPES = (pd.Int64Index, pd.RangeIndex, pd.PeriodIndex, pd.DatetimeIndex)
  VALID_MULTIINDEX_TYPES = (pd.Int64Index, pd.RangeIndex)
  VALID_INDEX_TYPES = (pd.Int64Index, pd.RangeIndex, pd.PeriodIndex, pd.DatetimeIndex)
  VALID_MULTIINDEX_TYPES = (pd.Int64Index, pd.RangeIndex)


# Get Dataset

1. List the names of all the files that will be used in the training  
2. Since the values only have the x_value, specify the number used for classification (eg s1 = 1, s2 = 2)  
3. Itterate over all file_names, create a df 
4. Convert the 2d_array dataframe into a nested df
5. add approiate y value and append it to the df  
4. Concatenate all the dataframes in the list  

In [4]:
file_names = ["s1_no_vol", "s2_no_vol"]

dfs= []
y = []
for file_name in file_names:
    df = pd.read_csv ("../datasets/"+file_name+".csv", nrows=100)
    nested_df = from_2d_array_to_nested(df)
    y.extend([file_name[1]] * df.shape[0])
    dfs.append(nested_df)

y = np.asarray(y)
combined_df = pd.concat(dfs)

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(combined_df, y)

## Define the classifiers

Define which classifiers that will be used and store it in a list

In [6]:
from sktime.classification.dictionary_based import IndividualBOSS
from sktime.classification.dictionary_based import ContractableBOSS

clfs = []

clfs.append(ContractableBOSS(n_parameter_samples=25, max_ensemble_size=5))
clfs.append(IndividualBOSS())

In [7]:
types = []

for clf in clfs: 
    types.append(get_clf_class(type))

## Classify the Dataset for every classifier

Classify the dataset with each classifier and save their accuracy score

In [8]:
acc_scores = []

In [9]:
for clf in clfs: 
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    acc_score =accuracy_score(y_test, y_pred)
    acc_scores.append(acc_score)

## Sort According to Type


In [10]:
types, acc_scores, clfs = zip(
    *sorted(zip(types, acc_scores, clfs), key=lambda pair: pair[0])
)

## Print the results

Itterate over the classifier and accuracy score and display in a nice "table"

In [11]:
print(f"{'Type' : <20}{'Name' : <40}{'Accuracy' : <15}")
for type, clf, acc_score in zip(types, clfs, acc_scores):
    acc_score_perc = '{:0.0f}'.format(acc_score*100)
    print(f"{get_clf_class(clf): <20}{get_clf_name(clf) : <40}{(acc_score_perc)+'%':<15}")


Type                Name                                    Accuracy       
dictionary_based    ContractableBOSS                        98%            
dictionary_based    IndividualBOSS                          90%            
