In [1]:
import argparse
import pickle
import os
import sys
from sklearn.metrics import accuracy_score, f1_score
from tabulate import tabulate
import classify
from analyze_results import modified_accuracy_score
from sklearn.model_selection import train_test_split
import pandas as pd
from sktime.base import load

In [2]:
data_file = '../datasets/data_directional_200-class_100-samples.pkl'
domain_file = '../collection_scripts/top-1k-curated'

In [3]:
def load_data(path):
    '''
    Loads data from the pickle file.

    path: path to the file to load.

    Returns: data, labels as lists.
    '''
    data, labels = [], []
    with open(path, 'rb') as data_file:
        d = pickle.load(data_file)
        data = d['data']
        labels = d['labels']

    return data, labels

In [4]:
with open(domain_file, 'r') as fh:
    domains = [line.strip() for line in fh.readlines()]

In [5]:
print('Loading data...')
data, labels = load_data(data_file)

Loading data...


In [11]:
import pickle
with open('./minirocket_iat_ridge/predictions.pkl', 'rb') as p:
    iat_stuff = pickle.load(p)
X_iat = iat_stuff['X']

In [69]:
# X_iat.iloc[0]
i = 3
delays = pd.Series(np.random.uniform(0, 0.1, X_iat.loc[i].index.shape[0]))
filter = X_iat.loc[i, 'packet_size'] > 100
# print(filter)
print(delays)
# print(delays*filter)
delays *= filter
print(delays)
# print(delays.loc[0])
delays.loc[0] = 0
# print(delays.loc[0])
# print(delays)
# print(X_iat.loc[i, 'time'])
# print(delays)
# print(X_iat.loc[i, 'time'] + delays)
X_iat.loc[i, 'time'] = (X_iat.loc[i, 'time'] + delays).values
# print(X_iat.loc[i]['time'])
# print(X_iat.loc[i])

0     0.001769
1     0.046898
2     0.036546
3     0.011142
4     0.004688
5     0.004671
6     0.088225
7     0.054433
8     0.087886
9     0.053110
10    0.052146
11    0.061155
12    0.015638
13    0.069086
14    0.048865
15    0.056197
16    0.005063
17    0.052070
18    0.021838
19    0.030407
20    0.000564
21    0.079756
22    0.038431
23    0.003183
24    0.044764
25    0.009144
26    0.038201
27    0.074162
28    0.073644
29    0.013863
30    0.024590
31    0.027598
dtype: float64
0     0.001769
1     0.046898
2     0.000000
3     0.000000
4     0.000000
5     0.004671
6     0.088225
7     0.000000
8     0.000000
9     0.000000
10    0.052146
11    0.061155
12    0.000000
13    0.000000
14    0.000000
15    0.056197
16    0.005063
17    0.052070
18    0.000000
19    0.030407
20    0.000000
21    0.000000
22    0.000000
23    0.000000
24    0.000000
25    0.009144
26    0.038201
27    0.000000
28    0.000000
29    0.000000
30    0.000000
31    0.000000
dtype: float64


In [44]:
import numpy as np

In [7]:
max_length = max(map(lambda sample: len(sample), data))
print(max_length)

374


In [8]:
lengths = [max_length, 200, 100, 50, 25, 10, 5]

In [9]:
def make_sample_list(data):
    samples_as_lists = []
    for sample in data:
        samples_as_lists.append([(k, v) for k, v in sample.items()])
    return samples_as_lists

# divide data into smaller pieces:
def divide_data(samples_as_lists, labels, length):
    new_data = []
    new_labels = []
    for i in range(0, len(labels)):
        sample = samples_as_lists[i]
        label = labels[i]
        slice_sample_list = sample[0:length]
        slice_sample = {a[0]:a[1] for a in slice_sample_list}
        new_data.append(slice_sample)
        new_labels.append(label)
        # for slice_start in range(0, len(sample), int(length/2)):
        #     slice_end = slice_start + length
        #     slice_sample_list = sample[slice_start:slice_end]
        #     slice_sample = {a[0]:a[1] for a in slice_sample_list}
        #     new_data.append(slice_sample)
        #     new_labels.append(label)
    return new_data, new_labels

In [10]:
print('slicing data')
all_data = {}
samples_as_lists = make_sample_list(data)
for l in lengths:
    new_data, new_labels = divide_data(samples_as_lists, labels, l)
    all_data[l] = (new_data, new_labels)
print('done slicing')

slicing data
done slicing


In [11]:
methods = ['rocket', 'catch22']
models = {'rocket': '/home/jay/Documents/Fall 2022/EECS 589/Project/eecs589-patel-schauer/classification/directional_200-class_100-samples_rocket/model',
          'catch22': '/home/jay/Documents/Fall 2022/EECS 589/Project/eecs589-patel-schauer/classification/directional_200-class_100-samples_catch22/model'}

In [12]:
def run_classifier(data, labels):
    max_length = max(map(lambda sample: len(sample), data))
    _, X_test, _, y_test = train_test_split(
        data, labels, random_state=589, test_size=0.25, shuffle=True
    )
    X_test = classify.make_directional_dataframe(X_test)
    y_test = pd.Series(y_test)

    output = {}
    for method in methods:
        print('Loading model')
        clf = load(models[method])
        print('Making predictions...')
        y_pred = clf.predict(X_test)
        accuracy = accuracy_score(y_pred, y_test)
        f1 = f1_score(y_test, y_pred, average='weighted')
        modified_accuracy = modified_accuracy_score(y_test, y_pred, domains)
        table = [[max_length, accuracy, f1, modified_accuracy]]
        print(method)
        print(tabulate(table, headers=['Length', 'Accuracy', 'F1', 'Modified Accuracy']))
        output[method] = table[0]
    return output

In [13]:
output = {m: [] for m in methods}
for length, value in all_data.items():
    data = value[0]
    labels = value[1]
    print(f'Running classifier with length {length}')
    result = run_classifier(data, labels)
    print(f'Finished running classifier with length {length}')
    for m in methods:
        print(f'Results so far for method {m}')
        output[m].append(result[m])
        print(tabulate(output[m], headers=['Length', 'Accuracy', 'F1', 'Modified Accuracy']))
for m in methods:
    print(f'Final results with method {m}')
    print(tabulate(output[m], headers=['Length', 'Accuracy', 'F1', 'Modified Accuracy']))

Running classifier with length 374
Loading model
Making predictions...


  for _label, _series in multi_ind_dataframe.iteritems():  # noqa


rocket
  Length    Accuracy        F1    Modified Accuracy
--------  ----------  --------  -------------------
     374    0.926218  0.922765             0.960784
Loading model




Making predictions...


  for _label, _series in multi_ind_dataframe.iteritems():  # noqa


catch22
  Length    Accuracy        F1    Modified Accuracy
--------  ----------  --------  -------------------
     374    0.925814  0.924749             0.957348
Finished running classifier with length 374
Results so far for method rocket
  Length    Accuracy        F1    Modified Accuracy
--------  ----------  --------  -------------------
     374    0.926218  0.922765             0.960784
Results so far for method catch22
  Length    Accuracy        F1    Modified Accuracy
--------  ----------  --------  -------------------
     374    0.925814  0.924749             0.957348
Running classifier with length 200
Loading model
Making predictions...


  for _label, _series in multi_ind_dataframe.iteritems():  # noqa


rocket
  Length    Accuracy        F1    Modified Accuracy
--------  ----------  --------  -------------------
     200    0.913483  0.907741             0.948049
Loading model
Making predictions...


  for _label, _series in multi_ind_dataframe.iteritems():  # noqa


catch22
  Length    Accuracy        F1    Modified Accuracy
--------  ----------  --------  -------------------
     200    0.902567  0.894581             0.934101
Finished running classifier with length 200
Results so far for method rocket
  Length    Accuracy        F1    Modified Accuracy
--------  ----------  --------  -------------------
     374    0.926218  0.922765             0.960784
     200    0.913483  0.907741             0.948049
Results so far for method catch22
  Length    Accuracy        F1    Modified Accuracy
--------  ----------  --------  -------------------
     374    0.925814  0.924749             0.957348
     200    0.902567  0.894581             0.934101
Running classifier with length 100
Loading model
Making predictions...


  for _label, _series in multi_ind_dataframe.iteritems():  # noqa


rocket
  Length    Accuracy       F1    Modified Accuracy
--------  ----------  -------  -------------------
     100    0.768547  0.73545             0.803922
Loading model
Making predictions...


  for _label, _series in multi_ind_dataframe.iteritems():  # noqa


catch22
  Length    Accuracy        F1    Modified Accuracy
--------  ----------  --------  -------------------
     100    0.745704  0.713232             0.777643
Finished running classifier with length 100
Results so far for method rocket
  Length    Accuracy        F1    Modified Accuracy
--------  ----------  --------  -------------------
     374    0.926218  0.922765             0.960784
     200    0.913483  0.907741             0.948049
     100    0.768547  0.73545              0.803922
Results so far for method catch22
  Length    Accuracy        F1    Modified Accuracy
--------  ----------  --------  -------------------
     374    0.925814  0.924749             0.957348
     200    0.902567  0.894581             0.934101
     100    0.745704  0.713232             0.777643
Running classifier with length 50
Loading model
Making predictions...


  for _label, _series in multi_ind_dataframe.iteritems():  # noqa


rocket
  Length    Accuracy        F1    Modified Accuracy
--------  ----------  --------  -------------------
      50    0.556297  0.494494             0.596119
Loading model
Making predictions...


  for _label, _series in multi_ind_dataframe.iteritems():  # noqa


catch22
  Length    Accuracy        F1    Modified Accuracy
--------  ----------  --------  -------------------
      50    0.496665  0.432788             0.535072
Finished running classifier with length 50
Results so far for method rocket
  Length    Accuracy        F1    Modified Accuracy
--------  ----------  --------  -------------------
     374    0.926218  0.922765             0.960784
     200    0.913483  0.907741             0.948049
     100    0.768547  0.73545              0.803922
      50    0.556297  0.494494             0.596119
Results so far for method catch22
  Length    Accuracy        F1    Modified Accuracy
--------  ----------  --------  -------------------
     374    0.925814  0.924749             0.957348
     200    0.902567  0.894581             0.934101
     100    0.745704  0.713232             0.777643
      50    0.496665  0.432788             0.535072
Running classifier with length 25
Loading model
Making predictions...


  for _label, _series in multi_ind_dataframe.iteritems():  # noqa


rocket
  Length    Accuracy        F1    Modified Accuracy
--------  ----------  --------  -------------------
      25     0.29149  0.243864             0.300586
Loading model
Making predictions...


  for _label, _series in multi_ind_dataframe.iteritems():  # noqa


catch22
  Length    Accuracy        F1    Modified Accuracy
--------  ----------  --------  -------------------
      25    0.201132  0.130746             0.201334
Finished running classifier with length 25
Results so far for method rocket
  Length    Accuracy        F1    Modified Accuracy
--------  ----------  --------  -------------------
     374    0.926218  0.922765             0.960784
     200    0.913483  0.907741             0.948049
     100    0.768547  0.73545              0.803922
      50    0.556297  0.494494             0.596119
      25    0.29149   0.243864             0.300586
Results so far for method catch22
  Length    Accuracy        F1    Modified Accuracy
--------  ----------  --------  -------------------
     374    0.925814  0.924749             0.957348
     200    0.902567  0.894581             0.934101
     100    0.745704  0.713232             0.777643
      50    0.496665  0.432788             0.535072
      25    0.201132  0.130746             0.20133

  for _label, _series in multi_ind_dataframe.iteritems():  # noqa


rocket
  Length    Accuracy         F1    Modified Accuracy
--------  ----------  ---------  -------------------
      10   0.0432585  0.0135493            0.0432585
Loading model
Making predictions...


  for _label, _series in multi_ind_dataframe.iteritems():  # noqa


catch22
  Length    Accuracy         F1    Modified Accuracy
--------  ----------  ---------  -------------------
      10   0.0501314  0.0144207            0.0501314
Finished running classifier with length 10
Results so far for method rocket
  Length    Accuracy         F1    Modified Accuracy
--------  ----------  ---------  -------------------
     374   0.926218   0.922765             0.960784
     200   0.913483   0.907741             0.948049
     100   0.768547   0.73545              0.803922
      50   0.556297   0.494494             0.596119
      25   0.29149    0.243864             0.300586
      10   0.0432585  0.0135493            0.0432585
Results so far for method catch22
  Length    Accuracy         F1    Modified Accuracy
--------  ----------  ---------  -------------------
     374   0.925814   0.924749             0.957348
     200   0.902567   0.894581             0.934101
     100   0.745704   0.713232             0.777643
      50   0.496665   0.432788            

  for _label, _series in multi_ind_dataframe.iteritems():  # noqa


rocket
  Length    Accuracy           F1    Modified Accuracy
--------  ----------  -----------  -------------------
       5  0.00848999  0.000645849           0.00848999
Loading model
Making predictions...


  for _label, _series in multi_ind_dataframe.iteritems():  # noqa


catch22
  Length    Accuracy          F1    Modified Accuracy
--------  ----------  ----------  -------------------
       5   0.0119264  0.00137544            0.0119264
Finished running classifier with length 5
Results so far for method rocket
  Length    Accuracy           F1    Modified Accuracy
--------  ----------  -----------  -------------------
     374  0.926218    0.922765              0.960784
     200  0.913483    0.907741              0.948049
     100  0.768547    0.73545               0.803922
      50  0.556297    0.494494              0.596119
      25  0.29149     0.243864              0.300586
      10  0.0432585   0.0135493             0.0432585
       5  0.00848999  0.000645849           0.00848999
Results so far for method catch22
  Length    Accuracy          F1    Modified Accuracy
--------  ----------  ----------  -------------------
     374   0.925814   0.924749              0.957348
     200   0.902567   0.894581              0.934101
     100   0.745704   0