In [None]:
from src.OPFInterface import OPFClassifier # cpp
from src.py_opf import OptimumPathForestClassifier # py
from functools import cmp_to_key
import pandas as pd
import numpy as np
from os import listdir
from src.utils import error, read_df, euclidean_distance_classify, dtw_distance_classify
from tqdm import tqdm
from time import time
# plot
import matplotlib.pyplot as plt
from sklearn.manifold import MDS

In [None]:
df_names = listdir('data/UCRArchive_2018')

df = {}

In [None]:
df['name'] = {i: v for i, v in enumerate(df_names)}

In [None]:
df['1NN-ED'] = np.array([read_df(df_name)[4]['ED'] for df_name in df_names])

In [None]:
df['1NN-DTW'] = []
for df_name in tqdm(df_names):
    X, y, X_test, y_test, df_errors = read_df(df_name)
    df['1NN-DTW'].append(error(dtw_distance_classify(X, y, X_test), y_test))
df['1NN-DTW'] = np.array(df['1NN-DTW'])

In [None]:
df['OPF-ED'] = []
for df_name in tqdm(df_names):
    X, y, X_test, y_test, df_errors = read_df(df_name)
    opf = OPFClassifier('euclidean-distance')
    opf.fit(X, y)
    preds = opf.classify(X_test)
    df['OPF-ED'].append(error(preds, y_test))
df['OPF-ED'] = np.array(df['OPF-ED'])

In [None]:
df['OPF-DTW'] = []
for df_name in tqdm(df_names):
    X, y, X_test, y_test, df_errors = read_df(df_name)
    opf = OptimumPathForestClassifier('dtw-distance')
    opf.fit(X, y)
    preds = opf.classify(X_test)
    df['OPF-DTW'].append(error(preds, y_test))
df['OPF-DTW'] = np.array(df['OPF-DTW'])

In [None]:
df = pd.DataFrame(df)
df.to_csv('all_errors.csv', index=False)

## Add times

In [None]:
ITERATIONS = 10 # amount of iterations to get runtime
df = {}

df_names = ['WordSynonyms', 'SemgHandSubjectCh2', 'PLAID', 'MelbournePedestrian', 'ChlorineConcentration', 'ShapesAll']

In [None]:
df['name'] = {i: v for i, v in enumerate(df_names)}

In [None]:
df['OPF-ED-fast-time-mean'] = []
df['OPF-ED-fast-time-std'] = []
for it in range(ITERATIONS):
    df[f'OPF-ED-fast-time-it{it}'] = []

for df_name in tqdm(df_names):
    X, y, X_test, y_test, df_errors = read_df(df_name)
    
    runtimes = []
    for it in range(ITERATIONS):
        start_time = time()
        
        opf = OPFClassifier('euclidean-distance')
        opf.fit(X, y)
        preds = opf.classify(X_test)
        runtime = time() - start_time
        
        runtimes.append(runtime)
        df[f'OPF-ED-fast-time-it{it}'].append(runtime)
    
    df['OPF-ED-fast-time-mean'].append(np.mean(runtimes))
    df['OPF-ED-fast-time-std'].append(np.std(runtimes))

df['OPF-ED-fast-time-mean'] = {i: v for i, v in enumerate(df['OPF-ED-fast-time-mean'])}
df['OPF-ED-fast-time-std'] = {i: v for i, v in enumerate(df['OPF-ED-fast-time-std'])}
for it in range(ITERATIONS):
    df[f'OPF-ED-fast-time-it{it}'] = {i: v for i, v in enumerate(df[f'OPF-ED-fast-time-it{it}'])}

In [None]:
df['OPF-ED-slow-time-mean'] = []
df['OPF-ED-slow-time-std'] = []
for it in range(ITERATIONS):
    df[f'OPF-ED-slow-time-it{it}'] = []

for df_name in tqdm(df_names):
    X, y, X_test, y_test, df_errors = read_df(df_name)
    
    runtimes = []
    for it in range(ITERATIONS):
        start_time = time()
        
        opf = OptimumPathForestClassifier('euclidean-distance')
        opf.fit(X, y)
        preds = opf.classify(X_test)
        runtime = time() - start_time
        
        runtimes.append(runtime)
        df[f'OPF-ED-slow-time-it{it}'].append(runtime)
    
    df['OPF-ED-slow-time-mean'].append(np.mean(runtimes))
    df['OPF-ED-slow-time-std'].append(np.std(runtimes))

df['OPF-ED-slow-time-mean'] = {i: v for i, v in enumerate(df['OPF-ED-slow-time-mean'])}
df['OPF-ED-slow-time-std'] = {i: v for i, v in enumerate(df['OPF-ED-slow-time-std'])}
for it in range(ITERATIONS):
    df[f'OPF-ED-slow-time-it{it}'] = {i: v for i, v in enumerate(df[f'OPF-ED-slow-time-it{it}'])}

In [None]:
df['OPF-DTW-slow-time-mean'] = []
df['OPF-DTW-slow-time-std'] = []
for it in range(ITERATIONS):
    df[f'OPF-DTW-slow-time-it{it}'] = []

for df_name in tqdm(df_names):
    X, y, X_test, y_test, df_errors = read_df(df_name)
    
    runtimes = []
    for it in range(ITERATIONS):
        start_time = time()
        
        opf = OptimumPathForestClassifier('dtw-slow')
        opf.fit(X, y)
        preds = opf.classify(X_test)
        runtime = time() - start_time
        
        runtimes.append(runtime)
        df[f'OPF-DTW-slow-time-it{it}'].append(runtime)
    
    df['OPF-DTW-slow-time-mean'].append(np.mean(runtimes))
    df['OPF-DTW-slow-time-std'].append(np.std(runtimes))

df['OPF-DTW-slow-time-mean'] = {i: v for i, v in enumerate(df['OPF-DTW-slow-time-mean'])}
df['OPF-DTW-slow-time-std'] = {i: v for i, v in enumerate(df['OPF-DTW-slow-time-std'])}
for it in range(ITERATIONS):
    df[f'OPF-DTW-slow-time-it{it}'] = {i: v for i, v in enumerate(df[f'OPF-DTW-slow-time-it{it}'])}

In [None]:
df['OPF-DTW-fast-time-mean'] = []
df['OPF-DTW-fast-time-std'] = []
for it in range(ITERATIONS):
    df[f'OPF-DTW-fast-time-it{it}'] = []

for df_name in tqdm(df_names):
    X, y, X_test, y_test, df_errors = read_df(df_name)
    
    runtimes = []
    for it in tqdm(range(ITERATIONS)):
        start_time = time()
        
        opf = OPFClassifier('dtw-distance')
        opf.fit(X, y)
        preds = opf.classify(X_test)
        runtime = time() - start_time
        
        runtimes.append(runtime)
        df[f'OPF-DTW-fast-time-it{it}'].append(runtime)
    
    df['OPF-DTW-fast-time-mean'].append(np.mean(runtimes))
    df['OPF-DTW-fast-time-std'].append(np.std(runtimes))

df['OPF-DTW-fast-time-mean'] = {i: v for i, v in enumerate(df['OPF-DTW-fast-time-mean'])}
df['OPF-DTW-fast-time-std'] = {i: v for i, v in enumerate(df['OPF-DTW-fast-time-std'])}
for it in range(ITERATIONS):
    df[f'OPF-DTW-fast-time-it{it}'] = {i: v for i, v in enumerate(df[f'OPF-DTW-fast-time-it{it}'])}

In [None]:
df = pd.DataFrame(df)
df.to_csv('less_errors_with_times.csv', index=False)