In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
A = np.random.normal(size = (100,))
B = np.random.uniform(size = (100,))
maxiter = 1000
tol = 1e-1
w = 1
lbda = 2

In [3]:
def TAOT_dist(A, B, lbda, w, maxiter, tol):
    d = len(A)
    t = np.linspace(1, d, d)
    t -= np.mean(t)
    t /= np.std(t)
    
    M = np.zeros((len(A), len(B)))
    for i in range(len(A)):
        for j in range(len(B)):
            M[i,j] = (A[i] - B[j])**2 + w*(t[i] - t[j])**2
    
    M /= np.median(M, axis = 1)
    K = np.exp(-lbda * M)
    c = 0
    u = np.ones((d, 1)) / d
    
    while c < maxiter:
        v = 1/ (d * K.T @ u)
        u = 1 / (d* K @ v)
        c +=1
        if c % 20 == 1:
            criterion = sum(abs(v * (K.T @ u) - 1/d))
            if criterion < tol:
                break
            
    return sum(u * ((K * M) @ v))[0]

In [4]:
def reformat_data_train(name):
    df = pd.read_csv('UCRArchive_2018/' + name + '/' + name + '_TRAIN.tsv', header = None, sep = '\t')
    df['Y'] = df[0]
    df1 = df.drop(0, axis =1)
    X = df1.drop('Y', axis = 1).values
    return X, df1.Y

In [5]:
def reformat_data_test(name):
    df = pd.read_csv('UCRArchive_2018/' + name + '/' + name + '_TEST.tsv', header = None, sep = '\t')
    df['Y'] = df[0]
    df1 = df.drop(0, axis =1)
    X = df1.drop('Y', axis = 1).values
    return X, df1.Y

In [6]:
TAOT_dist(A, A, 1000, 1, 10,0.00001)

4.5833769191542024e-05

In [7]:
UNIVARIATE_DATASET_NAMES_2018 = ['Adiac','Beef',  'CBF', 'ChlorineConcentration','CinCECGTorso', 'Coffee']

In [9]:
## TAOT distance matrices
from scipy.spatial.distance import pdist, squareform
for name in UNIVARIATE_DATASET_NAMES_2018[3:]:
    print(name)
    df_train = pd.read_csv('UCRArchive_2018/' + name + '/' + name + '_TRAIN.tsv', header = None, sep = '\t')
    df_train.index = ['train_' + str(i) for i in df_train.index]
    df_train['Y'] = df_train[0]
    df_train.drop(0, axis = 1, inplace = True)
    df_test = pd.read_csv('UCRArchive_2018/' + name + '/' + name + '_TEST.tsv', header = None, sep = '\t')
    df_test['Y'] = df_test[0]
    df_test.drop(0, axis = 1, inplace = True)
    X = pd.concat([df_train, df_test], axis = 0)
    y = pdist(X.drop('Y', axis = 1), lambda x, y:TAOT_dist(x, y, 100, 1, 10,0.0001))
    y = pd.DataFrame(squareform(y))
    y['Y'] = X.Y
    y.to_csv(name+"_distances_all_TAOT.csv")

ChlorineConcentration


  v = 1/ (d * K.T @ u)
  u = 1 / (d* K @ v)
  v = 1/ (d * K.T @ u)


KeyboardInterrupt: 

In [None]:
from sklearn.metrics import accuracy_score

In [72]:
## TAOT 1-NN classifier
for name in UNIVARIATE_DATASET_NAMES_2018:
    try:
        print(name)
        aa = pd.read_csv(name + '_distances_TRAIN_TAOT.csv').drop('Unnamed: 0', axis = 1)
        X_train, Y_train = reformat_data_train(name)
        aa_train = aa[:-int(0.2*len(aa.index))]
        aa_test = aa[-int(0.2*len(aa.index)):]
        y_test = Y_train.iloc[aa_test.index]
        y_pred = [Y_train[i] for i in aa.iloc[aa_train.index].idxmin().iloc[aa_test.index]]
        print(accuracy_score(y_test, y_pred))
    except:
        continue

Adiac
0.3076923076923077
Beef
0.0
CBF
0.16666666666666666
ChlorineConcentration
0.5376344086021505
CinCECGTorso
Coffee
1.0
DiatomSizeReduction
1.0
