In [None]:
import numpy as np
import pandas as pd
from scipy.spatial import distance
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
def DTW(a, b):   
    an = a.size
    bn = b.size
    pointwise_distance = distance.cdist(a.reshape(-1,1),b.reshape(-1,1))
    cumdist = np.matrix(np.ones((an+1,bn+1)) * np.inf)
    cumdist[0,0] = 0

    for ai in range(an):
        for bi in range(bn):
            minimum_cost = np.min([cumdist[ai, bi+1],
                                   cumdist[ai+1, bi],
                                   cumdist[ai, bi]])
            cumdist[ai+1, bi+1] = pointwise_distance[ai,bi] + minimum_cost

    return cumdist[an, bn]

In [None]:
training = pd.read_parquet('./data/Class1.parquet').T
evaluation = pd.read_parquet('./data/validation.parquet').T

In [None]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from scipy.spatial import distance
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, learning_curve
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.pipeline import make_pipeline, Pipeline
import matplotlib.pyplot as plt

In [None]:
def DTW(a, b):   
    an = a.size
    bn = b.size
    pointwise_distance = distance.cdist(a.reshape(-1,1),b.reshape(-1,1))
    cumdist = np.matrix(np.ones((an+1,bn+1)) * np.inf)
    cumdist[0,0] = 0

    for ai in range(an):
        for bi in range(bn):
            minimum_cost = np.min([cumdist[ai, bi+1],
                                   cumdist[ai+1, bi],
                                   cumdist[ai, bi]])
            cumdist[ai+1, bi+1] = pointwise_distance[ai,bi] + minimum_cost

    return cumdist[an, bn]


In [None]:
training = pd.read_parquet('./data/Class1.parquet').T
validation = pd.read_parquet('./data/validation.parquet').T

In [None]:
training['label'] = 1
y = training['label']
X = training.drop(['label'], axis = 1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

In [None]:
pipeline = Pipeline([('classifier', RandomForestClassifier())])

parameters = [{'classifier': [KNeighborsClassifier(metric=DTW)],
                 'kneighborsclassifier__n_neighbors': np.arange(1,10)},
                {'classifier': [RandomForestClassifier()],
                 'classifier__n_estimators': [10, 100, 1000],
                 'classifier__max_features': [1, 2, 3]}]


In [None]:
parameters = {
    'kneighborsclassifier__n_neighbors': np.arange(1,10)
    }
grid = GridSearchCV(pipeline, parameters, cv=5, verbose=1)

In [None]:
model = grid.fit(X_train, y_train)

In [None]:
model.best_score_

In [None]:
model.best_params_

In [None]:
model.best_estimator_.get_params()['classifier']

In [None]:
model.score(X_test, y_test)

In [None]:
confusion_matrix(y_test, model.predict(X_test))

In [None]:
N, train_score, val_score = learning_curve(model, X_train, y_train, train_size= np.linspace(0.1, 1.0, 10), cv=5)

In [None]:
print(N)
plt.plot(N, train_score.mean(axis=1), label='train')
plt.plot(N, val_score.mean(axis=1), label='validation')
plt.xlabel('train_sizes')
plt.legend()