In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score

from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier

from modules.categorical_data_coding import code_data
from modules.model_runer import find_n_neighbors, test_model
from modules.classes_scatterplot import draw_classes_scatterplot

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv('./prepared_datasets/100_Que_classification.csv')
df.head()

Unnamed: 0,f_0,f_1,f_2,f_3,f_4,f_5,f_6,f_7,f_8,f_9,...,f_4991,f_4992,f_4993,f_4994,f_4995,f_4996,f_4997,f_4998,f_4999,target
0,30.975315,30.319231,30.670246,30.899062,30.761777,30.761792,29.281708,28.701923,30.578738,31.250154,...,18.2653,18.982415,18.677331,18.616246,18.478962,18.768877,18.662092,18.952008,19.013123,0Ca/100Que
1,19.119938,19.425054,19.516669,18.601185,17.9908,18.616415,18.555431,18.173946,18.891162,18.936977,...,31.434223,32.242938,33.250054,32.883869,33.372185,33.1586,32.792315,33.295931,33.616346,0Ca/100Que
2,33.158662,32.487277,31.861692,31.785408,31.648023,32.670438,33.906354,34.577769,33.708085,32.5179,...,31.816346,32.533562,32.365677,31.450192,31.389108,31.526523,31.724838,31.602854,32.152169,0Ca/100Que
3,31.419785,31.0841,32.243715,33.342431,32.976246,31.984362,31.358777,31.663992,27.376308,25.667323,...,28.246369,26.827385,23.15,24.691115,27.666631,28.002346,24.447062,20.083077,18.984492,0Ca/100Que
4,18.633508,17.962123,18.435138,18.847154,18.770869,19.716985,20.7851,20.632515,19.579631,18.786246,...,31.069892,31.344608,31.970223,32.260138,31.695554,31.192069,31.665085,30.8717,29.132215,0Ca/100Que


In [4]:
list(df.target.unique())

['100Ca/0Que', '100Ca/10Que', '100Ca/100Que']

In [5]:
code_data(df, 'target')
df.head()

Unnamed: 0,f_0,f_1,f_2,f_3,f_4,f_5,f_6,f_7,f_8,f_9,...,f_4991,f_4992,f_4993,f_4994,f_4995,f_4996,f_4997,f_4998,f_4999,target
0,33.5563,32.9459,32.595,31.9999,32.5492,31.9846,31.6642,32.8086,32.7476,31.9236,...,19.6708,20.2048,20.098,20.0065,20.4795,21.1356,21.868,20.7236,20.1285,0
1,19.976,20.5405,20.5253,20.1438,19.9302,19.7318,18.9994,19.1367,22.2343,26.5067,...,33.3426,32.4119,32.2288,32.8544,32.7628,32.3051,31.6794,31.6489,32.2135,0
2,32.3203,32.5645,32.8086,32.9917,32.8544,32.3051,32.5797,29.3296,23.4092,20.7694,...,30.3824,30.3824,31.4811,31.9541,32.4119,32.0457,31.1912,31.4811,31.8473,0
3,31.2369,31.42,31.0386,31.359,32.5187,32.8086,32.1525,31.7252,31.7557,31.6031,...,31.8778,31.9541,32.3661,32.8239,31.9388,30.2756,31.0691,31.8473,32.0151,0
4,31.5726,31.5269,32.1219,32.1982,31.7252,31.1759,31.0843,31.2217,31.7252,31.298,...,32.2288,31.4048,31.0538,31.2827,31.1912,31.2369,31.1454,32.2288,32.2898,0


# Distance-based algorithms

In [6]:
scaler = MinMaxScaler()

In [7]:
distance_based_df = scaler.fit_transform(df.values)
distance_based_df = pd.DataFrame(distance_based_df)
distance_based_df.columns = df.columns
distance_based_df

Unnamed: 0,f_0,f_1,f_2,f_3,f_4,f_5,f_6,f_7,f_8,f_9,...,f_4991,f_4992,f_4993,f_4994,f_4995,f_4996,f_4997,f_4998,f_4999,target
0,0.599585,0.579473,0.576783,0.561910,0.579825,0.565165,0.553743,0.579085,0.580558,0.567184,...,0.283387,0.295755,0.299683,0.294754,0.315596,0.324583,0.342458,0.314813,0.305556,0.0
1,0.304786,0.315418,0.319044,0.305559,0.308322,0.302403,0.281166,0.279018,0.351350,0.449651,...,0.581357,0.563365,0.563191,0.575140,0.579098,0.563098,0.556009,0.546679,0.563790,0.0
2,0.572754,0.571355,0.581344,0.583355,0.586391,0.572038,0.573447,0.502729,0.376965,0.325166,...,0.516841,0.518873,0.546950,0.555492,0.571570,0.557559,0.545383,0.543117,0.555965,0.0
3,0.549236,0.546994,0.543547,0.548053,0.579168,0.582836,0.564253,0.555307,0.558933,0.560230,...,0.549432,0.553329,0.566174,0.574474,0.561421,0.519760,0.542725,0.550889,0.559550,0.0
4,0.556523,0.549269,0.566680,0.566198,0.562096,0.547823,0.541262,0.544256,0.558268,0.553610,...,0.557082,0.541287,0.537668,0.540840,0.545384,0.540287,0.544386,0.558986,0.565420,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3507,0.601261,0.565850,0.548452,0.569188,0.591662,0.614922,0.600396,0.580780,0.576253,0.584090,...,0.557497,0.571142,0.589459,0.581217,0.580163,0.585335,0.589636,0.570076,0.560285,1.0
3508,0.568864,0.568837,0.589572,0.587067,0.568747,0.553469,0.564664,0.573812,0.581307,0.577863,...,0.411236,0.510327,0.566320,0.573956,0.589392,0.576928,0.599666,0.587303,0.586433,1.0
3509,0.524873,0.528952,0.567155,0.574925,0.545501,0.557786,0.579506,0.576891,0.577380,0.593489,...,0.339801,0.351165,0.368507,0.361234,0.376034,0.364871,0.341013,0.346435,0.365109,1.0
3510,0.362303,0.353304,0.349231,0.344042,0.367628,0.371659,0.360195,0.379704,0.369196,0.363455,...,0.574651,0.575018,0.555181,0.576750,0.603595,0.619415,0.625700,0.591641,0.572867,1.0


In [8]:
X, y = np.array(distance_based_df.drop('target', axis=1)), np.array(df['target'])

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## k-Nearest Neighbors with Dynamic Time Warping

In [10]:
knn = KNeighborsTimeSeriesClassifier(n_neighbors=3)

In [11]:
#test_model(knn, X_train, y_train)

In [12]:
knn.fit(X_train, y_train)

In [None]:
knn_pred = knn.predict(X_test)

In [None]:
f1_score(knn_pred, y_test, average='weighted')

In [None]:
accuracy_score(knn_pred, y_test)

In [None]:
draw_classes_scatterplot(X_test, knn_pred, "Classes predicted by kNN algorithm", 
                         ["0% Que", "10% Que", "100% Que"], ["blue", "red", "green"])