In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score

from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier

from modules.categorical_data_coding import code_data
from modules.model_runer import find_n_neighbors, test_model
from modules.classes_scatterplot import draw_classes_scatterplot

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv('./prepared_datasets/0_Que_classification.csv')
df.head()

Unnamed: 0,f_0,f_1,f_2,f_3,f_4,f_5,f_6,f_7,f_8,f_9,...,f_4991,f_4992,f_4993,f_4994,f_4995,f_4996,f_4997,f_4998,f_4999,target
0,12.635397,13.077894,13.062591,12.894789,12.650586,12.711683,12.36068,12.925277,13.398274,13.535571,...,13.231437,13.399334,13.155131,13.643429,13.521326,13.353523,13.70442,14.391117,13.735014,0Ca/0Que
1,12.651611,13.124609,13.612906,13.597603,13.2162,13.444997,13.246694,12.010691,12.575289,12.865186,...,12.774651,13.583349,13.018846,12.622043,13.00354,13.232437,12.988234,13.064531,12.866229,0Ca/0Que
2,12.942526,13.339223,12.88142,12.606817,12.957714,12.957711,12.789909,12.469406,13.049303,13.0951,...,13.462266,13.447063,12.89776,12.897757,13.569054,12.897751,12.745149,12.928246,13.035043,0Ca/0Que
3,12.91294,13.035037,12.638334,12.577231,12.592529,13.248626,13.111323,13.06552,13.660617,13.660614,...,13.37168,13.432777,13.631074,13.585371,13.035969,12.730866,13.570063,14.59236,14.104157,0Ca/0Que
4,13.310654,13.371651,13.997249,14.210946,13.798943,13.93624,13.539537,13.646334,13.463231,13.264829,...,13.494794,13.250591,12.930189,12.701286,12.518183,13.03698,13.632077,13.235374,13.143771,0Ca/0Que


In [4]:
list(df.target.unique())

['0Ca/0Que', '10Ca/0Que', '100Ca/0Que']

In [5]:
code_data(df, 'target')
df.head()

Unnamed: 0,f_0,f_1,f_2,f_3,f_4,f_5,f_6,f_7,f_8,f_9,...,f_4991,f_4992,f_4993,f_4994,f_4995,f_4996,f_4997,f_4998,f_4999,target
0,12.635397,13.077894,13.062591,12.894789,12.650586,12.711683,12.36068,12.925277,13.398274,13.535571,...,13.231437,13.399334,13.155131,13.643429,13.521326,13.353523,13.70442,14.391117,13.735014,0
1,12.651611,13.124609,13.612906,13.597603,13.2162,13.444997,13.246694,12.010691,12.575289,12.865186,...,12.774651,13.583349,13.018846,12.622043,13.00354,13.232437,12.988234,13.064531,12.866229,0
2,12.942526,13.339223,12.88142,12.606817,12.957714,12.957711,12.789909,12.469406,13.049303,13.0951,...,13.462266,13.447063,12.89776,12.897757,13.569054,12.897751,12.745149,12.928246,13.035043,0
3,12.91294,13.035037,12.638334,12.577231,12.592529,13.248626,13.111323,13.06552,13.660617,13.660614,...,13.37168,13.432777,13.631074,13.585371,13.035969,12.730866,13.570063,14.59236,14.104157,0
4,13.310654,13.371651,13.997249,14.210946,13.798943,13.93624,13.539537,13.646334,13.463231,13.264829,...,13.494794,13.250591,12.930189,12.701286,12.518183,13.03698,13.632077,13.235374,13.143771,0


# Distance-based algorithms

In [6]:
scaler = MinMaxScaler()

In [7]:
distance_based_df = scaler.fit_transform(df.values)
distance_based_df = pd.DataFrame(distance_based_df)
distance_based_df.columns = df.columns
distance_based_df

Unnamed: 0,f_0,f_1,f_2,f_3,f_4,f_5,f_6,f_7,f_8,f_9,...,f_4991,f_4992,f_4993,f_4994,f_4995,f_4996,f_4997,f_4998,f_4999,target
0,0.183309,0.186687,0.187205,0.183287,0.188252,0.184973,0.174496,0.188504,0.190634,0.203206,...,0.194373,0.195839,0.190456,0.199567,0.193027,0.187649,0.203624,0.210831,0.207732,0.0
1,0.183646,0.187646,0.198571,0.197868,0.199897,0.200085,0.192764,0.169504,0.173372,0.189272,...,0.185014,0.199640,0.187640,0.178430,0.182275,0.185154,0.188761,0.183722,0.190034,0.0
2,0.189681,0.192051,0.183463,0.177313,0.194576,0.190043,0.183346,0.179033,0.183315,0.194051,...,0.199103,0.196825,0.185138,0.184136,0.194018,0.178255,0.183716,0.180937,0.193473,0.0
3,0.189067,0.185807,0.178442,0.176699,0.187057,0.196038,0.189973,0.191418,0.196137,0.205805,...,0.197247,0.196530,0.200289,0.198366,0.182949,0.174815,0.200836,0.214944,0.215251,0.0
4,0.197318,0.192716,0.206510,0.210592,0.211895,0.210208,0.198802,0.203484,0.191997,0.197579,...,0.199769,0.192767,0.185808,0.180070,0.172197,0.181125,0.202123,0.187214,0.195688,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6750,0.357675,0.445328,0.546125,0.571337,0.563608,0.507540,0.432416,0.403056,0.429970,0.516810,...,0.570885,0.581107,0.569181,0.577009,0.566692,0.547866,0.579924,0.582928,0.585178,1.0
6751,0.590519,0.551676,0.535268,0.567081,0.579805,0.589157,0.589894,0.592163,0.591131,0.589296,...,0.591382,0.602083,0.599621,0.610972,0.601722,0.590815,0.616833,0.597444,0.538103,1.0
6752,0.494779,0.542453,0.562233,0.568525,0.601341,0.594992,0.594473,0.597412,0.595149,0.615478,...,0.574986,0.536698,0.523820,0.574833,0.598096,0.575264,0.566026,0.553338,0.544493,1.0
6753,0.557632,0.561732,0.568082,0.568383,0.574813,0.582903,0.594019,0.612486,0.589886,0.587111,...,0.575472,0.564925,0.562771,0.562379,0.581795,0.582674,0.571901,0.559749,0.566114,1.0


In [8]:
X, y = np.array(distance_based_df.drop('target', axis=1)), np.array(df['target'])

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## k-Nearest Neighbors with Dynamic Time Warping

In [10]:
knn = KNeighborsTimeSeriesClassifier(n_neighbors=3)

In [11]:
#test_model(knn, X_train, y_train)

In [12]:
knn.fit(X_train, y_train)

In [None]:
knn_pred = knn.predict(X_test)

In [None]:
f1_score(knn_pred, y_test, average='weighted')

In [None]:
accuracy_score(knn_pred, y_test)

In [None]:
draw_classes_scatterplot(X_test, knn_pred, "Classes predicted by kNN algorithm", 
                         ["0% Que", "10% Que", "100% Que"], ["blue", "red", "green"])