In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import signal
from sklearn.model_selection import train_test_split,StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report,accuracy_score,ConfusionMatrixDisplay,confusion_matrix,precision_score,recall_score,roc_curve,roc_auc_score,balanced_accuracy_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
import datetime
from dataclasses import dataclass
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from scipy.ndimage import gaussian_filter1d
from itertools import combinations as comb
from pyts.image import RecurrencePlot
import PIL
from sklearn.utils import class_weight
from sklearn.model_selection import RandomizedSearchCV,GridSearchCV,StratifiedKFold
from unidecode import unidecode

In [2]:
train_data = pd.read_csv('./dataset/dataset_1/train_motion_data.csv')
test_data = pd.read_csv('./dataset/dataset_1/test_motion_data.csv')
print(train_data.shape)
print(test_data.shape)

(3644, 8)
(3084, 8)


In [3]:
train_data.head()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,Class,Timestamp
0,0.0,0.0,0.0,0.059407,-0.174707,0.101938,NORMAL,3581629
1,-1.624864,-1.082492,-0.204183,-0.028558,0.051313,0.135536,NORMAL,3581630
2,-0.59466,-0.12241,0.220502,-0.019395,-0.029322,0.087888,NORMAL,3581630
3,0.738478,-0.228456,0.667732,0.069791,-0.029932,0.054902,NORMAL,3581631
4,0.101741,0.777568,-0.06673,0.030696,-0.003665,0.054902,NORMAL,3581631


In [4]:
train_data.describe()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,Timestamp
count,3644.0,3644.0,3644.0,3644.0,3644.0,3644.0,3644.0
mean,0.040467,-0.073418,0.008271,0.001593,-0.001273,0.007949,3582707.0
std,0.985653,0.903408,0.985061,0.066918,0.126205,0.115687,642.1479
min,-4.636523,-4.699795,-7.143998,-0.751822,-1.587028,-1.236468,3581629.0
25%,-0.550695,-0.59254,-0.558464,-0.028558,-0.053756,-0.029398,3582121.0
50%,0.003931,-0.080833,0.002262,0.001985,-0.001833,0.002978,3582702.0
75%,0.595987,0.452401,0.556157,0.031918,0.051313,0.040852,3583270.0
max,4.985548,4.245151,5.171739,0.849255,1.679879,1.1905,3583791.0


In [9]:
X_train = train_data[['AccX', 'AccY', 'AccZ', 'GyroX', 'GyroY', 'GyroZ']]
train_data.Class=train_data.Class.replace(['AGGRESSIVE', 'NORMAL', 'SLOW'],[1,2,3])
y_train = train_data[['Class']]

In [10]:
print(X_train.shape)
print(y_train.shape)

(3644, 6)
(3644, 1)


In [11]:
X_test = test_data[['AccX', 'AccY', 'AccZ', 'GyroX', 'GyroY', 'GyroZ']]
test_data.Class=test_data.Class.replace(['AGGRESSIVE', 'NORMAL', 'SLOW'],[1,2,3])
y_test = test_data[['Class']]

In [12]:
print(X_test.shape)
print(y_test.shape)

(3084, 6)
(3084, 1)


In [13]:
from sklearn.linear_model import LogisticRegression
Logit = LogisticRegression(C=1e2,
                          multi_class='ovr',
                          random_state=17,
                          max_iter=200
                          )
Logit.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


LogisticRegression(C=100.0, max_iter=200, multi_class='ovr', random_state=17)

In [14]:
y_test_pred = Logit.predict(X_test)
y_test_pred_proba = Logit.predict_proba(X_test)
print(y_test_pred[:5])
print(y_test_pred_proba[:5])

[3 1 1 1 1]
[[0.32818855 0.31137278 0.36043867]
 [0.36648993 0.30818706 0.32532301]
 [0.65057553 0.13440257 0.2150219 ]
 [0.64546947 0.13819756 0.21633297]
 [0.64537728 0.1413967  0.21322602]]


In [15]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.45201037613488976
[[ 221   53  540]
 [ 152   54  791]
 [ 118   36 1119]]
              precision    recall  f1-score   support

           1     0.4501    0.2715    0.3387       814
           2     0.3776    0.0542    0.0947       997
           3     0.4567    0.8790    0.6011      1273

    accuracy                         0.4520      3084
   macro avg     0.4282    0.4016    0.3449      3084
weighted avg     0.4294    0.4520    0.3682      3084



In [16]:
### 3. MLP Classifier

from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(random_state=17, max_iter=1000).fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


In [17]:
y_test_pred = clf.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.43320363164721143
[[376 118 320]
 [265 165 567]
 [232 246 795]]
              precision    recall  f1-score   support

           1     0.4307    0.4619    0.4458       814
           2     0.3119    0.1655    0.2163       997
           3     0.4727    0.6245    0.5381      1273

    accuracy                         0.4332      3084
   macro avg     0.4051    0.4173    0.4000      3084
weighted avg     0.4096    0.4332    0.4097      3084



In [18]:
### 4. KNN

from sklearn.neighbors import KNeighborsClassifier

# 5개 근접한 이웃, 거리측정기준 : 유클리드
knn = KNeighborsClassifier(n_neighbors=5, p=2)

# Model Fitting 과정
knn.fit(X_train, y_train)

  return self._fit(X, y)


KNeighborsClassifier()

In [19]:
y_test_pred = knn.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.3719195849546044
[[364 232 218]
 [350 320 327]
 [383 427 463]]
              precision    recall  f1-score   support

           1     0.3318    0.4472    0.3810       814
           2     0.3269    0.3210    0.3239       997
           3     0.4593    0.3637    0.4060      1273

    accuracy                         0.3719      3084
   macro avg     0.3727    0.3773    0.3703      3084
weighted avg     0.3828    0.3719    0.3728      3084



In [20]:
### 5. Random Forest

from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=5, random_state=17)
rf_model.fit(X_train, y_train)

  


RandomForestClassifier(n_estimators=5, random_state=17)

In [21]:
y_test_pred = rf_model.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.3732166018158236
[[389 222 203]
 [386 321 290]
 [396 436 441]]
              precision    recall  f1-score   support

           1     0.3322    0.4779    0.3919       814
           2     0.3279    0.3220    0.3249       997
           3     0.4722    0.3464    0.3996      1273

    accuracy                         0.3732      3084
   macro avg     0.3774    0.3821    0.3722      3084
weighted avg     0.3886    0.3732    0.3734      3084



In [22]:
### 6. Decision Tree
from sklearn.tree import DecisionTreeClassifier
DT_MODEL= DecisionTreeClassifier(random_state=17)
DT_MODEL.fit(X_train, y_train)

DecisionTreeClassifier(random_state=17)

In [23]:
y_test_pred = DT_MODEL.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.3631647211413748
[[303 253 258]
 [301 326 370]
 [333 449 491]]
              precision    recall  f1-score   support

           1     0.3234    0.3722    0.3461       814
           2     0.3171    0.3270    0.3220       997
           3     0.4388    0.3857    0.4105      1273

    accuracy                         0.3632      3084
   macro avg     0.3598    0.3616    0.3595      3084
weighted avg     0.3690    0.3632    0.3649      3084



In [24]:
from xgboost import XGBClassifier
from xgboost import plot_importance
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pandas as pd

In [25]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train_xgb = le.fit_transform(y_train)
le = LabelEncoder()
y_test_xgb = le.fit_transform(y_test)

  y = column_or_1d(y, warn=True)


In [26]:
xgb_class = XGBClassifier()
xgb_class.fit(X_train, y_train_xgb)

XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
              importance_type=None, interaction_constraints='',
              learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
              missing=nan, monotone_constraints='()', n_estimators=100,
              n_jobs=0, num_parallel_tree=1, objective='multi:softprob',
              predictor='auto', random_state=0, reg_alpha=0, ...)

In [27]:
y_test_pred = xgb_class.predict(X_test)
print(accuracy_score(y_test_xgb, y_test_pred))
print(confusion_matrix(y_test_xgb, y_test_pred))
print(classification_report(y_test_xgb, y_test_pred, digits=4))

0.4056420233463035
[[322 218 274]
 [258 321 418]
 [221 444 608]]
              precision    recall  f1-score   support

           0     0.4020    0.3956    0.3988       814
           1     0.3266    0.3220    0.3242       997
           2     0.4677    0.4776    0.4726      1273

    accuracy                         0.4056      3084
   macro avg     0.3987    0.3984    0.3985      3084
weighted avg     0.4047    0.4056    0.4051      3084



In [28]:
### 8. LightGBM
from lightgbm import LGBMClassifier
lgbm_class = LGBMClassifier()
lgbm_class.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


LGBMClassifier()

In [29]:
y_test_pred =lgbm_class.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.41309987029831385
[[340 198 276]
 [249 303 445]
 [225 417 631]]
              precision    recall  f1-score   support

           1     0.4177    0.4177    0.4177       814
           2     0.3301    0.3039    0.3164       997
           3     0.4667    0.4957    0.4808      1273

    accuracy                         0.4131      3084
   macro avg     0.4048    0.4058    0.4050      3084
weighted avg     0.4096    0.4131    0.4110      3084



In [30]:
from catboost import CatBoostClassifier
cat_class = CatBoostClassifier()
cat_class.fit(X_train, y_train)

Learning rate set to 0.084521
0:	learn: 1.0939424	total: 143ms	remaining: 2m 22s
1:	learn: 1.0893541	total: 147ms	remaining: 1m 13s
2:	learn: 1.0848371	total: 150ms	remaining: 49.9s
3:	learn: 1.0806651	total: 158ms	remaining: 39.5s
4:	learn: 1.0776957	total: 161ms	remaining: 32.1s
5:	learn: 1.0739871	total: 165ms	remaining: 27.3s
6:	learn: 1.0715186	total: 171ms	remaining: 24.3s
7:	learn: 1.0688344	total: 177ms	remaining: 22s
8:	learn: 1.0664146	total: 185ms	remaining: 20.4s
9:	learn: 1.0641152	total: 188ms	remaining: 18.6s
10:	learn: 1.0614489	total: 191ms	remaining: 17.2s
11:	learn: 1.0590986	total: 194ms	remaining: 16s
12:	learn: 1.0572713	total: 201ms	remaining: 15.3s
13:	learn: 1.0552578	total: 207ms	remaining: 14.5s
14:	learn: 1.0529967	total: 211ms	remaining: 13.9s
15:	learn: 1.0507628	total: 217ms	remaining: 13.4s
16:	learn: 1.0476234	total: 224ms	remaining: 12.9s
17:	learn: 1.0465103	total: 230ms	remaining: 12.6s
18:	learn: 1.0447165	total: 235ms	remaining: 12.1s
19:	learn: 1.

<catboost.core.CatBoostClassifier at 0x1ef8e4b3748>

In [31]:
y_test_pred = cat_class.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.4069390402075227
[[328 208 278]
 [251 319 427]
 [255 410 608]]
              precision    recall  f1-score   support

           1     0.3933    0.4029    0.3981       814
           2     0.3404    0.3200    0.3299       997
           3     0.4631    0.4776    0.4702      1273

    accuracy                         0.4069      3084
   macro avg     0.3989    0.4002    0.3994      3084
weighted avg     0.4050    0.4069    0.4058      3084

