In [1]:
!nvidia-smi

Tue Jul  2 16:06:51 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 551.23                 Driver Version: 551.23         CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                     TCC/WDDM  | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 2080 ...  WDDM  |   00000000:01:00.0  On |                  N/A |
|  0%   42C    P8              8W /  280W |     296MiB /   8192MiB |      1%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import signal
from sklearn.model_selection import train_test_split,StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report,accuracy_score,ConfusionMatrixDisplay,confusion_matrix,precision_score,recall_score,roc_curve,roc_auc_score,balanced_accuracy_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
import datetime
from dataclasses import dataclass
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from scipy.ndimage import gaussian_filter1d
from itertools import combinations as comb
from pyts.image import RecurrencePlot
import PIL
from sklearn.utils import class_weight
from sklearn.model_selection import RandomizedSearchCV,GridSearchCV,StratifiedKFold
from unidecode import unidecode

In [3]:
train_data = pd.read_csv('./dataset/dataset_1/train_motion_data.csv')
test_data = pd.read_csv('./dataset/dataset_1/test_motion_data.csv')
print(train_data.shape)
print(test_data.shape)

(3644, 8)
(3084, 8)


In [4]:
train_data.head()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,Class,Timestamp
0,0.0,0.0,0.0,0.059407,-0.174707,0.101938,NORMAL,3581629
1,-1.624864,-1.082492,-0.204183,-0.028558,0.051313,0.135536,NORMAL,3581630
2,-0.59466,-0.12241,0.220502,-0.019395,-0.029322,0.087888,NORMAL,3581630
3,0.738478,-0.228456,0.667732,0.069791,-0.029932,0.054902,NORMAL,3581631
4,0.101741,0.777568,-0.06673,0.030696,-0.003665,0.054902,NORMAL,3581631


In [5]:
print(train_data['Class'].value_counts())
print('-' * 30)
print(test_data['Class'].value_counts())

SLOW          1331
NORMAL        1200
AGGRESSIVE    1113
Name: Class, dtype: int64
------------------------------
SLOW          1273
NORMAL         997
AGGRESSIVE     814
Name: Class, dtype: int64


In [6]:
print(train_data['Class'].value_counts())

SLOW          1331
NORMAL        1200
AGGRESSIVE    1113
Name: Class, dtype: int64


In [7]:
train_data[train_data['Class']=='AGGRESSIVE'].head()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,Class,Timestamp
1200,4.933154,-2.94608,-0.521562,0.226784,-1.290758,-0.309174,AGGRESSIVE,3582366
1201,2.397097,2.675064,-3.006705,0.014203,0.009163,0.054902,AGGRESSIVE,3582366
1202,-0.14744,0.042525,0.127188,0.017257,-0.003054,0.07506,AGGRESSIVE,3582367
1203,0.086913,0.360096,0.520542,0.066737,-0.007941,0.166079,AGGRESSIVE,3582367
1204,-1.12227,1.058838,1.156976,0.088728,0.0,0.381714,AGGRESSIVE,3582368


In [8]:
print(train_data[train_data['Class']=='AGGRESSIVE'].shape)

(1113, 8)


In [9]:
train_data_normal = train_data[train_data['Class']=='NORMAL']
train_data_aggressive = train_data[train_data['Class']=='AGGRESSIVE']

In [10]:
print(train_data_normal.shape)
print(train_data_aggressive.shape)

(1200, 8)
(1113, 8)


In [11]:
test_data_normal = test_data[test_data['Class']=='NORMAL']
test_data_aggressive = test_data[test_data['Class']=='AGGRESSIVE']

In [12]:
print(test_data_normal.shape)
print(test_data_aggressive.shape)

(997, 8)
(814, 8)


In [13]:
train_data_normal = train_data_normal.reset_index(drop=True)
train_data_aggressive = train_data_aggressive.reset_index(drop=True)

test_data_normal = test_data_normal.reset_index(drop=True)
test_data_aggressive = test_data_aggressive.reset_index(drop=True)

In [14]:
train_data_normal.head()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,Class,Timestamp
0,0.0,0.0,0.0,0.059407,-0.174707,0.101938,NORMAL,3581629
1,-1.624864,-1.082492,-0.204183,-0.028558,0.051313,0.135536,NORMAL,3581630
2,-0.59466,-0.12241,0.220502,-0.019395,-0.029322,0.087888,NORMAL,3581630
3,0.738478,-0.228456,0.667732,0.069791,-0.029932,0.054902,NORMAL,3581631
4,0.101741,0.777568,-0.06673,0.030696,-0.003665,0.054902,NORMAL,3581631


In [15]:
import pandas as pd
from sklearn.utils import shuffle

train_data_selected = pd.concat([train_data_normal,
                                 train_data_aggressive
                                 ])
train_data_selected = shuffle(train_data_selected)

test_data_selected = pd.concat([test_data_normal,
                                test_data_aggressive
                                ])
test_data_selected = shuffle(test_data_selected)

In [16]:
train_data_selected.head()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,Class,Timestamp
594,-0.518044,-0.618509,0.604425,0.15348,-0.182038,0.2235,NORMAL,3581950
83,2.329861,-0.312948,-1.877585,0.155923,0.156992,-0.234038,AGGRESSIVE,3582411
241,-0.567347,0.622879,0.283811,0.051465,-0.11973,-0.057498,AGGRESSIVE,3582496
1125,1.073158,0.010227,-0.658719,0.000153,-0.083078,-0.146073,NORMAL,3582237
416,1.355648,1.280515,0.141971,0.092393,0.136223,-0.08743,AGGRESSIVE,3582590


In [17]:
train_data_selected = train_data_selected.reset_index(drop=True)
test_data_selected = test_data_selected.reset_index(drop=True)

In [18]:
X_train = train_data_selected[['AccX', 'AccY', 'AccZ', 'GyroX', 'GyroY', 'GyroZ']]
train_data_selected.Class=train_data_selected.Class.replace(['NORMAL', 'AGGRESSIVE'],[0, 1])
y_train = train_data_selected[['Class']]

In [19]:
X_test = test_data_selected[['AccX', 'AccY', 'AccZ', 'GyroX', 'GyroY', 'GyroZ']]
test_data_selected.Class=test_data_selected.Class.replace(['NORMAL', 'AGGRESSIVE'],[0, 1])
y_test = test_data_selected[['Class']]

In [20]:
from sklearn.linear_model import LogisticRegression
Logit = LogisticRegression(C=1e2,
                          multi_class='ovr',
                          random_state=17,
                          max_iter=200
                          )
Logit.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


LogisticRegression(C=100.0, max_iter=200, multi_class='ovr', random_state=17)

In [21]:
y_test_pred = Logit.predict(X_test)
y_test_pred_proba = Logit.predict_proba(X_test)
print(y_test_pred[:5])
print(y_test_pred_proba[:5])

[0 0 0 1 1]
[[0.52150989 0.47849011]
 [0.55270816 0.44729184]
 [0.55063339 0.44936661]
 [0.47290435 0.52709565]
 [0.42475118 0.57524882]]


In [22]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.5698509110988405
[[675 322]
 [457 357]]
              precision    recall  f1-score   support

           0     0.5963    0.6770    0.6341       997
           1     0.5258    0.4386    0.4782       814

    accuracy                         0.5699      1811
   macro avg     0.5610    0.5578    0.5562      1811
weighted avg     0.5646    0.5699    0.5640      1811



In [23]:
### 3. MLP Classifier

from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(random_state=17, max_iter=1000).fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


In [24]:
y_test_pred = clf.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.5930425179458862
[[700 297]
 [440 374]]
              precision    recall  f1-score   support

           0     0.6140    0.7021    0.6551       997
           1     0.5574    0.4595    0.5037       814

    accuracy                         0.5930      1811
   macro avg     0.5857    0.5808    0.5794      1811
weighted avg     0.5886    0.5930    0.5871      1811



In [25]:
### 4. KNN

from sklearn.neighbors import KNeighborsClassifier

# 5개 근접한 이웃, 거리측정기준 : 유클리드
knn = KNeighborsClassifier(n_neighbors=5, p=2)

# Model Fitting 과정
knn.fit(X_train, y_train)

  return self._fit(X, y)


KNeighborsClassifier()

In [26]:
y_test_pred = knn.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.5593594699061292
[[608 389]
 [409 405]]
              precision    recall  f1-score   support

           0     0.5978    0.6098    0.6038       997
           1     0.5101    0.4975    0.5037       814

    accuracy                         0.5594      1811
   macro avg     0.5540    0.5537    0.5538      1811
weighted avg     0.5584    0.5594    0.5588      1811



In [27]:
### 5. Random Forest

from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=5, random_state=17)
rf_model.fit(X_train, y_train)

  


RandomForestClassifier(n_estimators=5, random_state=17)

In [28]:
y_test_pred = rf_model.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.5466593042517945
[[557 440]
 [381 433]]
              precision    recall  f1-score   support

           0     0.5938    0.5587    0.5757       997
           1     0.4960    0.5319    0.5133       814

    accuracy                         0.5467      1811
   macro avg     0.5449    0.5453    0.5445      1811
weighted avg     0.5498    0.5467    0.5477      1811



In [29]:
### 6. Decision Tree
from sklearn.tree import DecisionTreeClassifier
DT_MODEL= DecisionTreeClassifier(random_state=17)
DT_MODEL.fit(X_train, y_train)

DecisionTreeClassifier(random_state=17)

In [30]:
y_test_pred = DT_MODEL.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.5367200441744893
[[527 470]
 [369 445]]
              precision    recall  f1-score   support

           0     0.5882    0.5286    0.5568       997
           1     0.4863    0.5467    0.5147       814

    accuracy                         0.5367      1811
   macro avg     0.5373    0.5376    0.5358      1811
weighted avg     0.5424    0.5367    0.5379      1811



In [31]:
from xgboost import XGBClassifier
from xgboost import plot_importance
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pandas as pd

In [32]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train_xgb = le.fit_transform(y_train)
le = LabelEncoder()
y_test_xgb = le.fit_transform(y_test)

  y = column_or_1d(y, warn=True)


In [33]:
xgb_class = XGBClassifier()
xgb_class.fit(X_train, y_train_xgb)

XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
              importance_type=None, interaction_constraints='',
              learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
              missing=nan, monotone_constraints='()', n_estimators=100,
              n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, ...)

In [34]:
y_test_pred = xgb_class.predict(X_test)
print(accuracy_score(y_test_xgb, y_test_pred))
print(confusion_matrix(y_test_xgb, y_test_pred))
print(classification_report(y_test_xgb, y_test_pred, digits=4))

0.5532854776366648
[[581 416]
 [393 421]]
              precision    recall  f1-score   support

           0     0.5965    0.5827    0.5895       997
           1     0.5030    0.5172    0.5100       814

    accuracy                         0.5533      1811
   macro avg     0.5497    0.5500    0.5498      1811
weighted avg     0.5545    0.5533    0.5538      1811



In [35]:
### 8. LightGBM
from lightgbm import LGBMClassifier
lgbm_class = LGBMClassifier()
lgbm_class.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


LGBMClassifier()

In [36]:
y_test_pred =lgbm_class.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.5737161789066814
[[616 381]
 [391 423]]
              precision    recall  f1-score   support

           0     0.6117    0.6179    0.6148       997
           1     0.5261    0.5197    0.5229       814

    accuracy                         0.5737      1811
   macro avg     0.5689    0.5688    0.5688      1811
weighted avg     0.5732    0.5737    0.5735      1811



In [37]:
from catboost import CatBoostClassifier
cat_class = CatBoostClassifier()
cat_class.fit(X_train, y_train)

Learning rate set to 0.014738
0:	learn: 0.6922355	total: 101ms	remaining: 1m 40s
1:	learn: 0.6908581	total: 102ms	remaining: 51.1s
2:	learn: 0.6895854	total: 104ms	remaining: 34.5s
3:	learn: 0.6884760	total: 105ms	remaining: 26.2s
4:	learn: 0.6875204	total: 106ms	remaining: 21.2s
5:	learn: 0.6864439	total: 109ms	remaining: 18.1s
6:	learn: 0.6853633	total: 111ms	remaining: 15.8s
7:	learn: 0.6843945	total: 114ms	remaining: 14.2s
8:	learn: 0.6837158	total: 116ms	remaining: 12.7s
9:	learn: 0.6826928	total: 117ms	remaining: 11.6s
10:	learn: 0.6817564	total: 119ms	remaining: 10.7s
11:	learn: 0.6808135	total: 120ms	remaining: 9.88s
12:	learn: 0.6798098	total: 122ms	remaining: 9.22s
13:	learn: 0.6789601	total: 124ms	remaining: 8.75s
14:	learn: 0.6780708	total: 127ms	remaining: 8.33s
15:	learn: 0.6771665	total: 130ms	remaining: 8.02s
16:	learn: 0.6762908	total: 132ms	remaining: 7.65s
17:	learn: 0.6753388	total: 134ms	remaining: 7.31s
18:	learn: 0.6744077	total: 136ms	remaining: 7.01s
19:	learn:

<catboost.core.CatBoostClassifier at 0x27490e50488>

In [38]:
y_test_pred = cat_class.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.5952512424075097
[[677 320]
 [413 401]]
              precision    recall  f1-score   support

           0     0.6211    0.6790    0.6488       997
           1     0.5562    0.4926    0.5225       814

    accuracy                         0.5953      1811
   macro avg     0.5886    0.5858    0.5856      1811
weighted avg     0.5919    0.5953    0.5920      1811



In [39]:
### 10. LDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda_class = LinearDiscriminantAnalysis()
lda_class.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


LinearDiscriminantAnalysis()

In [40]:
y_test_pred = lda_class.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.5704030922142462
[[675 322]
 [456 358]]
              precision    recall  f1-score   support

           0     0.5968    0.6770    0.6344       997
           1     0.5265    0.4398    0.4793       814

    accuracy                         0.5704      1811
   macro avg     0.5616    0.5584    0.5568      1811
weighted avg     0.5652    0.5704    0.5647      1811



In [41]:
### 11. QDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
qda_class = QuadraticDiscriminantAnalysis()
qda_class.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


QuadraticDiscriminantAnalysis()

In [42]:
y_test_pred = qda_class.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.6029817780231916
[[755 242]
 [477 337]]
              precision    recall  f1-score   support

           0     0.6128    0.7573    0.6774       997
           1     0.5820    0.4140    0.4838       814

    accuracy                         0.6030      1811
   macro avg     0.5974    0.5856    0.5806      1811
weighted avg     0.5990    0.6030    0.5904      1811



In [43]:
X_train.to_csv('X_train.csv', index=False)
y_train.to_csv('y_train.csv', index=False)
X_test.to_csv('X_test.csv', index=False)
y_test.to_csv('y_test.csv', index=False)