In [1]:
!nvidia-smi

Tue Jul  2 14:04:49 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 551.23                 Driver Version: 551.23         CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                     TCC/WDDM  | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 2080 ...  WDDM  |   00000000:01:00.0  On |                  N/A |
|  0%   43C    P5             16W /  280W |     228MiB /   8192MiB |     27%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import signal
from sklearn.model_selection import train_test_split,StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report,accuracy_score,ConfusionMatrixDisplay,confusion_matrix,precision_score,recall_score,roc_curve,roc_auc_score,balanced_accuracy_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
import datetime
from dataclasses import dataclass
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from scipy.ndimage import gaussian_filter1d
from itertools import combinations as comb
from pyts.image import RecurrencePlot
import PIL
from sklearn.utils import class_weight
from sklearn.model_selection import RandomizedSearchCV,GridSearchCV,StratifiedKFold
from unidecode import unidecode

In [3]:
train_data = pd.read_csv('./dataset/dataset_1/train_motion_data.csv')
test_data = pd.read_csv('./dataset/dataset_1/test_motion_data.csv')
print(train_data.shape)
print(test_data.shape)

(3644, 8)
(3084, 8)


In [4]:
train_data.head()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,Class,Timestamp
0,0.0,0.0,0.0,0.059407,-0.174707,0.101938,NORMAL,3581629
1,-1.624864,-1.082492,-0.204183,-0.028558,0.051313,0.135536,NORMAL,3581630
2,-0.59466,-0.12241,0.220502,-0.019395,-0.029322,0.087888,NORMAL,3581630
3,0.738478,-0.228456,0.667732,0.069791,-0.029932,0.054902,NORMAL,3581631
4,0.101741,0.777568,-0.06673,0.030696,-0.003665,0.054902,NORMAL,3581631


In [6]:
print(train_data['Class'].value_counts())
print('-' * 30)
print(test_data['Class'].value_counts())

SLOW          1331
NORMAL        1200
AGGRESSIVE    1113
Name: Class, dtype: int64
------------------------------
SLOW          1273
NORMAL         997
AGGRESSIVE     814
Name: Class, dtype: int64


In [8]:
# Prompt
# There is a driver behavior dataset. We will explain the domain and statistics of this dataset and dicuss the instruction that will be performed on it. Below is the information about the given dataset.
# 
# There is a drvier behavior dataset. Our goal is to predict driving behavior whether this driver is normal or aggressive. Below is the information about the dataset.
# The acceleration on X axis in meters per second squared is {}. The acceleration on Y axis in meters per second squared is {}. The acceleration on Z axis in meters per second squared is {}. 
# The rotation on X axis in degrees per second is {}. The rotation on Y axis in degrees per second is {}. The rotation on Z axis in degrees per second is {}.
# What is the driving behavior of this dataset? Is this driver normal or aggressive?

In [9]:
print(train_data['Class'].value_counts())

SLOW          1331
NORMAL        1200
AGGRESSIVE    1113
Name: Class, dtype: int64


In [11]:
train_data[train_data['Class']=='AGGRESSIVE'].head()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,Class,Timestamp
1200,4.933154,-2.94608,-0.521562,0.226784,-1.290758,-0.309174,AGGRESSIVE,3582366
1201,2.397097,2.675064,-3.006705,0.014203,0.009163,0.054902,AGGRESSIVE,3582366
1202,-0.14744,0.042525,0.127188,0.017257,-0.003054,0.07506,AGGRESSIVE,3582367
1203,0.086913,0.360096,0.520542,0.066737,-0.007941,0.166079,AGGRESSIVE,3582367
1204,-1.12227,1.058838,1.156976,0.088728,0.0,0.381714,AGGRESSIVE,3582368


In [12]:
print(train_data[train_data['Class']=='AGGRESSIVE'].shape)

(1113, 8)


In [13]:
train_data_normal = train_data[train_data['Class']=='NORMAL']
train_data_aggressive = train_data[train_data['Class']=='AGGRESSIVE'].sample(n=100)

In [14]:
print(train_data_normal.shape)
print(train_data_aggressive.shape)

(1200, 8)
(100, 8)


In [19]:
test_data_normal = test_data[test_data['Class']=='NORMAL']
test_data_aggressive = test_data[test_data['Class']=='AGGRESSIVE'].sample(n=98)

In [20]:
print(test_data_normal.shape)
print(test_data_aggressive.shape)

(997, 8)
(98, 8)


In [21]:
train_data_normal = train_data_normal.reset_index(drop=True)
train_data_aggressive = train_data_aggressive.reset_index(drop=True)

In [22]:
test_data_normal = test_data_normal.reset_index(drop=True)
test_data_aggressive = test_data_aggressive.reset_index(drop=True)

In [23]:
train_data_normal.head()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,Class,Timestamp
0,0.0,0.0,0.0,0.059407,-0.174707,0.101938,NORMAL,3581629
1,-1.624864,-1.082492,-0.204183,-0.028558,0.051313,0.135536,NORMAL,3581630
2,-0.59466,-0.12241,0.220502,-0.019395,-0.029322,0.087888,NORMAL,3581630
3,0.738478,-0.228456,0.667732,0.069791,-0.029932,0.054902,NORMAL,3581631
4,0.101741,0.777568,-0.06673,0.030696,-0.003665,0.054902,NORMAL,3581631


In [27]:
import pandas as pd
from sklearn.utils import shuffle

train_data_selected = pd.concat([train_data_normal,
                                 train_data_aggressive
                                 ])
train_data_selected = shuffle(train_data_selected)

test_data_selected = pd.concat([test_data_normal,
                                test_data_aggressive
                                ])
test_data_selected = shuffle(test_data_selected)

In [28]:
train_data_selected.head()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,Class,Timestamp
352,0.315298,0.759092,-0.57952,0.042302,-0.01405,0.030467,NORMAL,3581819
868,0.169872,2.048677,0.223125,-0.014508,-0.031765,0.013974,NORMAL,3582098
0,-0.267929,-1.062563,0.539536,-0.040164,0.029932,0.044517,AGGRESSIVE,3582545
157,-2.00992,-0.192628,2.090355,0.064904,-0.087965,0.015806,NORMAL,3581714
1153,-1.330228,-0.693968,-0.644975,0.056352,-0.125227,0.022526,NORMAL,3582252


In [29]:
train_data_selected = train_data_selected.reset_index(drop=True)
test_data_selected = test_data_selected.reset_index(drop=True)

In [47]:
X_train = train_data_selected[['AccX', 'AccY', 'AccZ', 'GyroX', 'GyroY', 'GyroZ']]
train_data_selected.Class=train_data_selected.Class.replace(['NORMAL', 'AGGRESSIVE'],[0, 1])
y_train = train_data_selected[['Class']]

In [48]:
X_test = test_data_selected[['AccX', 'AccY', 'AccZ', 'GyroX', 'GyroY', 'GyroZ']]
test_data_selected.Class=test_data_selected.Class.replace(['NORMAL', 'AGGRESSIVE'],[0, 1])
y_test = test_data_selected[['Class']]

In [49]:
from sklearn.linear_model import LogisticRegression
Logit = LogisticRegression(C=1e2,
                          multi_class='ovr',
                          random_state=17,
                          max_iter=200
                          )
Logit.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


LogisticRegression(C=100.0, max_iter=200, multi_class='ovr', random_state=17)

In [50]:
y_test_pred = Logit.predict(X_test)
y_test_pred_proba = Logit.predict_proba(X_test)
print(y_test_pred[:5])
print(y_test_pred_proba[:5])

[1 1 1 1 1]
[[0.92885991 0.07114009]
 [0.92946445 0.07053555]
 [0.92692459 0.07307541]
 [0.91890454 0.08109546]
 [0.91821205 0.08178795]]


In [51]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.9105022831050228
[[997   0]
 [ 98   0]]
              precision    recall  f1-score   support

           1     0.9105    1.0000    0.9532       997
           2     0.0000    0.0000    0.0000        98

    accuracy                         0.9105      1095
   macro avg     0.4553    0.5000    0.4766      1095
weighted avg     0.8290    0.9105    0.8678      1095



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [52]:
### 3. MLP Classifier

from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(random_state=17, max_iter=1000).fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


In [53]:
y_test_pred = clf.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.9105022831050228
[[994   3]
 [ 95   3]]
              precision    recall  f1-score   support

           1     0.9128    0.9970    0.9530       997
           2     0.5000    0.0306    0.0577        98

    accuracy                         0.9105      1095
   macro avg     0.7064    0.5138    0.5054      1095
weighted avg     0.8758    0.9105    0.8729      1095



In [54]:
### 4. KNN

from sklearn.neighbors import KNeighborsClassifier

# 5개 근접한 이웃, 거리측정기준 : 유클리드
knn = KNeighborsClassifier(n_neighbors=5, p=2)

# Model Fitting 과정
knn.fit(X_train, y_train)

  return self._fit(X, y)


KNeighborsClassifier()

In [55]:
y_test_pred = knn.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.9068493150684932
[[990   7]
 [ 95   3]]
              precision    recall  f1-score   support

           1     0.9124    0.9930    0.9510       997
           2     0.3000    0.0306    0.0556        98

    accuracy                         0.9068      1095
   macro avg     0.6062    0.5118    0.5033      1095
weighted avg     0.8576    0.9068    0.8709      1095



In [56]:
### 5. Random Forest

from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=5, random_state=17)
rf_model.fit(X_train, y_train)

  


RandomForestClassifier(n_estimators=5, random_state=17)

In [57]:
y_test_pred = rf_model.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.9004566210045662
[[981  16]
 [ 93   5]]
              precision    recall  f1-score   support

           1     0.9134    0.9840    0.9474       997
           2     0.2381    0.0510    0.0840        98

    accuracy                         0.9005      1095
   macro avg     0.5758    0.5175    0.5157      1095
weighted avg     0.8530    0.9005    0.8701      1095



In [58]:
### 6. Decision Tree
from sklearn.tree import DecisionTreeClassifier
DT_MODEL= DecisionTreeClassifier(random_state=17)
DT_MODEL.fit(X_train, y_train)

DecisionTreeClassifier(random_state=17)

In [59]:
y_test_pred = DT_MODEL.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.8292237442922374
[[892 105]
 [ 82  16]]
              precision    recall  f1-score   support

           1     0.9158    0.8947    0.9051       997
           2     0.1322    0.1633    0.1461        98

    accuracy                         0.8292      1095
   macro avg     0.5240    0.5290    0.5256      1095
weighted avg     0.8457    0.8292    0.8372      1095



In [60]:
from xgboost import XGBClassifier
from xgboost import plot_importance
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pandas as pd

In [64]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train_xgb = le.fit_transform(y_train)
le = LabelEncoder()
y_test_xgb = le.fit_transform(y_test)

  y = column_or_1d(y, warn=True)


In [65]:
xgb_class = XGBClassifier()
xgb_class.fit(X_train, y_train_xgb)

XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
              importance_type=None, interaction_constraints='',
              learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
              missing=nan, monotone_constraints='()', n_estimators=100,
              n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, ...)

In [66]:
y_test_pred = xgb_class.predict(X_test)
print(accuracy_score(y_test_xgb, y_test_pred))
print(confusion_matrix(y_test_xgb, y_test_pred))
print(classification_report(y_test_xgb, y_test_pred, digits=4))

0.902283105022831
[[987  10]
 [ 97   1]]
              precision    recall  f1-score   support

           0     0.9105    0.9900    0.9486       997
           1     0.0909    0.0102    0.0183        98

    accuracy                         0.9023      1095
   macro avg     0.5007    0.5001    0.4835      1095
weighted avg     0.8372    0.9023    0.8653      1095



In [67]:
### 8. LightGBM
from lightgbm import LGBMClassifier
lgbm_class = LGBMClassifier()
lgbm_class.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


LGBMClassifier()

In [68]:
y_test_pred =lgbm_class.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.9077625570776255
[[991   6]
 [ 95   3]]
              precision    recall  f1-score   support

           1     0.9125    0.9940    0.9515       997
           2     0.3333    0.0306    0.0561        98

    accuracy                         0.9078      1095
   macro avg     0.6229    0.5123    0.5038      1095
weighted avg     0.8607    0.9078    0.8714      1095



In [69]:
from catboost import CatBoostClassifier
cat_class = CatBoostClassifier()
cat_class.fit(X_train, y_train)

Learning rate set to 0.011524
0:	learn: 0.6805749	total: 205ms	remaining: 3m 24s
1:	learn: 0.6680455	total: 209ms	remaining: 1m 44s
2:	learn: 0.6563963	total: 212ms	remaining: 1m 10s
3:	learn: 0.6443045	total: 220ms	remaining: 54.7s
4:	learn: 0.6332100	total: 223ms	remaining: 44.5s
5:	learn: 0.6223023	total: 229ms	remaining: 38s
6:	learn: 0.6116939	total: 234ms	remaining: 33.1s
7:	learn: 0.6010601	total: 236ms	remaining: 29.3s
8:	learn: 0.5910509	total: 239ms	remaining: 26.3s
9:	learn: 0.5806386	total: 246ms	remaining: 24.3s
10:	learn: 0.5714516	total: 248ms	remaining: 22.3s
11:	learn: 0.5623467	total: 251ms	remaining: 20.6s
12:	learn: 0.5537155	total: 257ms	remaining: 19.5s
13:	learn: 0.5453405	total: 259ms	remaining: 18.2s
14:	learn: 0.5368989	total: 260ms	remaining: 17.1s
15:	learn: 0.5289100	total: 262ms	remaining: 16.1s
16:	learn: 0.5207881	total: 264ms	remaining: 15.2s
17:	learn: 0.5133087	total: 271ms	remaining: 14.8s
18:	learn: 0.5062848	total: 274ms	remaining: 14.1s
19:	learn:

<catboost.core.CatBoostClassifier at 0x20ed0c9b288>

In [70]:
y_test_pred = cat_class.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.9095890410958904
[[995   2]
 [ 97   1]]
              precision    recall  f1-score   support

           1     0.9112    0.9980    0.9526       997
           2     0.3333    0.0102    0.0198        98

    accuracy                         0.9096      1095
   macro avg     0.6223    0.5041    0.4862      1095
weighted avg     0.8595    0.9096    0.8691      1095



In [71]:
### 10. LDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda_class = LinearDiscriminantAnalysis()
lda_class.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


LinearDiscriminantAnalysis()

In [72]:
y_test_pred = lda_class.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.9105022831050228
[[997   0]
 [ 98   0]]
              precision    recall  f1-score   support

           1     0.9105    1.0000    0.9532       997
           2     0.0000    0.0000    0.0000        98

    accuracy                         0.9105      1095
   macro avg     0.4553    0.5000    0.4766      1095
weighted avg     0.8290    0.9105    0.8678      1095



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [73]:
### 11. QDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
qda_class = QuadraticDiscriminantAnalysis()
qda_class.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


QuadraticDiscriminantAnalysis()

In [74]:
y_test_pred = qda_class.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.8776255707762557
[[948  49]
 [ 85  13]]
              precision    recall  f1-score   support

           1     0.9177    0.9509    0.9340       997
           2     0.2097    0.1327    0.1625        98

    accuracy                         0.8776      1095
   macro avg     0.5637    0.5418    0.5482      1095
weighted avg     0.8543    0.8776    0.8649      1095



In [75]:
X_train.head()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ
0,0.315298,0.759092,-0.57952,0.042302,-0.01405,0.030467
1,0.169872,2.048677,0.223125,-0.014508,-0.031765,0.013974
2,-0.267929,-1.062563,0.539536,-0.040164,0.029932,0.044517
3,-2.00992,-0.192628,2.090355,0.064904,-0.087965,0.015806
4,-1.330228,-0.693968,-0.644975,0.056352,-0.125227,0.022526


In [76]:
y_train.head()

Unnamed: 0,Class
0,1
1,1
2,2
3,1
4,1


In [77]:
X_test.head()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ
0,-1.536128,-1.322894,0.319673,-0.008552,-0.031536,0.316275
1,-0.122009,0.552918,0.417331,0.029932,0.019777,-0.004429
2,0.353325,0.815918,-0.066323,0.034819,0.111407,-0.065515
3,-0.0527,-0.26864,0.423433,0.004887,-0.00649,0.085979
4,0.304072,-0.61602,2.449866,-0.045204,-0.044364,0.021228


In [78]:
y_train.head()

Unnamed: 0,Class
0,1
1,1
2,2
3,1
4,1


In [79]:
X_train.to_csv('X_train.csv', index=False)
y_train.to_csv('y_train.csv', index=False)
X_test.to_csv('X_test.csv', index=False)
y_test.to_csv('y_test.csv', index=False)