In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import signal
from sklearn.model_selection import train_test_split,StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report,accuracy_score,ConfusionMatrixDisplay,confusion_matrix,precision_score,recall_score,roc_curve,roc_auc_score,balanced_accuracy_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
import datetime
from dataclasses import dataclass
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from scipy.ndimage import gaussian_filter1d
from itertools import combinations as comb
from pyts.image import RecurrencePlot
import PIL
from sklearn.utils import class_weight
from sklearn.model_selection import RandomizedSearchCV,GridSearchCV,StratifiedKFold
from unidecode import unidecode

In [2]:
train_data = pd.read_csv('./dataset/dataset_1/train_motion_data.csv')
test_data = pd.read_csv('./dataset/dataset_1/test_motion_data.csv')
print(train_data.shape)
print(test_data.shape)

(3644, 8)
(3084, 8)


In [5]:
X_train = train_data[['AccX', 'AccY', 'AccZ', 'GyroX', 'GyroY', 'GyroZ']]
train_data.Class=train_data.Class.replace(['AGGRESSIVE', 'NORMAL', 'SLOW'],[1,2,3])
y_train = train_data[['Class']]

In [6]:
print(X_train.shape)
print(y_train.shape)

(3644, 6)
(3644, 1)


In [7]:
X_test = test_data[['AccX', 'AccY', 'AccZ', 'GyroX', 'GyroY', 'GyroZ']]
test_data.Class=test_data.Class.replace(['AGGRESSIVE', 'NORMAL', 'SLOW'],[1,2,3])
y_test = test_data[['Class']]

In [8]:
print(X_test.shape)
print(y_test.shape)

(3084, 6)
(3084, 1)


In [9]:
X_concat = pd.concat([X_train, X_test])
y_concat = pd.concat([y_train, y_test])

X_and_y_concat = pd.concat([X_concat, y_concat], axis=1)

In [10]:
print(X_and_y_concat.shape)

(6728, 7)


In [11]:
X_and_y_concat.head()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,Class
0,0.0,0.0,0.0,0.059407,-0.174707,0.101938,2
1,-1.624864,-1.082492,-0.204183,-0.028558,0.051313,0.135536,2
2,-0.59466,-0.12241,0.220502,-0.019395,-0.029322,0.087888,2
3,0.738478,-0.228456,0.667732,0.069791,-0.029932,0.054902,2
4,0.101741,0.777568,-0.06673,0.030696,-0.003665,0.054902,2


In [12]:
X_and_y_concat.tail()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,Class
3079,-0.713858,-0.652975,-0.164015,-0.147829,-1.309466,0.51725,3
3080,1.514261,0.33007,1.020714,1.321302,1.707598,-0.674548,3
3081,1.280216,-1.735172,-2.332695,0.583376,0.690507,-0.468075,3
3082,0.912313,0.583314,-0.965622,0.235794,0.512745,0.406073,3
3083,1.462172,0.190287,0.019377,-0.254731,-0.279547,0.076205,3


In [13]:
X_test.tail()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ
3079,-0.713858,-0.652975,-0.164015,-0.147829,-1.309466,0.51725
3080,1.514261,0.33007,1.020714,1.321302,1.707598,-0.674548
3081,1.280216,-1.735172,-2.332695,0.583376,0.690507,-0.468075
3082,0.912313,0.583314,-0.965622,0.235794,0.512745,0.406073
3083,1.462172,0.190287,0.019377,-0.254731,-0.279547,0.076205


In [14]:
y_test.tail()

Unnamed: 0,Class
3079,3
3080,3
3081,3
3082,3
3083,3


In [15]:
X_and_y_concat = X_and_y_concat.reset_index(drop=True)

In [16]:
X_and_y_concat.tail()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,Class
6723,-0.713858,-0.652975,-0.164015,-0.147829,-1.309466,0.51725,3
6724,1.514261,0.33007,1.020714,1.321302,1.707598,-0.674548,3
6725,1.280216,-1.735172,-2.332695,0.583376,0.690507,-0.468075,3
6726,0.912313,0.583314,-0.965622,0.235794,0.512745,0.406073,3
6727,1.462172,0.190287,0.019377,-0.254731,-0.279547,0.076205,3


In [18]:
X_data = X_and_y_concat[['AccX', 'AccY', 'AccZ', 'GyroX', 'GyroY', 'GyroZ']]
X_data.head()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ
0,0.0,0.0,0.0,0.059407,-0.174707,0.101938
1,-1.624864,-1.082492,-0.204183,-0.028558,0.051313,0.135536
2,-0.59466,-0.12241,0.220502,-0.019395,-0.029322,0.087888
3,0.738478,-0.228456,0.667732,0.069791,-0.029932,0.054902
4,0.101741,0.777568,-0.06673,0.030696,-0.003665,0.054902


In [19]:
y_data = X_and_y_concat[['Class']]
y_data.head()

Unnamed: 0,Class
0,2
1,2
2,2
3,2
4,2


In [21]:
# Train (70%)과 Temp (30%)로 먼저 분할
train_size = 0.7
temp_size = 0.3

X_train, X_temp, y_train, y_temp = train_test_split(X_data, y_data, train_size=train_size, test_size=temp_size, random_state=42)

# Temp (30%)을 Val (10%)과 Test (20%)로 분할
val_size = 1/3  # temp_df의 1/3은 전체의 10%가 됨
test_size = 2/3  # temp_df의 2/3은 전체의 20%가 됨

X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, train_size=val_size, test_size=test_size, random_state=42)

# 결과 확인
print("Train set size:", len(X_train), len(y_train))
print("Validation set size:", len(X_val), len(y_val))
print("Test set size:", len(X_test), len(y_test))

# # 데이터프레임 확인
# print(X_train.head(), y_train.head())
# print(X_val.head(), y_val.head())
# print(X_test.head(), y_test.head())

Train set size: 4709 4709
Validation set size: 673 673
Test set size: 1346 1346


In [22]:
from sklearn.linear_model import LogisticRegression
Logit = LogisticRegression(C=1e2,
                          multi_class='ovr',
                          random_state=17,
                          max_iter=200
                          )
Logit.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


LogisticRegression(C=100.0, max_iter=200, multi_class='ovr', random_state=17)

In [23]:
y_test_pred = Logit.predict(X_test)
y_test_pred_proba = Logit.predict_proba(X_test)
print(y_test_pred[:5])
print(y_test_pred_proba[:5])

[3 3 3 3 3]
[[0.30868658 0.33393874 0.35737468]
 [0.30498976 0.32292898 0.37208126]
 [0.25031418 0.34566814 0.40401768]
 [0.200294   0.35396067 0.44574533]
 [0.2404338  0.32262627 0.43693993]]


In [24]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.39301634472511143
[[ 79   4 312]
 [ 42   2 419]
 [ 38   2 448]]
              precision    recall  f1-score   support

           1     0.4969    0.2000    0.2852       395
           2     0.2500    0.0043    0.0085       463
           3     0.3800    0.9180    0.5375       488

    accuracy                         0.3930      1346
   macro avg     0.3756    0.3741    0.2771      1346
weighted avg     0.3696    0.3930    0.2815      1346



In [25]:
### 3. MLP Classifier

from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(random_state=17, max_iter=1000).fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


In [26]:
y_test_pred = clf.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.4279346210995542
[[159  43 193]
 [ 79  54 330]
 [ 75  50 363]]
              precision    recall  f1-score   support

           1     0.5080    0.4025    0.4492       395
           2     0.3673    0.1166    0.1770       463
           3     0.4097    0.7439    0.5284       488

    accuracy                         0.4279      1346
   macro avg     0.4283    0.4210    0.3849      1346
weighted avg     0.4240    0.4279    0.3843      1346



In [27]:
### 4. KNN

from sklearn.neighbors import KNeighborsClassifier

# 5개 근접한 이웃, 거리측정기준 : 유클리드
knn = KNeighborsClassifier(n_neighbors=5, p=2)

# Model Fitting 과정
knn.fit(X_train, y_train)

  return self._fit(X, y)


KNeighborsClassifier()

In [28]:
y_test_pred = knn.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.3699851411589896
[[165 118 112]
 [140 160 163]
 [146 169 173]]
              precision    recall  f1-score   support

           1     0.3659    0.4177    0.3901       395
           2     0.3579    0.3456    0.3516       463
           3     0.3862    0.3545    0.3697       488

    accuracy                         0.3700      1346
   macro avg     0.3700    0.3726    0.3705      1346
weighted avg     0.3705    0.3700    0.3695      1346



In [29]:
### 5. Random Forest

from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=5, random_state=17)
rf_model.fit(X_train, y_train)

  


RandomForestClassifier(n_estimators=5, random_state=17)

In [30]:
y_test_pred = rf_model.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.3514115898959881
[[174 118 103]
 [159 129 175]
 [160 158 170]]
              precision    recall  f1-score   support

           1     0.3529    0.4405    0.3919       395
           2     0.3185    0.2786    0.2972       463
           3     0.3795    0.3484    0.3632       488

    accuracy                         0.3514      1346
   macro avg     0.3503    0.3558    0.3508      1346
weighted avg     0.3507    0.3514    0.3489      1346

