In [1]:
!nvidia-smi

Tue Jul  2 17:21:07 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 551.23                 Driver Version: 551.23         CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                     TCC/WDDM  | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 2080 ...  WDDM  |   00000000:01:00.0  On |                  N/A |
|  0%   41C    P8              8W /  280W |     290MiB /   8192MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import signal
from sklearn.model_selection import train_test_split,StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report,accuracy_score,ConfusionMatrixDisplay,confusion_matrix,precision_score,recall_score,roc_curve,roc_auc_score,balanced_accuracy_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
import datetime
from dataclasses import dataclass
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from scipy.ndimage import gaussian_filter1d
from itertools import combinations as comb
from pyts.image import RecurrencePlot
import PIL
from sklearn.utils import class_weight
from sklearn.model_selection import RandomizedSearchCV,GridSearchCV,StratifiedKFold
from unidecode import unidecode

In [3]:
train_data = pd.read_csv('./dataset/dataset_1/train_motion_data.csv')
test_data = pd.read_csv('./dataset/dataset_1/test_motion_data.csv')
print(train_data.shape)
print(test_data.shape)

(3644, 8)
(3084, 8)


In [4]:
train_data.head()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,Class,Timestamp
0,0.0,0.0,0.0,0.059407,-0.174707,0.101938,NORMAL,3581629
1,-1.624864,-1.082492,-0.204183,-0.028558,0.051313,0.135536,NORMAL,3581630
2,-0.59466,-0.12241,0.220502,-0.019395,-0.029322,0.087888,NORMAL,3581630
3,0.738478,-0.228456,0.667732,0.069791,-0.029932,0.054902,NORMAL,3581631
4,0.101741,0.777568,-0.06673,0.030696,-0.003665,0.054902,NORMAL,3581631


In [5]:
print(train_data['Class'].value_counts())

SLOW          1331
NORMAL        1200
AGGRESSIVE    1113
Name: Class, dtype: int64


In [6]:
print(test_data['Class'].value_counts())

SLOW          1273
NORMAL         997
AGGRESSIVE     814
Name: Class, dtype: int64


In [15]:
print(len(train_data))

3644


In [16]:
train_data['Timestamp'] = np.arange(3644)

In [17]:
test_data.head()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,Class,Timestamp
0,0.758194,-0.217791,0.457263,0.0,0.0,0.0,1,818922
1,0.66756,-0.03861,0.231416,-0.054367,-0.007712,0.225257,1,818923
2,2.724449,-7.584121,2.390926,0.023824,0.013668,-0.038026,1,818923
3,2.33095,-7.621754,2.529024,0.05681,-0.180587,-0.052076,1,818924
4,2.847215,-6.755621,2.22464,-0.031765,-0.035201,0.035277,1,818924


In [19]:
test_data.tail()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,Class,Timestamp
3079,-0.713858,-0.652975,-0.164015,-0.147829,-1.309466,0.51725,3,820706
3080,1.514261,0.33007,1.020714,1.321302,1.707598,-0.674548,3,820707
3081,1.280216,-1.735172,-2.332695,0.583376,0.690507,-0.468075,3,820707
3082,0.912313,0.583314,-0.965622,0.235794,0.512745,0.406073,3,820708
3083,1.462172,0.190287,0.019377,-0.254731,-0.279547,0.076205,3,820709


In [18]:
print(len(test_data))

3084


In [20]:
test_data['Timestamp'] = np.arange(3084)

In [21]:
x_train = train_data[['AccX', 'AccY', 'AccZ', 'GyroX', 'GyroY', 'GyroZ', 'Timestamp']]
train_data.Class=train_data.Class.replace(['AGGRESSIVE', 'NORMAL', 'SLOW'],[1,2,3])
y_train = train_data[['Class']]

In [22]:
x_test = test_data[['AccX', 'AccY', 'AccZ', 'GyroX', 'GyroY', 'GyroZ', 'Timestamp']]
test_data.Class=test_data.Class.replace(['AGGRESSIVE', 'NORMAL', 'SLOW'],[1,2,3])
y_test = test_data[['Class']]

In [23]:
x_train.head()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,Timestamp
0,0.0,0.0,0.0,0.059407,-0.174707,0.101938,0
1,-1.624864,-1.082492,-0.204183,-0.028558,0.051313,0.135536,1
2,-0.59466,-0.12241,0.220502,-0.019395,-0.029322,0.087888,2
3,0.738478,-0.228456,0.667732,0.069791,-0.029932,0.054902,3
4,0.101741,0.777568,-0.06673,0.030696,-0.003665,0.054902,4


In [25]:
# Acceleration Magnitude 계산
x_train['AccMagnitude'] = np.sqrt(x_train['AccX']**2 + x_train['AccY']**2 + x_train['AccZ']**2)
# Rotation Magnitude 계산
x_train['GyroMagnitude'] = np.sqrt(x_train['GyroX']**2 + x_train['GyroY']**2 + x_train['GyroZ']**2)
# Jerk 계산
x_train['JerkX'] = x_train['AccX'].diff().div(x_train['Timestamp'].diff(), fill_value=0)
x_train['JerkY'] = x_train['AccY'].diff().div(x_train['Timestamp'].diff(), fill_value=0)
x_train['JerkZ'] = x_train['AccZ'].diff().div(x_train['Timestamp'].diff(), fill_value=0)
x_train['JerkMagnitude'] = np.sqrt(x_train['JerkX']**2 + x_train['JerkY']**2 + x_train['JerkZ']**2)

In [26]:
# Acceleration Magnitude 계산
x_test['AccMagnitude'] = np.sqrt(x_test['AccX']**2 + x_test['AccY']**2 + x_test['AccZ']**2)
# Rotation Magnitude 계산
x_test['GyroMagnitude'] = np.sqrt(x_test['GyroX']**2 + x_test['GyroY']**2 + x_test['GyroZ']**2)
# Jerk 계산
x_test['JerkX'] = x_test['AccX'].diff().div(x_test['Timestamp'].diff(), fill_value=0)
x_test['JerkY'] = x_test['AccY'].diff().div(x_test['Timestamp'].diff(), fill_value=0)
x_test['JerkZ'] = x_test['AccZ'].diff().div(x_test['Timestamp'].diff(), fill_value=0)
x_test['JerkMagnitude'] = np.sqrt(x_test['JerkX']**2 + x_test['JerkY']**2 + x_test['JerkZ']**2)

In [27]:
x_train.head()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,Timestamp,AccMagnitude,GyroMagnitude,JerkX,JerkY,JerkZ,JerkMagnitude
0,0.0,0.0,0.0,0.059407,-0.174707,0.101938,0,0.0,0.210816,,,,
1,-1.624864,-1.082492,-0.204183,-0.028558,0.051313,0.135536,1,1.963075,0.147711,-1.624864,-1.082492,-0.204183,1.963075
2,-0.59466,-0.12241,0.220502,-0.019395,-0.029322,0.087888,2,0.64593,0.094659,1.030204,0.960082,0.424685,1.470862
3,0.738478,-0.228456,0.667732,0.069791,-0.029932,0.054902,3,1.021474,0.093707,1.333138,-0.106046,0.44723,1.410148
4,0.101741,0.777568,-0.06673,0.030696,-0.003665,0.054902,4,0.78703,0.063007,-0.636737,1.006023,-0.734462,1.398911


In [30]:
x_train.isnull().sum()

AccX             0
AccY             0
AccZ             0
GyroX            0
GyroY            0
GyroZ            0
Timestamp        0
AccMagnitude     0
GyroMagnitude    0
JerkX            1
JerkY            1
JerkZ            1
JerkMagnitude    1
dtype: int64

In [31]:
print(x_train.shape)

(3644, 13)


In [32]:
x_train.head()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,Timestamp,AccMagnitude,GyroMagnitude,JerkX,JerkY,JerkZ,JerkMagnitude
0,0.0,0.0,0.0,0.059407,-0.174707,0.101938,0,0.0,0.210816,,,,
1,-1.624864,-1.082492,-0.204183,-0.028558,0.051313,0.135536,1,1.963075,0.147711,-1.624864,-1.082492,-0.204183,1.963075
2,-0.59466,-0.12241,0.220502,-0.019395,-0.029322,0.087888,2,0.64593,0.094659,1.030204,0.960082,0.424685,1.470862
3,0.738478,-0.228456,0.667732,0.069791,-0.029932,0.054902,3,1.021474,0.093707,1.333138,-0.106046,0.44723,1.410148
4,0.101741,0.777568,-0.06673,0.030696,-0.003665,0.054902,4,0.78703,0.063007,-0.636737,1.006023,-0.734462,1.398911


In [33]:
x_train_dropna = x_train.dropna()

In [35]:
print(x_train_dropna.shape)

(3643, 13)


In [34]:
x_train_dropna.head()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,Timestamp,AccMagnitude,GyroMagnitude,JerkX,JerkY,JerkZ,JerkMagnitude
1,-1.624864,-1.082492,-0.204183,-0.028558,0.051313,0.135536,1,1.963075,0.147711,-1.624864,-1.082492,-0.204183,1.963075
2,-0.59466,-0.12241,0.220502,-0.019395,-0.029322,0.087888,2,0.64593,0.094659,1.030204,0.960082,0.424685,1.470862
3,0.738478,-0.228456,0.667732,0.069791,-0.029932,0.054902,3,1.021474,0.093707,1.333138,-0.106046,0.44723,1.410148
4,0.101741,0.777568,-0.06673,0.030696,-0.003665,0.054902,4,0.78703,0.063007,-0.636737,1.006023,-0.734462,1.398911
5,0.15847,0.345891,0.355274,0.021533,0.115454,0.014584,5,0.520551,0.118346,0.056728,-0.431676,0.422004,0.606341


In [36]:
y_train_dropna = y_train[1:]
print(len(y_train_dropna))

3643


In [37]:
print(x_train_dropna.columns)

Index(['AccX', 'AccY', 'AccZ', 'GyroX', 'GyroY', 'GyroZ', 'Timestamp',
       'AccMagnitude', 'GyroMagnitude', 'JerkX', 'JerkY', 'JerkZ',
       'JerkMagnitude'],
      dtype='object')


In [38]:
X_train = x_train_dropna[['AccX', 'AccY', 'AccZ', 'GyroX', 'GyroY', 'GyroZ',
       'AccMagnitude', 'GyroMagnitude', 'JerkX', 'JerkY', 'JerkZ',
       'JerkMagnitude']]

In [39]:
x_test.head()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,Timestamp,AccMagnitude,GyroMagnitude,JerkX,JerkY,JerkZ,JerkMagnitude
0,0.758194,-0.217791,0.457263,0.0,0.0,0.0,0,0.911801,0.0,,,,
1,0.66756,-0.03861,0.231416,-0.054367,-0.007712,0.225257,1,0.707588,0.231853,-0.090634,0.17918,-0.225848,0.302204
2,2.724449,-7.584121,2.390926,0.023824,0.013668,-0.038026,2,8.405834,0.046908,2.056889,-7.54551,2.15951,8.113508
3,2.33095,-7.621754,2.529024,0.05681,-0.180587,-0.052076,3,8.361843,0.196344,-0.393499,-0.037633,0.138098,0.418723
4,2.847215,-6.755621,2.22464,-0.031765,-0.035201,0.035277,4,7.661206,0.059098,0.516265,0.866132,-0.304384,1.053264


In [40]:
x_test.isna().sum()

AccX             0
AccY             0
AccZ             0
GyroX            0
GyroY            0
GyroZ            0
Timestamp        0
AccMagnitude     0
GyroMagnitude    0
JerkX            1
JerkY            1
JerkZ            1
JerkMagnitude    1
dtype: int64

In [41]:
x_test_dropna = x_test.dropna()

In [42]:
y_test_dropna = y_test[1:]
print(len(y_test_dropna))

3083


In [43]:
X_test = x_test_dropna[['AccX', 'AccY', 'AccZ', 'GyroX', 'GyroY', 'GyroZ',
       'AccMagnitude', 'GyroMagnitude', 'JerkX', 'JerkY', 'JerkZ',
       'JerkMagnitude']]

In [45]:
print(X_train.shape)
print(X_test.shape)

(3643, 12)
(3083, 12)


In [47]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train_dropna)

  


RandomForestClassifier()

In [48]:
# rf.score(x_test, y_test)
y_pred = rf.predict(X_test)
print(accuracy_score(y_pred, y_test_dropna))
print(classification_report(y_pred, y_test_dropna, digits=4))

0.4278300356795329
              precision    recall  f1-score   support

           1     0.4637    0.4323    0.4475       872
           2     0.2768    0.3255    0.2992       848
           3     0.5232    0.4886    0.5053      1363

    accuracy                         0.4278      3083
   macro avg     0.4212    0.4155    0.4173      3083
weighted avg     0.4386    0.4278    0.4323      3083



In [49]:
X_train = X_train
y_train = y_train_dropna
X_test = X_test
y_test = y_test_dropna

In [50]:
print(X_test.shape)
print(y_test.shape)

(3083, 12)
(3083, 1)


In [51]:
from sklearn.linear_model import LogisticRegression
Logit = LogisticRegression(C=1e2,
                          multi_class='ovr',
                          random_state=17,
                          max_iter=200
                          )
Logit.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


LogisticRegression(C=100.0, max_iter=200, multi_class='ovr', random_state=17)

In [52]:
y_test_pred = Logit.predict(X_test)
y_test_pred_proba = Logit.predict_proba(X_test)
print(y_test_pred[:5])
print(y_test_pred_proba[:5])

[3 1 1 1 1]
[[0.27857099 0.31750994 0.40391907]
 [0.92777752 0.05854834 0.01367415]
 [0.95809564 0.02493519 0.01696917]
 [0.94754099 0.03085346 0.02160555]
 [0.58460732 0.20049567 0.21489701]]


In [53]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.4641582873824197
[[376  52 385]
 [289  70 638]
 [212  76 985]]
              precision    recall  f1-score   support

           1     0.4287    0.4625    0.4450       813
           2     0.3535    0.0702    0.1172       997
           3     0.4905    0.7738    0.6004      1273

    accuracy                         0.4642      3083
   macro avg     0.4243    0.4355    0.3875      3083
weighted avg     0.4299    0.4642    0.4031      3083



In [54]:
### 3. MLP Classifier

from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(random_state=17, max_iter=1000).fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


In [55]:
y_test_pred = clf.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.4459941615309763
[[387 146 280]
 [262 202 533]
 [218 269 786]]
              precision    recall  f1-score   support

           1     0.4464    0.4760    0.4607       813
           2     0.3274    0.2026    0.2503       997
           3     0.4916    0.6174    0.5474      1273

    accuracy                         0.4460      3083
   macro avg     0.4218    0.4320    0.4195      3083
weighted avg     0.4266    0.4460    0.4284      3083



In [56]:
### 4. KNN

from sklearn.neighbors import KNeighborsClassifier

# 5개 근접한 이웃, 거리측정기준 : 유클리드
knn = KNeighborsClassifier(n_neighbors=5, p=2)

# Model Fitting 과정
knn.fit(X_train, y_train)

  return self._fit(X, y)


KNeighborsClassifier()

In [57]:
y_test_pred = knn.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.3996107687317548
[[375 220 218]
 [302 369 326]
 [298 487 488]]
              precision    recall  f1-score   support

           1     0.3846    0.4613    0.4195       813
           2     0.3429    0.3701    0.3560       997
           3     0.4729    0.3833    0.4234      1273

    accuracy                         0.3996      3083
   macro avg     0.4001    0.4049    0.3996      3083
weighted avg     0.4076    0.3996    0.4006      3083



In [58]:
### 5. Random Forest

from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=5, random_state=17)
rf_model.fit(X_train, y_train)

  


RandomForestClassifier(n_estimators=5, random_state=17)

In [59]:
y_test_pred = rf_model.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.3807979240999027
[[418 225 170]
 [407 322 268]
 [419 420 434]]
              precision    recall  f1-score   support

           1     0.3360    0.5141    0.4064       813
           2     0.3330    0.3230    0.3279       997
           3     0.4977    0.3409    0.4047      1273

    accuracy                         0.3808      3083
   macro avg     0.3889    0.3927    0.3797      3083
weighted avg     0.4018    0.3808    0.3803      3083



In [60]:
### 6. Decision Tree
from sklearn.tree import DecisionTreeClassifier
DT_MODEL= DecisionTreeClassifier(random_state=17)
DT_MODEL.fit(X_train, y_train)

DecisionTreeClassifier(random_state=17)

In [61]:
y_test_pred = DT_MODEL.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.3626337982484593
[[318 240 255]
 [335 323 339]
 [351 445 477]]
              precision    recall  f1-score   support

           1     0.3167    0.3911    0.3500       813
           2     0.3204    0.3240    0.3222       997
           3     0.4454    0.3747    0.4070      1273

    accuracy                         0.3626      3083
   macro avg     0.3608    0.3633    0.3597      3083
weighted avg     0.3710    0.3626    0.3645      3083



In [62]:
from xgboost import XGBClassifier
from xgboost import plot_importance
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pandas as pd

In [63]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train_xgb = le.fit_transform(y_train)
le = LabelEncoder()
y_test_xgb = le.fit_transform(y_test)

  y = column_or_1d(y, warn=True)


In [64]:
xgb_class = XGBClassifier()
xgb_class.fit(X_train, y_train_xgb)

XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
              importance_type=None, interaction_constraints='',
              learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
              missing=nan, monotone_constraints='()', n_estimators=100,
              n_jobs=0, num_parallel_tree=1, objective='multi:softprob',
              predictor='auto', random_state=0, reg_alpha=0, ...)

In [65]:
y_test_pred = xgb_class.predict(X_test)
print(accuracy_score(y_test_xgb, y_test_pred))
print(confusion_matrix(y_test_xgb, y_test_pred))
print(classification_report(y_test_xgb, y_test_pred, digits=4))

0.42458644177748944
[[365 205 243]
 [274 309 414]
 [228 410 635]]
              precision    recall  f1-score   support

           0     0.4210    0.4490    0.4345       813
           1     0.3344    0.3099    0.3217       997
           2     0.4915    0.4988    0.4951      1273

    accuracy                         0.4246      3083
   macro avg     0.4156    0.4192    0.4171      3083
weighted avg     0.4221    0.4246    0.4231      3083



In [66]:
### 8. LightGBM
from lightgbm import LGBMClassifier
lgbm_class = LGBMClassifier()
lgbm_class.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


LGBMClassifier()

In [67]:
y_test_pred =lgbm_class.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.4388582549464807
[[369 189 255]
 [256 311 430]
 [211 389 673]]
              precision    recall  f1-score   support

           1     0.4414    0.4539    0.4475       813
           2     0.3498    0.3119    0.3298       997
           3     0.4956    0.5287    0.5116      1273

    accuracy                         0.4389      3083
   macro avg     0.4289    0.4315    0.4296      3083
weighted avg     0.4342    0.4389    0.4359      3083



In [74]:
# LightGBM Classifier 초기화
lgbm = LGBMClassifier()

# 하이퍼파라미터 그리드 설정
param_grid = {
    'num_leaves': [31, 50, 70],
    'learning_rate': [0.01, 0.05, 0.1],
    'n_estimators': [100, 200, 500],
    'max_depth': [5, 10, 15]
}

# GridSearchCV 설정
grid_search = GridSearchCV(estimator=lgbm, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)

# 최적의 하이퍼파라미터 찾기
grid_search.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


GridSearchCV(cv=5, estimator=LGBMClassifier(), n_jobs=-1,
             param_grid={'learning_rate': [0.01, 0.05, 0.1],
                         'max_depth': [5, 10, 15],
                         'n_estimators': [100, 200, 500],
                         'num_leaves': [31, 50, 70]},
             scoring='accuracy')

In [75]:
# 최적의 하이퍼파라미터 출력
print("최적의 하이퍼파라미터:")
print(grid_search.best_params_)

# 최적의 하이퍼파라미터로 모델 구축
best_lgbm = grid_search.best_estimator_

# 예측 수행
y_pred = best_lgbm.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

최적의 하이퍼파라미터:
{'learning_rate': 0.01, 'max_depth': 15, 'n_estimators': 200, 'num_leaves': 31}
0.41809925397340253
[[369 188 256]
 [283 292 422]
 [221 424 628]]
              precision    recall  f1-score   support

           1     0.4227    0.4539    0.4377       813
           2     0.3230    0.2929    0.3072       997
           3     0.4809    0.4933    0.4870      1273

    accuracy                         0.4181      3083
   macro avg     0.4088    0.4134    0.4106      3083
weighted avg     0.4145    0.4181    0.4159      3083



In [68]:
from catboost import CatBoostClassifier
cat_class = CatBoostClassifier()
cat_class.fit(X_train, y_train)

Learning rate set to 0.08452
0:	learn: 1.0925967	total: 197ms	remaining: 3m 17s
1:	learn: 1.0873728	total: 218ms	remaining: 1m 48s
2:	learn: 1.0812067	total: 230ms	remaining: 1m 16s
3:	learn: 1.0748185	total: 252ms	remaining: 1m 2s
4:	learn: 1.0699077	total: 265ms	remaining: 52.7s
5:	learn: 1.0650078	total: 287ms	remaining: 47.6s
6:	learn: 1.0602734	total: 296ms	remaining: 42.1s
7:	learn: 1.0567353	total: 328ms	remaining: 40.6s
8:	learn: 1.0532127	total: 341ms	remaining: 37.6s
9:	learn: 1.0496693	total: 362ms	remaining: 35.8s
10:	learn: 1.0467705	total: 376ms	remaining: 33.8s
11:	learn: 1.0439706	total: 392ms	remaining: 32.2s
12:	learn: 1.0420513	total: 415ms	remaining: 31.5s
13:	learn: 1.0389289	total: 441ms	remaining: 31s
14:	learn: 1.0366549	total: 465ms	remaining: 30.5s
15:	learn: 1.0343264	total: 485ms	remaining: 29.8s
16:	learn: 1.0326218	total: 521ms	remaining: 30.1s
17:	learn: 1.0301050	total: 546ms	remaining: 29.8s
18:	learn: 1.0281033	total: 566ms	remaining: 29.2s
19:	learn: 

<catboost.core.CatBoostClassifier at 0x21c934ca7c8>

In [69]:
y_test_pred = cat_class.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.41809925397340253
[[369 188 256]
 [283 292 422]
 [221 424 628]]
              precision    recall  f1-score   support

           1     0.4227    0.4539    0.4377       813
           2     0.3230    0.2929    0.3072       997
           3     0.4809    0.4933    0.4870      1273

    accuracy                         0.4181      3083
   macro avg     0.4088    0.4134    0.4106      3083
weighted avg     0.4145    0.4181    0.4159      3083



In [70]:
X_train.to_csv('X_train_feat.csv', index=False)
y_train.to_csv('y_train_feat.csv', index=False)
X_test.to_csv('X_test_feat.csv', index=False)
y_test.to_csv('y_test_feat.csv', index=False)

In [71]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(3643, 12)
(3643, 1)
(3083, 12)
(3083, 1)


In [72]:
print(X_train.columns)

Index(['AccX', 'AccY', 'AccZ', 'GyroX', 'GyroY', 'GyroZ', 'AccMagnitude',
       'GyroMagnitude', 'JerkX', 'JerkY', 'JerkZ', 'JerkMagnitude'],
      dtype='object')


In [78]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

# 개별 모델 초기화
xgb_clf = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
lgbm_clf = LGBMClassifier(random_state=42)
catboost_clf = CatBoostClassifier(verbose=0, random_state=42)

# Voting Classifier 설정 (Soft Voting)
voting_clf = VotingClassifier(estimators=[
    ('xgb', xgb_clf),
    ('lgbm', lgbm_clf),
    ('catboost', catboost_clf)
], voting='soft')

# 모델 학습
voting_clf.fit(X_train, y_train)

# 예측 수행
y_test_pred = voting_clf.predict(X_test)

# 정확도 출력
accuracy = accuracy_score(y_test, y_test_pred)
print(f"Ensemble 모델의 정확도: {accuracy:.2f}")

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Ensemble 모델의 정확도: 0.44


In [79]:
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.4398313331170937
[[368 189 256]
 [256 330 411]
 [212 403 658]]
              precision    recall  f1-score   support

           1     0.4402    0.4526    0.4463       813
           2     0.3579    0.3310    0.3439       997
           3     0.4966    0.5169    0.5065      1273

    accuracy                         0.4398      3083
   macro avg     0.4316    0.4335    0.4323      3083
weighted avg     0.4369    0.4398    0.4381      3083



In [80]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

# 개별 모델 초기화
xgb_clf = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
lgbm_clf = LGBMClassifier(random_state=42)
catboost_clf = CatBoostClassifier(verbose=0, random_state=42)

# Voting Classifier 설정 (Soft Voting)
voting_clf = VotingClassifier(estimators=[
    ('xgb', xgb_clf),
    ('lgbm', lgbm_clf),
    # ('catboost', catboost_clf)
], voting='soft')

# 모델 학습
voting_clf.fit(X_train, y_train)

# 예측 수행
y_test_pred = voting_clf.predict(X_test)

# 정확도 출력
accuracy = accuracy_score(y_test, y_test_pred)
print(f"Ensemble 모델의 정확도: {accuracy:.2f}")
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Ensemble 모델의 정확도: 0.44
0.4369120986052546
[[370 198 245]
 [270 300 427]
 [212 384 677]]
              precision    recall  f1-score   support

           1     0.4343    0.4551    0.4444       813
           2     0.3401    0.3009    0.3193       997
           3     0.5019    0.5318    0.5164      1273

    accuracy                         0.4369      3083
   macro avg     0.4254    0.4293    0.4267      3083
weighted avg     0.4317    0.4369    0.4337      3083



In [81]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

# 개별 모델 초기화
xgb_clf = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
lgbm_clf = LGBMClassifier(random_state=42)
catboost_clf = CatBoostClassifier(verbose=0, random_state=42)

# Voting Classifier 설정 (Soft Voting)
voting_clf = VotingClassifier(estimators=[
    # ('xgb', xgb_clf),
    ('lgbm', lgbm_clf),
    ('catboost', catboost_clf)
], voting='soft')

# 모델 학습
voting_clf.fit(X_train, y_train)

# 예측 수행
y_test_pred = voting_clf.predict(X_test)

# 정확도 출력
accuracy = accuracy_score(y_test, y_test_pred)
print(f"Ensemble 모델의 정확도: {accuracy:.2f}")
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Ensemble 모델의 정확도: 0.44
0.4375608173856633
[[381 188 244]
 [269 317 411]
 [212 410 651]]
              precision    recall  f1-score   support

           1     0.4420    0.4686    0.4549       813
           2     0.3464    0.3180    0.3316       997
           3     0.4985    0.5114    0.5048      1273

    accuracy                         0.4376      3083
   macro avg     0.4290    0.4327    0.4305      3083
weighted avg     0.4344    0.4376    0.4357      3083



In [82]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

# 개별 모델 초기화
xgb_clf = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
lgbm_clf = LGBMClassifier(random_state=42)
catboost_clf = CatBoostClassifier(verbose=0, random_state=42)

# Voting Classifier 설정 (Soft Voting)
voting_clf = VotingClassifier(estimators=[
    ('xgb', xgb_clf),
    # ('lgbm', lgbm_clf),
    ('catboost', catboost_clf)
], voting='soft')

# 모델 학습
voting_clf.fit(X_train, y_train)

# 예측 수행
y_test_pred = voting_clf.predict(X_test)

# 정확도 출력
accuracy = accuracy_score(y_test, y_test_pred)
print(f"Ensemble 모델의 정확도: {accuracy:.2f}")
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Ensemble 모델의 정확도: 0.43
0.43399286409341553
[[371 192 250]
 [265 321 411]
 [218 409 646]]
              precision    recall  f1-score   support

           1     0.4344    0.4563    0.4451       813
           2     0.3482    0.3220    0.3345       997
           3     0.4943    0.5075    0.5008      1273

    accuracy                         0.4340      3083
   macro avg     0.4256    0.4286    0.4268      3083
weighted avg     0.4312    0.4340    0.4323      3083



In [84]:
# !pip install optuna

In [85]:
import optuna
import lightgbm as lgb
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score

In [86]:
# Objective 함수 정의
def objective(trial):
    param = {
        'objective': 'multiclass',
        'metric': 'multi_logloss',
        'verbosity': -1,
        'boosting_type': 'gbdt',
        'num_class': 3,  # 클래스 개수
        'max_depth': trial.suggest_int('max_depth', 4, 10),
        'num_leaves': trial.suggest_int('num_leaves', 20, 150),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 30),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 1.0),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 1.0)
    }

    model = lgb.LGBMClassifier(**param)
    return cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy').mean()

In [87]:
# Optuna를 사용한 하이퍼파라미터 튜닝
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

# 최적의 하이퍼파라미터 출력
print("최적의 하이퍼파라미터:")
print(study.best_params)

[32m[I 2024-07-02 18:21:00,848][0m A new study created in memory with name: no-name-205f80fd-d3fb-43c0-a2b1-90052498fa6f[0m
  # This is added back by InteractiveShellApp.init_path()
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
[32m[I 2024-07-02 18:21:03,879][0m Trial 0 finished with value: 0.4276715054492833 and parameters: {'max_depth': 4, 'num_leaves': 78, 'learning_rate': 0.023702506371271897, 'n_estimators': 578, 'min_child_samples': 13, 'subsample': 0.8519496718594332, 'colsample_bytree': 0.5036149421606935, 'reg_alpha': 2.958444178327852e-05, 'reg_lambda': 2.6293517841606763e-08}. Best is trial 0 with value: 0.4276715054492833.[0m
  # This is added back by Interactiv

최적의 하이퍼파라미터:
{'max_depth': 10, 'num_leaves': 129, 'learning_rate': 0.008257995161158735, 'n_estimators': 579, 'min_child_samples': 28, 'subsample': 0.5315574854200462, 'colsample_bytree': 0.5756031835541081, 'reg_alpha': 5.076306246885321e-07, 'reg_lambda': 0.0038678877477120975}


In [88]:
# 최적의 하이퍼파라미터로 모델 구축
best_params = study.best_params
best_model = lgb.LGBMClassifier(**best_params)
best_model.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


LGBMClassifier(colsample_bytree=0.5756031835541081,
               learning_rate=0.008257995161158735, max_depth=10,
               min_child_samples=28, n_estimators=579, num_leaves=129,
               reg_alpha=5.076306246885321e-07,
               reg_lambda=0.0038678877477120975, subsample=0.5315574854200462)

In [89]:
y_test_pred = best_model.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.4459941615309763
[[371 173 269]
 [252 281 464]
 [192 358 723]]
              precision    recall  f1-score   support

           1     0.4552    0.4563    0.4558       813
           2     0.3461    0.2818    0.3107       997
           3     0.4966    0.5679    0.5299      1273

    accuracy                         0.4460      3083
   macro avg     0.4326    0.4354    0.4321      3083
weighted avg     0.4370    0.4460    0.4394      3083



In [91]:
# from optuna import Trial, visualization                       ```

# optuna.visualization.plot_param_importances(study)
# optuna.visualization.plot_optimization_history(study)

In [95]:
import joblib
joblib.dump(best_model, 'lgb_best_model.pkl')

['lgb_best_model.pkl']

In [96]:
joblib.dump(best_model, 'lgb_best_model_1.pkl')

['lgb_best_model_1.pkl']

In [97]:
best_model.booster_.save_model('lgb_best_model_2_txt.txt')

<lightgbm.basic.Booster at 0x21c966a1588>

In [98]:
X_train.head()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,AccMagnitude,GyroMagnitude,JerkX,JerkY,JerkZ,JerkMagnitude
1,-1.624864,-1.082492,-0.204183,-0.028558,0.051313,0.135536,1.963075,0.147711,-1.624864,-1.082492,-0.204183,1.963075
2,-0.59466,-0.12241,0.220502,-0.019395,-0.029322,0.087888,0.64593,0.094659,1.030204,0.960082,0.424685,1.470862
3,0.738478,-0.228456,0.667732,0.069791,-0.029932,0.054902,1.021474,0.093707,1.333138,-0.106046,0.44723,1.410148
4,0.101741,0.777568,-0.06673,0.030696,-0.003665,0.054902,0.78703,0.063007,-0.636737,1.006023,-0.734462,1.398911
5,0.15847,0.345891,0.355274,0.021533,0.115454,0.014584,0.520551,0.118346,0.056728,-0.431676,0.422004,0.606341


In [99]:
X_train.describe()

Unnamed: 0,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,AccMagnitude,GyroMagnitude,JerkX,JerkY,JerkZ,JerkMagnitude
count,3643.0,3643.0,3643.0,3643.0,3643.0,3643.0,3643.0,3643.0,3643.0,3643.0,3643.0,3643.0
mean,0.040478,-0.073438,0.008274,0.001577,-0.001225,0.007923,1.438205,0.13077,-0.00011,0.000112,-0.000116,1.789139
std,0.985789,0.903531,0.985196,0.06692,0.126189,0.115692,0.834781,0.129414,1.072292,1.069894,1.376684,0.993904
min,-4.636523,-4.699795,-7.143998,-0.751822,-1.587028,-1.236468,0.064935,0.003707,-5.706999,-4.541037,-6.669646,0.024072
25%,-0.550872,-0.5926,-0.558509,-0.028558,-0.053756,-0.029398,0.852461,0.058419,-0.665565,-0.658815,-0.786008,1.090837
50%,0.004032,-0.081265,0.002422,0.001985,-0.001833,0.002978,1.268172,0.096612,0.02221,-0.013277,0.006993,1.586349
75%,0.595994,0.452967,0.556336,0.031918,0.051313,0.040852,1.843373,0.157894,0.676944,0.659891,0.801045,2.258621
max,4.985548,4.245151,5.171739,0.849255,1.679879,1.1905,7.451246,1.957027,4.369801,6.718676,6.639951,7.146545
