In [1]:
!nvidia-smi

Tue Jul  2 22:21:32 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 551.23                 Driver Version: 551.23         CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                     TCC/WDDM  | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 2080 ...  WDDM  |   00000000:01:00.0  On |                  N/A |
|  0%   41C    P8              8W /  280W |     312MiB /   8192MiB |      1%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import signal
from sklearn.model_selection import train_test_split,StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report,accuracy_score,ConfusionMatrixDisplay,confusion_matrix,precision_score,recall_score,roc_curve,roc_auc_score,balanced_accuracy_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
import datetime
from dataclasses import dataclass
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from scipy.ndimage import gaussian_filter1d
from itertools import combinations as comb
from pyts.image import RecurrencePlot
import PIL
from sklearn.utils import class_weight
from sklearn.model_selection import RandomizedSearchCV,GridSearchCV,StratifiedKFold
from unidecode import unidecode

In [3]:
X_train_feat = pd.read_csv('X_train_feat.csv')
y_train_feat = pd.read_csv('y_train_feat.csv')
X_test_feat = pd.read_csv('X_test_feat.csv')
y_test_feat = pd.read_csv('y_test_feat.csv')

In [4]:
print(X_train_feat.shape)
print(y_train_feat.shape)
print(X_test_feat.shape)
print(y_test_feat.shape)

(3643, 12)
(3643, 1)
(3083, 12)
(3083, 1)


In [5]:
train_sum = pd.concat([X_train_feat, y_train_feat], axis=1)
test_sum = pd.concat([X_test_feat, y_test_feat], axis=1)
print(train_sum.shape)
print(test_sum.shape)

(3643, 13)
(3083, 13)


In [7]:
print(train_sum['Class'].value_counts())
print(test_sum['Class'].value_counts())

3    1331
2    1199
1    1113
Name: Class, dtype: int64
3    1273
2     997
1     813
Name: Class, dtype: int64


In [10]:
train_agg = train_sum[train_sum['Class'] == 1]
train_norm = train_sum[train_sum['Class'] == 2]

train_agg_norm = pd.concat([train_agg, train_norm])
print(train_agg_norm.shape)

(2312, 13)


In [12]:
test_agg = test_sum[test_sum['Class'] == 1]
test_norm = test_sum[test_sum['Class'] == 2]
test_agg_norm = pd.concat([test_agg, test_norm])
print(test_agg_norm.shape)

(1810, 13)


In [13]:
print(train_agg_norm.columns)

Index(['AccX', 'AccY', 'AccZ', 'GyroX', 'GyroY', 'GyroZ', 'AccMagnitude',
       'GyroMagnitude', 'JerkX', 'JerkY', 'JerkZ', 'JerkMagnitude', 'Class'],
      dtype='object')


In [15]:
X_train_agg_norm = train_agg_norm[['AccX', 'AccY', 'AccZ', 'GyroX', 'GyroY', 'GyroZ', 'AccMagnitude', 'GyroMagnitude', 'JerkX', 'JerkY', 'JerkZ', 'JerkMagnitude']]
X_train_agg_norm.shape

(2312, 12)

In [16]:
y_train_agg_norm = train_agg_norm[['Class']]
print(y_train_agg_norm.shape)

(2312, 1)


In [17]:
X_test_agg_norm = test_agg_norm[['AccX', 'AccY', 'AccZ', 'GyroX', 'GyroY', 'GyroZ', 'AccMagnitude', 'GyroMagnitude', 'JerkX', 'JerkY', 'JerkZ', 'JerkMagnitude']]
X_test_agg_norm.shape

(1810, 12)

In [18]:
y_test_agg_norm = test_agg_norm[['Class']]
print(y_test_agg_norm.shape)

(1810, 1)


In [20]:
X_train = X_train_agg_norm
y_train = y_train_agg_norm
X_test = X_test_agg_norm
y_test = y_test_agg_norm
print(X_test.shape)
print(y_test.shape)

(1810, 12)
(1810, 1)


In [21]:
from sklearn.linear_model import LogisticRegression
Logit = LogisticRegression(C=1e2,
                          multi_class='ovr',
                          random_state=17,
                          max_iter=200
                          )
Logit.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


LogisticRegression(C=100.0, max_iter=200, multi_class='ovr', random_state=17)

In [22]:
y_test_pred = Logit.predict(X_test)
y_test_pred_proba = Logit.predict_proba(X_test)
print(y_test_pred[:5])
print(y_test_pred_proba[:5])

[2 1 1 1 1]
[[0.44981607 0.55018393]
 [0.98956277 0.01043723]
 [0.99533143 0.00466857]
 [0.99292127 0.00707873]
 [0.73266522 0.26733478]]


In [23]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.5994475138121547
[[405 408]
 [317 680]]
              precision    recall  f1-score   support

           1     0.5609    0.4982    0.5277       813
           2     0.6250    0.6820    0.6523       997

    accuracy                         0.5994      1810
   macro avg     0.5930    0.5901    0.5900      1810
weighted avg     0.5962    0.5994    0.5963      1810



In [24]:
### 3. MLP Classifier

from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(random_state=17, max_iter=1000).fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


In [25]:
y_test_pred = clf.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.6022099447513812
[[460 353]
 [367 630]]
              precision    recall  f1-score   support

           1     0.5562    0.5658    0.5610       813
           2     0.6409    0.6319    0.6364       997

    accuracy                         0.6022      1810
   macro avg     0.5986    0.5989    0.5987      1810
weighted avg     0.6029    0.6022    0.6025      1810



In [26]:
### 4. KNN

from sklearn.neighbors import KNeighborsClassifier

# 5개 근접한 이웃, 거리측정기준 : 유클리드
knn = KNeighborsClassifier(n_neighbors=5, p=2)

# Model Fitting 과정
knn.fit(X_train, y_train)

  return self._fit(X, y)


KNeighborsClassifier()

In [27]:
y_test_pred = knn.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.5773480662983426
[[387 426]
 [339 658]]
              precision    recall  f1-score   support

           1     0.5331    0.4760    0.5029       813
           2     0.6070    0.6600    0.6324       997

    accuracy                         0.5773      1810
   macro avg     0.5700    0.5680    0.5677      1810
weighted avg     0.5738    0.5773    0.5742      1810



In [28]:
### 5. Random Forest

from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=5, random_state=17)
rf_model.fit(X_train, y_train)

  


RandomForestClassifier(n_estimators=5, random_state=17)

In [29]:
y_test_pred = rf_model.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.5701657458563536
[[440 373]
 [405 592]]
              precision    recall  f1-score   support

           1     0.5207    0.5412    0.5308       813
           2     0.6135    0.5938    0.6035       997

    accuracy                         0.5702      1810
   macro avg     0.5671    0.5675    0.5671      1810
weighted avg     0.5718    0.5702    0.5708      1810



In [30]:
### 6. Decision Tree
from sklearn.tree import DecisionTreeClassifier
DT_MODEL= DecisionTreeClassifier(random_state=17)
DT_MODEL.fit(X_train, y_train)

DecisionTreeClassifier(random_state=17)

In [31]:
y_test_pred = DT_MODEL.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.5425414364640884
[[419 394]
 [434 563]]
              precision    recall  f1-score   support

           1     0.4912    0.5154    0.5030       813
           2     0.5883    0.5647    0.5763       997

    accuracy                         0.5425      1810
   macro avg     0.5398    0.5400    0.5396      1810
weighted avg     0.5447    0.5425    0.5434      1810



In [32]:
from xgboost import XGBClassifier
from xgboost import plot_importance
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pandas as pd

In [33]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train_xgb = le.fit_transform(y_train)
le = LabelEncoder()
y_test_xgb = le.fit_transform(y_test)

  y = column_or_1d(y, warn=True)


In [34]:
xgb_class = XGBClassifier()
xgb_class.fit(X_train, y_train_xgb)

XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
              importance_type=None, interaction_constraints='',
              learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
              missing=nan, monotone_constraints='()', n_estimators=100,
              n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, ...)

In [35]:
y_test_pred = xgb_class.predict(X_test)
print(accuracy_score(y_test_xgb, y_test_pred))
print(confusion_matrix(y_test_xgb, y_test_pred))
print(classification_report(y_test_xgb, y_test_pred, digits=4))

0.5806629834254143
[[461 352]
 [407 590]]
              precision    recall  f1-score   support

           0     0.5311    0.5670    0.5485       813
           1     0.6263    0.5918    0.6086       997

    accuracy                         0.5807      1810
   macro avg     0.5787    0.5794    0.5785      1810
weighted avg     0.5836    0.5807    0.5816      1810



In [36]:
### 8. LightGBM
from lightgbm import LGBMClassifier
lgbm_class = LGBMClassifier()
lgbm_class.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


LGBMClassifier()

In [37]:
y_test_pred =lgbm_class.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.6027624309392265
[[458 355]
 [364 633]]
              precision    recall  f1-score   support

           1     0.5572    0.5633    0.5602       813
           2     0.6407    0.6349    0.6378       997

    accuracy                         0.6028      1810
   macro avg     0.5989    0.5991    0.5990      1810
weighted avg     0.6032    0.6028    0.6030      1810



In [38]:
# LightGBM Classifier 초기화
lgbm = LGBMClassifier()

# 하이퍼파라미터 그리드 설정
param_grid = {
    'num_leaves': [31, 50, 70],
    'learning_rate': [0.01, 0.05, 0.1],
    'n_estimators': [100, 200, 500],
    'max_depth': [5, 10, 15]
}

# GridSearchCV 설정
grid_search = GridSearchCV(estimator=lgbm, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)

# 최적의 하이퍼파라미터 찾기
grid_search.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


GridSearchCV(cv=5, estimator=LGBMClassifier(), n_jobs=-1,
             param_grid={'learning_rate': [0.01, 0.05, 0.1],
                         'max_depth': [5, 10, 15],
                         'n_estimators': [100, 200, 500],
                         'num_leaves': [31, 50, 70]},
             scoring='accuracy')

In [39]:
# 최적의 하이퍼파라미터 출력
print("최적의 하이퍼파라미터:")
print(grid_search.best_params_)

# 최적의 하이퍼파라미터로 모델 구축
best_lgbm = grid_search.best_estimator_

# 예측 수행
y_pred = best_lgbm.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

최적의 하이퍼파라미터:
{'learning_rate': 0.05, 'max_depth': 5, 'n_estimators': 100, 'num_leaves': 31}
0.6027624309392265
[[458 355]
 [364 633]]
              precision    recall  f1-score   support

           1     0.5572    0.5633    0.5602       813
           2     0.6407    0.6349    0.6378       997

    accuracy                         0.6028      1810
   macro avg     0.5989    0.5991    0.5990      1810
weighted avg     0.6032    0.6028    0.6030      1810



In [40]:
from catboost import CatBoostClassifier
cat_class = CatBoostClassifier()
cat_class.fit(X_train, y_train)

Learning rate set to 0.014735
0:	learn: 0.6920241	total: 136ms	remaining: 2m 16s
1:	learn: 0.6909194	total: 139ms	remaining: 1m 9s
2:	learn: 0.6894336	total: 142ms	remaining: 47.2s
3:	learn: 0.6880581	total: 148ms	remaining: 36.7s
4:	learn: 0.6870569	total: 150ms	remaining: 29.9s
5:	learn: 0.6857350	total: 192ms	remaining: 31.8s
6:	learn: 0.6844795	total: 203ms	remaining: 28.8s
7:	learn: 0.6834104	total: 213ms	remaining: 26.4s
8:	learn: 0.6821545	total: 220ms	remaining: 24.2s
9:	learn: 0.6811378	total: 229ms	remaining: 22.6s
10:	learn: 0.6803166	total: 234ms	remaining: 21.1s
11:	learn: 0.6793821	total: 238ms	remaining: 19.6s
12:	learn: 0.6784177	total: 240ms	remaining: 18.3s
13:	learn: 0.6770403	total: 243ms	remaining: 17.1s
14:	learn: 0.6761029	total: 247ms	remaining: 16.2s
15:	learn: 0.6749545	total: 252ms	remaining: 15.5s
16:	learn: 0.6737111	total: 256ms	remaining: 14.8s
17:	learn: 0.6728317	total: 259ms	remaining: 14.1s
18:	learn: 0.6716105	total: 261ms	remaining: 13.5s
19:	learn:

<catboost.core.CatBoostClassifier at 0x1d2b66cc648>

In [41]:
y_test_pred = cat_class.predict(X_test)
print(accuracy_score(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred, digits=4))

0.5994475138121547
[[440 373]
 [352 645]]
              precision    recall  f1-score   support

           1     0.5556    0.5412    0.5483       813
           2     0.6336    0.6469    0.6402       997

    accuracy                         0.5994      1810
   macro avg     0.5946    0.5941    0.5942      1810
weighted avg     0.5985    0.5994    0.5989      1810



In [42]:
X_train.to_csv('X_train_feat_norm_agg.csv', index=False)
y_train.to_csv('y_train_feat_norm_agg.csv', index=False)
X_test.to_csv('X_test_feat_norm_agg.csv', index=False)
y_test.to_csv('y_test_feat_norm_agg.csv', index=False)

In [None]:
# lgbm_class

In [43]:
import joblib
joblib.dump(lgbm_class, 'lgbm_class_norm_agg.pkl')

['lgbm_class_norm_agg.pkl']