#**스마트폰 센서 데이터 기반 모션 분류**
# 단계3 : 단계별 모델링


## 0.미션

단계별로 나눠서 모델링을 수행하고자 합니다.  

* 단계1 : 정적(0), 동적(1) 행동 분류 모델 생성
* 단계2 : 세부 동작에 대한 분류모델 생성
    * 단계1 모델에서 0으로 예측 -> 정적 행동 3가지 분류 모델링
    * 단계1 모델에서 1으로 예측 -> 동적 행동 3가지 분류 모델링 
* 모델 통합
    * 두 단계 모델을 통합하고, 새로운 데이터에 대해서 최종 예측결과와 성능평가가 나오도록 함수로 만들기
* 성능 비교
    * 기본 모델링의 성능과 비교
    * 모든 모델링은 [다양한 알고리즘 + 성능 튜닝]을 수행해야 합니다.


## 1.환경설정

### (1) 라이브러리 불러오기

* 세부 요구사항
    - 기본적으로 필요한 라이브러리를 import 하도록 코드가 작성되어 있습니다.
    - 필요하다고 판단되는 라이브러리를 추가하세요.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# 필요하다고 판단되는 라이브러리를 추가하세요.
import joblib
features = joblib.load("/content/drive/MyDrive/5_mini/2023.04.12_미니프로젝트5차_3_5일차 실습자료/feature_importance.pkl")
# features

### (2) 데이터 불러오기

* 주어진 데이터셋
    * data01_train.csv : 학습 및 검증용

 <br/>  

* 세부 요구사항
    - data01_train.csv 를 불러와 'data' 이름으로 저장합니다.
        - data에서 변수 subject는 삭제합니다.
    - data01_test.csv 를 불러와 'new_data' 이름으로 저장합니다.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
data = pd.read_csv("/content/drive/MyDrive/5_mini/2023.04.12_미니프로젝트5차_3_5일차 실습자료/data01_train.csv")
data = data.drop("subject",axis=1)

## 2.데이터 전처리

* 세부 요구사항
    - Label 추가 : data 에 Activity_dynamic 를 추가합니다. Activity_dynamic은 과제1에서 is_dynamic과 동일한 값입니다.
    - x와 y1, y2로 분할하시오.
        * y1 : Activity
        * y2 : Activity_dynamic
    - train : val = 8 : 2 혹은 7 : 3
    - random_state 옵션을 사용하여 다른 모델과 비교를 위해 성능이 재현되도록 합니다.

In [None]:
data["Activity_dynamic"]=data["Activity"].map({"STANDING":0,"SITTING":0,"LAYING":0,"WALKING":1,"WALKING_UPSTAIRS":1,"WALKING_DOWNSTAIRS":1})
label_dict = {
    'STANDING': 0,
    'SITTING': 1,
    'LAYING': 2,
    'WALKING': 3,
    'WALKING_UPSTAIRS': 4,
    'WALKING_DOWNSTAIRS': 5
}
data["Activity"] = data["Activity"].map(label_dict)

In [None]:
x = data.drop(["Activity","Activity_dynamic"],axis=1)
y1 = data["Activity"]
y2 = data["Activity_dynamic"]

In [None]:
from sklearn.model_selection import train_test_split
x1_train,x1_test,y1_train,y1_test = train_test_split(x,y1,test_size=0.2,random_state=42,stratify=y1)
x2_train,x2_test,y2_train,y2_test = train_test_split(x,y2,test_size=0.2,random_state=42,stratify=y2)

## **3.단계별 모델링**

![](https://github.com/DA4BAM/image/blob/main/step%20by%20step.png?raw=true)

### (1) 단계1 : 정적/동적 행동 분류 모델

* 세부 요구사항
    * 정적 행동(Laying, Sitting, Standing)과 동적 행동(동적 : Walking, Walking-Up, Walking-Down)을 구분하는 모델 생성.
    * 몇가지 모델을 만들고 가장 성능이 좋은 모델을 선정하시오.

#### 1) 알고리즘1 : 

In [None]:
from sklearn.svm import SVC
model_svm = SVC()
model_svm.fit(x2_train,y2_train)

In [None]:
from sklearn.metrics import f1_score,confusion_matrix,accuracy_score
y_pred = model_svm.predict(x2_test)
print(accuracy_score(y2_test,y_pred))
print(f1_score(y2_test,y_pred))
print(confusion_matrix(y2_test,y_pred))

1.0
1.0
[[647   0]
 [  0 530]]


#### 2) 알고리즘2 : 

In [None]:
from xgboost import XGBClassifier
model_xgb = XGBClassifier()
model_xgb.fit(x2_train,y2_train)

In [None]:
y_pred = model_xgb.predict(x2_test)
print(accuracy_score(y2_test,y_pred))
print(f1_score(y2_test,y_pred))
print(confusion_matrix(y2_test,y_pred))

0.9991503823279524
0.9990574929311969
[[646   1]
 [  0 530]]


### (2) 단계2-1 : 정적 동작 세부 분류

* 세부 요구사항
    * 정적 행동(Laying, Sitting, Standing)인 데이터 추출
    * Laying, Sitting, Standing 를 분류하는 모델을 생성
    * 몇가지 모델을 만들고 가장 성능이 좋은 모델을 선정하시오.

In [None]:
data1 = data[data["Activity_dynamic"]==0]
data1 = data1.reset_index(drop=True)
data1

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",Activity,Activity_dynamic
0,0.288508,-0.009196,-0.103362,-0.988986,-0.962797,-0.967422,-0.989000,-0.962596,-0.965650,-0.929747,...,-0.816696,-0.042494,-0.044218,0.307873,0.072790,-0.601120,0.331298,0.165163,0,0
1,0.265757,-0.016576,-0.098163,-0.989551,-0.994636,-0.987435,-0.990189,-0.993870,-0.987558,-0.937337,...,-0.693515,-0.062899,0.388459,-0.765014,0.771524,0.345205,-0.769186,-0.147944,2,0
2,0.278709,-0.014511,-0.108717,-0.997720,-0.981088,-0.994008,-0.997934,-0.982187,-0.995017,-0.942584,...,-0.829311,0.000265,-0.525022,-0.891875,0.021528,-0.833564,0.202434,-0.032755,0,0
3,0.272026,-0.001329,-0.125491,-0.992068,-0.912985,-0.972451,-0.994752,-0.943141,-0.976428,-0.925446,...,-0.704995,-0.024442,0.076332,0.741277,0.729812,-0.817201,0.037746,0.136129,0,0
4,0.284338,0.021956,-0.006925,-0.980153,-0.838394,-0.782357,-0.983683,-0.816199,-0.743923,-0.914011,...,-0.400197,0.021212,-0.009465,-0.282762,0.563343,-0.782072,0.242834,-0.025285,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3229,0.257476,-0.413865,0.017374,-0.919666,0.075259,-0.630716,-0.937986,0.176796,-0.613510,-0.866296,...,-0.091605,-0.000793,0.267189,-0.186202,0.098099,0.792970,-0.034020,-0.928148,2,0
3230,0.277378,-0.013298,-0.104322,-0.996596,-0.987491,-0.973345,-0.996372,-0.987746,-0.973512,-0.942156,...,-0.956239,0.122320,0.136275,-0.708377,-0.507788,-0.818263,0.222620,0.035430,0,0
3231,0.277194,-0.012389,-0.131974,-0.994046,-0.940578,-0.917337,-0.994261,-0.932830,-0.908088,-0.936219,...,-0.690363,-0.034888,-0.261437,-0.391477,-0.877612,-0.912365,0.114009,0.080146,1,0
3232,0.267981,-0.018348,-0.107440,-0.991303,-0.989881,-0.990313,-0.992386,-0.988852,-0.991237,-0.936099,...,-0.886851,0.060173,0.228739,0.684400,-0.216665,0.620363,-0.437247,-0.571840,2,0


In [None]:
x = data1.drop(["Activity","Activity_dynamic"],axis=1)
y = data1["Activity"]
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42,stratify=y)

In [None]:
model_svm2 = SVC()
model_svm2.fit(x_train,y_train)

In [None]:
y_pred = model_svm2.predict(x_test)
print(accuracy_score(y_test,y_pred))
print(f1_score(y_test,y_pred,average="macro"))
print(confusion_matrix(y_test,y_pred))

0.9505409582689336
0.9495501149880097
[[205  13   0]
 [ 18 187   1]
 [  0   0 223]]


In [None]:
model_xgb2 = XGBClassifier()
model_xgb2.fit(x_train,y_train)

In [None]:
y_pred = model_xgb2.predict(x_test)
print(accuracy_score(y_test,y_pred))
print(f1_score(y_test,y_pred,average="macro"))
print(confusion_matrix(y_test,y_pred))

0.98145285935085
0.9811051693404634
[[214   4   0]
 [  8 198   0]
 [  0   0 223]]


### (3) 단계2-2 : 동적 동작 세부 분류

* 세부 요구사항
    * 동동적 행동(Walking, Walking Upstairs, Walking Downstairs)인 데이터 추출
    * Walking, Walking Upstairs, Walking Downstairs 를 분류하는 모델을 생성
    * 몇가지 모델을 만들고 가장 성능이 좋은 모델을 선정하시오.

In [None]:
data2 = data[data["Activity_dynamic"]==1]
data2 = data2.reset_index(drop=True)
data2["Activity"] = data2["Activity"]-3
data2

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",Activity,Activity_dynamic
0,0.289795,-0.035536,-0.150354,-0.231727,-0.006412,-0.338117,-0.273557,0.014245,-0.347916,0.008288,...,-0.408956,-0.255125,0.612804,0.747381,-0.072944,-0.695819,0.287154,0.111388,0,1
1,0.394807,0.034098,0.091229,0.088489,-0.106636,-0.388502,-0.010469,-0.109680,-0.346372,0.584131,...,-0.563437,-0.044344,-0.845268,-0.974650,-0.887846,-0.705029,0.264952,0.137758,2,1
2,0.330708,0.007561,-0.061371,-0.215760,0.101075,0.072949,-0.269857,0.060060,0.101298,-0.019263,...,-0.887024,-0.030645,-0.852091,-0.500195,0.306091,-0.552729,0.253885,0.291256,1,1
3,0.121465,-0.031902,-0.005196,-0.152198,-0.113104,-0.239423,-0.202401,-0.164698,-0.247099,0.114668,...,-0.775779,0.445206,-0.003487,-0.940185,0.041387,-0.886603,0.173338,-0.005627,0,1
4,0.303885,0.002768,-0.038613,-0.168656,0.190336,-0.140473,-0.205134,0.101144,-0.120572,-0.000818,...,-0.329728,-0.040030,0.257252,0.076091,-0.123425,-0.752882,0.266729,0.045692,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2642,0.277709,-0.053919,-0.098746,-0.589970,-0.076626,-0.500837,-0.605474,-0.142798,-0.506696,-0.533485,...,-0.268237,-0.076922,0.706620,-0.954969,-0.324779,-0.691663,0.153974,-0.196833,0,1
2643,0.252496,-0.009773,-0.118293,-0.155168,-0.244513,-0.044524,-0.244367,-0.229509,-0.042199,0.291682,...,-0.752980,0.619246,-0.684483,0.924623,-0.353041,-0.775518,0.253218,0.013216,1,1
2644,0.264961,-0.034719,-0.088140,-0.368158,-0.074514,-0.229606,-0.394387,-0.082437,-0.243525,-0.261087,...,-0.046157,0.037517,-0.903692,0.614933,-0.610918,-0.601604,0.335996,0.156518,0,1
2645,0.191568,0.013328,-0.105174,-0.126969,-0.121729,-0.327480,-0.192523,-0.109923,-0.295286,0.078644,...,-0.879215,0.721718,0.623151,0.866858,-0.445660,-0.690278,0.303194,-0.044188,1,1


In [None]:
x = data2.drop(["Activity","Activity_dynamic"],axis=1)
y = data2["Activity"]
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42,stratify=y)

In [None]:
model_svm3 = SVC()
model_svm3.fit(x_train,y_train)
y_pred = model_svm3.predict(x_test)
print(accuracy_score(y_test,y_pred))
print(f1_score(y_test,y_pred,average="macro"))
print(confusion_matrix(y_test,y_pred))

1.0
1.0
[[200   0   0]
 [  0 172   0]
 [  0   0 158]]


In [None]:
model_xgb3 = XGBClassifier()
model_xgb3.fit(x_train,y_train)

In [None]:
y_pred = model_xgb3.predict(x_test)
print(accuracy_score(y_test,y_pred))
print(f1_score(y_test,y_pred,average="macro"))
print(confusion_matrix(y_test,y_pred))

0.9962264150943396
0.9961750689049919
[[199   1   0]
 [  0 171   1]
 [  0   0 158]]


In [None]:
data2

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",Activity,Activity_dynamic
0,0.288508,-0.009196,-0.103362,-0.988986,-0.962797,-0.967422,-0.989000,-0.962596,-0.965650,-0.929747,...,-0.816696,-0.042494,-0.044218,0.307873,0.072790,-0.601120,0.331298,0.165163,0,0
1,0.265757,-0.016576,-0.098163,-0.989551,-0.994636,-0.987435,-0.990189,-0.993870,-0.987558,-0.937337,...,-0.693515,-0.062899,0.388459,-0.765014,0.771524,0.345205,-0.769186,-0.147944,2,0
2,0.278709,-0.014511,-0.108717,-0.997720,-0.981088,-0.994008,-0.997934,-0.982187,-0.995017,-0.942584,...,-0.829311,0.000265,-0.525022,-0.891875,0.021528,-0.833564,0.202434,-0.032755,0,0
3,0.272026,-0.001329,-0.125491,-0.992068,-0.912985,-0.972451,-0.994752,-0.943141,-0.976428,-0.925446,...,-0.704995,-0.024442,0.076332,0.741277,0.729812,-0.817201,0.037746,0.136129,0,0
4,0.284338,0.021956,-0.006925,-0.980153,-0.838394,-0.782357,-0.983683,-0.816199,-0.743923,-0.914011,...,-0.400197,0.021212,-0.009465,-0.282762,0.563343,-0.782072,0.242834,-0.025285,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3229,0.257476,-0.413865,0.017374,-0.919666,0.075259,-0.630716,-0.937986,0.176796,-0.613510,-0.866296,...,-0.091605,-0.000793,0.267189,-0.186202,0.098099,0.792970,-0.034020,-0.928148,2,0
3230,0.277378,-0.013298,-0.104322,-0.996596,-0.987491,-0.973345,-0.996372,-0.987746,-0.973512,-0.942156,...,-0.956239,0.122320,0.136275,-0.708377,-0.507788,-0.818263,0.222620,0.035430,0,0
3231,0.277194,-0.012389,-0.131974,-0.994046,-0.940578,-0.917337,-0.994261,-0.932830,-0.908088,-0.936219,...,-0.690363,-0.034888,-0.261437,-0.391477,-0.877612,-0.912365,0.114009,0.080146,1,0
3232,0.267981,-0.018348,-0.107440,-0.991303,-0.989881,-0.990313,-0.992386,-0.988852,-0.991237,-0.936099,...,-0.886851,0.060173,0.228739,0.684400,-0.216665,0.620363,-0.437247,-0.571840,2,0


### (4) 분류 모델 합치기


* 세부 요구사항
    * 두 단계 모델을 통합하고, 새로운 데이터(test)에 대해서 최종 예측결과와 성능평가가 나오도록 함수로 만들기
    * 데이터 파이프라인 구축 : test데이터가 로딩되어 전처리 과정을 거치고, 예측 및 성능 평가 수행

![](https://github.com/DA4BAM/image/blob/main/pipeline%20function.png?raw=true)

#### 1) 함수 만들기

In [None]:
def a(data):
    data = data.reset_index(drop=True)
    y_pred = model_svm.predict(data)
    y_pred = pd.DataFrame(y_pred,columns=["result1"])
    data3 = pd.concat([data,y_pred],axis=1)
    # display(data3)

    data4 = data3[data3["result1"]==0]
    data4 = data4.drop("result1",axis=1)
    y_pred2 = model_xgb2.predict(data4)
    y_pred2 = pd.DataFrame(y_pred2,columns=["result2"],index=data4.index)
    # display(y_pred2)


    data5 = data3[data3["result1"]==1]
    data5 = data5.drop("result1",axis=1)
    y_pred3 = model_svm3.predict(data5)
    y_pred3 = pd.DataFrame(y_pred3,columns=["result2"],index=data5.index)
    y_pred3["result2"] = y_pred3["result2"] + 3
    # display(y_pred3)

    y_pred4=y_pred2.merge(y_pred3, left_index=True, right_index=True, how='outer')
    y_pred4['result2_x']=np.where(pd.isnull(y_pred4['result2_x']),0,y_pred4['result2_x'])
    y_pred4['result2_y']=np.where(pd.isnull(y_pred4['result2_y']),0,y_pred4['result2_y'])
    y_pred4['result'] = y_pred4['result2_x'] + y_pred4['result2_y']
    y_pred4 = y_pred4.drop(['result2_x','result2_y'],axis=1)
    return y_pred4

In [None]:
a(x1_test)

Unnamed: 0,result
0,2.0
1,3.0
2,0.0
3,0.0
4,4.0
...,...
1172,0.0
1173,1.0
1174,5.0
1175,0.0


In [None]:
from sklearn.metrics import accuracy_score,f1_score,confusion_matrix
print(confusion_matrix(y1_test,a(x1_test)))
print(accuracy_score(y1_test,a(x1_test)))
print(f1_score(y1_test,a(x1_test),average="macro"))

[[214   4   0   0   0   0]
 [  8 197   0   0   1   0]
 [  0   0 223   0   0   0]
 [  0   0   0 200   0   0]
 [  0   0   0   0 171   1]
 [  0   0   0   0   1 157]]
0.9872557349192863
0.9876269124480461


In [None]:
####