### 라이브러리 및 데이터 불러오기

In [63]:
import pandas as pd
import numpy as np

# 데이터 불러오기
df = pd.read_csv('labeled.csv')
df.head()

Unnamed: 0,Gender,Age_Group,Shoulder_Height_Diff_px,Hip_Height_Diff_px,Torso_Vertical_Tilt_deg,Ear_Hip_Vertical_Tilt_deg,Shoulder_Line_Horizontal_Tilt_deg,Hip_Line_Horizontal_Tilt_deg,Exercise_Label
0,Female,Teen,1.0,3.0,-0.55,-0.74,0.34,1.74,Glute Bridge
1,Female,Teen,2.0,1.0,-0.32,0.42,-0.7,0.62,Resistance Band Shoulder Squeeze
2,Female,Teen,1.0,4.0,2.33,0.67,0.44,2.39,Glute Bridge
3,Female,Teen,1.0,1.0,3.35,1.11,-0.38,-0.52,Bird Dog
4,Female,Teen,3.0,4.0,-0.28,-1.09,1.08,-2.18,Glute Bridge


### 결측치 확인 및 처리

In [64]:
print(df.isnull().sum())

df = df.dropna().reset_index(drop=True)

Gender                               0
Age_Group                            0
Shoulder_Height_Diff_px              0
Hip_Height_Diff_px                   0
Torso_Vertical_Tilt_deg              0
Ear_Hip_Vertical_Tilt_deg            9
Shoulder_Line_Horizontal_Tilt_deg    0
Hip_Line_Horizontal_Tilt_deg         0
Exercise_Label                       0
dtype: int64


### 라벨 인코딩

In [65]:
from sklearn.preprocessing import LabelEncoder

# 성별, 연령대, 운동라벨을 숫자로 변환
le_gender = LabelEncoder()
le_age = LabelEncoder()
le_label = LabelEncoder()

df['Gender_enc'] = le_gender.fit_transform(df['Gender'])
df['Age_Group_enc'] = le_age.fit_transform(df['Age_Group'])
df['Exercise_Label_enc'] = le_label.fit_transform(df['Exercise_Label'])

df[['Gender', 'Gender_enc', 'Age_Group', 'Age_Group_enc', 'Exercise_Label', 'Exercise_Label_enc']].head()

Unnamed: 0,Gender,Gender_enc,Age_Group,Age_Group_enc,Exercise_Label,Exercise_Label_enc
0,Female,0,Teen,2,Glute Bridge,5
1,Female,0,Teen,2,Resistance Band Shoulder Squeeze,9
2,Female,0,Teen,2,Glute Bridge,5
3,Female,0,Teen,2,Bird Dog,1
4,Female,0,Teen,2,Glute Bridge,5


### 라벨 인코딩 결과

In [66]:
mapping_gender = dict(zip(le_gender.classes_, le_gender.transform(le_gender.classes_)))
mapping_age = dict(zip(le_age.classes_, le_age.transform(le_age.classes_)))
mapping_label = dict(zip(le_label.classes_, le_label.transform(le_label.classes_)))

print(mapping_gender)
print(mapping_age)
print(mapping_label)

{'Female': 0, 'Male': 1}
{'Middle-aged': 0, 'Senior': 1, 'Teen': 2, 'Young Adult': 3}
{'Band Shoulder Abduction': 0, 'Bird Dog': 1, 'Clamshell': 2, 'Cross-Body Arm Stretch': 3, 'Diaphragmatic Breathing': 4, 'Glute Bridge': 5, 'Modified Plank': 6, 'Plank': 7, 'Resistance Band Arm Stretch': 8, 'Resistance Band Shoulder Squeeze': 9, 'Russian Twist': 10, 'Seated Figure-Four Stretch': 11, 'Seated Marches': 12, 'Seated Marching': 13, 'Seated Shoulder Raise': 14, 'Shoulder Press': 15, 'Side-lying Leg Lift': 16, 'Standing Hip Abduction': 17, 'Wall Angel': 18, 'Wall Push-up': 19}


### 피처/라벨 분리

In [67]:
feature_cols = [
    'Shoulder_Height_Diff_px', 'Hip_Height_Diff_px', 'Torso_Vertical_Tilt_deg',
    'Ear_Hip_Vertical_Tilt_deg', 'Shoulder_Line_Horizontal_Tilt_deg', 'Hip_Line_Horizontal_Tilt_deg',
    'Gender_enc', 'Age_Group_enc'
]
X = df[feature_cols]
y = df['Exercise_Label_enc']

### 학습/테스트 데이터 분할

In [68]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(f"전체 데이터 수: {len(df)}")
print(f"학습 데이터 수: {len(X_train)}")
print(f"테스트 데이터 수: {len(X_test)}")

전체 데이터 수: 645
학습 데이터 수: 516
테스트 데이터 수: 129


### 스케일링

In [69]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### RandomForest 모델 학습

In [70]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_scaled, y_train)

### 모델 평가

In [71]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = rf.predict(X_test_scaled)
print("테스트 데이터 정확도:", accuracy_score(y_test, y_pred))

present_labels = np.unique(y_test)
target_names = le_label.inverse_transform(present_labels)
print(classification_report(y_test, y_pred, 
                            labels=present_labels, 
                            target_names=target_names, 
                            zero_division=0))

테스트 데이터 정확도: 0.8527131782945736
                                  precision    recall  f1-score   support

         Band Shoulder Abduction       1.00      0.90      0.95        10
                        Bird Dog       0.67      0.57      0.62         7
                       Clamshell       0.40      1.00      0.57         2
          Cross-Body Arm Stretch       1.00      0.83      0.91         6
                    Glute Bridge       0.83      0.83      0.83         6
                  Modified Plank       1.00      0.86      0.92         7
                           Plank       1.00      0.60      0.75        10
     Resistance Band Arm Stretch       0.89      1.00      0.94         8
Resistance Band Shoulder Squeeze       0.57      0.67      0.62         6
      Seated Figure-Four Stretch       0.60      0.75      0.67         4
                  Seated Marches       0.83      0.83      0.83         6
                 Seated Marching       1.00      0.83      0.91         6
     

### 모델/스케일러/라벨인코더 저장

In [72]:
import joblib

joblib.dump(rf, 'exercise_rf_model.pkl')
joblib.dump(scaler, 'exercise_scaler.pkl')
joblib.dump(le_label, 'exercise_label_encoder.pkl')

['exercise_label_encoder.pkl']