In [1]:
import pandas as pd
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MultiLabelBinarizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, precision_recall_fscore_support

In [2]:
# 1. 데이터 불러오기
data = pd.read_csv('azdigar nav sartirovka+0 delete.csv')

In [3]:
# 2. 분석할 열들 정의
columns = [
    'w08chronic_a', 'w08chronic_b', 'w08chronic_c',
    'w08chronic_d', 'w08chronic_e', 'w08chronic_f',
    'w08chronic_g', 'w08chronic_h', 'w08chronic_i',
    'w08chronic_k', 'w08chronic_l', 'w08chronic_m'
]


In [4]:
# 3. 데이터 클리닝 - 'w08chronic_m'에서 값이 3인 행 제거
cleaned_data = data[data['w08chronic_m'] != 3]

In [5]:
# 4. 피처와 타겟 정의
X = cleaned_data.drop(columns=columns)
y_all = cleaned_data[columns]

In [6]:
# 5. 데이터 스케일링
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [7]:
# 6. 학습 및 테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_all, test_size=0.2, random_state=42)

In [8]:
# 7. 모델 정의 및 학습
rf = RandomForestClassifier()
multi_target_rf = MultiOutputClassifier(rf, n_jobs=-1)
multi_target_rf.fit(X_train, y_train)


In [9]:
# 8. 모델 예측
y_pred = multi_target_rf.predict(X_test)


In [11]:
# 9. 각 타겟별 평가 결과 저장
results = []

for i, target in enumerate(columns):
    # 각 타겟에 대한 평가
    precision, recall, f1, _ = precision_recall_fscore_support(y_test.iloc[:, i], y_pred[:, i], average='weighted', zero_division=0)
    accuracy = accuracy_score(y_test.iloc[:, i], y_pred[:, i])
    micro_f1 = f1_score(y_test.iloc[:, i], y_pred[:, i], average='micro')
    macro_f1 = f1_score(y_test.iloc[:, i], y_pred[:, i], average='macro')
    weighted_f1 = f1_score(y_test.iloc[:, i], y_pred[:, i], average='weighted')
    
    # AUC 계산
    try:
        auc = roc_auc_score(y_test.iloc[:, i], y_pred[:, i], average='weighted')
    except ValueError:
        auc = np.nan
    
    # 결과 저장
    results.append({
        'Target': target,
        'Model': 'Random Forest',
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1,
        'Accuracy': accuracy,
        'Micro F1': micro_f1,
        'Macro F1': macro_f1,
        'Weighted F1': weighted_f1,
        'AUC': auc
    })

    # 예측 결과 확인
    print(f"Target: {target}")
    print("Predictions:", y_pred[:, i])
    print("Actual:", y_test.iloc[:, i].values)

Target: w08chronic_a
Predictions: [1 1 5 5 5 5 1 5 5 1 1 5 5 5 5 5 1 5 5 1 5 1 1 5 5 5 5 1 1 5 5 5 5 1 1 1 1
 5 1 5 1 1 5 1 5 5 1 5 5 1 5 1 1 1 5 1 1 5 1 1 5 1 1 1 1 5 5 5 1 1 1 1 1 5
 5 1 5 1 1 1 1 5 1 1 1 1 1 1 5 1 1 1 5 1 1 1 1 5 1 1 1 5 1 1 1 1 1 5 1 5 5
 1 1 5 5 1 5 5 5 1 1 1 5 5 5 1 1 5 5 1 5 1 5 1 1 5 1 1 5 1 1 5 5 1 1 5 1 1
 5 1 1 1 1 1 5 1 5 5 1 1 1 5 1 1 1 1 5 1 1 1 1 1 5 1 1 1 5 5 1 5 5 5 5 1 1
 5 5 5 5 1 5 1 5 1 1 1 1 5 1 1 5 1 1 5 1 1 5 1 1 5 1 1 5 5 1 1 1 1 5 5 1 1
 1 5 1 5 1 1 1 1 5 1 1 5 1 5 5 1 1 5 1 1 1 1 1 5 1 1 5 1 5 1 1 1 1 5 1 1 1
 1 5 1 5 1 1 1 5 5 1 5 5 1 1 1 5 5 1 1 5 5 5 5 5 1 5 1 1 5 1 5 5 1 5 1 1 1
 1 5 1 5 5 5 5 1 1 5 1 1 1 1 1 5 5 1 5 5 5 5 5 5 1 5 1 1 5 1 1 1 5 1 5 1 1
 1 1 5 5 5 1 1 1 1 1 1 1 1 5 5 1 1 5 1 5 1 5 5 5 1 5 1 1 1 1 1 5 5 1 1 5 1
 1 1 1 5 1 1 5 1 1 1 1 5 1 1 1 1 1 5 1 5 5 5 1 1 5 5 1 1 1 1 1 1 1 5 5 1 1
 5 5 5 5 5 1 5 5 5 5 1 5 5 1 1 5 1 1 1 5 1 5 1 5 5 1 1 1 5 1 5 1 1 5 1 1 1
 5 1 1 1 1 1 1 1 5 1 5 1 1 1 5 1 1 5 1 1 5 1 5 1 5 1 5 1 1 1 5 1 5

In [18]:
# 10. 결과 DataFrame으로 변환 및 출력
result_table = pd.DataFrame(results)
print(result_table)

          Target          Model  Precision    Recall  F1-Score  Accuracy  \
0   w08chronic_a  Random Forest   0.731962  0.721854  0.720599  0.721854   
1   w08chronic_b  Random Forest   0.702556  0.768212  0.681998  0.768212   
2   w08chronic_c  Random Forest   0.859611  0.927152  0.892105  0.927152   
3   w08chronic_d  Random Forest   0.941285  0.970199  0.955523  0.970199   
4   w08chronic_e  Random Forest   0.943428  0.971302  0.957163  0.971302   
5   w08chronic_f  Random Forest   0.799307  0.894040  0.844024  0.894040   
6   w08chronic_g  Random Forest   0.922732  0.945916  0.921717  0.945916   
7   w08chronic_h  Random Forest   0.903077  0.949227  0.925577  0.949227   
8   w08chronic_i  Random Forest   0.756590  0.770419  0.743932  0.770419   
9   w08chronic_k  Random Forest   0.971508  0.985651  0.978529  0.985651   
10  w08chronic_l  Random Forest   0.973685  0.986755  0.980177  0.986755   
11  w08chronic_m  Random Forest   0.982418  0.991170  0.986775  0.991170   

    Micro F