In [14]:
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, precision_recall_fscore_support
from sklearn.preprocessing import label_binarize


In [16]:
# 1. 데이터 불러오기
data = pd.read_csv('azdigar nav sartirovka+0 delete.csv')


In [17]:
# 분석할 열들
columns = [
    'w08chronic_a', 'w08chronic_b', 'w08chronic_c',
    'w08chronic_d', 'w08chronic_e', 'w08chronic_f',
    'w08chronic_g', 'w08chronic_h', 'w08chronic_i',
    'w08chronic_k', 'w08chronic_l', 'w08chronic_m'
]

In [18]:
# 2. 데이터 클리닝 - 'w08chronic_m'에서 값이 3인 행 제거
cleaned_data = data[data['w08chronic_m'] != 3]

In [19]:
# 3. 피처 데이터 정의 (모든 chronic 컬럼 제외)
X = cleaned_data.drop(columns=columns)
y_all = cleaned_data[columns]

In [20]:
# 4. 데이터 스케일링
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [21]:
# 5. 모델 정의
models = {
    'Random Forest': RandomForestClassifier(random_state=42),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'SVM': SVC(probability=True, random_state=42),
    'MLP': MLPClassifier(max_iter=2000, random_state=42)
}

In [33]:
# 6. Harmony Search 파라미터 설정
HMS = 5  # Harmony Memory Size (조화 메모리 크기)
HMCR = 0.9  # Harmony Memory Considering Rate (기존 솔루션 사용 확률)
PAR = 0.3  # Pitch Adjusting Rate (피처 조정 확률)
max_iterations = 10  # 최대 반복 횟수

# Harmony Search 초기화
def initialize_harmony_memory():
    return [np.random.randint(2, size=X_scaled.shape[1]) for _ in range(HMS)]

# Harmony 평가 함수
def evaluate_harmony(harmony):
    selected_features = [i for i, bit in enumerate(harmony) if bit]
    if len(selected_features) == 0:
        return 0, 0  # 피처가 선택되지 않으면 0 리턴
    X_train_selected = X_train[:, selected_features]
    X_test_selected = X_test[:, selected_features]
    
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train_selected, y_train)
    y_pred = model.predict(X_test_selected)

    # AUC를 위해 타겟 클래스를 이진화
    y_test_bin = label_binarize(y_test, classes=np.unique(y_train))  # 여기에 y_train의 고유 클래스 기반 이진화
    if y_test_bin.shape[1] == 1:  # 클래스가 하나일 경우
        y_test_bin = np.concatenate([1 - y_test_bin, y_test_bin], axis=1)

    f1 = f1_score(y_test, y_pred, average='macro')

    # AUC 계산 시 다중 클래스를 고려
    auc = roc_auc_score(y_test_bin, model.predict_proba(X_test_selected), multi_class="ovr", average="macro")
    return f1, auc

# 새로운 Harmony 생성 함수
def generate_new_harmony(HM):
    new_harmony = []
    for i in range(X_scaled.shape[1]):
        if random.random() < HMCR:
            new_harmony.append(HM[random.randint(0, HMS-1)][i])
        else:
            new_harmony.append(random.randint(0, 1))
        if random.random() < PAR:
            new_harmony[i] = 1 - new_harmony[i]  # 피처 선택 변경 (뮤테이션)
    return new_harmony

# Harmony Search 실행 함수
def run_harmony_search():
    harmony_memory = initialize_harmony_memory()
    best_harmony = None
    best_f1 = -1

    for iteration in range(max_iterations):
        print(f"Iteration {iteration + 1}/{max_iterations}")  # 진행 상황 출력
        new_harmony = generate_new_harmony(harmony_memory)
        new_f1, new_auc = evaluate_harmony(new_harmony)

        worst_index = np.argmin([evaluate_harmony(h)[0] for h in harmony_memory])
        if new_f1 > evaluate_harmony(harmony_memory[worst_index])[0]:
            harmony_memory[worst_index] = new_harmony

        if new_f1 > best_f1:
            best_f1 = new_f1
            best_harmony = new_harmony

    return best_harmony


In [34]:
# 7. 결과 저장을 위한 빈 데이터프레임 생성
result_table = pd.DataFrame(columns=['Target', 'Model', 'Precision', 'Recall', 'F1-Score', 'Accuracy', 'Micro F1', 'Macro F1', 'Weighted F1', 'AUC'])

In [35]:
# 8. 각 타겟 열을 사용한 학습 및 평가
for target_column in columns:
    print(f'\n[{target_column}] 열을 예측:')
    print('==================================')

    y = cleaned_data[target_column].values

    # 데이터 나누기
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    # 9. Harmony Search를 사용한 피처 선택
    best_harmony = run_harmony_search()
    selected_features = [i for i, bit in enumerate(best_harmony) if bit]

    if len(selected_features) == 0:
        print(f'선택된 피처가 없습니다. {target_column} 열에 대한 평가를 건너뜁니다.')
        continue

    X_train_selected = X_train[:, selected_features]
    X_test_selected = X_test[:, selected_features]

    # 타겟 클래스가 두 개 이상이면 AUC 계산을 위해 타겟을 이진화 (One-vs-Rest 방식)
    y_test_bin = label_binarize(y_test, classes=np.unique(y))
    if y_test_bin.shape[1] == 1:  # 클래스가 하나인 경우
        y_test_bin = np.concatenate([1 - y_test_bin, y_test_bin], axis=1)

    # 10. 각 모델에 대해 학습 및 평가
    for model_name, model in models.items():
        print(f'\n[{model_name}] 모델 평가:')
        print('--------------------------')

        try:
            # 모델 학습
            model.fit(X_train_selected, y_train)

            # 예측
            y_pred = model.predict(X_test_selected)
            
            # 확률 예측 값이 있는 경우
            if hasattr(model, "predict_proba"):
                y_pred_proba = model.predict_proba(X_test_selected)
            else:
                y_pred_proba = np.zeros((len(y_test), len(np.unique(y_test))))  # 확률 값을 지원하지 않는 모델에 대한 처리

            # 정확도 계산
            accuracy = accuracy_score(y_test, y_pred)

            # precision, recall, f1-score 계산 (weighted average)
            precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average='weighted')

            # Micro, Macro, Weighted F1 계산
            micro_f1 = f1_score(y_test, y_pred, average='micro')
            macro_f1 = f1_score(y_test, y_pred, average='macro')
            weighted_f1 = f1_score(y_test, y_pred, average='weighted')

            # AUC 계산
            try:
                auc = roc_auc_score(y_test_bin, y_pred_proba, multi_class="ovr", average='weighted')
            except ValueError:
                auc = np.nan

            # 결과 추가
            new_row = pd.DataFrame({
                'Model': [model_name],
                'Target': [target_column],
                'Precision': [precision],
                'Recall': [recall],
                'F1-Score': [f1],
                'Accuracy': [accuracy],
                'Micro F1': [micro_f1],
                'Macro F1': [macro_f1],
                'Weighted F1': [weighted_f1],
                'AUC': [auc]
            })
            result_table = pd.concat([result_table, new_row], ignore_index=True)
        except Exception as e:
            print(f"{model_name} 모델에서 오류 발생: {e}")
            continue


[w08chronic_a] 열을 예측:
Iteration 1/10
Iteration 2/10
Iteration 3/10
Iteration 4/10
Iteration 5/10
Iteration 6/10
Iteration 7/10
Iteration 8/10
Iteration 9/10
Iteration 10/10

[Random Forest] 모델 평가:
--------------------------

[Decision Tree] 모델 평가:
--------------------------

[SVM] 모델 평가:
--------------------------

[MLP] 모델 평가:
--------------------------

[w08chronic_b] 열을 예측:
Iteration 1/10
Iteration 2/10
Iteration 3/10
Iteration 4/10
Iteration 5/10
Iteration 6/10
Iteration 7/10
Iteration 8/10
Iteration 9/10
Iteration 10/10

[Random Forest] 모델 평가:
--------------------------

[Decision Tree] 모델 평가:
--------------------------

[SVM] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



[MLP] 모델 평가:
--------------------------

[w08chronic_c] 열을 예측:
Iteration 1/10
Iteration 2/10
Iteration 3/10
Iteration 4/10
Iteration 5/10
Iteration 6/10
Iteration 7/10
Iteration 8/10
Iteration 9/10
Iteration 10/10

[Random Forest] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



[Decision Tree] 모델 평가:
--------------------------

[SVM] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



[MLP] 모델 평가:
--------------------------

[w08chronic_d] 열을 예측:
Iteration 1/10
Iteration 2/10
Iteration 3/10
Iteration 4/10
Iteration 5/10
Iteration 6/10
Iteration 7/10
Iteration 8/10
Iteration 9/10
Iteration 10/10

[Random Forest] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



[Decision Tree] 모델 평가:
--------------------------

[SVM] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



[MLP] 모델 평가:
--------------------------

[w08chronic_e] 열을 예측:
Iteration 1/10
Iteration 2/10
Iteration 3/10
Iteration 4/10
Iteration 5/10
Iteration 6/10
Iteration 7/10
Iteration 8/10
Iteration 9/10
Iteration 10/10

[Random Forest] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



[Decision Tree] 모델 평가:
--------------------------

[SVM] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



[MLP] 모델 평가:
--------------------------

[w08chronic_f] 열을 예측:
Iteration 1/10
Iteration 2/10
Iteration 3/10
Iteration 4/10
Iteration 5/10
Iteration 6/10
Iteration 7/10
Iteration 8/10
Iteration 9/10
Iteration 10/10

[Random Forest] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



[Decision Tree] 모델 평가:
--------------------------

[SVM] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



[MLP] 모델 평가:
--------------------------

[w08chronic_g] 열을 예측:
Iteration 1/10
Iteration 2/10
Iteration 3/10
Iteration 4/10
Iteration 5/10
Iteration 6/10
Iteration 7/10
Iteration 8/10
Iteration 9/10
Iteration 10/10

[Random Forest] 모델 평가:
--------------------------

[Decision Tree] 모델 평가:
--------------------------

[SVM] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



[MLP] 모델 평가:
--------------------------

[w08chronic_h] 열을 예측:
Iteration 1/10
Iteration 2/10
Iteration 3/10
Iteration 4/10
Iteration 5/10
Iteration 6/10
Iteration 7/10
Iteration 8/10
Iteration 9/10
Iteration 10/10

[Random Forest] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



[Decision Tree] 모델 평가:
--------------------------

[SVM] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



[MLP] 모델 평가:
--------------------------

[w08chronic_i] 열을 예측:
Iteration 1/10
Iteration 2/10
Iteration 3/10
Iteration 4/10
Iteration 5/10
Iteration 6/10
Iteration 7/10
Iteration 8/10
Iteration 9/10
Iteration 10/10

[Random Forest] 모델 평가:
--------------------------

[Decision Tree] 모델 평가:
--------------------------

[SVM] 모델 평가:
--------------------------

[MLP] 모델 평가:
--------------------------

[w08chronic_k] 열을 예측:
Iteration 1/10
Iteration 2/10
Iteration 3/10
Iteration 4/10
Iteration 5/10
Iteration 6/10
Iteration 7/10
Iteration 8/10
Iteration 9/10
Iteration 10/10

[Random Forest] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



[Decision Tree] 모델 평가:
--------------------------

[SVM] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



[MLP] 모델 평가:
--------------------------

[w08chronic_l] 열을 예측:
Iteration 1/10
Iteration 2/10
Iteration 3/10
Iteration 4/10
Iteration 5/10
Iteration 6/10
Iteration 7/10
Iteration 8/10
Iteration 9/10
Iteration 10/10

[Random Forest] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



[Decision Tree] 모델 평가:
--------------------------

[SVM] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



[MLP] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



[w08chronic_m] 열을 예측:
Iteration 1/10
Iteration 2/10
Iteration 3/10
Iteration 4/10
Iteration 5/10
Iteration 6/10
Iteration 7/10
Iteration 8/10
Iteration 9/10
Iteration 10/10

[Random Forest] 모델 평가:
--------------------------

[Decision Tree] 모델 평가:
--------------------------

[SVM] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



[MLP] 모델 평가:
--------------------------


In [36]:
# 11. 결과 테이블 출력
print("\n전체 모델 평가 결과:")
print(result_table)


전체 모델 평가 결과:
          Target          Model  Precision    Recall  F1-Score  Accuracy  \
0   w08chronic_a  Random Forest   0.758605  0.736203  0.732758  0.736203   
1   w08chronic_a  Decision Tree   0.658247  0.657837  0.657962  0.657837   
2   w08chronic_a            SVM   0.761112  0.721854  0.714419  0.721854   
3   w08chronic_a            MLP   0.676897  0.675497  0.675641  0.675497   
4   w08chronic_b  Random Forest   0.684070  0.762693  0.684064  0.762693   
5   w08chronic_b  Decision Tree   0.696383  0.695364  0.695871  0.695364   
6   w08chronic_b            SVM   0.591847  0.769316  0.669012  0.769316   
7   w08chronic_b            MLP   0.674178  0.687638  0.680454  0.687638   
8   w08chronic_c  Random Forest   0.859611  0.927152  0.892105  0.927152   
9   w08chronic_c  Decision Tree   0.862684  0.817881  0.839169  0.817881   
10  w08chronic_c            SVM   0.859611  0.927152  0.892105  0.927152   
11  w08chronic_c            MLP   0.873172  0.886313  0.879529  0.886313  