In [1]:
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, precision_recall_fscore_support
from sklearn.preprocessing import label_binarize
from multiprocessing import Pool

In [2]:
# 데이터 불러오기
data = pd.read_csv('azdigar nav sartirovka+0 delete.csv')

In [4]:
# 분석할 열들
columns = [
    'w08chronic_a', 'w08chronic_b', 'w08chronic_c',
    'w08chronic_d', 'w08chronic_e', 'w08chronic_f',
    'w08chronic_g', 'w08chronic_h', 'w08chronic_i',
    'w08chronic_k', 'w08chronic_l', 'w08chronic_m'
] 

In [5]:
# 데이터 클리닝
cleaned_data = data[data['w08chronic_m'] != 3] 
X = cleaned_data.drop(columns=columns)
y_all = cleaned_data[columns]

In [7]:
# 데이터 스케일링
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [8]:
# 모델 정의
models = {
    'Random Forest': RandomForestClassifier(random_state=42),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
}


In [10]:
# 결과를 저장할 빈 데이터프레임 생성
result_table = pd.DataFrame(columns=[
    'Target', 'Model', 'Precision', 'Recall', 'F1-Score', 
    'Accuracy', 'Micro F1', 'Macro F1', 'Weighted F1', 'AUC'
])  

def evaluate_individual(individual, model, X_train, y_train, X_test, y_test):
    selected_features = [i for i, bit in enumerate(individual) if bit]
    if len(selected_features) == 0:
        return 0  # 선택된 피처가 없을 경우 적합도 0

    X_train_selected = X_train[:, selected_features]
    X_test_selected = X_test[:, selected_features]

    model.fit(X_train_selected, y_train)
    y_pred = model.predict(X_test_selected)

    return f1_score(y_test, y_pred, average='macro')

def run_genetic_algorithm(model, X_train, y_train, X_test, y_test):
    population_size = 5  # 줄이기
    num_generations = 5   # 줄이기
    num_features = X_train.shape[1]

    population = [np.random.randint(0, 2, num_features) for _ in range(population_size)]

    for generation in range(num_generations):
        fitness_scores = [evaluate_individual(individual, model, X_train, y_train, X_test, y_test) for individual in population]

        best_index = np.argmax(fitness_scores)
        best_individual = population[best_index]

        # 다음 세대 생성 로직 추가
        # ...

    return best_individual


# 각 타겟 열에 대해 평가
for target_column in columns:
    print(f'\n[{target_column}] 열을 예측합니다:')
    print('==================================')

    # y를 1D 배열로 변환
    y = cleaned_data[target_column].values.ravel()  # 1D 배열로 변환

    # 교차 검증을 위한 Stratified K-Fold 사용
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    for model_name, model in models.items():
        print(f'\n[{model_name}] 모델 평가:')
        print('--------------------------')

        try:
            # 유전 알고리즘을 사용한 피처 선택
            best_individual = run_genetic_algorithm(model, X_scaled, y, X_scaled, y)
            selected_features = [i for i, bit in enumerate(best_individual) if bit]

            if len(selected_features) == 0:
                print(f'선택된 피처가 없습니다. {target_column} 열에 대한 {model_name} 모델 평가를 건너뜁니다.')
                continue

            # 교차 검증을 통한 평가
            macro_f1_scores = []
            auc_scores = []

            for train_index, test_index in skf.split(X_scaled, y):
                X_train, X_test = X_scaled[train_index][:, selected_features], X_scaled[test_index][:, selected_features]
                y_train, y_test = y[train_index], y[test_index]

                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)

                # AUC를 위한 확률 예측
                y_pred_proba = model.predict_proba(X_test) if hasattr(model, "predict_proba") else None

                # 메트릭 계산
                precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average='weighted')
                accuracy = accuracy_score(y_test, y_pred)
                micro_f1 = f1_score(y_test, y_pred, average='micro')
                macro_f1 = f1_score(y_test, y_pred, average='macro')
                weighted_f1 = f1_score(y_test, y_pred, average='weighted')

                # AUC 계산
                if y_pred_proba is not None:
                    y_test_bin = label_binarize(y_test, classes=np.unique(y))
                    auc = roc_auc_score(y_test_bin, y_pred_proba, multi_class="ovr", average='weighted')
                else:
                    auc = np.nan

                # 결과 추가
                macro_f1_scores.append(macro_f1)
                auc_scores.append(auc)

            # 평균 점수 계산
            average_macro_f1 = np.mean(macro_f1_scores)
            average_auc = np.mean(auc_scores)

            # 결과 저장
            new_row = pd.DataFrame({
                'Model': [model_name],
                'Target': [target_column],
                'Precision': [precision],
                'Recall': [recall],
                'F1-Score': [f1],
                'Accuracy': [accuracy],
                'Micro F1': [micro_f1],
                'Macro F1': [average_macro_f1],
                'Weighted F1': [weighted_f1],
                'AUC': [average_auc]
            })
            result_table = pd.concat([result_table, new_row], ignore_index=True)

        except Exception as e:
            print(f"{model_name} 모델에서 오류 발생: {e}")

# 결과 테이블 출력
print("\n전체 모델 평가 결과:")
print(result_table)



[w08chronic_a] 열을 예측합니다:

[Random Forest] 모델 평가:
--------------------------
Random Forest 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[Decision Tree] 모델 평가:
--------------------------
Decision Tree 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[w08chronic_b] 열을 예측합니다:

[Random Forest] 모델 평가:
--------------------------
Random Forest 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[Decision Tree] 모델 평가:
--------------------------
Decision Tree 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[w08chronic_c] 열을 예측합니다:

[Random Forest] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Random Forest 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[Decision Tree] 모델 평가:
--------------------------
Decision Tree 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[w08chronic_d] 열을 예측합니다:

[Random Forest] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Random Forest 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[Decision Tree] 모델 평가:
--------------------------
Decision Tree 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[w08chronic_e] 열을 예측합니다:

[Random Forest] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Random Forest 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[Decision Tree] 모델 평가:
--------------------------
Decision Tree 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[w08chronic_f] 열을 예측합니다:

[Random Forest] 모델 평가:
--------------------------
Random Forest 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[Decision Tree] 모델 평가:
--------------------------
Decision Tree 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[w08chronic_g] 열을 예측합니다:

[Random Forest] 모델 평가:
--------------------------
Random Forest 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[Decision Tree] 모델 평가:
--------------------------
Decision Tree 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[w08chronic_h] 열을 예측합니다:

[Random Forest] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Random Forest 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[Decision Tree] 모델 평가:
--------------------------
Decision Tree 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[w08chronic_i] 열을 예측합니다:

[Random Forest] 모델 평가:
--------------------------
Random Forest 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[Decision Tree] 모델 평가:
--------------------------
Decision Tree 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[w08chronic_k] 열을 예측합니다:

[Random Forest] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Random Forest 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[Decision Tree] 모델 평가:
--------------------------
Decision Tree 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[w08chronic_l] 열을 예측합니다:

[Random Forest] 모델 평가:
--------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Random Forest 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[Decision Tree] 모델 평가:
--------------------------
Decision Tree 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[w08chronic_m] 열을 예측합니다:

[Random Forest] 모델 평가:
--------------------------
Random Forest 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

[Decision Tree] 모델 평가:
--------------------------
Decision Tree 모델에서 오류 발생: y should be a 1d array, got an array of shape (906, 2) instead.

전체 모델 평가 결과:
Empty DataFrame
Columns: [Target, Model, Precision, Recall, F1-Score, Accuracy, Micro F1, Macro F1, Weighted F1, AUC]
Index: []


In [None]:
# 결과 테이블 출력
print("\n전체 모델 평가 결과:")
print(result_table)