In [70]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
# AutoGluon
from autogluon.tabular import TabularPredictor

# 1. 데이터 로드
# CSV 파일 경로와 타깃 컬럼명을 프로젝트에 맞게 수정하세요
data = pd.read_csv('kidney_disease_4.csv')

In [71]:
# 2. 학습/테스트 데이터 분할 (원본 DataFrame 기준)
# stratify에는 타깃 컬럼명을 직접 지정합니다
df_train, df_test = train_test_split(
    data,
    test_size=0.2,
    stratify=data['class'],
    random_state=42
)


In [None]:
# 3. 특성과 타깃 분리
X_train = df_train.drop('class', axis=1)
y_train = df_train['class'].astype('category')  # AutoGluon용 카테고리 타입
X_test = df_test.drop('class', axis=1)
y_test = df_test['class'].astype('category')

binary = True

In [73]:
# 4. 평가 함수 정의
def evaluate_model(name, y_true, y_pred, y_proba=None, pos_label=None):
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, average='binary', pos_label=pos_label)
    rec = recall_score(y_true, y_pred, average='binary', pos_label=pos_label)
    f1 = f1_score(y_true, y_pred, average='binary', pos_label=pos_label)
    print(f'--- {name} ---')
    print(f'Accuracy : {acc:.4f}')
    print(f'Precision: {prec:.4f}')
    print(f'Recall   : {rec:.4f}')
    print(f'F1 Score : {f1:.4f}')
    if y_proba is not None:
        roc = roc_auc_score(y_true, y_proba, pos_label=pos_label)
        print(f'ROC-AUC  : {roc:.4f}')
    print()

In [74]:
# 5. AutoGluon을 활용한 모델 학습
train_data = X_train.copy()
train_data['class'] = y_train
predictor = TabularPredictor(
    label='class',
    eval_metric='f1'
).fit(
    train_data,
    time_limit=300,
    presets='best_quality'
)


No path specified. Models will be saved in: "AutogluonModels/ag-20250724_120756"
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.12.7
Operating System:   Darwin
Platform Machine:   arm64
Platform Version:   Darwin Kernel Version 24.5.0: Tue Apr 22 19:54:49 PDT 2025; root:xnu-11417.121.6~2/RELEASE_ARM64_T6000
CPU Count:          8
Memory Avail:       4.19 GB / 16.00 GB (26.2%)
Disk Space Avail:   774.72 GB / 926.35 GB (83.6%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit

In [75]:
# 5-1. 학습된 모델 명시적 저장
model_save_path = 'autogluon_model'
predictor.save(model_save_path)
print(f'Model explicitly saved to: {model_save_path}')

Model explicitly saved to: autogluon_model


In [76]:
# 6. positive/negative 클래스 레이블 설정
# 0=not ckd, 1=ckd
negative_label = 0
positive_label = 1
print(f'Negative class: {negative_label}, Positive class: {positive_label}')

Negative class: 0, Positive class: 1


In [None]:
# 7. 학습 결과 요약 (리더보드, 최적 모델 및 하이퍼파라미터)
leaderboard = predictor.leaderboard(df_test, silent=True)
print('=== Leaderboard ===')
print(leaderboard)

# 최적 모델 이름 (deprecation 해결: model_best 속성 사용)
best_model = predictor.model_best
print(f'Best model: {best_model}')

# 하이퍼파라미터 확인
best_model_obj = predictor._trainer.load_model(best_model)
print('=== Hyperparameters of Best Model ===')
print(best_model_obj.get_params())


=== Leaderboard ===
                        model  score_test  score_val eval_metric  \
0        CatBoost_r137_BAG_L1    0.983051   0.967480          f1   
1             CatBoost_BAG_L1    0.983051   0.975410          f1   
2        CatBoost_r177_BAG_L1    0.983051   0.963563          f1   
3     RandomForestEntr_BAG_L1    0.983051   0.974576          f1   
4         CatBoost_r13_BAG_L1    0.983051   0.971429          f1   
5          XGBoost_r89_BAG_L1    0.983051   0.983193          f1   
6       ExtraTreesEntr_BAG_L1    0.983051   0.987448          f1   
7       ExtraTrees_r42_BAG_L1    0.983051   0.987448          f1   
8       ExtraTreesGini_BAG_L1    0.983051   0.991597          f1   
9    RandomForest_r195_BAG_L1    0.983051   0.970954          f1   
10    RandomForestGini_BAG_L1    0.983051   0.974576          f1   
11         CatBoost_r9_BAG_L1    0.983051   0.979424          f1   
12             XGBoost_BAG_L1    0.983051   0.975207          f1   
13         XGBoost_r33_BAG_L

In [78]:
# 8. 학습 결과 요약
leaderboard = predictor.leaderboard(df_test, silent=True)
print('=== Leaderboard ===')
print(leaderboard)
best_model = predictor.model_best
print(f'Best model: {best_model}')

=== Leaderboard ===
                        model  score_test  score_val eval_metric  \
0        CatBoost_r137_BAG_L1    0.983051   0.967480          f1   
1        CatBoost_r177_BAG_L1    0.983051   0.963563          f1   
2             CatBoost_BAG_L1    0.983051   0.975410          f1   
3         CatBoost_r13_BAG_L1    0.983051   0.971429          f1   
4          XGBoost_r89_BAG_L1    0.983051   0.983193          f1   
5          CatBoost_r9_BAG_L1    0.983051   0.979424          f1   
6     RandomForestGini_BAG_L1    0.983051   0.974576          f1   
7       ExtraTreesGini_BAG_L1    0.983051   0.991597          f1   
8       ExtraTrees_r42_BAG_L1    0.983051   0.987448          f1   
9     RandomForestEntr_BAG_L1    0.983051   0.974576          f1   
10      ExtraTreesEntr_BAG_L1    0.983051   0.987448          f1   
11   RandomForest_r195_BAG_L1    0.983051   0.970954          f1   
12         XGBoost_r33_BAG_L1    0.983051   0.987448          f1   
13             XGBoost_BAG_L

In [79]:
# 9. 모든 모델별 평가 지표 출력
# 테스트용 DataFrame 준비
test_data = X_test.copy()
test_data['class'] = y_test

 # 모델 이름 리스트 가져오기
model_names = predictor.model_names() if callable(predictor.model_names) else predictor.model_names
print('=== Metrics for All Models ===')
for model in model_names:
     print(f"--- Metrics for Model: {model} ---")
     # AutoGluon evaluate 메서드로 지표 계산
     results = predictor.evaluate(test_data, model=model, auxiliary_metrics=True)
     # 주요 평가지표 출력
     print(f"Accuracy : {results['accuracy']:.4f}")
     print(f"Precision: {results['precision']:.4f}")
     print(f"Recall   : {results['recall']:.4f}")
     print(f"F1 Score : {results['f1']:.4f}")
     if 'roc_auc' in results:
         print(f"ROC-AUC  : {results['roc_auc']:.4f}")


=== Metrics for All Models ===
--- Metrics for Model: KNeighborsUnif_BAG_L1 ---
Accuracy : 0.5875
Precision: 0.4615
Recall   : 0.6000
F1 Score : 0.5217
ROC-AUC  : 0.6377
--- Metrics for Model: KNeighborsDist_BAG_L1 ---
Accuracy : 0.6375
Precision: 0.5143
Recall   : 0.6000
F1 Score : 0.5538
ROC-AUC  : 0.6763
--- Metrics for Model: RandomForestGini_BAG_L1 ---
Accuracy : 0.9875
Precision: 1.0000
Recall   : 0.9667
F1 Score : 0.9831
ROC-AUC  : 1.0000
--- Metrics for Model: RandomForestEntr_BAG_L1 ---
Accuracy : 0.9875
Precision: 1.0000
Recall   : 0.9667
F1 Score : 0.9831
ROC-AUC  : 1.0000
--- Metrics for Model: CatBoost_BAG_L1 ---
Accuracy : 0.9875
Precision: 1.0000
Recall   : 0.9667
F1 Score : 0.9831
ROC-AUC  : 1.0000
--- Metrics for Model: ExtraTreesGini_BAG_L1 ---
Accuracy : 0.9875
Precision: 1.0000
Recall   : 0.9667
F1 Score : 0.9831
ROC-AUC  : 1.0000
--- Metrics for Model: ExtraTreesEntr_BAG_L1 ---
Accuracy : 0.9875
Precision: 1.0000
Recall   : 0.9667
F1 Score : 0.9831
ROC-AUC  : 1.000

In [80]:
# 8. 모델 저장 경로 확인
model_dir = predictor.path  # AutoGluon 기본 저장 경로
print(f'Models saved in predictor.path: {model_dir}')

Models saved in predictor.path: /Users/jeong-kyu/Documents/연세대/헬스케어 부트/AutogluonModels/ag-20250724_120756
