In [1]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import tensorflow
import seaborn as sns
import pandas as pd

from tensorflow import keras

from rdkit import Chem
from rdkit import DataStructs
from rdkit.Chem import AllChem
from rdkit.Chem.Fingerprints import FingerprintMols
from rdkit.Chem import rdMolDescriptors

from sklearn import datasets, metrics
from sklearn.metrics import auc, roc_auc_score, roc_curve, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import minmax_scale
from sklearn.preprocessing import StandardScaler


from scipy import interp
from tensorflow.keras.layers import Embedding, Dense 
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint



from tensorflow.keras import backend as K 
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras import initializers

2025-06-25 13:20:36.852378: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-06-25 13:20:37.651567: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2025-06-25 13:20:37.651637: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory


In [2]:
# test_xgboost.py

import os
import torch
import joblib
import pandas as pd
from sklearn.metrics import (
    accuracy_score, roc_auc_score, average_precision_score,
    precision_score, recall_score, f1_score
)

def evaluate_model_on_test(fold, save_dir):
    # 1) 테스트 데이터 로드
    test_path = os.path.join(save_dir, f'XGB_test_fold{fold}.pt')
    data = torch.load(test_path)
    X_test = data['features'].numpy()
    y_test = data['labels'].numpy()

    # 2) 모델 로드
    model_path = os.path.join(save_dir, f'xgboost_fold{fold}.pkl')
    model = joblib.load(model_path)

    # 3) 예측 및 확률
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1]

    # 4) 평가 지표 계산
    metrics = {
        'fold': fold,
        'accuracy': accuracy_score(y_test, y_pred),
        'roc_auc': roc_auc_score(y_test, y_proba),
        'avg_precision': average_precision_score(y_test, y_proba),
        'precision': precision_score(y_test, y_pred),
        'recall': recall_score(y_test, y_pred),
        'f1': f1_score(y_test, y_pred)
    }
    return metrics

def main():
    save_dir = './processed'   # 학습 스크립트에서 사용한 경로와 동일하게 설정
    n_splits = 20

    all_metrics = []
    for fold in range(1, n_splits+1):
        print(f'Evaluating fold {fold}...')
        try:
            m = evaluate_model_on_test(fold, save_dir)
            all_metrics.append(m)
            print(f' Fold {fold} metrics: {m}')
        except FileNotFoundError as e:
            print(f'  파일을 찾을 수 없습니다: {e}')

    # 5) 개별 Fold 지표 DataFrame 생성
    df_metrics = pd.DataFrame(all_metrics)

    # 6) Fold별 평균(mean) row 추가
    mean_row = df_metrics.mean(numeric_only=True).to_dict()
    mean_row['fold'] = 'mean'
    df_metrics = df_metrics.append(mean_row, ignore_index=True)

    # 7) 결과를 CSV로 저장
    output_path = os.path.join(save_dir, 'test_metrics_summary.csv')
    df_metrics.to_csv(output_path, index=False)
    print(f'\nTest save: {output_path}')

    # 8) AUROC와 AUPR의 fold별 평균 및 표준편차 계산
    #    (mean_row 에는 평균만 들어있으므로, 원본 all_metrics 로 std 계산)
    df_orig = pd.DataFrame(all_metrics)
    roc_mean = df_orig['roc_auc'].mean()
    roc_std  = df_orig['roc_auc'].std()
    aupr_mean = df_orig['avg_precision'].mean()
    aupr_std  = df_orig['avg_precision'].std()

    print(f"\n AUROC fold mean: {roc_mean:.4f}, std: {roc_std:.4f}")
    print(f" AUPR   fold mean: {aupr_mean:.4f}, std: {aupr_std:.4f}")

if __name__ == '__main__':
    main()

Evaluating fold 1...
 Fold 1 metrics: {'fold': 1, 'accuracy': 0.4444444444444444, 'roc_auc': 0.5555555555555556, 'avg_precision': 0.5846920055253388, 'precision': 0.4, 'recall': 0.2222222222222222, 'f1': 0.2857142857142857}
Evaluating fold 2...
 Fold 2 metrics: {'fold': 2, 'accuracy': 0.6111111111111112, 'roc_auc': 0.654320987654321, 'avg_precision': 0.7636094841977195, 'precision': 0.625, 'recall': 0.5555555555555556, 'f1': 0.5882352941176471}
Evaluating fold 3...
 Fold 3 metrics: {'fold': 3, 'accuracy': 0.6111111111111112, 'roc_auc': 0.6419753086419753, 'avg_precision': 0.5767815517815518, 'precision': 0.6, 'recall': 0.6666666666666666, 'f1': 0.631578947368421}
Evaluating fold 4...
 Fold 4 metrics: {'fold': 4, 'accuracy': 0.6470588235294118, 'roc_auc': 0.7638888888888888, 'avg_precision': 0.8468253968253969, 'precision': 0.7142857142857143, 'recall': 0.5555555555555556, 'f1': 0.6250000000000001}
Evaluating fold 5...
 Fold 5 metrics: {'fold': 5, 'accuracy': 0.8235294117647058, 'roc_au