In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes # 당뇨병
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, precision_recall_curve
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import json

np.random.seed(42)
torch.manual_seed(42)

plt.rcParams['font.size'] = 12
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['axes.grid'] = True

In [None]:
class Config: # 기본 설정 관리
    def __init__(self):
        self.test_size = 0.2
        self.val_size = 0.2
        self.random_state = 42
        self.input_dim = 10
        self.hidden_dims = [64, 32, 16]
        self.dropout_rate = 0.3
        self.batch_size = 32
        self.num_epochs = 200
        self.learning_rate = 0.001
        self.weight_decay = 0.0001
        self.patience = 20 # early stop
        self.min_delta = 0.001
        self.scheduler_step_size = 30
        self.scheduler_gamma = 0.5

config = Config()
print('Config 생성 완료!')
print(f'Batch Size: {config.batch_size}')
print(f'Learning Rate: {config.learning_rate}')

In [None]:
class DataPreprocessor:
    def __init__(self, config):
        self.config = config
        self.scaler = StandardScaler()

    def load_and_prepare_data(self): # 원본 데이터 --> train, test, val 데이터로 나누기
        diabetes = load_diabetes()
        X = diabetes.data
        y_regression = diabetes.target
        median = np.median(y_regression)
        y = (y_regression > median).astype(int)

        X_train_val, X_test, y_train_val, y_test = train_test_split(
            X, y, test_size=self.config.test_size, stratify=y, random_state=self.config.random_state
        )
        # Train - Test 분류

        X_train, X_val, y_train, y_val = train_test_split(
            X_train_val, y_train_val, test_size=self.config.val_size,
            stratify=y_train_val, random_state=self.config.random_state
        )
        # Train data set 을 Train - Validation 분류

        X_train = self.scaler.fit_transform(X_train)
        #fit_transform(X_train) 훈련하니깐 fit_transform 이야
        X_val = self.scaler.transform(X_val)
        X_test = self.scaler.transform(X_test)

        print('데이터 준비 완료')
        print(f'Train: {X_train.shape[0]}, Val: {X_val.shape[0]}, Test: {X_test.shape[0]}')

        return X_train, X_val, X_test, y_train, y_val, y_test

    def create_dataloaders(self, X_train, X_val, X_test, y_train, y_val, y_test): # 데이터 로더 생성(여기서 배치 결정)
        train_dataset = TensorDataset(
            torch.FloatTensor(X_train),
            torch.FloatTensor(y_train).view(-1, 1)
            # 파이토치가 2차원 구조 기대 (batch_size, features)
        )
        val_dataset = TensorDataset(
            torch.FloatTensor(X_val),
            torch.FloatTensor(y_val).view(-1, 1)
        )
        test_dataset = TensorDataset(
            torch.FloatTensor(X_test),
            torch.FloatTensor(y_test).view(-1, 1)
        )

        train_loader = DataLoader(train_dataset, batch_size=self.config.batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=self.config.batch_size, shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size=self.config.batch_size, shuffle=False)

        print(f'DataLoader 생성 완료 (Batch Size: {self.config.batch_size})')

        return train_loader, val_loader, test_loader

print('DataPreprocessor 클래스 정의 완료!')

In [None]:
preprocessor = DataPreprocessor(config)
X_train, X_val, X_test, y_train, y_val, y_test = preprocessor.load_and_prepare_data()
train_loader, val_loader, test_loader = preprocessor.create_dataloaders(
    X_train, X_val, X_test, y_train, y_val, y_test
)