In [16]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [18]:
# 1. 데이터 로딩 및 전처리
url = "https://raw.githubusercontent.com/MyungKyuYi/AI-class/refs/heads/main/diabetes.csv"
df = pd.read_csv(url)
print("✅ 데이터 로딩 완료")
print(df.head())

✅ 데이터 로딩 완료
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  


In [6]:
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
           'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']

In [8]:
# 2. 특성(X)와 레이블(y) 분리 (분류: Outcome은 클래스 0,1)
X = df.drop('Outcome', axis=1).values
y = df['Outcome'].values  # 0, 1 분류 레이블

In [10]:
# 3. 학습/테스트 셋 분할 및 스케일링
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

In [12]:
# numpy 배열 → torch tensor 변환
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test  = torch.tensor(X_test, dtype=torch.float32)
y_test  = torch.tensor(y_test, dtype=torch.long)

In [14]:
# 4. Dataset 클래스 정의
class DiabetesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X 
        self.y = y 
    def __len__(self):
        return len(self.y)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [22]:
train_dataset = DiabetesDataset(X_train, y_train)
test_dataset  = DiabetesDataset(X_test, y_test)

In [24]:
# 5. DataLoader 생성
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [26]:
# 6. Abalone 스타일 분류 모델 정의 (은닉층 크기 확대)
class DiabetesNetAbaloneClassification(nn.Module):
    def __init__(self, input_dim):
        super(DiabetesNetAbaloneClassification, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 2)  # 2 클래스 분류
        )
    def forward(self, x):
        return self.net(x)

model = DiabetesNetAbaloneClassification(input_dim=X_train.shape[1])

In [28]:
# 7. 손실 함수 및 최적화기 설정
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)

In [30]:
# 8. 학습 및 평가 함수는 앞서 재사용 (train_classification, evaluate_classification 사용)
def train_classification(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.
    for data, target in loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * data.size(0)
    return running_loss / len(loader.dataset)

In [32]:
def evaluate_classification(model, loader, criterion, device):
    model.eval()
    running_loss = 0.
    correct = 0
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            loss = criterion(outputs, target)
            running_loss += loss.item() * data.size(0)
            pred = outputs.argmax(dim=1)
            correct += pred.eq(target).sum().item()
    avg_loss = running_loss / len(loader.dataset)
    accuracy = correct / len(loader.dataset)
    return avg_loss, accuracy

In [38]:
# 9. 학습 루프 실행
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
num_epochs = 50

for epoch in range(1, num_epochs+1):
    train_loss = train_classification(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = evaluate_classification(model, test_loader, criterion, device)
    print(f"Epoch {epoch:02d}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}, Val Acc = {val_acc:.4f}")

Epoch 01: Train Loss = 0.3289, Val Loss = 0.5672, Val Acc = 0.7013
Epoch 02: Train Loss = 0.3268, Val Loss = 0.5583, Val Acc = 0.7338
Epoch 03: Train Loss = 0.3250, Val Loss = 0.5681, Val Acc = 0.7403
Epoch 04: Train Loss = 0.3208, Val Loss = 0.5645, Val Acc = 0.7338
Epoch 05: Train Loss = 0.3192, Val Loss = 0.5739, Val Acc = 0.7013
Epoch 06: Train Loss = 0.3199, Val Loss = 0.5666, Val Acc = 0.7208
Epoch 07: Train Loss = 0.3177, Val Loss = 0.5743, Val Acc = 0.7273
Epoch 08: Train Loss = 0.3125, Val Loss = 0.5720, Val Acc = 0.7403
Epoch 09: Train Loss = 0.3091, Val Loss = 0.5759, Val Acc = 0.7273
Epoch 10: Train Loss = 0.3078, Val Loss = 0.5897, Val Acc = 0.7338
Epoch 11: Train Loss = 0.3061, Val Loss = 0.5770, Val Acc = 0.7403
Epoch 12: Train Loss = 0.3069, Val Loss = 0.5850, Val Acc = 0.7273
Epoch 13: Train Loss = 0.3013, Val Loss = 0.5768, Val Acc = 0.7273
Epoch 14: Train Loss = 0.2992, Val Loss = 0.5861, Val Acc = 0.7338
Epoch 15: Train Loss = 0.2962, Val Loss = 0.5932, Val Acc = 0.

In [40]:
print("최종 평가:", evaluate_classification(model, test_loader, criterion, device))

최종 평가: (0.6656751655913019, 0.7597402597402597)
