In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [4]:
# 1. 데이터 로딩 및 전처리
url = "https://raw.githubusercontent.com/MyungKyuYi/AI-class/refs/heads/main/diabetes.csv"
df = pd.read_csv(url)
print("✅ 데이터 로딩 완료")
print(df.head())

✅ 데이터 로딩 완료
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  


In [6]:
# Outcome을 회귀 대상으로 사용 (float형으로 변환)
X = df.drop("Outcome", axis=1).values
y = df["Outcome"].values.astype(np.float32)  # 회귀용 타겟

In [10]:
# 2. 학습/테스트 분할 및 스케일링
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

In [12]:
# 3. numpy -> torch tensor 변환
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test  = torch.tensor(X_test, dtype=torch.float32)
y_test  = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [14]:
# 4. Dataset 정의 (회귀용)
class DiabetesDatasetReg(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    def __len__(self):
        return len(self.y)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset_reg = DiabetesDatasetReg(X_train, y_train)
test_dataset_reg  = DiabetesDatasetReg(X_test, y_test)

In [16]:
# 5. DataLoader 생성
batch_size = 32
train_loader_reg = DataLoader(train_dataset_reg, batch_size=batch_size, shuffle=True)
test_loader_reg  = DataLoader(test_dataset_reg, batch_size=batch_size, shuffle=False)

In [18]:
# 6. Abalone 스타일 회귀 모델 정의 (은닉층 크기를 확대)
class DiabetesNetAbaloneRegression(nn.Module):
    def __init__(self, input_dim):
        super(DiabetesNetAbaloneRegression, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)  # 회귀이므로 출력 1개
        )
    def forward(self, x):
        return self.net(x)

model_reg_abalone = DiabetesNetAbaloneRegression(input_dim=X_train.shape[1])

In [20]:
# 7. 손실함수와 최적화기 설정
criterion_reg = nn.MSELoss()
optimizer_reg_abalone = optim.Adam(model_reg_abalone.parameters(), lr=0.0005)

In [22]:
# 8. 학습 및 평가 함수 (회귀)
def train_regression(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.
    for data, target in loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * data.size(0)
    return running_loss / len(loader.dataset)

In [24]:
def evaluate_regression(model, loader, criterion, device):
    model.eval()
    running_loss = 0.
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            loss = criterion(outputs, target)
            running_loss += loss.item() * data.size(0)
    return running_loss / len(loader.dataset)

In [26]:
# 9. 학습 루프 실행
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_reg_abalone.to(device)
num_epochs = 50

for epoch in range(1, num_epochs+1):
    train_loss = train_regression(model_reg_abalone, train_loader_reg, criterion_reg, optimizer_reg_abalone, device)
    val_loss = evaluate_regression(model_reg_abalone, test_loader_reg, criterion_reg, device)
    print(f"Epoch {epoch:02d}: Train MSE = {train_loss:.4f}, Val MSE = {val_loss:.4f}")

Epoch 01: Train MSE = 0.1895, Val MSE = 0.1689
Epoch 02: Train MSE = 0.1601, Val MSE = 0.1646
Epoch 03: Train MSE = 0.1519, Val MSE = 0.1704
Epoch 04: Train MSE = 0.1490, Val MSE = 0.1764
Epoch 05: Train MSE = 0.1465, Val MSE = 0.1712
Epoch 06: Train MSE = 0.1437, Val MSE = 0.1703
Epoch 07: Train MSE = 0.1413, Val MSE = 0.1717
Epoch 08: Train MSE = 0.1394, Val MSE = 0.1753
Epoch 09: Train MSE = 0.1370, Val MSE = 0.1725
Epoch 10: Train MSE = 0.1362, Val MSE = 0.1732
Epoch 11: Train MSE = 0.1340, Val MSE = 0.1791
Epoch 12: Train MSE = 0.1339, Val MSE = 0.1773
Epoch 13: Train MSE = 0.1317, Val MSE = 0.1800
Epoch 14: Train MSE = 0.1306, Val MSE = 0.1754
Epoch 15: Train MSE = 0.1290, Val MSE = 0.1838
Epoch 16: Train MSE = 0.1289, Val MSE = 0.1884
Epoch 17: Train MSE = 0.1279, Val MSE = 0.1849
Epoch 18: Train MSE = 0.1246, Val MSE = 0.1841
Epoch 19: Train MSE = 0.1245, Val MSE = 0.1853
Epoch 20: Train MSE = 0.1226, Val MSE = 0.1825
Epoch 21: Train MSE = 0.1230, Val MSE = 0.1868
Epoch 22: Tra

In [28]:
print("최종 테스트 MSE:", evaluate_regression(model_reg_abalone, test_loader_reg, criterion_reg, device))

최종 테스트 MSE: 0.206086073989992
