In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [4]:
# 1. 데이터 로딩
url = "https://raw.githubusercontent.com/MyungKyuYi/AI-class/refs/heads/main/diabetes.csv"
df = pd.read_csv(url)
print("✅ 데이터 로딩 완료")
print(df.head())

✅ 데이터 로딩 완료
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  


In [6]:
# 2. 데이터 전처리 (회귀를 위해, Outcome을 연속형 값으로 사용)
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 
           'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']

In [8]:
from sklearn.preprocessing import LabelEncoder
label_encoders = {}
for column in columns:
    label_encoders[column] = LabelEncoder()
    df[column] = label_encoders[column].fit_transform(df[column])


In [18]:
# 3.특성(X)와 타겟(y) 분리
X = df.drop('Outcome', axis=1).values
y = df['Outcome'].values.astype(np.float32)  # 회귀용이므로 float으로 변환

In [20]:
# 4. 학습/테스트 셋 분할 및 스케일링 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

In [22]:
# numpy → torch tensor 변환
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)  # (N,1) 형태로 변환
X_test  = torch.tensor(X_test, dtype=torch.float32)
y_test  = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [24]:
# 5. Dataset 클래스 정의 (회귀용)
class DiabetesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    def __len__(self):
        return len(self.y)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [26]:
train_dataset = DiabetesDataset(X_train, y_train)
test_dataset  = DiabetesDataset(X_test, y_test)

In [28]:
# 6. DataLoader 생성
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [32]:
# 7. 회귀용 모델 정의
class DiabetesNetReg(nn.Module):
    def __init__(self, input_dim):
        super(DiabetesNetReg, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)  # 회귀 문제이므로 출력 뉴런 1개
        )
    def forward(self, x):
        return self.net(x)

model = DiabetesNetReg(input_dim=X_train.shape[1])

In [34]:
# 8. 손실함수와 최적화기 정의
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [36]:
# 9. 학습 함수 정의
def train_reg(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for data, target in loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * data.size(0)
    epoch_loss = running_loss / len(loader.dataset)
    return epoch_loss

In [38]:
# 10. 평가 함수 정의
def evaluate_reg(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            running_loss += loss.item() * data.size(0)
    epoch_loss = running_loss / len(loader.dataset)
    return epoch_loss

In [40]:
# 11. 학습 루프 실행
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
num_epochs = 50
for epoch in range(1, num_epochs + 1):
    train_loss = train_reg(model, train_loader, criterion, optimizer, device)
    val_loss = evaluate_reg(model, test_loader, criterion, device)
    print(f'Epoch {epoch:02d}: Train MSE={train_loss:.4f}, Val MSE={val_loss:.4f}')

Epoch 01: Train MSE=0.3146, Val MSE=0.2121
Epoch 02: Train MSE=0.1954, Val MSE=0.1805
Epoch 03: Train MSE=0.1684, Val MSE=0.1719
Epoch 04: Train MSE=0.1590, Val MSE=0.1759
Epoch 05: Train MSE=0.1563, Val MSE=0.1733
Epoch 06: Train MSE=0.1534, Val MSE=0.1754
Epoch 07: Train MSE=0.1513, Val MSE=0.1717
Epoch 08: Train MSE=0.1488, Val MSE=0.1707
Epoch 09: Train MSE=0.1473, Val MSE=0.1707
Epoch 10: Train MSE=0.1450, Val MSE=0.1715
Epoch 11: Train MSE=0.1445, Val MSE=0.1671
Epoch 12: Train MSE=0.1427, Val MSE=0.1727
Epoch 13: Train MSE=0.1402, Val MSE=0.1746
Epoch 14: Train MSE=0.1406, Val MSE=0.1725
Epoch 15: Train MSE=0.1415, Val MSE=0.1750
Epoch 16: Train MSE=0.1371, Val MSE=0.1737
Epoch 17: Train MSE=0.1358, Val MSE=0.1762
Epoch 18: Train MSE=0.1331, Val MSE=0.1751
Epoch 19: Train MSE=0.1321, Val MSE=0.1749
Epoch 20: Train MSE=0.1307, Val MSE=0.1794
Epoch 21: Train MSE=0.1301, Val MSE=0.1738
Epoch 22: Train MSE=0.1281, Val MSE=0.1755
Epoch 23: Train MSE=0.1272, Val MSE=0.1812
Epoch 24: T

In [42]:
# 최종 평가
final_loss = evaluate_reg(model, test_loader, criterion, device)
print(f"\n최종 테스트 MSE: {final_loss:.4f}")


최종 테스트 MSE: 0.1861
