In [29]:
pip install torch torchvision torchaudio

Note: you may need to restart the kernel to use updated packages.


In [30]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [31]:
# 1. 데이터 로딩
url = "https://raw.githubusercontent.com/MyungKyuYi/AI-class/refs/heads/main/diabetes.csv"
df = pd.read_csv(url)
print("✅ 데이터 로딩 완료")
print(df.head())

✅ 데이터 로딩 완료
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  


In [32]:
df.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [33]:
# 특성(X)와 레이블(y) 분리
X = df.drop('Outcome', axis=1).values
y = df['Outcome'].values

In [34]:
# 학습/테스트 데이터 분리 (예: 80:20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [35]:
# 특성 정규화: 평균 0, 분산 1 기준 표준화 진행 (훈련 데이터에 대해 fit 후 test 데이터에 적용)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [36]:
# numpy의 타입을 torch Tensor로 변환
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)  # 분류 문제이므로 long 타입
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

In [37]:
# 2. PyTorch Dataset 클래스 정의
class DiabetesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = DiabetesDataset(X_train, y_train)
test_dataset = DiabetesDataset(X_test, y_test)

In [38]:
# 3. DataLoader 생성: 배치 사이즈, 셔플 여부 설정
train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)

## 모델 정의

In [40]:
# 4. 신경망 모델 정의
class DiabetesNet(nn.Module):
    def __init__(self, input_dim):
        super(DiabetesNet, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 2)  # 이진 분류: 출력 차원 2 (각 클래스의 로짓)
        )
        
    def forward(self, x):
        return self.net(x)

model = DiabetesNet(input_dim=X_train.shape[1])

## 손실 함수 및 최적화 기법 정의

In [42]:
# 5. 손실 함수와 최적화 기법 정의 (CrossEntropyLoss, Adam)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

## 모델 학습

In [44]:
# 6. 모델 학습 및 평가 함수 정의
def train(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0
    for batch_idx, (data, target) in enumerate(loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()      # 기울기 초기화
        output = model(data)       # 순전파(forward)
        loss = criterion(output, target)
        loss.backward()            # 역전파(backward)
        optimizer.step()           # 가중치 업데이트
        running_loss += loss.item() * data.size(0)
    epoch_loss = running_loss / len(loader.dataset)
    return epoch_loss

In [45]:
def evaluate(model, loader, criterion, device):
    model.eval()
    running_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            running_loss += loss.item() * data.size(0)
            # 예측: 가장 큰 값의 인덱스로 선택
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    epoch_loss = running_loss / len(loader.dataset)
    accuracy = correct / len(loader.dataset)
    return epoch_loss, accuracy


In [46]:
# 7. 학습 루프 실행
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

num_epochs = 50
for epoch in range(1, num_epochs+1):
    train_loss = train(model, train_loader, criterion, optimizer, device)
    test_loss, test_acc = evaluate(model, test_loader, criterion, device)
    print(f'Epoch {epoch:02d}: Train Loss={train_loss:.4f}, Test Loss={test_loss:.4f}, Test Acc={test_acc:.4f}')

Epoch 01: Train Loss=0.6743, Test Loss=0.6414, Test Acc=0.6753
Epoch 02: Train Loss=0.6085, Test Loss=0.5805, Test Acc=0.7078
Epoch 03: Train Loss=0.5391, Test Loss=0.5289, Test Acc=0.7208
Epoch 04: Train Loss=0.4932, Test Loss=0.5122, Test Acc=0.7403
Epoch 05: Train Loss=0.4677, Test Loss=0.4962, Test Acc=0.7208
Epoch 06: Train Loss=0.4555, Test Loss=0.4944, Test Acc=0.7208
Epoch 07: Train Loss=0.4496, Test Loss=0.4902, Test Acc=0.7208
Epoch 08: Train Loss=0.4453, Test Loss=0.4840, Test Acc=0.7143
Epoch 09: Train Loss=0.4436, Test Loss=0.4888, Test Acc=0.7273
Epoch 10: Train Loss=0.4383, Test Loss=0.4950, Test Acc=0.7208
Epoch 11: Train Loss=0.4341, Test Loss=0.4972, Test Acc=0.7273
Epoch 12: Train Loss=0.4331, Test Loss=0.4956, Test Acc=0.7143
Epoch 13: Train Loss=0.4269, Test Loss=0.4968, Test Acc=0.7208
Epoch 14: Train Loss=0.4254, Test Loss=0.5069, Test Acc=0.7208
Epoch 15: Train Loss=0.4238, Test Loss=0.5098, Test Acc=0.7143
Epoch 16: Train Loss=0.4210, Test Loss=0.5035, Test Acc

## 모델 평가

In [48]:
# 최종 평가: 테스트 데이터셋에 대한 정확도 출력
final_test_loss, final_test_acc = evaluate(model, test_loader, criterion, device)
print(f'\nFinal Test Loss: {final_test_loss:.4f}')
print(f'Final Test Accuracy: {final_test_acc:.4f}')


Final Test Loss: 0.5498
Final Test Accuracy: 0.7468
