In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay
import tensorflow as tf


import matplotlib.pyplot as plt
from torchsummary import summary
from sklearn.model_selection import train_test_split
import numpy as np

In [None]:
url = "https://github.com/MyungKyuYi/AI-class/raw/refs/heads/main/diabetes.csv"
data = pd.read_csv(url)
data

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [None]:
data.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [None]:
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']

In [None]:
X = data.drop('BMI', axis=1).values
y = data['BMI'].values

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [None]:
# Create DataLoader
train_dataset = TensorDataset(X_train, y_train)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = TensorDataset(X_test, y_test)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

(torch.Size([614, 8]),
 torch.Size([154, 8]),
 torch.Size([614, 1]),
 torch.Size([154, 1]))

In [None]:
class DiabatesDense(nn.Module):
    def __init__(self):
        super(DiabatesDense, self).__init__()
        self.fc1 = nn.Linear(8, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)  # 4 classes in the dataset

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the model, loss function, and optimizer
model = DiabatesDense()

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
model.train()
for epoch in range(50):  # 50번 에폭 반복
    total_loss = 0
    for X_batch, y_batch in train_dataloader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)  # GPU 또는 CPU로 전송
        optimizer.zero_grad()          # 기울기 초기화
        output = model(X_batch)        # 예측값 계산
        loss = criterion(output, y_batch)  # 손실 계산 (예: MSELoss)
        loss.backward()                # 역전파
        optimizer.step()               # 가중치 업데이트
        total_loss += loss.item()      # 손실 누적
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_dataloader):.4f}")

Epoch 1, Loss: 46.0607
Epoch 2, Loss: 43.6644
Epoch 3, Loss: 52.9585
Epoch 4, Loss: 43.8750
Epoch 5, Loss: 43.8761
Epoch 6, Loss: 41.7278
Epoch 7, Loss: 41.1291
Epoch 8, Loss: 40.9933
Epoch 9, Loss: 40.4647
Epoch 10, Loss: 47.1118
Epoch 11, Loss: 39.5468
Epoch 12, Loss: 40.3542
Epoch 13, Loss: 39.5324
Epoch 14, Loss: 41.4636
Epoch 15, Loss: 39.6906
Epoch 16, Loss: 38.6307
Epoch 17, Loss: 38.0503
Epoch 18, Loss: 37.9846
Epoch 19, Loss: 37.7011
Epoch 20, Loss: 39.0388
Epoch 21, Loss: 37.9851
Epoch 22, Loss: 39.4279
Epoch 23, Loss: 37.1205
Epoch 24, Loss: 36.8815
Epoch 25, Loss: 38.1398
Epoch 26, Loss: 36.5308
Epoch 27, Loss: 36.4687
Epoch 28, Loss: 36.6154
Epoch 29, Loss: 36.9420
Epoch 30, Loss: 35.8553
Epoch 31, Loss: 35.3692
Epoch 32, Loss: 35.0714
Epoch 33, Loss: 36.1692
Epoch 34, Loss: 35.1065
Epoch 35, Loss: 34.7881
Epoch 36, Loss: 36.1017
Epoch 37, Loss: 35.3691
Epoch 38, Loss: 36.0106
Epoch 39, Loss: 35.3054
Epoch 40, Loss: 36.7486
Epoch 41, Loss: 35.5679
Epoch 42, Loss: 35.1889
E

In [None]:
# 평가
model.eval()
preds, actuals = [], []
with torch.no_grad():
    for X_batch, y_batch in test_dataloader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch).cpu().numpy()
        preds.extend(outputs)
        actuals.extend(y_batch.numpy())

mse = mean_squared_error(actuals, preds)
print(f"Test MSE: {mse:.4f}")


Test MSE: 48.0490
