In [95]:
# 라이브러리 불러오기
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
import matplotlib.pyplot as plt

In [97]:
# 데이터 불러오기
data = pd.read_csv("C:/Users/LIM/OneDrive - gachon.ac.kr/바탕 화면/study/인공지능개론/diabetes.csv")
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [99]:
# 데이터 확인
print(data.columns.tolist())  #컬럼명 확인
data.dtypes            #데이터 타입 확인

['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']


Pregnancies                   int64
Glucose                       int64
BloodPressure                 int64
SkinThickness                 int64
Insulin                       int64
BMI                         float64
DiabetesPedigreeFunction    float64
Age                           int64
Outcome                       int64
dtype: object

In [101]:
# 결측치 제거
data.isnull().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

In [103]:
# 레이블 갯수 확인
data['Outcome'].value_counts()

Outcome
0    500
1    268
Name: count, dtype: int64

In [105]:
# 데이터 분리
X = data.drop(columns=['Outcome'])  #입력값 분리
X.head() #입력값 분리 확인

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33


In [107]:
X = data.drop('Outcome', axis=1).values  #넘파이 변환

In [109]:
y = data["Outcome"]  #출력값 분리
y.head()  #출력값 분리 확인

0    1
1    0
2    1
3    0
4    1
Name: Outcome, dtype: int64

In [111]:
y = data['Outcome'].values  #넘파이 변환

In [113]:
scaler = StandardScaler()  #정규화를 위한 객체 생성
X = scaler.fit_transform(X)  #정규화

In [115]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  #데이터 분할

In [117]:
# TensorDataset으로 래핑
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)  # y를 2D 텐서로 변환
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)  # y를 2D 텐서로 변환

In [119]:
# 데이터셋과 데이터로더 생성
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

In [121]:
# shape 확인
X_train.shape, y_train.shape, X_test.shape, y_test.shape    

((614, 8), (614,), (154, 8), (154,))

In [123]:
# 모델 정의
class diabetesDense(nn.Module):
    def __init__(self):
        super(diabetesDense, self).__init__()
        self.fc1 = nn.Linear(8, 16)
        self.fc2 = nn.Linear(16, 32)
        self.fc3 = nn.Linear(32, 32)
        self.fc4 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)
        return x

# Initialize the model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = diabetesDense().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [125]:
# 모델 훈련 모드 설정
model.train()

diabetesDense(
  (fc1): Linear(in_features=8, out_features=16, bias=True)
  (fc2): Linear(in_features=16, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=32, bias=True)
  (fc4): Linear(in_features=32, out_features=1, bias=True)
)

In [127]:
# 학습 파라미터 초기화
epochs = 30
train_losses = []
test_accuracies = []

In [129]:
# 학습 루프
for epoch in range(epochs):
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    train_losses.append(total_loss / len(train_loader))
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")

Epoch 1, Loss: 0.2265
Epoch 2, Loss: 0.2046
Epoch 3, Loss: 0.1879
Epoch 4, Loss: 0.1719
Epoch 5, Loss: 0.1611
Epoch 6, Loss: 0.1536
Epoch 7, Loss: 0.1510
Epoch 8, Loss: 0.1474
Epoch 9, Loss: 0.1475
Epoch 10, Loss: 0.1456
Epoch 11, Loss: 0.1470
Epoch 12, Loss: 0.1447
Epoch 13, Loss: 0.1426
Epoch 14, Loss: 0.1409
Epoch 15, Loss: 0.1373
Epoch 16, Loss: 0.1382
Epoch 17, Loss: 0.1422
Epoch 18, Loss: 0.1376
Epoch 19, Loss: 0.1367
Epoch 20, Loss: 0.1379
Epoch 21, Loss: 0.1347
Epoch 22, Loss: 0.1313
Epoch 23, Loss: 0.1309
Epoch 24, Loss: 0.1280
Epoch 25, Loss: 0.1333
Epoch 26, Loss: 0.1312
Epoch 27, Loss: 0.1249
Epoch 28, Loss: 0.1310
Epoch 29, Loss: 0.1241
Epoch 30, Loss: 0.1230


In [131]:
# 모델 평가 모드 전환
model.eval()

diabetesDense(
  (fc1): Linear(in_features=8, out_features=16, bias=True)
  (fc2): Linear(in_features=16, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=32, bias=True)
  (fc4): Linear(in_features=32, out_features=1, bias=True)
)

In [133]:
# GPU 사용 여부 확인
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)  # 모델을 GPU로 이동

diabetesDense(
  (fc1): Linear(in_features=8, out_features=16, bias=True)
  (fc2): Linear(in_features=16, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=32, bias=True)
  (fc4): Linear(in_features=32, out_features=1, bias=True)
)

In [135]:
# 예측값, 실제값 정의
preds, actuals = [], []

In [139]:
# 평가 루프
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch).cpu().numpy()
        preds.extend(outputs)
        actuals.extend(y_batch.numpy())

mse = mean_squared_error(actuals, preds)
r2 = r2_score(actuals, preds)

print(f"R^2 Score: {r2:.4f}")
print(f"Test MSE: {mse:.4f}")

R^2 Score: 0.1984
Test MSE: 0.1840
