## 딥러닝응용 1주차 리포트

#### 1. 당뇨병 데이터를 가지고 머신러닝 5가지 분류를 수행.
#### (SVM, LR, RF, DT, KNN)

In [97]:
# Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report

In [98]:
# Load Data
df = pd.read_csv('diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [99]:
# Create Target
x = df.drop(['Outcome'], axis=1)
y = df['Outcome']

In [100]:
# Train-Test Split
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)

In [101]:
# SVM Train
from sklearn import svm
model = svm.SVC()
model.fit(x_train, y_train)

# SVM Test
y_pred = model.predict(x_test)
print("SVM Classification Report")
print(classification_report(y_test, y_pred))

SVM Classification Report
              precision    recall  f1-score   support

           0       0.79      0.92      0.85       102
           1       0.77      0.52      0.62        52

    accuracy                           0.79       154
   macro avg       0.78      0.72      0.74       154
weighted avg       0.78      0.79      0.77       154



In [102]:
# LR Train
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(max_iter=500)
model.fit(x_train, y_train)

# LR Test
y_pred = model.predict(x_test)
print("LR Classification Report")
print(classification_report(y_test, y_pred))

LR Classification Report
              precision    recall  f1-score   support

           0       0.78      0.89      0.83       102
           1       0.71      0.52      0.60        52

    accuracy                           0.77       154
   macro avg       0.75      0.71      0.72       154
weighted avg       0.76      0.77      0.76       154



In [103]:
# RF Train
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(x_train, y_train)

# RF Test
y_pred = model.predict(x_test)
print("RF Classification Report")
print(classification_report(y_test, y_pred))

RF Classification Report
              precision    recall  f1-score   support

           0       0.81      0.89      0.85       102
           1       0.73      0.58      0.65        52

    accuracy                           0.79       154
   macro avg       0.77      0.73      0.75       154
weighted avg       0.78      0.79      0.78       154



In [104]:
# DT Train
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()
model.fit(x_train, y_train)

# DT Test
y_pred = model.predict(x_test)
print("DT Classification Report")
print(classification_report(y_test, y_pred))

DT Classification Report
              precision    recall  f1-score   support

           0       0.77      0.74      0.75       102
           1       0.52      0.56      0.54        52

    accuracy                           0.68       154
   macro avg       0.64      0.65      0.64       154
weighted avg       0.68      0.68      0.68       154



In [105]:
# KNN Train
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()
model.fit(x_train, y_train)

# KNN Test
y_pred = model.predict(x_test)
print("KNN Classification Report")
print(classification_report(y_test, y_pred))

KNN Classification Report
              precision    recall  f1-score   support

           0       0.80      0.83      0.82       102
           1       0.65      0.60      0.62        52

    accuracy                           0.75       154
   macro avg       0.72      0.71      0.72       154
weighted avg       0.75      0.75      0.75       154



#### 2. 동일한 데이터로 딥러닝 분류 수행하라. (dense layer 만 사용)

In [114]:
# Import Libraries
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
import pandas as pd

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load Data
df = pd.read_csv('diabetes.csv')
x = df.drop(['Outcome'], axis=1).values
y = df['Outcome'].values

# Train-Test Split + Data Normalization
from sklearn.model_selection import train_test_split
scaler = StandardScaler()
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# Convert to PyTorch Tensors
x_train_tensor = torch.tensor(x_train, dtype=torch.float32)
x_test_tensor = torch.tensor(x_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Dataset and DataLoader
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)


In [116]:
# NeuralNetwork Class (Only Dense Layers)
class NeuralNetwork(nn.Module):
    def __init__(self, n_input, n_output, n_hidden):
        super(NeuralNetwork, self).__init__()
        self.l1 = nn.Linear(n_input, n_hidden)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(n_hidden, n_output)

    def forward(self, x):
        x = self.l1(x)
        x = self.relu(x)
        x = self.l2(x)
        return x

# Hyperparameters
n_input = x_train.shape[1]   
n_output = 2                 
n_hidden = 64                

# Functions
model = NeuralNetwork(n_input, n_output, n_hidden).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# Train Function
def train(dataloader, model, loss_fn, optimizer):
    model.train() 
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        pred = model(X)
        loss = loss_fn(pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

# Test Function
def test(dataloader, model, loss_fn):
    model.eval()  # Set model to evaluation mode
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():  # Disable gradient calculation
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)

            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:.6f} \n")

# Train + Test Loop
epochs = 20
for t in range(epochs):
    print(f"Epoch [{t+1}/{epochs}] ---------------------------")
    train(train_loader, model, loss_fn, optimizer)
    test(test_loader, model, loss_fn)

print("Done!")

Epoch [1/20] ---------------------------
Test Accuracy: 75.3%, Avg loss: 0.562564 

Epoch [2/20] ---------------------------
Test Accuracy: 76.6%, Avg loss: 0.525923 

Epoch [3/20] ---------------------------
Test Accuracy: 77.3%, Avg loss: 0.510422 

Epoch [4/20] ---------------------------
Test Accuracy: 77.3%, Avg loss: 0.505820 

Epoch [5/20] ---------------------------
Test Accuracy: 78.6%, Avg loss: 0.502035 

Epoch [6/20] ---------------------------
Test Accuracy: 76.6%, Avg loss: 0.507794 

Epoch [7/20] ---------------------------
Test Accuracy: 76.6%, Avg loss: 0.506794 

Epoch [8/20] ---------------------------
Test Accuracy: 76.0%, Avg loss: 0.505237 

Epoch [9/20] ---------------------------
Test Accuracy: 78.6%, Avg loss: 0.505346 

Epoch [10/20] ---------------------------
Test Accuracy: 77.3%, Avg loss: 0.509401 

Epoch [11/20] ---------------------------
Test Accuracy: 76.6%, Avg loss: 0.509235 

Epoch [12/20] ---------------------------
Test Accuracy: 77.3%, Avg loss: 

#### 3. 해당 데이터에서 Outcome을 삭제하고 BMI를 예측하는 회귀를 수행하라.

In [108]:
# Drop 'Outcome' + Target → BMI
df = pd.read_csv('diabetes.csv')
x = df.drop(['Outcome', 'BMI'], axis=1)
y = df['BMI']

In [109]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)

In [110]:
# LR Train
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(x_train, y_train)

# LR Test
from sklearn.metrics import mean_squared_error
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 39.880241751199044


#### 4. 3번과 동일하지만 dense layer만 사용한 신경만으로 회귀를 수행하라.

In [111]:
# Import Libraries
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
import pandas as pd

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load Data
df = pd.read_csv('diabetes.csv')

x = df.drop(['BMI'], axis=1).values  
y = df['BMI'].values  

# Train-Test Split + Data Normalization
from sklearn.model_selection import train_test_split
scaler = StandardScaler()
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# Convert to PyTorch Tensors
x_train_tensor = torch.tensor(x_train, dtype=torch.float32)
x_test_tensor = torch.tensor(x_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)  # Make it 2D for regression
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

# Dataset and Dataloader
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [113]:
# NeuralNetwork Class (Only Dense Layer)
class NeuralNetwork(nn.Module):
    def __init__(self, n_input, n_hidden):
        super(NeuralNetwork, self).__init__()
        self.l1 = nn.Linear(n_input, n_hidden)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(n_hidden, 1)  

    def forward(self, x):
        x = self.l1(x)
        x = self.relu(x)
        x = self.l2(x)
        return x

# Hyperparameters
n_input = x_train.shape[1]   
n_hidden = 64                

# Functions
model = NeuralNetwork(n_input, n_hidden).to(device)
loss_fn = nn.MSELoss()  # Mean Squared Error for regression
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# Train Function
def train(dataloader, model, loss_fn, optimizer):
    model.train() 
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        pred = model(X)
        loss = loss_fn(pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


# Test Function
def test(dataloader, model, loss_fn):
    model.eval()  # Set model to evaluation mode
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss = 0

    with torch.no_grad():  
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()

    test_loss /= num_batches
    print(f"Avg loss: {test_loss:.6f} \n")

# Train + Test Loop
epochs = 200
for t in range(epochs):
    print(f"Epoch [{t+1}/{epochs}]")
    train(train_loader, model, loss_fn, optimizer)
    test(test_loader, model, loss_fn)
    print("-" * 30)

print("Done!")


Epoch [1/200]
Avg loss: 1068.450671 

------------------------------
Epoch [2/200]
Avg loss: 1040.725122 

------------------------------
Epoch [3/200]
Avg loss: 1010.751501 

------------------------------
Epoch [4/200]
Avg loss: 977.090796 

------------------------------
Epoch [5/200]
Avg loss: 938.284888 

------------------------------
Epoch [6/200]
Avg loss: 894.323669 

------------------------------
Epoch [7/200]
Avg loss: 845.365283 

------------------------------
Epoch [8/200]
Avg loss: 790.613672 

------------------------------
Epoch [9/200]
Avg loss: 732.682898 

------------------------------
Epoch [10/200]
Avg loss: 669.999011 

------------------------------
Epoch [11/200]
Avg loss: 604.911230 

------------------------------
Epoch [12/200]
Avg loss: 539.889075 

------------------------------
Epoch [13/200]
Avg loss: 475.071393 

------------------------------
Epoch [14/200]
Avg loss: 413.425745 

------------------------------
Epoch [15/200]
Avg loss: 356.209662 

--