# Logistic Regression vs Deep NN 비교 실험
> 컴퓨터소프트웨어학부 2021088304 박현준


### 1. 과제 내용
1) Logistic Regression\
2) Deep NN (3 hidden layers)

### 2. 데이터셋
[Chest_Xray_Pneumonia](https://www.kaggle.com/datasets/paultimothymooney/chest-xray-pneumonia)

In [20]:
# Import libraries
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
import torchvision
from torchvision import transforms, datasets

import json
import numpy as np
import matplotlib.pyplot as plt

In [21]:
root_dir = os.path.expanduser("C:/Users/CV/Desktop/Assignment/archive/chest_xray")
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((128,128)),
    transforms.ToTensor()])
input_dim = 128*128

num_epochs = 10

## 학습 / 테스트 데이터 split
trainval_ds = datasets.ImageFolder(os.path.join(root_dir, 'train'), transform=transform)
test_ds = datasets.ImageFolder(os.path.join(root_dir, 'test'), transform=transform)
train_size = int(0.8 * len(trainval_ds))
val_size = len(trainval_ds) - train_size
train_ds, val_ds = random_split(trainval_ds, [train_size, val_size], generator=torch.Generator().manual_seed(42))

# DataLoader 구성
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=32)
test_loader = DataLoader(test_ds, batch_size=32)

## Logistic Regression

In [22]:
class LogisticModel(nn.Module):
    def __init__(self, input_dim):
        super(LogisticModel, self).__init__()
        self.linear = nn.Linear(input_dim, 1)

    def forward(self, x):
        x = self.linear(x)
        return torch.sigmoid(x)

    def train_model(self, loader, optimizer, criterion, device):
        self.train()
        total_loss = 0
        correct = 0
        total = 0

        for images, labels in loader:
            images = images.to(device)
            labels = labels.to(device).float().unsqueeze(1)

            optimizer.zero_grad()
            outputs = self(images.view(images.size(0), -1))
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

            preds = (outputs > 0.5).float()
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        avg_loss = total_loss / len(loader)
        accuracy = correct / total
        return avg_loss, accuracy

    def evaluate_model(self, loader, criterion, device):
        self.eval()
        total_loss = 0
        correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in loader:
                images = images.to(device)
                labels = labels.to(device).float().unsqueeze(1)

                outputs = self(images.view(images.size(0), -1))
                loss = criterion(outputs, labels)
                total_loss += loss.item()

                preds = (outputs > 0.5).float()
                correct += (preds == labels).sum().item()
                total += labels.size(0)

        avg_loss = total_loss / len(loader)
        accuracy = correct / total
        return avg_loss, accuracy


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = LogisticModel(input_dim).to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

logistic_results = {
    'model': 'logistic',
    'train_acc': [],
    'val_acc': [],
    'test_acc': None 
}

for epoch in range(num_epochs):
    train_loss, train_acc = model.train_model(train_loader, optimizer, criterion, device)
    val_loss, val_acc = model.evaluate_model(val_loader, criterion, device)

    logistic_results['train_acc'].append(train_acc)
    logistic_results['val_acc'].append(val_acc)

    print(f"[Epoch {epoch+1}] "
          f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | "
          f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

test_loss, test_acc = model.evaluate_model(test_loader, criterion, device)
logistic_results['test_acc'] = test_acc
print(f"\n[Test Accuracy] {test_acc:.4f}")

with open(f"logistic_results_{num_epochs}.json", "w") as f:
    json.dump(logistic_results, f, indent=4)

[Epoch 1] Train Loss: 24.2885 | Train Acc: 0.7368 | Val Loss: 26.8561 | Val Acc: 0.7289
[Epoch 2] Train Loss: 24.7257 | Train Acc: 0.7426 | Val Loss: 26.8561 | Val Acc: 0.7289


## Deep Neural Network

In [None]:
class DeepNN(nn.Module):
    def __init__(self, input_dim):
        super(DeepNN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

    def train_model(self, loader, optimizer, criterion, device):
        self.train()
        total_loss = 0
        correct = 0
        total = 0

        for images, labels in loader:
            images = images.to(device)
            labels = labels.to(device).float().unsqueeze(1) 

            optimizer.zero_grad()
            outputs = self(images.view(images.size(0), -1))
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

            preds = (outputs > 0.5).float()
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        avg_loss = total_loss / len(loader)
        accuracy = correct / total
        return avg_loss, accuracy

    def evaluate_model(self, loader, criterion, device):
        self.eval()
        total_loss = 0
        correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in loader:
                images = images.to(device)
                labels = labels.to(device).float().unsqueeze(1)

                outputs = self(images.view(images.size(0), -1))
                loss = criterion(outputs, labels)
                total_loss += loss.item()

                preds = (outputs > 0.5).float()
                correct += (preds == labels).sum().item()
                total += labels.size(0)

        avg_loss = total_loss / len(loader)
        accuracy = correct / total
        return avg_loss, accuracy


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = DeepNN(input_dim).to(device)
criterion = nn.BCELoss() 
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

deep_results = {
    'model': 'Deep',
    'train_acc': [],
    'val_acc': [],
    'test_acc': None
}

for epoch in range(num_epochs):
    train_loss, train_acc = model.train_model(train_loader, optimizer, criterion, device)
    val_loss, val_acc = model.evaluate_model(val_loader, criterion, device)

    deep_results['train_acc'].append(train_acc)
    deep_results['val_acc'].append(val_acc)

    print(f"[Epoch {epoch+1}] "
          f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | "
          f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

test_loss, test_acc = model.evaluate_model(test_loader, criterion, device)
deep_results['test_acc'] = test_acc
print(f"\n[Test Accuracy] {test_acc:.4f}")

with open(f"deep_results_{num_epochs}.json", "w") as f:
    json.dump(deep_results, f, indent=4)

[Epoch 1] Train Loss: 0.3409 | Train Acc: 0.8531 | Val Loss: 0.1650 | Val Acc: 0.9291
[Epoch 2] Train Loss: 0.1985 | Train Acc: 0.9204 | Val Loss: 0.1610 | Val Acc: 0.9406

[Test Accuracy] 0.7532


# **Logistic Regression vs Deep NN 비교 실험 시각화**
> train/val/test accuracy comparison by bar plot

In [None]:
with open(f"logistic_results_{num_epochs}.json", "r") as f:
    logistic_results = json.load(f)

with open(f"deep_results_{num_epochs}.json", "r") as f:
    deep_results = json.load(f)

models = [logistic_results['model'], deep_results['model']]

train_acc = [logistic_results['train_acc'][-1], deep_results['train_acc'][-1]]
val_acc = [logistic_results['val_acc'][-1], deep_results['val_acc'][-1]]
test_acc = [logistic_results['test_acc'], deep_results['test_acc']]

x = np.arange(len(models))
width = 0.25

plt.figure(figsize=(8, 6))
plt.bar(x - width, train_acc, width, label='Train Accuracy')
plt.bar(x, val_acc, width, label='Validation Accuracy')
plt.bar(x + width, test_acc, width, label='Test Accuracy')

plt.xticks(x, models)
plt.ylim(0.5, 1.0)
plt.ylabel("Accuracy")
plt.title("Chest X-ray Binary Classification Results")
plt.legend()
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()

plt.savefig(f"classification_results_comparison_iter_{num_epochs}.png", dpi=300)
plt.show()

FileNotFoundError: [Errno 2] No such file or directory: 'logistic_results_10.json'