In [22]:
import torch
from torch import nn
from torch import optim
import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import os

# Dataset

In [23]:
base_dir = r"/Users/h383kim/pytorch/AlexNet/splitted"
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')
test_dir = os.path.join(base_dir, 'test')

In [24]:
img_transform = transforms.Compose([
    transforms.Resize(size=(224, 224)),
    transforms.ToTensor()
])

In [25]:
train_dataset = ImageFolder(root=train_dir,
                            transform=img_transform)
val_dataset = ImageFolder(root=val_dir,
                          transform=img_transform)
test_dataset = ImageFolder(root=test_dir,
                          transform=img_transform)                            

In [26]:
BATCH_SIZE = 16
train_dataloader = DataLoader(dataset=train_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              num_workers=os.cpu_count())
val_dataloader = DataLoader(dataset=val_dataset,
                            batch_size=BATCH_SIZE,
                            shuffle=True,
                            num_workers=os.cpu_count())
test_dataloader = DataLoader(dataset=test_dataset,
                             batch_size=BATCH_SIZE,
                             shuffle=False,
                             num_workers=os.cpu_count())

# Model

In [27]:
VGGNet_type = {
    "VGG11" : [64, "MaxPool", 128, "MaxPool", 256, 256, "MaxPool", 512, 512, "MaxPool", 512, 512, "MaxPool"],
    "VGG13" : [64, 64, "MaxPool", 128, 128, "MaxPool", 256, 256, "MaxPool", 512, 512, "MaxPool", 512, 512, "MaxPool"],
    "VGG16" : [64, 64, "MaxPool", 128, 128, "MaxPool", 256, 256, 256, "MaxPool", 512, 512, 512, "MaxPool", 512, 512, 512, "MaxPool"],
    "VGG19" : [64, 64, "MaxPool", 128, 128, "MaxPool", 256, 256, 256, 256, "MaxPool", 512, 512, 512, 512, "MaxPool", 512, 512, 512, 512, "MaxPool"]
}

In [28]:
class VGGNet(nn.Module):
    def __init__(self, in_channels, model="VGG19", num_classes=10):
        super().__init__()

        self.input_size=in_channels

        self.conv = self._create_conv_layers(VGGNet_type[model])

        self.FC1 = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512*7*7, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
        )

        self.FC2 = nn.Sequential(
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5)
        )

        self.FC3 = nn.Sequential(
            nn.Linear(4096, num_classes)
        )
        
    def forward(self, x):
        x = self.conv(x)
        x = self.FC1(x)
        x = self.FC2(x)
        x = self.FC3(x)

        return x
    
    def _create_conv_layers(self, layers):
        conv_blocks = []
        in_channels = self.input_size
        
        for layer in layers:
            # conv layer
            if isinstance(layer, int):
                out_channels = layer
                
                conv_blocks.append(nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(3, 3), stride=1, padding=1))
                conv_blocks.append(nn.BatchNorm2d(out_channels))
                conv_blocks.append(nn.ReLU())
                # out_channels becomes the next in_channels
                in_channels = out_channels

            # maxpooling layer
            else:
                conv_blocks.append(nn.MaxPool2d(kernel_size=(2, 2), stride=2))


        return nn.Sequential(*conv_blocks)

# Train / Evaluation

In [29]:
def train(model: torch.nn.Module,
          dataloader: torch.utils.data.DataLoader,
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module=nn.CrossEntropyLoss()):
    # Put the model into train mode
    model.train()
    train_loss, train_acc, correct = 0, 0, 0

    for batch, (X, y) in enumerate(dataloader):
        # Send the data into cpu or gpu
        X, y = X.to(DEVICE), y.to(DEVICE)

        # Forward pass
        preds_prob = model(X)

        # Calculate the loss
        loss = loss_fn(preds_prob, y)
        train_loss += loss.item()

        # Optimizer zero_grad
        optimizer.zero_grad()

        # Backpropagtion
        loss.backward()
        # Optimizer step
        optimizer.step()

        pred = torch.argmax(preds_prob, dim=1)
        correct += pred.eq(y.view_as(pred)).sum().item()

    train_loss /= len(dataloader)
    train_acc = 100. * correct / len(dataloader.dataset)
    return train_loss, train_acc

In [30]:
def evaluate(model: torch.nn.Module,
             dataloader: torch.utils.data.DataLoader,
             optimizer: torch.optim.Optimizer,
             loss_fn: torch.nn.Module=nn.CrossEntropyLoss()):
    # Put the model into eval mode
    model.eval()
    test_loss, test_acc, correct = 0, 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(DEVICE), y.to(DEVICE)

            # Forward pass
            preds_prob = model(X)

            # Calculate the loss
            loss = loss_fn(preds_prob, y).item()
            test_loss += loss

            pred = torch.argmax(preds_prob, dim=1)
            correct += pred.eq(y.view_as(pred)).sum().item()

        test_loss /= len(dataloader)
        test_acc = 100. * correct / len(dataloader.dataset)

    return test_loss, test_acc

In [31]:
import time
import copy

def train_baseline(model: torch.nn.Module, 
                   train_dataloader: torch.utils.data.DataLoader, 
                   val_dataloader: torch.utils.data.DataLoader,
                   optimizer: torch.optim.Optimizer,
                   loss_fn: torch.nn.Module=nn.CrossEntropyLoss(),
                   num_epochs: int=30):
    best_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())
    
    for epoch in range(1, num_epochs + 1):
        start_time = time.time()
        # Train the model and print save the results
        train_loss, train_acc = train(model=model,
                                      dataloader=train_dataloader, 
                                      optimizer=optimizer,
                                      loss_fn=loss_fn)
        
        val_loss, val_acc = evaluate(model=model,
                                     dataloader=val_dataloader,
                                     optimizer=optimizer,
                                     loss_fn=loss_fn)
        
        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            
        end_time = time.time()
        time_elapsed = end_time - start_time
        print(f"------------ epoch {epoch} ------------")
        print(f"Train loss: {train_loss:.4f} | Train acc: {train_acc:.2f}%")
        print(f"Val loss: {val_loss:.4f} | Val acc: {val_acc:2f}%")
        print(f"Time taken: {time_elapsed / 60:.0f}min {time_elapsed % 60:.0f}s")
        
    model.load_state_dict(best_model_wts)
    return model  

# Train Baseline Model

In [32]:
DEVICE = "mps" if torch.backends.mps.is_available() else "cpu"
DEVICE

'mps'

In [36]:
VGG = VGGNet(in_channels=3, model="VGG11", num_classes=10).to(DEVICE)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(VGG.parameters(), lr = 0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.3)

In [37]:
base = train_baseline(model=VGG,
                      train_dataloader=train_dataloader,
                      val_dataloader=val_dataloader,
                      optimizer=optimizer,
                      loss_fn=loss_fn,
                      num_epochs=3)

------------ epoch 1 ------------
Train loss: 2.4795 | Train acc: 18.18%
Val loss: 2.2058 | Val acc: 18.706963%
Time taken: 10min 39s
------------ epoch 2 ------------
Train loss: 2.2124 | Train acc: 18.63%
Val loss: 2.2082 | Val acc: 18.592196%
Time taken: 10min 34s
------------ epoch 3 ------------
Train loss: 2.2115 | Train acc: 18.34%
Val loss: 2.2095 | Val acc: 18.439174%
Time taken: 10min 36s


In [34]:
import torchsummary
from torchsummary import summary

summary(VGG.to("cpu"), input_size=(3, 224, 224), device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
       BatchNorm2d-2         [-1, 64, 224, 224]             128
              ReLU-3         [-1, 64, 224, 224]               0
         MaxPool2d-4         [-1, 64, 112, 112]               0
            Conv2d-5        [-1, 128, 112, 112]          73,856
       BatchNorm2d-6        [-1, 128, 112, 112]             256
              ReLU-7        [-1, 128, 112, 112]               0
         MaxPool2d-8          [-1, 128, 56, 56]               0
            Conv2d-9          [-1, 256, 56, 56]         295,168
      BatchNorm2d-10          [-1, 256, 56, 56]             512
             ReLU-11          [-1, 256, 56, 56]               0
           Conv2d-12          [-1, 256, 56, 56]         590,080
      BatchNorm2d-13          [-1, 256, 56, 56]             512
             ReLU-14          [-1, 256,

In [54]:
import gc
gc.collect()
torch.mps.empty_cache()