In [1]:
# Initial setup cell
!pip3 install -r ../../requirements.txt

[31mERROR: Could not find a version that satisfies the requirement os (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for os[0m[31m
[0m

In [2]:
# Import dependencies
import os
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data

### Data Setup
Initializes our train and test set and stores the ground truth of each image. We use Torch libraries to handle this for us.

In [3]:
# Import data setup dependencies from Torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

In [6]:
# Path to our lung_image_sets
data_dir = "../../lung_colon_image_set/lung_image_sets"

# Convert images into Tensors
tensor_data = transforms.Compose([transforms.ToTensor()])

# Load the dataset using ImageFolder
data = ImageFolder(root=data_dir, transform=tensor_data)

# Split the dataset into train and test sets
train_size = int(0.8 * len(data))
test_size = len(data) - train_size
train, test = torch.utils.data.random_split(data, [train_size, test_size])

# Create data loaders for training and testing
load_train = DataLoader(train, batch_size=32, shuffle=True)
load_test = DataLoader(test, batch_size=32, shuffle=False)

### DGW-Net: A basic CNN image classifier

DGW-Net (abbreviated from Dino-Garcia-Wang Net) is a simple CNN architecture that follows the structure below:
- Input -> CONV (3x3, 64 filters) -> ReLU -> MaxPool -> CONV (3x3, 64 filters) -> ReLU -> MaxPool -> FC

This model architecture follows from our simple CNN implementation for CIFAR-10 on Assignment 2. We decided on 64 filters as empirically this number has proven to be successful in models like ResNet. The implementation follows below:

In [7]:
# Define our DGW-Net Architecture
class DGWNet(nn.Module):
    def __init__(self):
        super(DGWNet, self).__init__()
        
        # First convolutional block: 64 filters used. (768, 768, 3) -> (384, 384, 64)
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Second convolutional block: 64 filters used. (384, 384, 64) -> (192, 192, 64)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # FC Layer -> 128 Features
        self.fc = nn.Linear(192 * 192 * 64, 128)
        
    def forward(self, x):
        # First convolutional block
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool1(x)
        
        # Second convolutional block
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool2(x)
        
        # Flatten the output from the convolutional blocks
        x = x.view(x.size(0), -1)
        
        # Fully connected layer for classification
        x = self.fc(x)
        
        return x

### DGW-Net + SVM
For our first situation, we will use SVM to do classification on our extracted features.

In [8]:
class DGWSVM(nn.Module):
  def __init__(self):
      super(DGWSVM, self).__init__()
      
      # Base DGWNet feature extractor -> 128 features
      self.extract_features = DGWNet()
      
      # SVM layer
      self.svm = nn.Linear(128, 3) 
  
  def forward(self, x):
        # Pass the input through the base CNN
        x = self.extract_features(x)
        
        # SVM layer
        x = self.svm(x)
        
        return x

In [None]:
# Define instance of our model
model = DGWNet()

# Define hyperparameters
learning_rate = 5e-3
momentum = 0.9

In [None]:
# Define our loss function and optimizer
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)