In [1]:
# Initial setup cell
!pip3 install -r ../../requirements.txt

[31mERROR: Could not find a version that satisfies the requirement os (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for os[0m[31m
[0m

In [2]:
# Import dependencies
import os
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data

### Data Setup
Initializes our train and test set and stores the ground truth of each image. We use Torch libraries to handle this for us.

In [3]:
# Import data setup dependencies
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.data import SubsetRandomSampler
from torchvision.datasets import ImageFolder
from sklearn.model_selection import KFold

In [9]:
# Path to our lung_image_sets
data_dir = "../../lung_colon_image_set/lung_image_sets"

# Convert images into Tensors
tensor_data = transforms.Compose([transforms.ToTensor()])

# Load the dataset using ImageFolder
data = ImageFolder(root=data_dir, transform=tensor_data)

# Split the dataset into train and test sets
train_size = int(0.8 * len(data))
test_size = len(data) - train_size
train, test = torch.utils.data.random_split(data, [train_size, test_size])

# Create data loaders for training and testing
load_train = DataLoader(train, batch_size=32, shuffle=True)
load_test = DataLoader(test, batch_size=32, shuffle=False)

### DGW-Net: A basic CNN image classifier

DGW-Net (abbreviated from Dino-Garcia-Wang Net) is a simple CNN architecture that follows the structure below:
- Input -> CONV (3x3, 64 filters) -> ReLU -> MaxPool -> CONV (3x3, 64 filters) -> ReLU -> MaxPool -> FC

This model architecture follows from our simple CNN implementation for CIFAR-10 on Assignment 2. We decided on 64 filters as empirically this number has proven to be successful in models like ResNet. The implementation follows below:

In [10]:
# Define our DGW-Net Architecture
class DGWNet(nn.Module):
    def __init__(self):
        super(DGWNet, self).__init__()
        
        # First convolutional block: 64 filters used. (768, 768, 3) -> (384, 384, 64)
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Second convolutional block: 64 filters used. (384, 384, 64) -> (192, 192, 64)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # FC Layer -> 128 Features
        self.fc = nn.Linear(192 * 192 * 64, 128)
        
    def forward(self, x):
        # First convolutional block
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool1(x)
        
        # Second convolutional block
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool2(x)
        
        # Flatten the output from the convolutional blocks
        x = x.view(x.size(0), -1)
        
        # Fully connected layer for classification
        x = self.fc(x)
        
        return x

### DGW-Net + SVM
For our first situation, we will use SVM to do classification on our extracted features.

In [11]:
class DGWSVM(nn.Module):
  def __init__(self):
      super(DGWSVM, self).__init__()
      
      # Base DGWNet feature extractor -> 128 features
      self.extract_features = DGWNet()
      
      # SVM layer
      self.svm = nn.Linear(128, 3) 
  
  def forward(self, x):
        # Pass the input through the base CNN
        x = self.extract_features(x)
        
        # SVM layer
        x = self.svm(x)
        
        return x

In [12]:
# Define instance of our model
model = DGWSVM()

# Define hyperparameters
learning_rate = 5e-3
momentum = 0.9

# Define our loss function and optimizer
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

# Check current device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [15]:
# Train our model
num_epochs = 1

# Create a KFold object with 5 splits
kfold = KFold(n_splits=5, shuffle=True, random_state=231)

# Iterate over folds in training
for fold, (train_indices, val_indices) in enumerate(kfold.split(data), 1):
    # Create data samplers for train and validation sets
    train_sampler = SubsetRandomSampler(train_indices)
    val_sampler = SubsetRandomSampler(val_indices)

    # Create data loaders for train and validation sets
    train_loader = DataLoader(data, batch_size=32, sampler=train_sampler)
    val_loader = DataLoader(data, batch_size=32, sampler=val_sampler)

    for epoch in range(num_epochs):
        running_loss = 0.0
        num_samples = len(load_train)
        sample_count = 0
        
        for images, labels in load_train:
            sample_count += 1
            print(f"Running sample {sample_count} out of {num_samples}")
            
            # Move the input data to the device (CPU or GPU)
            images = images.to(device)
            labels = labels.to(device)
            
            # Zero the parameter gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(images)
            loss = loss_function(outputs, labels)
            
            # Backward pass and optimization
            loss.backward()
            optimizer.step()
            
            # Update running loss
            running_loss += loss.item()
        
        # Print the average loss for the epoch
        epoch_loss = running_loss / len(load_train)
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")

Running sample 1 out of 375
Running sample 2 out of 375
Running sample 3 out of 375
Running sample 4 out of 375
Running sample 5 out of 375
Running sample 6 out of 375
Running sample 7 out of 375
Running sample 8 out of 375
Running sample 9 out of 375
Running sample 10 out of 375
Running sample 11 out of 375


KeyboardInterrupt: 