In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!cp "/content/drive/MyDrive/ColumbiaMSCS/COMS4995_DL_for_CV/Project/Data/project_data.zip" "/content/project_data.zip"

In [3]:
!mkdir -p "/content/data"

In [4]:
!unzip -q "/content/project_data.zip" -d "/content/data"

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset, random_split
from torchvision import models
from torchvision.models import googlenet, GoogLeNet_Weights
import numpy as np
import os
import shutil
import glob
import matplotlib.pyplot as plt
import random

In [6]:
# Count of each class in the data
project_data = {}

project_data['spring'] = glob.glob('/content/data/project_data/spring/spring*')
project_data['summer'] = glob.glob('/content/data/project_data/summer/summer*')
project_data['fall'] = glob.glob('/content/data/project_data/fall/fall*')
project_data['winter'] = glob.glob('/content/data/project_data/winter/winter*')

print(f"count of spring images :  {len(project_data['spring'])}")
print(f"count of summer images :  {len(project_data['summer'])}")
print(f"count of fall images :  {len(project_data['fall'])}")
print(f"count of winter images :  {len(project_data['winter'])}")

count of spring images :  6000
count of summer images :  6000
count of fall images :  6000
count of winter images :  6000


In [7]:
SOURCE = '/content/data/project_data'
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [8]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])
dataset = datasets.ImageFolder(root=SOURCE, transform=transform)

In [9]:
# Use 4,200 images of each category for traning, 900 of each category for validation, 900 images of each category for testing
fall_samples = []
spring_samples = []
summer_samples = []
winter_samples = []

# Separate dataset into four label groups
for i in range(len(dataset)):
    _, label = dataset[i]
    if label == 0:
        fall_samples.append(i)
    elif label == 1:
        spring_samples.append(i)
    elif label == 2:
        summer_samples.append(i)
    else:
        winter_samples.append(i)

# Shuffle the samples
random.shuffle(fall_samples)
random.shuffle(spring_samples)
random.shuffle(summer_samples)
random.shuffle(winter_samples)

# Split the samples
train_indices = fall_samples[:4200] + spring_samples[:4200] + summer_samples[:4200] + winter_samples[:4200]
val_indices = fall_samples[4200:5100] + spring_samples[4200:5100] + summer_samples[4200:5100] + winter_samples[4200:5100]
test_indices = fall_samples[5100:6000] + spring_samples[5100:6000] + summer_samples[5100:6000] + winter_samples[5100:6000]

# Shuffle the indices
random.shuffle(train_indices)
random.shuffle(val_indices)
random.shuffle(test_indices)

# Create subset datasets
train_dataset = Subset(dataset, train_indices)
val_dataset = Subset(dataset, val_indices)
test_dataset = Subset(dataset, test_indices)

In [10]:
# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [11]:
# Load pre-trained GoogLeNet
model_googlenet = googlenet(weights=GoogLeNet_Weights.DEFAULT)

In [12]:
# Change the final fully connected layer's output feature size to 4 so that
# it's suitable for softmax activation for classification on four seasons
model_googlenet.fc.out_features = 4

In [13]:
# Freeze all layers except the last modified layer
for name, param in model_googlenet.named_parameters():
    if "fc" not in name:
        param.requires_grad = False
    else:
        param.requires_grad = True

In [14]:
# Use GPU
model_googlenet = model_googlenet.to(DEVICE)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_googlenet.fc.parameters(), lr=0.001, momentum=0.9)
softmax = nn.Softmax(dim=1)

In [15]:
# Define training function
def train_model(model, train_loader, val_loader, loss_fn, optimizer, epochs, threshold):
    best_val_loss = float('inf')
    degrade_times = 0
    for epoch in range(epochs):
        train_loss = 0.0
        train_corrects = 0
        train_count = 0
        model.train()
        for x_batch, y_batch in train_loader:
            x_batch, y_batch = x_batch.to(DEVICE), y_batch.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(x_batch)
            loss = loss_fn(outputs, y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * len(y_batch)
            pred = softmax(outputs)
            train_corrects += (torch.argmax(pred, dim=1) == y_batch).float().sum()
            train_count += y_batch.size(0)
        train_loss = train_loss / len(train_loader.dataset)
        train_acc = train_corrects / train_count

        val_loss = 0.0
        val_corrects = 0
        val_count = 0
        model.eval()
        with torch.no_grad():
            for x_batch, y_batch in val_loader:
                x_batch, y_batch = x_batch.to(DEVICE), y_batch.to(DEVICE)
                outputs = model(x_batch)
                loss = loss_fn(outputs, y_batch)
                val_loss += loss.item() * len(y_batch)
                pred = softmax(outputs)
                val_corrects += (torch.argmax(pred, dim=1) == y_batch).float().sum()
                val_count += y_batch.size(0)
        val_loss = val_loss / len(val_loader.dataset)
        val_acc = val_corrects / val_count
        print(f'Epoch {epoch} Train Loss {train_loss:.4f} Train Accuracy {train_acc:.4f} Validation Loss {val_loss:.4f} Validation Accuracy {val_acc:.4f}')

        # Check for early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            degrade_times = 0
            # Save the model if it has the best validation loss so far
            torch.save(model.state_dict(), './best_model_googlenet.pth')
        else:
            degrade_times += 1
            # If the number of epochs where validation loss continuously increases
            # is larger than threshold, stop training the network to avoid overfitting
            if degrade_times > threshold:
                print(f'Early stopping at epoch {epoch}')
                break

In [16]:
# Define evaluation function
def evaluate_model(model, test_loader):
    correct = 0
    count = 0
    model.eval()
    with torch.no_grad():
        for x_batch, y_batch in test_loader:
            x_batch, y_batch = x_batch.to(DEVICE), y_batch.to(DEVICE)
            outputs = model(x_batch)
            pred = softmax(outputs)
            correct += (torch.argmax(pred, dim=1) == y_batch).float().sum()
            count += y_batch.size(0)
        test_acc = correct / count
    print(f'Accuracy on test set: {test_acc:.4f}')

In [17]:
# Fine-tune the network on seasons image data
train_model(model_googlenet, train_loader, val_loader, loss_fn, optimizer, epochs=10, threshold=2)

Epoch 0 Train Loss 1.4309 Train Accuracy 0.5181 Validation Loss 0.9463 Validation Accuracy 0.6403
Epoch 1 Train Loss 0.9442 Train Accuracy 0.6284 Validation Loss 0.8505 Validation Accuracy 0.6706
Epoch 2 Train Loss 0.8828 Train Accuracy 0.6510 Validation Loss 0.8076 Validation Accuracy 0.6917
Epoch 3 Train Loss 0.8430 Train Accuracy 0.6660 Validation Loss 0.7843 Validation Accuracy 0.6981
Epoch 4 Train Loss 0.8306 Train Accuracy 0.6704 Validation Loss 0.7755 Validation Accuracy 0.6997
Epoch 5 Train Loss 0.8235 Train Accuracy 0.6757 Validation Loss 0.7623 Validation Accuracy 0.7058
Epoch 6 Train Loss 0.8167 Train Accuracy 0.6756 Validation Loss 0.7536 Validation Accuracy 0.7106
Epoch 7 Train Loss 0.8035 Train Accuracy 0.6826 Validation Loss 0.7481 Validation Accuracy 0.7103
Epoch 8 Train Loss 0.8002 Train Accuracy 0.6784 Validation Loss 0.7538 Validation Accuracy 0.7086
Epoch 9 Train Loss 0.7973 Train Accuracy 0.6848 Validation Loss 0.7529 Validation Accuracy 0.7089


In [18]:
# Load the best model
model_googlenet.load_state_dict(torch.load('./best_model_googlenet.pth'))
# Evaluate on the test set
evaluate_model(model_googlenet, test_loader)

Accuracy on test set: 0.6958


In [19]:
# Unfreeze all layers
for param in model_googlenet.parameters():
    param.requires_grad = True

In [20]:
# Set the optimizer with a smaller learning rate
optimizer = optim.SGD(model_googlenet.parameters(), lr=0.0005, momentum=0.9)
# Continue fine-tuning the network
train_model(model_googlenet, train_loader, val_loader, loss_fn, optimizer, epochs=10, threshold=2)

Epoch 0 Train Loss 0.7518 Train Accuracy 0.7059 Validation Loss 0.6652 Validation Accuracy 0.7397
Epoch 1 Train Loss 0.6448 Train Accuracy 0.7493 Validation Loss 0.6402 Validation Accuracy 0.7508
Epoch 2 Train Loss 0.5785 Train Accuracy 0.7758 Validation Loss 0.6216 Validation Accuracy 0.7597
Epoch 3 Train Loss 0.5286 Train Accuracy 0.7980 Validation Loss 0.6205 Validation Accuracy 0.7586
Epoch 4 Train Loss 0.4741 Train Accuracy 0.8174 Validation Loss 0.6189 Validation Accuracy 0.7614
Epoch 5 Train Loss 0.4225 Train Accuracy 0.8410 Validation Loss 0.6297 Validation Accuracy 0.7628
Epoch 6 Train Loss 0.3822 Train Accuracy 0.8596 Validation Loss 0.6445 Validation Accuracy 0.7597
Epoch 7 Train Loss 0.3353 Train Accuracy 0.8780 Validation Loss 0.6669 Validation Accuracy 0.7575
Early stopping at epoch 7


In [21]:
# Load the best model
model_googlenet.load_state_dict(torch.load('./best_model_googlenet.pth'))
# Evaluate on the test set
evaluate_model(model_googlenet, test_loader)

Accuracy on test set: 0.7372
