In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, WeightedRandomSampler
import numpy as np
import os
import time
import copy
from imblearn.over_sampling import SMOTE
from sklearn.metrics import precision_score, recall_score, f1_score

In [2]:
# Data Preprocessing and Augmentation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),  # Randomly crop to 224x224
        transforms.RandomHorizontalFlip(),  # Randomly flip images horizontally
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  # Randomly change image brightness, contrast, saturation, and hue
        transforms.ToTensor(),  # Convert images to PyTorch tensors
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normalize with mean and std of ImageNet dataset
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),  # Resize to 256x256
        transforms.CenterCrop(224),  # Crop the center part to 224x224
        transforms.ToTensor(),  # Convert images to PyTorch tensors
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normalize with mean and std of ImageNet dataset
    ]),
}

data_dir = r'D:\GoogleStreetView\Rumble Strip.v6i.folder'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}
dataloaders = {x: DataLoader(image_datasets[x], batch_size=32, shuffle=True, num_workers=4) for x in ['val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
# Handling Class Imbalance with WeightedRandomSampler
targets = [label for _, label in image_datasets['train']]
class_counts = np.bincount(targets)
class_weights = 1. / class_counts
samples_weights = np.array([class_weights[t] for t in targets])

sampler = WeightedRandomSampler(samples_weights, len(samples_weights))
dataloaders['train'] = DataLoader(image_datasets['train'], batch_size=32, sampler=sampler, num_workers=4)

In [4]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, inputs, targets):
        BCE_loss = nn.CrossEntropyLoss()(inputs, targets)
        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss

        if self.reduction == 'mean':
            return torch.mean(F_loss)
        elif self.reduction == 'sum':
            return torch.sum(F_loss)
        else:
            return F_loss

criterion = FocalLoss(alpha=2.0, gamma=2.0)

In [15]:
model_ft = models.densenet121(pretrained=True)
num_ftrs = model_ft.classifier.in_features
model_ft.classifier = nn.Linear(num_ftrs, len(class_names))
model_ft = model_ft.to(device)

# model_ft = models.inception_v3(pretrained=True)
# num_ftrs = model_ft.fc.in_features
# model_ft.fc = nn.Linear(num_ftrs, len(class_names))
# model_ft.AuxLogits.fc = nn.Linear(model_ft.AuxLogits.fc.in_features, len(class_names))  # Handle auxiliary classifier
# model_ft = model_ft.to(device)

# Optimizer and Learning Rate Scheduler

criterion = nn.CrossEntropyLoss(weight=torch.tensor(class_weights, dtype=torch.float).to(device))
optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.001)
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

# Early Stopping
class EarlyStopping:
    def __init__(self, patience=7, verbose=False):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf

    def __call__(self, val_loss, model):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        save_path = 'D:\GoogleStreetView\TrainingDense121_0605\checkpoint.pt'
        if not os.path.exists(save_path):
            os.makedirs(save_path)
        torch.save(model.state_dict(), save_path)
        self.val_loss_min = val_loss

early_stopping = EarlyStopping(patience=100, verbose=True)

# Training Function
def train_model(model, criterion, optimizer, scheduler, num_epochs=100):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0
            total_tp = 0
            total_fp = 0
            total_tn = 0
            total_fn = 0
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                true_labels = labels.cpu().numpy()
                predicted_labels = preds.cpu().numpy()
                # calculate tp, fp, tn, fn
                tp = np.sum(np.logical_and(true_labels == 1, predicted_labels == 1))
                fp = np.sum(np.logical_and(true_labels == 0, predicted_labels == 1))
                tn = np.sum(np.logical_and(true_labels == 0, predicted_labels == 0))
                fn = np.sum(np.logical_and(true_labels == 1, predicted_labels == 0))
                total_tp += tp
                total_fp += fp
                total_tn += tn
                total_fn += fn

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]


            precision = total_tp / (total_tp + total_fp)
            recall = total_tp / (total_tp + total_fn)
            f1 = 2 * (precision * recall) / (precision + recall)
            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f} F1: {f1:.4f} Precision: {precision:.4f} Recall: {recall:.4f}')

            if phase == 'val' and f1 > best_acc:
                best_acc = f1
                best_model_wts = copy.deepcopy(model.state_dict())

        early_stopping(epoch_loss, model)
        if early_stopping.early_stop:
            print("Early stopping")
            break

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:.4f}')

    model.load_state_dict(best_model_wts)
    return model

# Train the Model
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=200)

# Evaluate the Model
def evaluate_model(model, dataloaders, dataset_sizes):
    model.eval()
    running_corrects = 0

    for inputs, labels in dataloaders['val']:
        inputs = inputs.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            running_corrects += torch.sum(preds == labels.data)

    accuracy = running_corrects.double() / dataset_sizes['val']
    print(f'Validation Accuracy: {accuracy:.4f}')

evaluate_model(model_ft, dataloaders, dataset_sizes)


Epoch 0/199
----------
train Loss: 0.6031 Acc: 0.6070 F1: 0.6658 Precision: 0.5655 Recall: 0.8092
val Loss: 0.4600 Acc: 0.7472 F1: 0.7132 Precision: 0.6062 Recall: 0.8661
Validation loss decreased (inf --> 0.460005).  Saving model ...
Epoch 1/199
----------
train Loss: 0.4486 Acc: 0.7348 F1: 0.7648 Precision: 0.6868 Recall: 0.8629
val Loss: 0.4542 Acc: 0.7277 F1: 0.7032 Precision: 0.5819 Recall: 0.8884
Validation loss decreased (0.460005 --> 0.454235).  Saving model ...
Epoch 2/199
----------
train Loss: 0.4207 Acc: 0.7759 F1: 0.7953 Precision: 0.7342 Recall: 0.8675
val Loss: 0.3243 Acc: 0.8817 F1: 0.8470 Precision: 0.7984 Recall: 0.9018
Validation loss decreased (0.454235 --> 0.324309).  Saving model ...
Epoch 3/199
----------
train Loss: 0.3884 Acc: 0.7989 F1: 0.8052 Precision: 0.7504 Recall: 0.8686
val Loss: 0.3409 Acc: 0.8833 F1: 0.8349 Precision: 0.8585 Recall: 0.8125
Epoch 4/199
----------
train Loss: 0.3888 Acc: 0.7985 F1: 0.8042 Precision: 0.7513 Recall: 0.8652
val Loss: 0.3173