In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, WeightedRandomSampler
import numpy as np
import os
import time
import copy
from sklearn.metrics import precision_score, recall_score, f1_score
from skimage import exposure

# Data Preprocessing and Augmentation
data_transforms = {
    'train': transforms.Compose([
         transforms.RandomResizedCrop(299),  # Randomly crop to 299x299 for Inception v3
        transforms.RandomHorizontalFlip(),  # Randomly flip images horizontally
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  # Randomly change image brightness, contrast, saturation, and hue
        transforms.ToTensor(),  # Convert images to PyTorch tensors
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normalize with mean and std of ImageNet dataset
    ]),
    'val': transforms.Compose([
        transforms.Resize(320),  # Resize to 320x320
        transforms.CenterCrop(299),  # Crop the center part to 299x299
        # AdaptiveEqualization(),
        transforms.ToTensor(),  # Convert images to PyTorch tensors
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normalize with mean and std of ImageNet dataset
    ]),
}

In [10]:


data_dir = r'D:\GoogleStreetView\Datasetv2_split'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}
dataloaders = {x: DataLoader(image_datasets[x], batch_size=32, shuffle=True, num_workers=4) for x in ['val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [8]:
targets = [label for _, label in image_datasets['train']]
class_counts = np.bincount(targets)
class_weights = 1. / class_counts
samples_weights = np.array([class_weights[t] for t in targets])

sampler = WeightedRandomSampler(samples_weights, len(samples_weights))
dataloaders['train'] = DataLoader(image_datasets['train'], batch_size=32, sampler=sampler, num_workers=4)

# Define the Model
model_ft = models.inception_v3(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, len(class_names))
model_ft.AuxLogits.fc = nn.Linear(model_ft.AuxLogits.fc.in_features, len(class_names))  # Handle auxiliary classifier
model_ft = model_ft.to(device)
# # resnet18
# model_ft = models.resnet34(pretrained=True)
# num_ftrs = model_ft.fc.in_features
# model_ft.fc = nn.Linear(num_ftrs, len(class_names))
# model_ft = model_ft.to(device)

# Optimizer and Learning Rate Scheduler
optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.001)
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

# Early Stopping and Best Model Saving Based on F1 Score
class BestModelSaver:
    def __init__(self, patience=7, verbose=False):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.f1_score_max = 0

    def __call__(self, f1_score, model):
        score = f1_score
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(f1_score, model)
        elif score < self.best_score:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(f1_score, model)
            self.counter = 0

    def save_checkpoint(self, f1_score, model):
        if self.verbose:
            print(f'F1 Score improved ({self.f1_score_max:.4f} --> {f1_score:.4f}).  Saving model ...')
        save_path = 'D:\GoogleStreetView\TrainingInception_0616'
        if not os.path.exists(save_path):
            os.makedirs(save_path)
        torch.save(model.state_dict(), os.path.join(save_path,f'best_model_{f1_score:.4f}.pt'))
        self.f1_score_max = f1_score

best_model_saver = BestModelSaver(patience=10, verbose=True)



# Train the Model
criterion = nn.CrossEntropyLoss(weight=torch.tensor(class_weights, dtype=torch.float).to(device))
# model_ft = train_model_resnet(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=100)
model_ft = train_model_inception(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=200)

# Evaluation Function with Adjustable Threshold
def predict_with_threshold(model, dataloader, threshold=0.5):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            probs = torch.nn.functional.softmax(outputs, dim=1)
            preds = (probs[:, 1] > threshold).int()
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    return np.array(all_preds), np.array(all_labels)

# Evaluate the Model with Custom Threshold
threshold = 0.5  # Adjust this value as needed
preds, labels = predict_with_threshold(model_ft, dataloaders['val'], threshold=threshold)

# Calculate Metrics
precision = precision_score(labels, preds)
recall = recall_score(labels, preds)
f1 = f1_score(labels, preds)

print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')



Epoch 0/99
----------
train Loss: 0.7574 Acc: 0.5203 Precision: 0.5580 Recall: 0.3026 F1: 0.3924
val Loss: 0.6729 Acc: 0.5226 Precision: 0.6875 Recall: 0.3115 F1: 0.4288
F1 Score improved (0.0000 --> 0.4288).  Saving model ...
Epoch 1/99
----------
train Loss: 0.6552 Acc: 0.5766 Precision: 0.6355 Recall: 0.3395 F1: 0.4426
val Loss: 0.6513 Acc: 0.6980 Precision: 0.7141 Recall: 0.7919 F1: 0.7510
F1 Score improved (0.4288 --> 0.7510).  Saving model ...
Epoch 2/99
----------
train Loss: 0.6287 Acc: 0.6172 Precision: 0.6951 Recall: 0.4387 F1: 0.5379
val Loss: 1.0114 Acc: 0.5602 Precision: 0.6525 Recall: 0.5033 F1: 0.5683
Epoch 3/99
----------
train Loss: 0.5194 Acc: 0.7297 Precision: 0.8746 Recall: 0.5535 F1: 0.6780
val Loss: 0.4264 Acc: 0.7794 Precision: 0.9732 Recall: 0.6340 F1: 0.7678
F1 Score improved (0.7510 --> 0.7678).  Saving model ...
Epoch 4/99
----------
train Loss: 0.4317 Acc: 0.7863 Precision: 0.9334 Recall: 0.6233 F1: 0.7475
val Loss: 0.3728 Acc: 0.8221 Precision: 0.9774 Recal

In [11]:
# Training Function
def train_model_inception(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_f1 = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0
            all_preds = []
            all_labels = []

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    if phase == 'train':
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4 * loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            precision = precision_score(all_labels, all_preds)
            recall = recall_score(all_labels, all_preds)
            f1 = f1_score(all_labels, all_preds)

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f} Precision: {precision:.4f} Recall: {recall:.4f} F1: {f1:.4f}')

            if phase == 'val' and f1 > best_f1:
                best_f1 = f1
                best_model_wts = copy.deepcopy(model.state_dict())

        best_model_saver(f1, model)
        if best_model_saver.early_stop:
            print("Early stopping")
            break

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val F1: {best_f1:.4f}')

    model.load_state_dict(best_model_wts)
    return model

def train_model_resnet(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_f1 = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0
            all_preds = []
            all_labels = []

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            precision = precision_score(all_labels, all_preds)
            recall = recall_score(all_labels, all_preds)
            f1 = f1_score(all_labels, all_preds)

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f} Precision: {precision:.4f} Recall: {recall:.4f} F1: {f1:.4f}')

            if phase == 'val' and f1 > best_f1:
                best_f1 = f1
                best_model_wts = copy.deepcopy(model.state_dict())

        best_model_saver(f1, model)
        if best_model_saver.early_stop:
            print("Early stopping")
            break

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val F1: {best_f1:.4f}')

    model.load_state_dict(best_model_wts)
    return model