In [None]:
import numpy as np
import pandas as pd
import cv2, os
import matplotlib.pyplot as plt
from PIL import Image
from glob import glob
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models, datasets
import time
from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score, accuracy_score
from sklearn.metrics import roc_curve, auc

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("cuda available? " + str(torch.cuda.is_available()))

In [None]:
# Define root directory from Google Drive
from google.colab import drive
drive.mount("/content/drive", force_remount=True)
root_dir = "/content/drive/My Drive/ceilometer_dataset1.1/"

In [None]:
transforms_train = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Augmentation on test images not needed
transforms_test = torchvision.transforms.Compose([
    transforms.Resize((256,256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

Structure of dataset folders
```
ceilometer_dataset
    ----> train
          ---->true
          ---->false
    ----> test
          ---->true
          ---->false

70% train, 30% test
```

In [None]:
# remember to modify placeholders!
train_dir = root_dir + "train/"
test_dir = root_dir +"test/"
train_classa_dir = root_dir + "train/true/"
train_classb_dir = root_dir + "train/false/"
test_classa_dir = root_dir + "test/true/"
test_classb_dir = root_dir + "test/false/"

In [None]:
train_datasets = datasets.ImageFolder(train_dir, transforms_train)
test_dataset = datasets.ImageFolder(test_dir, transforms_test)
train_dataset, val_dataset = torch.utils.data.random_split(train_datasets, [round(len(train_datasets)*0.70), round(len(train_datasets)*0.30)])

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=12, shuffle=True, num_workers=2, drop_last=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=12, shuffle=True, num_workers=2, drop_last=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=12, shuffle=False, num_workers=2, drop_last=True)

In [None]:
print('Train dataset size:', len(train_dataset))
print('Validation dataset size:', len(val_dataset))
print('Test dataset size:', len(test_dataset))
class_names = test_dataset.classes
print('Class names:', class_names)

In [None]:
# Random checking of train images
im, label = train_dataset[100]
print(im.shape)
print("-------------")
print(label)
plt.imshow(im.permute(1,2,0).cpu())
print("Class: ", class_names[label])

In [None]:
model = models.vgg16(pretrained=True)
num_features = model.classifier[6].in_features     #extract fc layers features
model.classifier[6] = nn.Linear(num_features, 2) #(num_of_class == 2)
model = model.to(device)
criterion = nn.CrossEntropyLoss()  #(set loss function)
optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.8, weight_decay=1e-4)

In [None]:
model

In [None]:
class EarlyStopper:
    def __init__(self, patience=1, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = np.inf

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False


In [None]:
# Initialize training history
# Initialize history
history_loss = {"train": [], "val": [],"test": []}
history_accuracy = {"train": [], "val": [], "test": []}
history_f1 = {"train": [], "val": [], "test": []}
history_precision = {"train": [], "val": [], "test": []}
history_recall = {"train": [], "val": [], "test": []}

# Initialize best validation accuracy and test accuracy at best validation accuracy
best_val_accuracy = 0
best_test_accuracy = 0
best_test_f1 = 0
best_test_precision = 0
best_test_recall = 0
save_test_value = False

In [None]:
num_epochs = 60   #(set no of epochs)
early_stopper = EarlyStopper(patience=3, min_delta=0.05)
start_time = time.time() #(for showing time)
for epoch in range(num_epochs): #(loop for every epoch)
    print("Epoch {} running".format(epoch)) #(printing message)
    """ Training Phase """
    model.train()    #(training model)
    running_loss = 0   #(set loss 0)
    running_corrects = 0
    running_labels_preds = {"labels": [], "preds": []}

    # load a batch data of images
    for i, (inputs, labels) in enumerate(train_dataloader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        # forward inputs and get output
        optimizer.zero_grad()
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)
        # get loss value and update the network weights
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

        # store labels and preds and calculate f1, precision and recall at the end of the epoch
        running_labels_preds["labels"].append(labels.to('cpu').data.numpy())
        running_labels_preds["preds"].append(preds.to('cpu'))

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = running_corrects / len(train_dataset) * 100.

    epoch_labels = np.concatenate((running_labels_preds['labels'][0],running_labels_preds['labels'][1]), axis=None)
    epoch_preds = np.concatenate((running_labels_preds['preds'][0],running_labels_preds['preds'][1]), axis=None)
    for i in range(2, len(running_labels_preds['labels'])):
      epoch_labels = np.concatenate((epoch_labels,running_labels_preds['labels'][i]), axis=None)
      epoch_preds = np.concatenate((epoch_preds,running_labels_preds['preds'][i]), axis=None)

    epoch_f1 = f1_score(epoch_labels, epoch_preds)
    epoch_precision = precision_score(epoch_labels, epoch_preds)
    epoch_recall = recall_score(epoch_labels, epoch_preds)

    history_loss["train"].append(epoch_loss)
    history_accuracy["train"].append(epoch_acc)
    history_f1["train"].append(epoch_f1)
    history_precision["train"].append(epoch_precision)
    history_recall["train"].append(epoch_recall)

    print('[Train #{}] Loss: {:.4f} Acc: {:.4f}% Time: {:.4f}s'.format(epoch, epoch_loss, epoch_acc, time.time() -start_time))

    """ Validation Phase """
    model.eval()
    with torch.no_grad():
        running_loss = 0.
        running_corrects = 0
        running_labels_preds = {"labels": [], "preds": []}

        for i, (inputs, labels) in enumerate(val_dataloader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

            # store labels and preds and calculate f1, precision and recall at the end of the epoch
            running_labels_preds["labels"].append(labels.to('cpu').data.numpy())
            running_labels_preds["preds"].append(preds.to('cpu'))

        epoch_loss = running_loss / len(val_dataset)
        epoch_acc = running_corrects / len(val_dataset) * 100.

        epoch_labels = np.concatenate((running_labels_preds['labels'][0],running_labels_preds['labels'][1]), axis=None)
        epoch_preds = np.concatenate((running_labels_preds['preds'][0],running_labels_preds['preds'][1]), axis=None)
        for i in range(2, len(running_labels_preds['labels'])):
          epoch_labels = np.concatenate((epoch_labels,running_labels_preds['labels'][i]), axis=None)
          epoch_preds = np.concatenate((epoch_preds,running_labels_preds['preds'][i]), axis=None)

        epoch_f1 = f1_score(epoch_labels, epoch_preds)
        epoch_precision = precision_score(epoch_labels, epoch_preds)
        epoch_recall = recall_score(epoch_labels, epoch_preds)

        history_loss["val"].append(epoch_loss)
        history_accuracy["val"].append(epoch_acc)
        history_f1["val"].append(epoch_f1)
        history_precision["val"].append(epoch_precision)
        history_recall["val"].append(epoch_recall)

        if epoch_acc > best_val_accuracy:
          best_val_accuracy = epoch_acc
          save_test_value = True
        else:
          save_test_value = False
        print('[Val #{}] Loss: {:.4f} Acc: {:.4f}% Time: {:.4f}s'.format(epoch, epoch_loss, epoch_acc, time.time()- start_time))
        # early stopping
        if early_stopper.early_stop(epoch_loss):
          break

    """ Testing Phase """
    model.eval()
    with torch.no_grad():
        running_loss = 0
        running_corrects = 0
        running_labels_preds = {"labels": [], "preds": []}

        for i, (inputs, labels) in enumerate(test_dataloader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

            # store labels and preds and calculate f1, precision and recall at the end of the epoch
            running_labels_preds["labels"].append(labels.to('cpu').data.numpy())
            running_labels_preds["preds"].append(preds.to('cpu'))

        epoch_loss = running_loss / len(test_dataset)
        epoch_acc = running_corrects / len(test_dataset) * 100.

        epoch_labels = np.concatenate((running_labels_preds['labels'][0],running_labels_preds['labels'][1]), axis=None)
        epoch_preds = np.concatenate((running_labels_preds['preds'][0],running_labels_preds['preds'][1]), axis=None)
        for i in range(2, len(running_labels_preds['labels'])):
          epoch_labels = np.concatenate((epoch_labels,running_labels_preds['labels'][i]), axis=None)
          epoch_preds = np.concatenate((epoch_preds,running_labels_preds['preds'][i]), axis=None)

        epoch_f1 = f1_score(epoch_labels, epoch_preds)
        epoch_precision = precision_score(epoch_labels, epoch_preds)
        epoch_recall = recall_score(epoch_labels, epoch_preds)

        history_loss["test"].append(epoch_loss)
        history_accuracy["test"].append(epoch_acc)
        history_f1["test"].append(epoch_f1)
        history_precision["test"].append(epoch_precision)
        history_recall["test"].append(epoch_recall)

        if save_test_value == True:
          best_test_accuracy = epoch_acc
          best_test_f1 = epoch_f1
          best_test_precision = epoch_precision
          best_test_recall = epoch_recall
        print('[Test #{}] Loss: {:.4f} Acc: {:.4f}% Time: {:.4f}s'.format(epoch, epoch_loss, epoch_acc, time.time()- start_time))

In [None]:
nb_classes = 2

confusion_matrix = torch.zeros(nb_classes, nb_classes)
with torch.no_grad():
    for i, (inputs, classes) in enumerate(test_dataloader):
        inputs = inputs.to(device)
        classes = classes.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        for t, p in zip(classes.view(-1), preds.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1

print(confusion_matrix)

In [None]:
# Plot loss history
plt.title("Loss")
for split in ["train", "val", "test"]:
  plt.plot(history_loss[split], label=split)
plt.legend()
plt.show()

In [None]:
for i in range(0, len(history_accuracy["train"])):
  history_accuracy["train"][i]=history_accuracy["train"][i].cpu()
for i in range(0, len(history_accuracy["val"])):
  history_accuracy["val"][i]=history_accuracy["val"][i].cpu()
for i in range(0, len(history_accuracy["test"])):
  history_accuracy["test"][i]=history_accuracy["test"][i].cpu()

# Plot accuracy history
plt.title("Accuracy")
for split in ["train", "val", "test"]:
  plt.plot(history_accuracy[split], label=split)
plt.legend()
plt.show()

In [None]:
# Plot F1 history
plt.title("F1 Score")
for split in ["train", "val", "test"]:
  plt.plot(history_f1[split], label=split)
plt.legend()
plt.show()

In [None]:
# Plot precision history
plt.title("Precision")
for split in ["train", "val", "test"]:
  plt.plot(history_precision[split], label=split)
plt.legend()
plt.show()

In [None]:
# Plot recall history
plt.title("Recall")
for split in ["train", "val", "test"]:
  plt.plot(history_recall[split], label=split)
plt.legend()
plt.show()

In [None]:
print(best_test_accuracy)
print(best_test_f1)
print(best_test_precision)
print(best_test_recall)

In [None]:
save_path = 'vgg16_SGD_7.pth'
torch.save(model.state_dict(), save_path)