In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# !pip install -U "ray[default]"
# !pip install -U tensorboardx

In [None]:
import os
import glob
import numpy as np
import pandas as pd
from functools import partial

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader,random_split

import torchvision
from torchvision import transforms
from torchvision.io import read_image

from filelock import FileLock

from ray import tune
from ray.air import session
from ray.air.checkpoint import Checkpoint
from ray.tune.schedulers import ASHAScheduler

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
DATA_FOLDER = "/content/drive/MyDrive/Thesis/Experiments/VC-PRG-IMG/"

MODEL_NAME = "CNN"
BATCH_SIZE = 32
NUM_EPOCHS = 50
LEARNING_RATE = 1e-4

In [None]:
class CNNNetwork(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.BatchNorm2d(32),
        )

        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=128, kernel_size=3, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.BatchNorm2d(128),
        )

        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.BatchNorm2d(128),
        )

        self.conv4 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.BatchNorm2d(128),
        )

        self.flatten = nn.Flatten()

        self.linear = nn.Linear(in_features=28800, out_features=15)

        self.dropout = nn.Dropout(0.5)

        self.softmax = nn.Softmax(dim=1)
    
    def forward(self, input_data):
        x = self.conv1(input_data)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.flatten(x)
        x = self.dropout(x)
        logits = self.linear(x)
        predictions = self.softmax(logits)
        return predictions

In [None]:
class VehicleDataset(Dataset):
    def __init__(self, X, y, transform=None):
        self.X = X
        self.y = y
        self.transform = transform

    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        image = read_image(self.X[idx])
        image = image[:3, :, :]
        if self.transform:
            image = self.transform(image)
        label = self.y[idx]
        return image, label

def get_label(filename):
    label = os.path.basename(filename).replace(".png", "").split("-")[-1]
    return int(label)

In [None]:
def load_data(train_data_dir="./data", test_data_dir="./data"):
    data_transforms = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    with FileLock(os.path.expanduser("~/data.lock")):
        train_files = sorted(glob.glob(os.path.join(train_data_dir, "*.png")))
        train_labels = [get_label(file) for file in train_files]
        test_files = sorted(glob.glob(os.path.join(test_data_dir, "*.png")))
        test_labels = [get_label(file) for file in test_files]

        train_dataset = VehicleDataset(train_files, train_labels, transform=data_transforms)
        test_dataset = VehicleDataset(test_files, test_labels, transform=data_transforms)
    return train_dataset, test_dataset

In [None]:
def train(config):
  model = CNNNetwork()

  device = "cuda" if torch.cuda.is_available() else "cpu"

  model.to(device)

  loss_fn = nn.CrossEntropyLoss()
  optimizer = optim.Adam(model.parameters(), lr=config["lr"], weight_decay=config["wd"])

  loaded_checkpoint = session.get_checkpoint()
  if loaded_checkpoint:
    with loaded_checkpoint.as_directory() as loaded_checkpoint_dir:
      model_state, optimizer_state = torch.load(os.path.join(loaded_checkpoint_dir, "checkpoint.pt"))
      model.load_state_dict(model_state)
      optimizer.load_state_dict(optimizer_state)
  
  train_data_dir = os.path.abspath("/content/drive/MyDrive/Thesis/Experiments//VC-PRG-1_5/")
  test_data_dir = os.path.abspath("/content/drive/MyDrive/Thesis/Experiments/VC-PRG-6/")
  train_dataset, test_dataset = load_data(train_data_dir, test_data_dir)

  test_abs = int(len(train_dataset) * 0.1)
  train_subset, val_subset = random_split(train_dataset, [len(train_dataset) - test_abs, test_abs])

  train_dataloader = DataLoader(train_subset, batch_size=int(config["batch_size"]), shuffle=True)
  val_dataloader = DataLoader(val_subset, batch_size=int(config["batch_size"]), shuffle=True)

  for epoch in range(NUM_EPOCHS):

    # Training
    train_running_loss = 0.0

    for idx, (X_train, y_train_trues) in enumerate(train_dataloader, 0):
      X_train, y_train_trues = X_train.to(device), y_train_trues.to(device)
      
      # Zero the gradients paramter
      optimizer.zero_grad()

      # Forward
      y_train_preds = model(X_train)
      train_loss = loss_fn(y_train_preds, y_train_trues)
      # Backward
      train_loss.backward()
      # Optimize
      optimizer.step()

      train_running_loss += train_loss.item()

    print(f"Epoch [{epoch+1}/{NUM_EPOCHS}]\t|\tTrain Loss: {train_running_loss/len(train_dataloader):.5f}\t|")

    # Validation
    val_running_loss = 0.0
    val_steps = 0
    total = 0
    correct = 0

    for idx, (X_val, y_val_trues) in enumerate(val_dataloader, 0):
      with torch.no_grad():
        X_val, y_val_trues = X_val.to(device), y_val_trues.to(device)

        y_val_preds = model(X_val)
        _, predicted = torch.max(y_val_preds.data, 1)
        total += y_val_trues.size(0)
        correct += (predicted == y_val_trues).sum().item()

        val_loss = loss_fn(y_val_preds, y_val_trues)
        val_running_loss += val_loss.item()
        val_steps += 1

    path = f"/content/drive/MyDrive/Thesis/Experiments/{MODEL_NAME}"
    os.makedirs(path, exist_ok=True)
    torch.save((model.state_dict(), optimizer.state_dict()), path +"/checkpoint.pt")
    checkpoint = Checkpoint.from_directory(path)

    session.report({"loss": (val_running_loss / val_steps), "accuracy": correct/total}, checkpoint=checkpoint)

  print("Finished Traiing")  

In [None]:
def test_best_model(best_result):
  device = "cuda" if torch.cuda.is_available() else "cpu"

  best_trained_model = CNNNetwork()
  best_trained_model.to(device)

  checkpoint_path = os.path.join(best_result.checkpoint.to_directory(), "checkpoint.pt")

  model_state, optimizer_state = torch.load(checkpoint_path)
  best_trained_model.load_state_dict(model_state)

  train_data_dir = os.path.abspath("/content/drive/MyDrive/Thesis/Experiments//VC-PRG-1_5/")
  test_data_dir = os.path.abspath("/content/drive/MyDrive/Thesis/Experiments/VC-PRG-6/")
  train_dataset, test_dataset = load_data(train_data_dir, test_data_dir)
  test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

  correct = 0
  total = 0
  with torch.no_grad():
    for X_test, y_test_trues in test_dataloader:
      X_test, y_test_trues = X_test.to(device), y_test_trues.to(device)
      y_test_preds = best_trained_model(X_test)
      _, predicted = torch.max(y_test_preds.data, 1)
      total += y_test_trues.size(0)
      correct += (predicted == y_test_trues).sum().item()
  print("Best trial test set accuracy: {}".format(correct / total))   

In [None]:
def main(num_samples=10, gpus_per_trial=1):
  config = {
    "lr": tune.loguniform(1e-5, 1e-3),
    "batch_size": tune.grid_search([32, 64, 128]),
    "wd": tune.grid_search([0, 0.01, 0.05, 0.025]),
  }

  scheduler = ASHAScheduler(
    max_t=NUM_EPOCHS,
    grace_period=1,
    reduction_factor=2,
  )

  tuner = tune.Tuner(
      tune.with_resources(
          tune.with_parameters(train),
          resources={"cpu":2, "gpu": gpus_per_trial}
      ),
      tune_config = tune.TuneConfig(
          metric="loss",
          mode="min",
          scheduler=scheduler,
          num_samples=num_samples,
      ),
      param_space=config,
  )

  results = tuner.fit()

  best_result = results.get_best_result("loss", "min")

  print("Best trial config: {}".format(best_result.config))
  print("Best trial final validation loss: {}".format(best_result.metrics["loss"]))
  print("Best trial final validation accuracy: {}".format(best_result.metrics["accuracy"]))

  test_best_model(best_result)

In [None]:
main(num_samples=2, gpus_per_trial=1)