In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# !pip install -U "ray[default]"

In [None]:
import os
import glob
import numpy as np
import pandas as pd
from functools import partial

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader,random_split

import torchvision
from torchvision import transforms
from torchvision.io import read_image
from torchvision.models import resnet50, ResNet50_Weights

from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
DATA_FOLDER = "/content/drive/MyDrive/Thesis/Experiments/VC-PRG-IMG/"

MODEL_NAME = "resnet50"
BATCH_SIZE = 32
NUM_EPOCHS = 50
LEARNING_RATE = 1e-4

In [None]:
class VehicleDataset(Dataset):
    def __init__(self, X, y, transform=None):
        self.X = X
        self.y = y
        self.transform = transform

    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        image = read_image(self.X[idx])
        image = image[:3, :, :]
        if self.transform:
            image = self.transform(image)
        label = self.y[idx]
        return image, label

In [None]:
def get_label(filename):
    label = os.path.basename(filename).replace(".png", "").split("-")[-1]
    return int(label)

files = sorted(glob.glob(os.path.join(DATA_FOLDER, "*.png")))
labels = [get_label(file) for file in files]
df = pd.DataFrame({"filename": files, "label": labels})

temp_X, test_X, temp_y, test_y = train_test_split(df['filename'], df['label'], test_size=0.1, random_state=42)
temp_X.reset_index(drop=True, inplace=True)
temp_y.reset_index(drop=True, inplace=True)
test_X.reset_index(drop=True, inplace=True)
test_y.reset_index(drop=True, inplace=True)

train_X, val_X, train_y, val_y = train_test_split(temp_X, temp_y, test_size=0.1, random_state=42)
train_X.reset_index(drop=True, inplace=True)
train_y.reset_index(drop=True, inplace=True)
val_X.reset_index(drop=True, inplace=True)
val_y.reset_index(drop=True, inplace=True)

In [None]:
data_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [None]:
def train(config, checkpoint_dir=None, data_dir=None):
  model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
  num_ftrs = model.fc.in_features
  model.fc = nn.Linear(num_ftrs, 13)

  device = "cuda" if torch.cuda.is_available() else "cpu"

  model.to(device)

  loss_fn = nn.CrossEntropyLoss()
  optimizer = optim.Adam(model.parameters(), lr=config["lr"], weight_decay=config["wd"])

  if checkpoint_dir:
    model_state, optimizer_state = torch.load(os.path.join(checkpoint_dir, "checkpoint"))
    model.load_state_dict(model_state)
    optimizer.load_state_dict(optimizer_state)

  train_dataset = VehicleDataset(train_X, train_y, transform=data_transforms)
  val_dataset = VehicleDataset(val_X, val_y, transform=data_transforms)

  train_dataloader = DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True)
  val_dataloader = DataLoader(val_dataset, batch_size=config["batch_size"], shuffle=True)

  for epoch in range(NUM_EPOCHS):

    # Training
    train_running_loss = 0.0

    for idx, (X_train, y_train_trues) in enumerate(train_dataloader, 0):
      X_train, y_train_trues = X_train.to(device), y_train_trues.to(device)
      
      # Zero the gradients paramter
      optimizer.zero_grad()

      # Forward
      y_train_preds = model(X_train)
      train_loss = loss_fn(y_train_preds, y_train_trues)
      # Backward
      train_loss.backward()
      # Optimize
      optimizer.step()

      train_running_loss += train_loss.item()

    print(f"Epoch [{epoch+1}/{NUM_EPOCHS}]\t|\tTrain Loss: {train_running_loss/len(train_dataloader):.5f}\t|")

    # Validation
    val_running_loss = 0.0
    val_steps = 0
    total = 0
    correct = 0

    for idx, (X_val, y_val_trues) in enumerate(val_dataloader, 0):
      with torch.no_grad():
        X_val, y_val_trues = X_val.to(device), y_val_trues.to(device)

        y_val_preds = model(X_val)
        _, predicted = torch.max(y_val_preds.data, 1)
        total += y_val_trues.size(0)
        correct += (predicted == y_val_trues).sum().item()

        val_loss = loss_fn(y_val_preds, y_val_trues)
        val_running_loss += val_loss.item()
        val_steps += 1

    with tune.checkpoint_dir(epoch) as checkpoint_dir:
      path = os.path.join(checkpoint_dir, "checkpoint")
      torch.save((model.state_dict(), optimizer.state_dict()), path)
    
    tune.report(loss=(val_running_loss / val_steps), accuracy=correct / total)
  print("Finished Traiing")  

In [None]:
def test_accuracy(model, device="cpu"):
  test_dataset = VehicleDataset(test_X, test_y, transform=data_transforms)
  test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

  correct = 0
  total = 0
  with torch.no_grad():
    for X_test, y_test_trues in test_dataloader:
        X_test, y_test_trues = X_test.to(device), y_test_trues.to(device)

        y_test_preds = model(X_test)
        _, predicted = torch.max(y_test_preds.data, 1)
        total += y_test_trues.size(0)
        correct += (predicted == y_test_trues).sum().item()
  return correct / total

In [None]:
data_dir = os.path.abspath("./data")
checkpoint_dir = os.path.abspath("./checkpoint")

config = {
    "lr": tune.loguniform(1e-5, 1e-3),
    "batch_size": tune.choice([32, 64, 128]),
    "wd": tune.choice([0, 0.01, 0.05, 0.025]),
}

scheduler = ASHAScheduler(
    metric="loss",
    mode="min",
    max_t=NUM_EPOCHS,
    grace_period=1,
    reduction_factor=2
)

reporter = CLIReporter(
    metric_columns=["loss", "accuracy", "training_iteration"]
)

result = tune.run(
    partial(train, data_dir=data_dir, checkpoint_dir=checkpoint_dir),
    config=config,
    num_samples=10,
    scheduler=scheduler,
    progress_reporter=reporter
)

In [None]:
best_trial = result.get_best_trial("loss", "min", "last")
print("Best trial config: {}".format(best_trial.config))
print("Best trial final validation loss: {}".format(best_trial.last_result["loss"]))
print("Best trial final validation accuracy: {}".format(best_trial.last_result["accuracy"]))

In [None]:
best_trained_model = CNNNetwork()
best_checkpoint_dir = best_trial.checkpoint.value
model_state, optimizer_state = torch.load(os.path.join(best_checkpoint_dir, "checkpoint"))
best_trained_model.load_state_dict(model_state)

test_acc = test_accuracy(best_trained_model, "cuda")
print("Best trial test set accuracy: {}".format(test_acc))