In [1]:
import os
import glob
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from torchvision.io import read_image
from torchvision import transforms
from torchvision.models import resnet18

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [47]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
DATA_FOLDER = "../dataset/VC-PRG-IMG/"

MODEL_NAME = "ViT"
BATCH_SIZE = 1
NUM_EPOCHS = 10
LEARNING_RATE = 1e-4

In [32]:
class VehicleDataset(Dataset):
    def __init__(self, X, y, transform=None):
        self.X = X
        self.y = y
        self.transform = transform

    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        image = read_image(self.X[idx])
        image = image[:3, :, :]
        if self.transform:
            image = self.transform(image)
        label = self.y[idx]
        return image, label

In [8]:
def get_label(filename):
    label = os.path.basename(filename).replace(".png", "").split("-")[-1]
    return int(label)

In [12]:
files = sorted(glob.glob(os.path.join(DATA_FOLDER, "*.png")))
labels = [get_label(file) for file in files]
df = pd.DataFrame({"filename": files, "label": labels})

In [21]:
X_train, X_test, y_train, y_test = train_test_split(df['filename'], df['label'], test_size=0.1, random_state=42)

X_train.reset_index(drop=True, inplace=True)
X_test.reset_index(drop=True, inplace=True)
y_train.reset_index(drop=True, inplace=True)
y_test.reset_index(drop=True, inplace=True)

In [33]:
data_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [43]:
train_dataset = VehicleDataset(X_train, y_train, transform=data_transforms)
test_dataset = VehicleDataset(X_test, y_test, transform=data_transforms)

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [48]:
model = torch.hub.load('facebookresearch/deit:main', 'deit_base_patch16_224', pretrained=True)

n_inputs = model.head.in_features
model.head = nn.Sequential(
    nn.Linear(n_inputs, 512),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(512, 13)
)
model = model.to(device)

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

def multi_acc(y_pred, y_true):
    y_pred_softmax = torch.log_softmax(y_pred, dim=1)
    _, y_pred_tags = torch.max(y_pred_softmax, dim=1)
    correct_pred = (y_pred_tags == y_true).float()
    acc = correct_pred.sum() / len(correct_pred)
    acc = torch.round(acc * 100)
    return acc

In [None]:
train_losses = []
train_accs = []

for epoch in range(NUM_EPOCHS):
    train_running_loss = 0.0
    train_running_acc = 0.0

    # Train
    model.train()
    for idx, (X_train, y_train_trues) in enumerate(train_dataloader, 0):
        X_train, y_train_trues = X_train.to(device), y_train_trues.to(device)

        # Predict
        optimizer.zero_grad()
                
        y_train_preds = model(X_train)

        train_loss = loss_fn(y_train_preds, y_train_trues)
        train_acc = multi_acc(y_train_preds, y_train_trues)

        train_loss.backward()
        optimizer.step()

        train_running_loss += train_loss.item()
        train_running_acc += train_acc.item()
    train_losses.append(train_running_loss/len(train_dataloader))
    train_accs.append(train_running_acc/len(train_dataloader))

    print(f"Epoch [{epoch+1}/{NUM_EPOCHS}]\t|\tTrain Loss: {train_running_loss/len(train_dataloader):.5f}\t|\tTrain Acc: {train_running_acc/len(train_dataloader):.3f}\t|")


# Save the model
torch.save(model.state_dict(), f"vcd_{MODEL_NAME}_{LEARNING_RATE}lr_{NUM_EPOCHS}epoch_model.pt")

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
ax1.plot(train_accs, '-o', label='Train')
ax1.set_title('Accuracy')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Accuracy')
ax1.legend()

ax2.plot(train_losses, '-o', label='Train')
ax2.set_title('Loss')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Loss')
ax2.legend()

plt.savefig(f"vcd_{MODEL_NAME}_{LEARNING_RATE}lr_{NUM_EPOCHS}epoch_model.png")

In [None]:
y_test_pred_list = []
with torch.no_grad():
    model.eval()
    for idx, (X_test, y_test_trues) in enumerate(test_dataloader, 0):
        X_test, y_test_trues = X_test.to(device), y_test_trues.to(device)

        y_test_preds = model(X_test)

        _, y_test_preds_tags = torch.max(y_test_preds, dim=1)
        y_test_pred_list.append(y_test_preds_tags.cpu().numpy())

y_test_pred_list = [a.squeeze().tolist() for a in y_test_pred_list]
if BATCH_SIZE != 1:
  y_test_pred_list= [item for sublist in y_test_pred_list for item in sublist]

In [None]:
confusion_matrix_df = pd.DataFrame(confusion_matrix(y_test, y_test_pred_list))
sns.heatmap(confusion_matrix_df, annot=True)

In [None]:
print(classification_report(y_test, y_test_pred_list))