In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, WeightedRandomSampler

import torchaudio
from torchaudio.transforms import Resample, MelSpectrogram
import torchvision
import torchvision.transforms as T
from torchvision.transforms import Resize
from torchvision.models import resnet34

import h5py
import numpy as np

import os
import glob

import matplotlib.pyplot as plt
%matplotlib  inline

In [None]:
BATCH_SIZE = 10
NUM_EPOCHS = 50
LEARNING_RATE = 1e-4

In [None]:
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

In [None]:
model = resnet34(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 15)

In [None]:
train_dataset = "../preprocess/datasets/train_dataset.h5"
hf = h5py.File(train_dataset, 'r')
x_train = np.array(hf['features'], dtype=np.float64)
y_train = np.array(hf['vehicle_counts']).astype('int')
hf.close()

test_dataset = "../preprocess/datasets/test_dataset.h5"
hf = h5py.File(test_dataset, 'r')
x_test = np.array(hf['features'], dtype=np.float64)
y_test = np.array(hf['vehicle_counts']).astype('int')
hf.close()

print(f"X train: {x_train.shape}\tY train: {y_train.shape}")
print(f"X test: {x_test.shape}\tY test: {y_test.shape}")

In [None]:
TRAIN_AUDIO_FOLDER = "../VC-PRG-1_5/"

labels = []

train_audio_files = sorted(glob.glob(TRAIN_AUDIO_FOLDER + "*.txt"))

for item in range(len(train_audio_files)):
    with open(train_audio_files[item], 'r') as f:
        sentence = f.readlines()
        if -1 in sentence:
            labels.append(0)
        else:
            labels.append(len(sentence))

labels_unique, counts = np.unique(labels, return_counts=True)
class_weights = [sum(counts) / c for c in counts]
example_weights = [class_weights[e-1] for e in labels]
sampler = WeightedRandomSampler(example_weights, len(labels))

In [None]:
# Train Data
tensor_x_train = torch.Tensor(x_train)
tensor_y_train = torch.Tensor(y_train).to(torch.int64)
train_dataset = TensorDataset(tensor_x_train, tensor_y_train)
# train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler)

# Test Data
tensor_x_test = torch.Tensor(x_test)
tensor_y_test = torch.Tensor(y_test).to(torch.int64)
test_dataset = TensorDataset(tensor_x_test, tensor_y_test)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
model = model.to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

train_loss = list()
train_acc = list()

for epoch in range(NUM_EPOCHS):
    running_loss = 0.0
    correct = 0.0
    total = 0.0
    for i, data in enumerate(train_dataloader, 0):
        features, labels = data
        features, labels = features.to(device), labels.to(device)
        features = Resize((224, 224), interpolation=T.InterpolationMode.BILINEAR)(features)
        features = features.unsqueeze(1).repeat(1, 3, 1, 1)

        labels = nn.functional.one_hot(labels, num_classes=15).float()

        optimizer.zero_grad()
        
        predictions = model(features)
        loss = loss_fn(predictions, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        total += labels.size(0)
        correct += (torch.argmax(predictions, 1) == torch.argmax(labels, 1)).sum().item()
        if i % 10 == 0:
            print(f"Epoch [{epoch + 1} / {NUM_EPOCHS}] loss: {running_loss / 10:.4f}")
    
    train_loss.append(running_loss / len(train_dataloader))
    train_acc.append(correct / total * 100.0)

torch.save(model.state_dict(), "vcd_resnet18_model.pth")
print("Finishshed Training")

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
ax1.plot(train_acc, '-o')
ax1.set_title = 'Train Accuracy'
ax1.set_xlabel = 'Epoch'
ax1.set_ylabel = 'Accuracy'

ax2.plot(train_loss, '-o')
ax2.set_title = 'Train Loss'
ax2.set_xlabel = 'Epoch'
ax2.set_ylabel = 'Loss'
plt.show()

In [None]:
correct = 0
total = 0

model.eval()
with torch.no_grad():
    for data in test_dataloader:
        features, labels = data
        features, labels = features.to(device), labels.to(device)
        features = Resize((224, 224), interpolation=T.InterpolationMode.BILINEAR)(features)
        features = features.unsqueeze(1).repeat(1, 3, 1, 1)
        
        predictions = model(features)
        _, predictions = torch.max(predictions, 1)
        
        total += labels.size(0)
        correct += (predictions == labels).sum().item()
print(f'Accuracy: {100 * correct // total} %')