# Environment

In [None]:
import os
import io
import requests
import zipfile
import torch

In [None]:
# clone the git repository and set it as the working directory
! git clone https://github.com/martin-marek/parking-space-occupancy
os.chdir('parking-space-occupancy')

In [None]:
# download the dataset
if not os.path.exists('dataset/data'):
    r = requests.get("https://pub-e8bbdcbe8f6243b2a9933704a9b1d8bc.r2.dev/parking%2Frois_gopro.zip")
    z = zipfile.ZipFile(io.BytesIO(r.content))
    z.extractall('dataset/data')

In [None]:
# set device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
# import dataset, models, and training utils.
from dataset import acpds
from utils.engine import train_model
from models.rcnn import RCNN
from models.faster_rcnn_fpn import FasterRCNN_FPN

In [None]:
# load dataset
train_ds, valid_ds, test_ds = acpds.create_datasets('dataset/data')

In [None]:
# train model
model = RCNN()
out_dir = 'out_dir'
train_model(model, train_ds, valid_ds, test_ds, out_dir, device, verbose=True)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 175MB/s]


epoch   1 -- train acc: 0.8628 -- valid acc.: 0.9301 -- 77 sec
epoch   2 -- train acc: 0.9341 -- valid acc.: 0.9470 -- 57 sec
epoch   3 -- train acc: 0.9422 -- valid acc.: 0.9165 -- 57 sec
epoch   4 -- train acc: 0.9487 -- valid acc.: 0.9575 -- 57 sec
epoch   5 -- train acc: 0.9533 -- valid acc.: 0.9695 -- 57 sec
epoch   6 -- train acc: 0.9686 -- valid acc.: 0.9559 -- 57 sec
epoch   7 -- train acc: 0.9596 -- valid acc.: 0.9653 -- 59 sec
epoch   8 -- train acc: 0.9611 -- valid acc.: 0.9690 -- 58 sec
epoch   9 -- train acc: 0.9746 -- valid acc.: 0.9685 -- 57 sec
epoch  10 -- train acc: 0.9621 -- valid acc.: 0.9601 -- 57 sec
epoch  11 -- train acc: 0.9638 -- valid acc.: 0.9716 -- 57 sec
epoch  12 -- train acc: 0.9781 -- valid acc.: 0.9664 -- 57 sec
epoch  13 -- train acc: 0.9749 -- valid acc.: 0.9748 -- 57 sec
epoch  14 -- train acc: 0.9788 -- valid acc.: 0.9790 -- 57 sec
epoch  15 -- train acc: 0.9786 -- valid acc.: 0.9800 -- 57 sec
epoch  16 -- train acc: 0.9753 -- valid acc.: 0.9653 --

In [None]:
import os
import io
import json
import requests
import zipfile
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from collections import defaultdict
from collections import namedtuple
from glob import glob

In [None]:
# download the training logs
logs_dir = 'training_output'
if not os.path.exists(logs_dir):
    r = requests.get("https://pub-e8bbdcbe8f6243b2a9933704a9b1d8bc.r2.dev/parking%2Fpaper_training_output.zip")
    z = zipfile.ZipFile(io.BytesIO(r.content))
    z.extractall(logs_dir)

In [None]:
model_name = 'FasterRCNN_FPN_800_square'
train_log = pd.read_csv(f'{logs_dir}/{model_name}_0/train_log.csv')
va = train_log.valid_accuracy.tolist()
plt.plot(va, label=model_name)
plt.xlabel('Epoch')
plt.ylabel('Validation acc. (%)')
plt.show()

In [None]:
# create dicts with model validation and test accuracies
va_dict = defaultdict(list)
ta_dict = defaultdict(list)

# iterate through model directories
for model_dir in sorted(glob(f'{logs_dir}/*')):

    # get model id based on model directory
    model_id = model_dir.split('/')[-1]

    # split model_id into model_name and training_iter
    model_name, _ = model_id.rsplit('_', 1)

    # read validation accuracy from training logs
    train_log = pd.read_csv(f'{model_dir}/train_log.csv')
    va = train_log.valid_accuracy.tolist()

    # append logs if they're the first logs of the given model
    # or if they're of the same length as the previous logs
    # (avoid storing logs of a model that hasn't finished trainig yet)
    if len(va_dict[model_name]) == 0 or len(va_dict[model_name][0]) == len(va):
        # read test accuracy from test logs
        with open(f'{model_dir}/test_logs.json') as f:
            ta = json.load(f)['accuracy']

        va_dict[model_name] += [va]
        ta_dict[model_name] += [ta]

# compute accuracy mean and SE for each model
Logs = namedtuple('Logs', ['va_mean', 'va_se', 'ta_mean', 'ta_se'])
logs = {}
for k, v in va_dict.items():
    # print number of training iters for each model
    print(f'{k}: {len(v)}')

    # calculate the mean and standard error of valid. accuracy
    va = np.array(v)
    # va = np.array([ma(x, 10) for x in va])
    va_mean = np.mean(va, 0)
    va_se = np.std(va, 0) / np.sqrt(va.shape[0])

    # calculate the mean and standard error of test accuracy
    ta = np.array(ta_dict[k])
    ta_mean = np.mean(ta)
    ta_se = np.std(ta) / np.sqrt(len(ta))

    # save validation and test logs
    logs[k] = Logs(va_mean, va_se, ta_mean, ta_se)

In [None]:
def ma(x, w=10):
    """Moving average."""
    return np.convolve(x, np.ones(w), 'valid') / w

In [None]:
fig, ax = plt.subplots(figsize=[12, 8])
for k, v in logs.items():
    epochs = np.arange(len(v.va_mean))
    plt.plot(epochs, v.va_mean, label=k, linewidth=2)
    plt.fill_between(epochs, v.va_mean-v.va_se, v.va_mean+v.va_se, alpha=0.5)
ax.legend()
ax.set_ylim([0.925, 0.99])
plt.show()

In [None]:
# create a dataframe with model accuracies
df = pd.DataFrame(columns=['Architecture', 'Pooling', 'Resolution', 'Valid. accuracy [\%]', 'Test accuracy [\%]'])
for i, (k, v) in enumerate(logs.items()):
    model_name, res, pooling = k.rsplit('_', 2)
    model_name = {'RCNN': 'R-CNN', 'FasterRCNN_FPN': 'Faster R-CNN FPN'}[model_name]
    pooling = {'qdrl': 'quadrilateral', 'square': 'square'}[pooling]
    va_str = f'{100*v.va_mean[-1]:.2f} $\pm$ {100*v.va_se[-1]:.2f}' # valid. accuracy
    ta_str = f'{100*v.ta_mean:.2f} $\pm$ {100*v.ta_se:.2f}' # test accuracy
    df.loc[i] = [model_name, pooling, res, va_str, ta_str]

In [None]:
# sort the dataframe
df['Resolution'] = df['Resolution'].astype(int)
df = df.sort_values(['Architecture', 'Pooling', 'Resolution'], ascending=[True, False, False])
df