In [0]:
#@title Training parameters
model_name = "JNN19" #@param {type:"string"}

# Colab setup

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


## Environment setup

In [3]:
import os
if not os.path.exists('/content/drive/My Drive/rUNSWift'):
  raise FileNotFoundError('Please add https://drive.google.com/drive/folders/1OU4Bg0qz_DnF7DxjOFTlFc9r5hHGUrFD?usp=sharing to "My Drive"')
os.chdir('/content/drive/My Drive/rUNSWift/Vision/DNN 2020/marvin training model')

!pip install thop
!sed -i 's/#  print/print/;/print(prefix/s/.item()//g;' /usr/local/lib/python3.6/dist-packages/thop/profile.py

Collecting thop
  Downloading https://files.pythonhosted.org/packages/6c/8b/22ce44e1c71558161a8bd54471123cc796589c7ebbfc15a7e8932e522f83/thop-0.0.31.post2005241907-py3-none-any.whl
Installing collected packages: thop
Successfully installed thop-0.0.31.post2005241907


# Training model
___
The neural network outputs an image with the same size as the input image. Each pixel in the output is either a 0 or a 1 where a 1 indicates that the pixel belongs to a robot.  The bounding boxes are then drawn in postprocessing.

We tested multiple models with different architectures (skip connections, depthwise-separable convolutions, inverted residual bottlenecks, etc.) but many of then could not be converted to `tiny-dnn` because of the limited types of layers available in `tiny-dnn`. Also, when we tested models on the robot they proved to be far too slow so we had to cut down the number of layers and adjust the network parameters to improve inference time.

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchsummary import summary
from thop import profile

import basic_models
from dataloaderV2 import RobotAEDataset
from utils import *

In [5]:
if 'COLAB_TPU_ADDR' in os.environ:
  VERSION = "1.5" # from https://colab.research.google.com/github/pytorch/xla/blob/master/contrib/colab/getting-started.ipynb#scrollTo=sPJVqAKyml5W&line=1&uniqifier=1
  !curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
  !python pytorch-xla-env-setup.py --version $VERSION
  import torch_xla
  import torch_xla.core.xla_model as xm
  device = xm.xla_device()
elif torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")
device

device(type='cpu')

In [6]:
# Load dataset (assumes the labelled data already exists)
val_path = '../data/validation'
train_path = '../data/training'
val_dataset = RobotAEDataset(val_path, downsample_factor=0.25, device=device)
print('val_dataset   loaded')
train_dataset = RobotAEDataset(train_path, downsample_factor=0.25, device=device)
print('train_dataset loaded')

val_dataset   loaded
train_dataset loaded


In [0]:
# Create dataset loaders
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=0)
valloader = torch.utils.data.DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=0)

In [0]:
# Loss function & optimizer
pos_weight = torch.Tensor([1], device=device) # hyperparameter (> 1 means more weight to false negatives, < 1 means more weight to false positives)
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

In [106]:
# [Re]load model
from importlib import reload  # Py3 only; unneeded in py2.
basic_models = reload(basic_models)
model = getattr(basic_models, model_name)()

# Print model info
flops, params = count_params(model, torch.randn(1, 1, 240, 320))

# Model
model = model.to(device)
summary(model, (1, 240, 320))

# Save path for model
model_path = f'./models/{model_name}.pth'

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-3)

				 Sequential (1843200.0, 80.0)
			 conv_bn_relu (1843200.0, 80.0)
				 Sequential (7488000.0, 3088.0)
			 conv_bn_relu (7488000.0, 3088.0)
				 Sequential (7488000.0, 3088.0)
			 conv_bn_relu (7488000.0, 3088.0)
				 Sequential (5616000.0, 4632.0)
			 conv_bn_relu (5616000.0, 4632.0)
				 Sequential (8380800.0, 6936.0)
			 conv_bn_relu (8380800.0, 6936.0)
				 Sequential (2793600.0, 9248.0)
			 conv_bn_relu (2793600.0, 9248.0)
				 Sequential (3715200.0, 12320.0)
			 conv_bn_relu (3715200.0, 12320.0)
				 Sequential (3715200.0, 12320.0)
			 conv_bn_relu (3715200.0, 12320.0)
		 Sequential (42000300.0, 52097.0)
	 JNN19 (42000300.0, 52097.0)
params:	52,097
flops:	42.000M
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 8, 120, 160]              80
       BatchNorm2d-2          [-1, 8, 120, 160]               0
              ReLU-3          [-1, 8, 120, 160]               0

In [107]:
# Train model
num_epochs = 200
best_loss = 9999999999

for epoch in range(num_epochs):
    train_loss = train(model, device, trainloader, criterion, optimizer, epoch)
    val_loss, _ = evaluate(model, device, valloader, criterion)
    if val_loss < best_loss:
            best_loss = val_loss
            torch.save(model.state_dict(), model_path)

Epoch 1
Training loss: 0.00846
Validation loss: 0.01048
Average inference time: 4.777 ms
Epoch 2
Training loss: 0.00313
Validation loss: 0.01141
Average inference time: 4.775 ms
Epoch 3
Training loss: 0.00218
Validation loss: 0.00334
Average inference time: 4.596 ms
Epoch 4
Training loss: 0.00174
Validation loss: 0.00749
Average inference time: 5.020 ms
Epoch 5
Training loss: 0.00165
Validation loss: 0.00542
Average inference time: 4.843 ms
Epoch 6
Training loss: 0.00148
Validation loss: 0.00224
Average inference time: 4.913 ms
Epoch 7
Training loss: 0.00113
Validation loss: 0.00296
Average inference time: 4.870 ms
Epoch 8
Training loss: 0.00103
Validation loss: 0.00270
Average inference time: 4.575 ms
Epoch 9
Training loss: 0.00090
Validation loss: 0.00163
Average inference time: 4.808 ms
Epoch 10
Training loss: 0.00089
Validation loss: 0.00394
Average inference time: 4.602 ms
Epoch 11
Training loss: 0.00076
Validation loss: 0.00152
Average inference time: 4.842 ms
Epoch 12
Training l

In [108]:
# Load saved weights
model.load_state_dict(torch.load(model_path))
avg_loss, avg_time = evaluate(model, device, valloader, criterion)
avg_loss, avg_time

Validation loss: 0.00091
Average inference time: 4.916 ms


(0.0009105204666654268, 4.91641362508138)

In [109]:
# So we have a history of results
import pandas as pd
import datetime
try:
  df = pd.read_csv('summary.csv', index_col=0)
except FileNotFoundError:
  df = pd.DataFrame(columns=['model name', 'date', 'num flops', 'num params', 'best val train loss', 'best val loss', 'colab inference time (ms)'])
row = {
    'model name': model_name,
    'date': datetime.datetime.now(),
    'num flops': flops,
    'num params': params,
    'best val loss': best_loss,
    'colab inference time (ms)': avg_time,
}
df = df.append(row, ignore_index=True)
df.to_csv('summary.csv')
df

Unnamed: 0,model name,date,num flops,num params,best val train loss,best val loss,colab inference time (ms),v6 inference time (s),+gcc 7.5,+vectorization3,+64-bit+vectorization4,+fewer horizontal adds,+3x3 asm,+unroll asm
0,NeuralNetwork,2020-05-30 16:50:11.531938,38054700.0,37985.0,,0.001182,3.596121,0.098185,0.096944,0.085482,0.079342,0.052859,0.045973,0.039756
1,JNN1,2020-05-31 07:42:00.238484,37363500.0,38009.0,,0.001079,3.095989,0.093413,,0.082305,0.076387,0.051117,0.043682,0.037389
2,JNN2,2020-05-31 09:07:49.131277,38285100.0,38105.0,,0.000785,3.765872,0.086521,,0.076644,0.072058,0.051099,0.044441,0.038354
3,JNN3,2020-05-31 12:00:21.437022,29875500.0,36369.0,,0.000821,3.411449,0.074717,0.074788,0.066302,0.060352,0.039751,0.035248,0.029669
4,JNN4,2020-05-31 13:04:46.042455,30451500.0,38369.0,,0.000896,2.985795,0.079523,,0.072098,0.064883,0.047197,0.041215,0.037312
5,JNN5,2020-05-31 14:14:21.150786,34137900.0,39905.0,,0.000775,3.941987,0.086943,0.088031,0.080159,0.064988,0.042801,0.037959,0.03476
6,JNN6,2020-05-31 15:22:09.758481,27629100.0,37209.0,,0.000865,3.536892,0.071068,,0.064473,0.054799,0.037234,0.032465,0.028794
7,JNN7,2020-05-31 17:20:45.904784,22329900.0,29617.0,,0.000884,3.496485,0.055485,,0.051564,0.04241,0.028704,0.025932,0.022551
8,JNN8,2020-05-31 18:23:37.926662,19565100.0,27985.0,,0.001021,2.392858,0.050424,,0.045674,0.042075,0.031352,0.028161,0.024581
9,JNN9,2020-06-01 03:55:24.764050,30797100.0,36465.0,,0.000838,3.541705,0.076478,,0.069125,0.064433,0.04465,0.039478,0.034086


In [110]:
# Display and save results
display_results(model, device, valloader, thresh=0.5, save_dst=None)

Output hidden; open in https://colab.research.google.com to view.