In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm.auto import tqdm

import torch
import torch.nn as nn
from torch.utils.data import ConcatDataset
import torchvision
from torchvision import transforms

from dataloading.nvidia import NvidiaCropWide, Normalize, NvidiaDataset
from network import PilotNet
from trainer import Trainer

import wandb

%load_ext autoreload
%autoreload 2

## Datasets

In [2]:
root_path = Path("/home/romet/data/datasets/ut/nvidia-data")

train_paths = [
    root_path / "2021-05-20-12-36-10_e2e_sulaoja_20_30",
    root_path / "2021-05-20-12-43-17_e2e_sulaoja_20_30", 
    root_path / "2021-05-20-12-51-29_e2e_sulaoja_20_30",
    root_path / "2021-05-20-13-44-06_e2e_sulaoja_10_10",
    root_path / "2021-05-20-13-51-21_e2e_sulaoja_10_10",
    root_path / "2021-05-20-13-59-00_e2e_sulaoja_10_10",
    root_path / "2021-05-28-15-07-56_e2e_sulaoja_20_30",
    root_path / "2021-05-28-15-17-19_e2e_sulaoja_20_30",               
    root_path / "2021-06-07-14-06-31_e2e_rec_ss6",
    root_path / "2021-06-07-14-09-18_e2e_rec_ss6",
    root_path / "2021-06-07-14-36-16_e2e_rec_ss6",
    root_path / "2021-06-09-13-14-51_e2e_rec_ss2",
    root_path / "2021-06-09-13-55-03_e2e_rec_ss2_backwards",
    root_path / "2021-06-09-14-58-11_e2e_rec_ss3",
    root_path / "2021-06-09-15-42-05_e2e_rec_ss3_backwards",
    root_path / "2021-06-09-16-24-59_e2e_rec_ss13",
    root_path / "2021-06-09-16-50-22_e2e_rec_ss13_backwards",
    root_path / "2021-06-10-12-59-59_e2e_ss4",
    root_path / "2021-06-10-13-19-22_e2e_ss4_backwards",
    root_path / "2021-06-10-13-51-34_e2e_ss12",
    root_path / "2021-06-10-14-02-24_e2e_ss12_backwards",
    root_path / "2021-06-10-14-44-24_e2e_ss3_backwards",
    root_path / "2021-06-10-15-03-16_e2e_ss3_backwards",
    root_path / "2021-06-14-11-08-19_e2e_rec_ss14",
    root_path / "2021-06-14-11-43-48_e2e_rec_ss14_backwards"
]

#train_paths_sidecameras = [root_path / "2021-05-28-15-07-56_e2e_sulaoja_20_30",
#                      root_path / "2021-05-28-15-17-19_e2e_sulaoja_20_30"]

tr = transforms.Compose([NvidiaCropWide(), Normalize()])
trainsetFront = NvidiaDataset(train_paths, camera="front_wide", transform=tr)

#tr_left = transforms.Compose([NvidiaCropWide(), Normalize()])
#trainsetLeft = NvidiaDataset(train_paths_sidecameras, transform=tr_left, camera="left", steering_correction=-0.167)

#tr_right = transforms.Compose([NvidiaCropWide(), Normalize()])
#trainsetRight = NvidiaDataset(train_paths_sidecameras, transform=tr_right, camera="right", steering_correction=0.004)

#trainset = ConcatDataset([trainsetFront, trainsetLeft, trainsetRight])

trainloader = torch.utils.data.DataLoader(trainsetFront, batch_size=64, shuffle=True,
                                         num_workers=6, pin_memory=True, persistent_workers=True)

In [3]:
valid_paths = [
                root_path / "2021-05-28-15-19-48_e2e_sulaoja_20_30",
                root_path / "2021-06-07-14-20-07_e2e_rec_ss6",
                root_path / "2021-06-14-11-22-05_e2e_rec_ss14"
              ]
validset = NvidiaDataset(valid_paths, camera="front_wide", transform=tr)
validloader = torch.utils.data.DataLoader(validset, batch_size=64, shuffle=False,
                                         num_workers=3, pin_memory=True, persistent_workers=True)

## Train model

In [4]:
n_epochs = 100
learning_rate = 1e-3
weight_decay = 0.01
model_name = f"models/1-pilotnet-base/wide-v2"

model = PilotNet()
criterion = nn.L1Loss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), 
                              eps=1e-08, weight_decay=weight_decay, amsgrad=False)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
criterion = criterion.to(device)

trainer = Trainer(model_name, wandb_logging=True)
trainer.train(model, trainloader, validloader, optimizer, criterion, n_epochs)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mrometaidla[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.10.32 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


  0%|          | 0/7937 [00:00<?, ?it/s]

  0%|          | 0/973 [00:00<?, ?it/s]

Saving best model.


  0%|          | 0/7937 [00:00<?, ?it/s]

  0%|          | 0/973 [00:00<?, ?it/s]

Saving best model.


  0%|          | 0/7937 [00:00<?, ?it/s]

  0%|          | 0/973 [00:00<?, ?it/s]

  0%|          | 0/7937 [00:00<?, ?it/s]

  0%|          | 0/973 [00:00<?, ?it/s]

  0%|          | 0/7937 [00:00<?, ?it/s]

  0%|          | 0/973 [00:00<?, ?it/s]

Saving best model.


  0%|          | 0/7937 [00:00<?, ?it/s]

  0%|          | 0/973 [00:00<?, ?it/s]

  0%|          | 0/7937 [00:00<?, ?it/s]

  0%|          | 0/973 [00:00<?, ?it/s]

  0%|          | 0/7937 [00:00<?, ?it/s]

  0%|          | 0/973 [00:00<?, ?it/s]

Saving best model.


  0%|          | 0/7937 [00:00<?, ?it/s]

[34m[1mwandb[0m: Network error resolved after 0:02:52.822523, resuming normal operation.
[34m[1mwandb[0m: 500 encountered ({"error":"Error 1040: Too many connections"}), retrying request
[34m[1mwandb[0m: 500 encountered ({"error":"Error 1040: Too many connections"}), retrying request
[34m[1mwandb[0m: Network error resolved after 0:00:35.698622, resuming normal operation.
[34m[1mwandb[0m: 500 encountered ({"error":"Error 1040: Too many connections"}), retrying request
[34m[1mwandb[0m: Network error resolved after 0:01:14.730210, resuming normal operation.


  0%|          | 0/973 [00:00<?, ?it/s]

  0%|          | 0/7937 [00:00<?, ?it/s]

  0%|          | 0/973 [00:00<?, ?it/s]

  0%|          | 0/7937 [00:00<?, ?it/s]

  0%|          | 0/973 [00:00<?, ?it/s]

  0%|          | 0/7937 [00:00<?, ?it/s]

  0%|          | 0/973 [00:00<?, ?it/s]

  0%|          | 0/7937 [00:00<?, ?it/s]

  0%|          | 0/973 [00:00<?, ?it/s]

  0%|          | 0/7937 [00:00<?, ?it/s]

  0%|          | 0/973 [00:00<?, ?it/s]

  0%|          | 0/7937 [00:00<?, ?it/s]

  0%|          | 0/973 [00:00<?, ?it/s]

  0%|          | 0/7937 [00:00<?, ?it/s]

  0%|          | 0/973 [00:00<?, ?it/s]

  0%|          | 0/7937 [00:00<?, ?it/s]

  0%|          | 0/973 [00:00<?, ?it/s]

  0%|          | 0/7937 [00:00<?, ?it/s]

  0%|          | 0/973 [00:00<?, ?it/s]

Early stopping, on epoch: 18.


0.1429302476907392

## Save models

In [5]:
torch.save(model.state_dict(), f"{model_name}/last.pt")
# wandb.save(f"{model_name}/last.pt")
# wandb.save(f"{model_name}/best.pt")

In [6]:
# model_name = f"models/1-pilotnet-base/1cam-batch-v1"
# trainer = Trainer(model_name, wandb_logging=False)

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = iter(validloader).next()
inputs = data['image'].to(device)
best_model = trainer.load_model(f"{model_name}/best.pt")
best_model.eval()
ONNX_FILE_PATH = f"{model_name}/best.onnx"
torch.onnx.export(best_model, inputs, ONNX_FILE_PATH)
#wandb.save(f"{model_name}/best.onnx")

In [8]:
import onnx
onnx.checker.check_model(f"{model_name}/best.onnx")