# Getting Started:
## A simple driving model training and evaluation pipeline using the Drive360 dataset and PyTorch.

## Loading data from Drive360 dataset.

The **dataset.py** file contains the 3 classes necessary for creating a Drive360Loader. Using the **config.json** file to specify the location of the csv and data directory, we can generate phase (train, validation, test) specific data loaders that can output samples from each set. Adjust the **dataset.py** to your preferred training framework.

In [1]:
import json
from dataset import Drive360Loader

# load the config.json file that specifies data 
# location parameters and other hyperparameters 
# required.
config = json.load(open('./config.json'))

# create a train, validation and test data loader
train_loader = Drive360Loader(config, 'train')
validation_loader = Drive360Loader(config, 'validation')
test_loader = Drive360Loader(config, 'test')

# print the data (keys) available for use. See full 
# description of each data type in the documents.
print('Loaded train loader with the following data available as a dict.')
print(train_loader.drive360.dataframe.keys())


Phase: train # of data: 76100
Phase: validation # of data: 5007
Phase: test # of data: 13618
Loaded train loader with the following data available as a dict.
Index(['cameraRight', 'cameraFront', 'cameraRear', 'cameraLeft', 'canSteering',
       'canSpeed', 'chapter'],
      dtype='object')


## Training a basic driving model

Create your driving model. This is specific to your learning framework. 

Below we give a very basic dummy model that uses the front facing camera and a resnet34 + LSTM architecture to predict canSteering and canSpeed. 

In [2]:
from torchvision import models
import torch.nn as nn
import torch

class SomeDrivingModel(nn.Module):
    def __init__(self):
        super(SomeDrivingModel, self).__init__()
        final_concat_size = 0
        
        # Main CNN
        cnn = models.resnet34(pretrained=True)
        self.features = nn.Sequential(*list(cnn.children())[:-1])
        self.intermediate = nn.Sequential(nn.Linear(
                          cnn.fc.in_features, 128),
                          nn.ReLU())
        final_concat_size += 128

        # Main LSTM
        self.lstm = nn.LSTM(input_size=128,
                            hidden_size=64,
                            num_layers=3,
                            batch_first=False)
        final_concat_size += 64
        
        # Angle Regressor
        self.control_angle = nn.Sequential(
            nn.Linear(final_concat_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
        # Speed Regressor
        self.control_speed = nn.Sequential(
            nn.Linear(final_concat_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
    
    def forward(self, data):
        module_outputs = []
        lstm_i = []
        # Loop through temporal sequence of
        # front facing camera images and pass 
        # through the cnn.
        for k, v in data['cameraFront'].items():
            x = self.features(v)
            x = x.view(x.size(0), -1)
            x = self.intermediate(x)
            lstm_i.append(x)
            # feed the current front facing camera
            # output directly into the 
            # regression networks.
            if k == 0:
                module_outputs.append(x)

        # Feed temporal outputs of CNN into LSTM
        i_lstm, _ = self.lstm(torch.stack(lstm_i))
        module_outputs.append(i_lstm[-1])
        
        # Concatenate current image CNN output 
        # and LSTM output.
        x_cat = torch.cat(module_outputs, dim=-1)
        
        # Feed concatenated outputs into the 
        # regession networks.
        prediction = {'canSteering': torch.squeeze(self.control_angle(x_cat)),
                      'canSpeed': torch.squeeze(self.control_speed(x_cat))}
        return prediction

# Create your own driving model, this is
#  a very basic one. 
model = SomeDrivingModel().cuda() if config['cuda']['use'] else SomeDrivingModel()
model


SomeDrivingModel(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

A basic training procedure that iterates over the train_loader and feeds each sample into our dummy model, subsequently calculates loss. We kill after 20 batches just

In [None]:
import torch.optim as optim
import torch

num_epochs = 40
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=2, verbose=True)
model.train()
log_every = 100  




def validate(model, criterion, data_loader):
    val_running_loss = 0.0
    for data, target in data_loader:
        data, target = sent_to_device(data, target, config)
        pred = model(data)
        loss = compute_loss(pred, target, criterion)
        val_running_loss += loss.item()
    return val_running_loss / len(validation_loader)


def compute_loss(prediction, target, criterion):
    return criterion(prediction['canSpeed'], target['canSpeed']) + \
            2 * criterion(prediction['canSteering'], target['canSteering'])


def sent_to_device(data, target, config):
    if config['cuda']['use']:
        data = {cam: ({idx: frame.cuda() for idx, frame in frames.items()} 
                if isinstance(frames, dict) else frames.cuda())   
                for cam, frames in data.items()}
        target = {cam: labels.cuda() for cam, labels in target.items()}
    return data, target


for epoch in range(num_epochs):
    
    # Training
    train_running_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = sent_to_device(data, target, config)
        optimizer.zero_grad()
        pred = model(data)
        loss = compute_loss(pred, target, criterion)
        loss.backward()
        optimizer.step()
        
        train_running_loss += loss.item()
        if batch_idx and batch_idx % log_every == 0:  
            train_loss = train_running_loss / log_every
            print('[epoch: %d, batch:  %5d] training loss: %.5f' % (epoch, batch_idx, train_loss))
            train_running_loss = 0.0
            
    del data
    del target
            
    
    # Validation
    val_loss = validate(model, criterion, validation_loader)
    scheduler.step(val_loss)
    print('[epoch: %d] validation loss: %.5f' % (epoch, val_loss))
    
    # Save model
    torch.save(model.state_dict(), f"./model-{epoch}.torch")
            

[epoch: 0, batch:    100] training loss: 1.05533
[epoch: 0, batch:    200] training loss: 0.65524
[epoch: 0, batch:    300] training loss: 0.67674


## Save trained model

In [6]:
from datetime import datetime

label = datetime.now().strftime("%d-%m-%Y-%H:%M:%S")
torch.save(model.state_dict(), f"./model-{label}.torch")

## Local evaluation of the model.

In [None]:
import numpy as np

model.eval()
with torch.no_grad():
    for batch_idx, (data, target) in enumerate(validation_loader):
        prediction = model(data)
        mse = (np.square(prediction['canSpeed'] - target['canSpeed']).mean() + \ 
                2 * (np.square(prediction['canSteering'] - target['canSteering']).mean()
        if batch_idx % 100 == 0:
            print(batch_idx)


## Creating a submission file.

In [5]:
normalize_targets = config['target']['normalize']
target_mean = config['target']['mean']
target_std = config['target']['std']

def add_results(results, output):
    steering = np.squeeze(output['canSteering'].cpu().data.numpy())
    speed = np.squeeze(output['canSpeed'].cpu().data.numpy())
    if normalize_targets:
        steering = (steering*target_std['canSteering'])+target_mean['canSteering']
        speed = (speed*target_std['canSpeed'])+target_mean['canSpeed']
    if np.isscalar(steering):
        steering = [steering]
    if np.isscalar(speed):
        speed = [speed]
    results['canSteering'].extend(steering)
    results['canSpeed'].extend(speed)


We use pandas to create a submission file which is simply a 2-column csv with a canSteering and canSpeed prediction for each row in the **drive360_test.csv** a total of 305437 rows/predictions not including the header. See the **sample_submission.csv** file as an example.

IMPORTANT: for the test phase indices will start 10s (100 samples) into each chapter this is to allow challenge participants to experiment with different temporal settings of data input. If challenge participants have a greater temporal length than 10s for each training sample, then they must write a custom function here. Please check out the **dataset.py** file for additional explanation.

In [6]:
import pandas as pd

label = datetime.now().strftime("%d/%m/%Y-%H:%M:%S")
file = f'./submission-{label}.csv'
results = {'canSteering': [],
           'canSpeed': []}
with torch.no_grad():
    for batch_idx, (data, target) in enumerate(test_loader):
        prediction = model(data)
        add_results(results, prediction)
        
df = pd.DataFrame.from_dict(results)
df.to_csv(file, index=False)