In [1]:
#imports
import torch
from torch import nn
import pandas as pd
import numpy as np
from torch.optim.lr_scheduler import ExponentialLR
from efficientnet_pytorch import EfficientNet
from tqdm import tqdm
from torch.utils.data import DataLoader
from IPython.utils import io
import warnings
import statistics

In [2]:
#read in optical flow rgb images file
images = pd.read_pickle('../data/images.pkl')
images = images.reset_index()
#read labels
labels = None
with open("../speedchallenge/data/train.txt") as f:
    #we drop the first label cause optical flow is two images
    labels = (list(map(float, f.read().splitlines()))[1:])
labels = np.array(labels)

In [3]:
#model class
class Model(nn.Module):
    
    def __init__(self, rnn_features, rnn_hidden, batch_size, seq_length):
        
        super(Model, self).__init__()
        #initialize parameters
        self.rnn_features = rnn_features
        self.rnn_hidden = rnn_hidden
        self.batch_size = batch_size
        self.seq_length = seq_length
        #representation layer
        self.conv1 = nn.Conv2d(1280, 2048, 5, stride=2)
        self.conv2 = nn.Conv2d(2048, 4096, 5, stride=2)
        #temporal layer
        self.rnn = nn.RNN(rnn_features, rnn_hidden, 1, batch_first=True, bidirectional=False)   
        #decision layer
        self.fc1 = nn.Linear(rnn_hidden, 2**9)
        self.fc2 = nn.Linear(2**9, 2**7)
        self.fc3 = nn.Linear(2**7, 1)
    
    def forward(self, x):
        
        #unravel windows for cnn layer
        x = x.reshape(self.batch_size*self.seq_length, x.shape[2], x.shape[3], x.shape[4])
        #apply convolutions
        x = self.conv1(x)
        x = self.conv2(x)
        #recreate windows with flattened input features for rnn layer
        x = x.reshape(self.batch_size, self.seq_length, -1)
        #initialize hidden layer (why the fuck would anyone do this)
        hidden = self.init_hidden()
        #apply reccurent layer
        out, hidden = self.rnn(x, hidden)
        #only take last hidden state
        #https://i.stack.imgur.com/SjnTl.png
        out = out[:,self.seq_length-1::self.seq_length-1].reshape(-1, rnn_hidden)
        #apply decision layers
        out = self.fc1(out)
        out = self.fc2(out)
        out = self.fc3(out)
        return out, hidden
    
    def init_hidden(self):
        
        #initialize hidden rnn layer with zeros
        hidden = torch.zeros(1, self.batch_size, self.rnn_hidden).cuda()
        return hidden

In [4]:
%%capture
#hyper paramters
batch_size = 16
seq_length = 5
rnn_features = 4096*2
n_epochs = 100
n_train_examples = 200
n_test_examples = 20
rnn_hidden = 2**10
lr=1e-2

#initialize model
model = Model(rnn_features=rnn_features, rnn_hidden=rnn_hidden, batch_size=batch_size, seq_length=seq_length)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
scheduler = ExponentialLR(optimizer, gamma=0.99)
criterion = nn.MSELoss()

#use gpu if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

#create prediction model
model.to(device)

#download feature extractor
cnn = EfficientNet.from_pretrained('efficientnet-b0')
cnn.to(device)

In [5]:
#function to create data loader
def get_loader(n, batch_size):
    #sample n images above sequence length index
    sample_images = images[seq_length:].sample(n).index
    #create windows using efficientbet-b0
    sequences, targets = [], []
    for sample in tqdm(sample_images):
        window = images[sample-seq_length+1:sample+1].Image
        temp = []
        for of in window:
            im = torch.tensor(of.reshape(1,3,480,640).numpy()).float().cuda()
            features = cnn.extract_features(im).detach().cpu().numpy()
            features = features.reshape(features.shape[1:])
            temp.append(features)
        label = labels[sample-seq_length+1:sample+1]
        sequences.append(np.array(temp))
        targets.append(label)
    return sequences, targets

In [6]:
#data loader classes
class MyTrainDataloader(torch.utils.data.Dataset):
    def __init__(self):
        self.images = torch.Tensor(x_train)

    def __len__(self):
        return len(x_train)

    def __getitem__(self, idx):
        return self.images[idx, :, :], y_train[idx]
    
class MyTestDataLoader(torch.utils.data.Dataset):
    def __init__(self):
        self.images = torch.Tensor(x_test)

    def __len__(self):
        return len(x_test)

    def __getitem__(self, idx):
        return self.images[idx, :, :], y_test[idx]

In [7]:
#create train and test data loaders
x_train, y_train = get_loader(n_train_examples, batch_size)
x_test, y_test = get_loader(n_test_examples, batch_size)

train_data = MyTrainDataloader()
train_loader = torch.utils.data.DataLoader(train_data,
                                           shuffle=True,
                                           num_workers=2,
                                           batch_size=batch_size,
                                           drop_last=True)

test_data = MyTestDataLoader()
test_loader = torch.utils.data.DataLoader(test_data,
                                           shuffle=True,
                                           num_workers=2,
                                           batch_size=batch_size,
                                           drop_last=True)

100%|██████████| 200/200 [00:36<00:00,  5.47it/s]
100%|██████████| 20/20 [00:04<00:00,  4.94it/s]


In [8]:
%%capture
#for switching model between train and eval without cell output
def train():
    with io.capture_output() as captured:
        model.train()

def test():
    with io.capture_output() as captured:
        model.eval()

In [9]:
warnings.filterwarnings("ignore")

#track loss values
batch_train_loss = []
train_loss = []
val_loss = []

#begin training
for epoch in range(1, n_epochs + 1):
    
    #train
    train()
    train_loss = []
    for batch in train_loader:
        x, y = batch[0], batch[1]
        optimizer.zero_grad()
        output, hidden = model(torch.tensor(x).cuda())
        loss = criterion(output.flatten(), torch.tensor(y)[:,-1].flatten().float().cuda())
        loss.backward()
        optimizer.step()
        train_loss.append(loss.item())
        batch_train_loss.append(loss.item())
    scheduler.step()
    
    #validate
    if epoch%1 == 0:
        test()
        val_loss = []
        for batch in test_loader:
            x, y = batch[0], batch[1]
            output, hidden = model(torch.tensor(x).cuda())
            loss = criterion(output.flatten(), torch.tensor(y)[:,-1].flatten().float().cuda())
            val_loss.append(loss.item())
        
        #print progress
        print('Epoch: {}/{}.............'.format(epoch, n_epochs))
        print("Train MSE: {:.4f}".format(statistics.mean(train_loss)))
        print("Val MSE: {:.4f}".format(statistics.mean(val_loss)), "\n")
        
        #track loss
        train_loss.append(statistics.mean(train_loss))
        val_loss.append(statistics.mean(val_loss))

Epoch: 1/100.............
Train MSE: 2459.2386
Val MSE: 762.3265 

Epoch: 2/100.............
Train MSE: 636.4022
Val MSE: 280.0166 

Epoch: 3/100.............
Train MSE: 201.1439
Val MSE: 175.3254 

Epoch: 4/100.............
Train MSE: 102.3543
Val MSE: 140.3883 

Epoch: 5/100.............
Train MSE: 87.5857
Val MSE: 165.5199 

Epoch: 6/100.............
Train MSE: 84.9453
Val MSE: 133.5028 



KeyboardInterrupt: 

In [94]:
x = images.iloc[0].Image.reshape(1,3,480,640).float()
conv1 = nn.Conv2d(3,16,11,stride=7)
x = conv1(x)
conv2 = nn.Conv2d(16,32,7,stride=5)
x = conv2(x)

In [95]:
x.flatten().shape

torch.Size([7072])