In [1]:
import cv2
import torch
import pandas as pd
import numpy as np
import time
from sklearn.utils import shuffle
import torch.nn as nn
import torch.nn.functional as F
torch.set_default_tensor_type('torch.FloatTensor')
data_path = "./data/"
train_file = "{}train/train.mp4".format(data_path)
test_file = "{}test/test.mp4".format(data_path)
labels = [float(speed) for speed in open("{}train/train.txt".format(data_path))]
len(labels)

20400

In [2]:
def ChangeBrightness(image):
    bright_factor = 0.2 + np.random.uniform()
    
    hsv_image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    # perform brightness augmentation only on the second channel
    hsv_image[:,:,2] = hsv_image[:,:,2] * bright_factor
    
    # change back to RGB
    image_rgb = cv2.cvtColor(hsv_image, cv2.COLOR_HSV2RGB)
    return image_rgb

In [3]:
# creates a bunch of frames out of a video
def CreateFrames(fname, outdir):
    cap = cv2.VideoCapture(fname)
    success, img = cap.read()
    currentFrame = 0
    while success:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # crop image to remove dash and make 256 x 256
        img = img[114:370, 192:-192]
        cv2.imwrite("%s%d.jpg" % (outdir, currentFrame), img)
        success, img = cap.read()
        currentFrame += 1
    cap.release()
    
#CreateFrames(train_file, "data/train/")
#CreateFrames(test_file, "data/test/")

In [None]:
# playing around with images
# image shape (140, 400, 3)
#img = cv2.imread("./data/train/1223.jpg")
#cv2.imshow('img', img)
#cv2.waitKey(0)
#cv2.destroyAllWindows()
#img.shape

In [4]:
def CreateTrainData(fnames, speeds):
    d = {"image_path":fnames, "speed":speeds}
    df = pd.DataFrame(data=d)
    return df
    
train_fnames = ["%strain/%d.jpg" % (data_path, i) for i in range(len(labels))]
train_df = CreateTrainData(train_fnames, labels)
#train_df

In [5]:
def batch_shuffle(df):
    train_data = pd.DataFrame()
    valid_data = pd.DataFrame()
    for i in range(len(df) - 1):
        idx1 = np.random.randint(len(df) - 1)
        idx2 = idx1 + 1
        
        row1 = df.iloc[[idx1]].reset_index()
        row2 = df.iloc[[idx2]].reset_index()
        
        randInt = np.random.randint(9)
        if randInt < 2:
            valid_frames = [valid_data, row1, row2]
            valid_data = pd.concat(valid_frames, axis = 0, join = 'outer', ignore_index=False)
        else:
            train_frames = [train_data, row1, row2]
            train_data = pd.concat(train_frames, axis = 0, join = 'outer', ignore_index=False)
    return train_data, valid_data
train_data, val_data = batch_shuffle(train_df)

In [6]:
def GetOpticalFlow(img1, img2):
    gray1, gray2 = cv2.cvtColor(img1, cv2.COLOR_RGB2GRAY), cv2.cvtColor(img2, cv2.COLOR_RGB2GRAY)
    hsv = np.zeros_like(img1)
    hsv[...,1] = 255
    flow = cv2.calcOpticalFlowFarneback(gray1, gray2, None, 0.5, 3, 15, 3, 5, 1.2, 0)
    mag, ang = cv2.cartToPolar(flow[...,0], flow[...,1])
    hsv[...,0] = ang*180/np.pi/2
    hsv[...,2] = cv2.normalize(mag,None,0,255,cv2.NORM_MINMAX)
    bgr = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR)
    return bgr

In [7]:
def preprocess_image(image, train=False):
    image = cv2.resize(image, (220, 66), interpolation = cv2.INTER_AREA)
    if train:
        image = ChangeBrightness(image)
    return image


In [35]:
# assumes training data is shuffled
def generate_data(data, batch_size = 32, train=True):
    image_batch = np.zeros((batch_size, 3, 66, 220))
    label_batch = np.zeros((batch_size))
    data_len = len(data.index)
    cur = 0
    while cur * batch_size < data_len:
        start_idx = cur * batch_size
        
        # validation data isn't shuffled and we can iterate frame by frame
        step = 2 if train else 1
        for i, j in enumerate(range(start_idx, min(start_idx + batch_size, data_len-2), step)):
            row1 = data.iloc[j]
            row2 = data.iloc[j + 1]
            
            # get flow data
            img1 = preprocess_image(cv2.imread(row1['image_path']), train)
            img2 = preprocess_image(cv2.imread(row2['image_path']), train)
            flow = GetOpticalFlow(img1, img2)
            f = flow
            flow = np.transpose(flow, (2, 0, 1))
            
            # add gaussian noise
            speed = np.mean([row1['speed'], row2['speed']])
            
            # add noise when the car is moving
            if train and not speed:
                speed += np.random.normal()
            
            image_batch[i] = flow
            label_batch[i] = speed
            
        cur += 1
        if train: image_batch, label_batch = shuffle(image_batch, label_batch)
        x, y = torch.from_numpy(image_batch).type(torch.cuda.FloatTensor), torch.from_numpy(label_batch).type(torch.cuda.FloatTensor)
        yield x, f, y
        

In [21]:
class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.size(0), -1)

class FlowModel(nn.Module):
    
    def __init__(self):
        super(FlowModel, self).__init__()
        
        self.model = nn.Sequential(
        nn.Conv2d(3, 24, 5, 2),
        nn.ELU(),
        nn.Conv2d(24, 36, 5, 2),
        nn.ELU(),
        nn.Conv2d(36, 48, 5, 2),
        nn.ELU(),
        nn.Dropout(0.5),
        nn.Conv2d(48, 64, 3),
        nn.ELU(),
        nn.Conv2d(64, 64, 3),
        nn.ELU(),
        Flatten(),
        nn.Linear(1280, 100),
        nn.ELU(),
        nn.Linear(100, 50),
        nn.ELU(),
        nn.Linear(50, 10),
        nn.ELU(),
        nn.Linear(10, 1))
        
        # initialize weights
        self.model.apply(self.init_weights)
    
    def init_weights(self, m):
        if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
            nn.init.kaiming_normal_(m.weight)
            
    def forward(self, x):
        # normalize values
        x = x / 127.5 - 1
        x = self.model(x)
        return x

In [25]:
def train_epoch(model, dl, opt):
    criterion = nn.MSELoss()
    total_loss = 0
    n_batches = 0
    for batch_img, _, batch_label in dl:
        batch_img.cuda()
        model.zero_grad()
        batch_preds = model(batch_img)
        loss = criterion(batch_preds.squeeze(), batch_label)
        loss.backward()
        opt.step()
        total_loss += loss.item()
        n_batches += 1
        
    return total_loss / n_batches

In [27]:
def calculate_validation_loss(model, val_dl):
    n_batches = 0
    total_loss = 0
    criterion = nn.MSELoss()
    for batch_img, _, batch_label in val_dl:
        batch_img.cuda()
        batch_preds = model(batch_img)
        loss = criterion(batch_preds.squeeze(), batch_label)
        total_loss += loss.item()
        n_batches += 1
    return total_loss / n_batches()

In [None]:
# find fastest batch size
#for i in range(10):
#    t0 = time.time()
#    bs = 2**i
#    dl = generate_training_data(train_data, batch_size=bs)
#    train_epoch(model, dl, opt)
#    t1 = time.time()
#    diff = t1 - t0
#    print('Batch size: {}, training time: {}'.format(bs, diff))

In [32]:
bs = 128
dev = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model = FlowModel().to(dev)

In [33]:
for i in range(100):
    dl = generate_data(train_data, batch_size=bs)
    val_dl = generate_data(val_data, batch_size=bs, train=False)
    opt = torch.optim.Adam(model.parameters())
    model.train()
    torch.set_grad_enabled(True)
    train_loss = train_epoch(model, dl, opt) / bs
    
    # evaluation mode
    model.eval()
    torch.set_grad_enabled(False)
    val_loss = calculate_validation_loss(model, val_dl) / bs
    print('train_loss: {}, val_loss: {}'.format(train_loss, val_loss))

train_loss: 0.07201549905021826, val_loss: 0.221366369475921
train_loss: 0.0640717149381676, val_loss: 0.4499005331761307
train_loss: 0.060312867630273104, val_loss: 0.3239288433558411
train_loss: 0.052340716769498204, val_loss: 0.059685288928449154
train_loss: 0.04713526691850876, val_loss: 0.9183496079511113
train_loss: 0.04865508956173735, val_loss: 0.18647991079423162
train_loss: 0.043256443053964644, val_loss: 0.1256821695715189
train_loss: 0.041401834843019326, val_loss: 0.198995320747296
train_loss: 0.04087931484795145, val_loss: 0.06625465096698867
train_loss: 0.037060855549850294, val_loss: 0.03351790871885088
train_loss: 0.03580085797265412, val_loss: 0.2833390376634068
train_loss: 0.03627995131236891, val_loss: 0.03222635419418415
train_loss: 0.03410981422770889, val_loss: 0.020948538789525628
train_loss: 89.97017744512507, val_loss: 0.03353218770482474
train_loss: 0.034400050062686205, val_loss: 0.02469432669588261
train_loss: 8.088705794480179, val_loss: 0.4047457844846778

In [34]:
# save model
model_path = '{}flow_model'.format(data_path)
torch.save(model.state_dict(), model_path)
model = FlowModel()
model.load_state_dict(torch.load(model_path))
model.eval()

FlowModel(
  (model): Sequential(
    (0): Conv2d(3, 24, kernel_size=(5, 5), stride=(2, 2))
    (1): ELU(alpha=1.0)
    (2): Conv2d(24, 36, kernel_size=(5, 5), stride=(2, 2))
    (3): ELU(alpha=1.0)
    (4): Conv2d(36, 48, kernel_size=(5, 5), stride=(2, 2))
    (5): ELU(alpha=1.0)
    (6): Dropout(p=0.5)
    (7): Conv2d(48, 64, kernel_size=(3, 3), stride=(1, 1))
    (8): ELU(alpha=1.0)
    (9): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
    (10): ELU(alpha=1.0)
    (11): Flatten()
    (12): Linear(in_features=1280, out_features=100, bias=True)
    (13): ELU(alpha=1.0)
    (14): Linear(in_features=100, out_features=50, bias=True)
    (15): ELU(alpha=1.0)
    (16): Linear(in_features=50, out_features=10, bias=True)
    (17): ELU(alpha=1.0)
    (18): Linear(in_features=10, out_features=1, bias=True)
  )
)