In [1]:
import sys, skvideo.io, json, base64
import numpy as np
from PIL import Image
from io import BytesIO, StringIO

In [2]:
from fastai.conv_learner import *
from fastai.dataset import *
from fastai.models.resnet import vgg_resnet50

import json
torch.backends.cudnn.benchmark=True

## Models

In [3]:
class SaveFeatures():
    features=None
    def __init__(self, m): self.hook = m.register_forward_hook(self.hook_fn)
    def hook_fn(self, module, input, output): self.features = output
    def remove(self): self.hook.remove()

In [4]:
class UnetBlock(nn.Module):
    def __init__(self, up_in, x_in, n_out):
        super().__init__()
        up_out = x_out = n_out//2
        self.x_conv  = nn.Conv2d(x_in,  x_out,  1)
        self.tr_conv = nn.ConvTranspose2d(up_in, up_out, 2, stride=2)
        self.bn = nn.BatchNorm2d(n_out)
        
    def forward(self, up_p, x_p):
        up_p = self.tr_conv(up_p)
        x_p = self.x_conv(x_p)
        cat_p = torch.cat([up_p,x_p], dim=1)
        return self.bn(F.relu(cat_p))

In [5]:
class Unet34(nn.Module):
    def __init__(self, rn):
        super().__init__()
        self.rn = rn
        self.sfs = [SaveFeatures(rn[i]) for i in [2,4,5,6]]
        self.up1 = UnetBlock(512,256,256)
        self.up2 = UnetBlock(256,128,256)
        self.up3 = UnetBlock(256,64,256)
        self.up4 = UnetBlock(256,64,256)
        self.up5 = UnetBlock(256,3,16)
        self.up6 = nn.ConvTranspose2d(16, 3, 1)
        
    def forward(self,x):
        inp = x
        x = F.relu(self.rn(x))
        x = self.up1(x, self.sfs[3].features)
        x = self.up2(x, self.sfs[2].features)
        x = self.up3(x, self.sfs[1].features)
        x = self.up4(x, self.sfs[0].features)
        x = self.up5(x, inp)
        x = self.up6(x)
        return x
    
    def close(self):
        for sf in self.sfs: sf.remove()

In [6]:
class UnetModel():
    def __init__(self,model,name='unet'):
        self.model,self.name = model,name

    def get_layer_groups(self, precompute):
        lgs = list(split_by_idxs(children(self.model.rn), [lr_cut]))
        return lgs + [children(self.model)[1:]]

In [7]:
def get_base():
    layers = cut_model(f(True), cut)
    return nn.Sequential(*layers)

In [8]:
f = resnet34
cut,lr_cut = model_meta[f]

In [9]:

m_base = get_base()
m = to_gpu(Unet34(m_base))
#     models = UnetModel(m)
#     learn = ConvLearner(md, models)
#     learn.opt_fn=optim.Adam
#     learn.crit=nn.CrossEntropyLoss()
#     learn.metrics=[new_acc, dice]
#     return learn

In [10]:
PATH = Path('../data/Train')

In [11]:
# learn.load('1024urn')
load_model(m, str(PATH/'models/1024urn.h5'))

In [12]:
# file = sys.argv[-1]
file = 'test_video.mp4'

if file == 'demo.py':
  print ("Error loading video")
  quit

# Define encoder function
def encode(array):
	pil_img = Image.fromarray(array)
	buff = BytesIO()
	pil_img.save(buff, format="PNG")
	return base64.b64encode(buff.getvalue()).decode("utf-8")

video = skvideo.io.vread(file)

In [13]:

class Normalize():
    """ Normalizes an image to zero mean and unit standard deviation, given the mean m and std s of the original image """
    def __init__(self, m, s, tfm_y=TfmType.NO):
        self.m=np.array(m, dtype=np.float32)
        self.s=np.array(s, dtype=np.float32)
        self.tfm_y=tfm_y

    def __call__(self, x, y=None):
        x = (x-self.m)/self.s
        if self.tfm_y==TfmType.PIXEL and y is not None: y = (y-self.m)/self.s
        return x,y

In [14]:
def normalize(x):
    m,s = imagenet_stats
    x = (x-m)/s
    return x

In [36]:
def preprocess(video):
    f1_norm = normalize(video)
    f1_roll = np.rollaxis(f1_norm, 3, 1)
    f1_pad = np.pad(f1_roll, [(0,0),(0,0),(0,8),(0,0)], mode='constant')
    return f1_pad

In [37]:
f1 = preprocess(video)

In [19]:
xv = VV(torch.from_numpy(f1_pad[:8]).contiguous().float())

In [38]:
# m

In [39]:
# model_summary(m, [3,608,800])

In [50]:
a = np.zeros([3,300,400])

In [53]:
np.vstack([a,a]).shape

(6, 300, 400)

In [64]:
list(range(0,f1.shape[0],8))

[0, 8, 16, 24]

In [68]:
results = []
for i in range(0,f1.shape[0],8):
    xv = VV(torch.from_numpy(f1_pad[i:i+8]).contiguous().float())
    preds = m(xv)
    mx,idx = torch.max(preds, 1)
    idx_slice = idx[:,:-8,:]
    results.append(idx_slice)

In [69]:
r_stacked = torch.cat(results,0)

In [70]:
r_stacked.shape

torch.Size([31, 600, 800])

In [79]:
r_np = r_stacked.data.cpu().numpy()

In [82]:
answer_key = {}

# Frame numbering starts at 1
frame_idx = 1
for frame in r_np:
    # Look for red cars :)
	binary_car_result = (frame==1).astype('uint8')
    
    # Look for road :)
	binary_road_result = binary_car_result = (frame==2).astype('uint8')

	answer_key[frame_idx] = [encode(binary_car_result), encode(binary_road_result)]
    
    # Increment frame
	frame_idx+=1

# Print output in proper json format
tester_data = json.dumps(answer_key)
with open('tester_data', 'w') as f:
    f.write(tester_data)
print(json.dumps(answer_key))

{"1": ["iVBORw0KGgoAAAANSUhEUgAAAyAAAAJYCAAAAAC/Hd2sAAARdUlEQVR4nO3d15LjKBQAUOia//9l5sFREgq2FQjnVO1ur9ttIcTlEmQ7BAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACA78WrC9CtGEK6ugysEiAXielV9QIFttFhFebv6gL0KgqFKrhMBYnBcKs0Mshlxn2TvqpEAuQy41whd5RIgFxmkjGkkAIJkLPdwyDGSUBIIQX6d3UBehLTLT6kinoIkNPEW4Q8w0PCqIEAOVgMId3/CffgSDJIPVyrg81UcLr/Mj1uOXnlFpmlJCbpl3jER3hO1V9zdnvsJXExjpWt37T0yyCHlEQGOZT+p3au4KGm1buWPYbP4moyyJG+7n70W6VwJQ41t1m+pdplkRIIkCP9Eh9BiJRAgBxpWLsp//AaYXIlc5APfb9L8WV86MMupfY/FT/p0TMfzPBFjcsh15FBPjXTWjO3r2eDQY9UF9frQzGEtDWJPCs3TR/6lCxyDXfzrotbhkdvv0ohxJBywyuqI4PMGuSJmHL5IPMX8ZZf3p6/Q/pYPiwHEiBZ8fYmjref38011cc7BueeLEDqI0AW/Vw9e6WPyctxDgEyZ+cGvVNFi5CTCZCMvSplMIk54DU5ngAZ27FGds8fo1fleALkza6VcUT6mL4yBxMgd3tXxKAVH/riHEmAhEMq