In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os.path
import scipy
import argparse
import math
import cv2
import sys
import time
import re
from lxml import etree
import caffe
from scipy.optimize import fmin_bfgs, minimize
from tqdm import tqdm

%matplotlib inline

In [2]:
# xml
def getImage(imageInput, input_shape=[1, 3, 360, 480]):
    frameRaw = cv2.imread(imageInput)
    rawSize = frameRaw.shape
    frame = cv2.resize(frameRaw, (input_shape[3], input_shape[2]))
    input_image = frame.transpose((2, 0, 1))
    input_image = np.asarray([input_image])
    return input_image,frameRaw,rawSize
    
    
def parseChildren(dataSub):
    dim_x = int(dataSub.getchildren()[0].text)
    dim_y = int(dataSub.getchildren()[1].text)
    return np.array(
        filter(
            lambda x: len(x)>0, re.split("\n|\s+", dataSub.getchildren()[-1].text)
        )).astype(np.float64).reshape([dim_x, dim_y])

def parseInfo(xml_INFO):
    tree = etree.parse(xml_INFO)
    data = tree.getroot()
    M_RTK = {}
    for node in data:
        if node.tag == "rotation_matrix":
            M_RTK['R'] = parseChildren(node)
        if node.tag == "translation_vector":
            M_RTK['T'] = parseChildren(node)
        if node.tag == "camera_matrix":
            M_RTK['K'] = parseChildren(node)
        if node.tag == "distortion_coefficients":
            M_RTK['D'] = parseChildren(node)
            
    return M_RTK


# CAFFE
def loadNet(model, weights):
    net = caffe.Net(model, weights, caffe.TEST)
    return net

def roadLaneFromSegNet(imageInput, net):
    caffe.set_mode_gpu()
    input_shape = net.blobs['data'].data.shape
    output_shape = net.blobs['argmax'].data.shape

    input_image,raw_image,rawSize = getImage(imageInput)
    out = net.forward_all(data=input_image)
    segmentation_ind = np.squeeze(net.blobs['argmax'].data)
    segmentation_ind_3ch = np.resize(segmentation_ind, (3, input_shape[2], input_shape[3]))
    #segmentation_ind
    segmentation_ind_3ch = segmentation_ind_3ch.transpose(1, 2, 0).astype(np.uint8)
    out = cv2.resize(((segmentation_ind_3ch[:,:,0]==3)*255).astype(np.uint8), (rawSize[1],rawSize[0]))
    return out,raw_image


def extractBinarizedImg(imgout,rawimg):
    rawimg = rawimg[:,:,::-1]
    imgBinary = (imgout>100).astype(np.int)
    
    for i in range(3):
        rawimg[:,:,i] = rawimg[:,:,i]*imgBinary
    
    return rawimg


def func(p, img, refPos):
    a,b,c,d = p
    tmpVar1 = ((refPos[0]-d)-c)
    tmpVar2 = ((refPos[0]-d)+c)
    
    y = np.arange(200)
    A =  a
    B =  (b-2*a*tmpVar1)
    C =  (a*tmpVar1**2.0 - b*(tmpVar1) + refPos[1] -y)
    BB = (b-2*a*tmpVar2)
    CC = (a*tmpVar2**2.0 - b*(tmpVar2) + refPos[1] -y)
    
    core = B**2-4*A*C
    y = y[core >= 0]
    
    core = core[core >= 0]
    x11 = ((-B + np.sqrt(core)) / (2*A)).astype(int)
    x12 = ((-B - np.sqrt(core)) / (2*A)).astype(int)
    x21 = ((-BB + np.sqrt(core)) / (2*A)).astype(int)
    x22 = ((-BB - np.sqrt(core)) / (2*A)).astype(int)

    
    return [x11, x12, x21, x22, y]


def loss(p, img, refPos):
    x11, x12, x21, x22, y = func(p, img, refPos)
    idx1 = filter(lambda x: x[0]<100 and x[0]>0, np.concatenate([x11, y]).reshape(-1,2, order='F'))
    idx2 = filter(lambda x: x[0]<100 and x[0]>0, np.concatenate([x12, y]).reshape(-1,2, order='F'))
    idx3 = filter(lambda x: x[0]<100 and x[0]>0, np.concatenate([x21, y]).reshape(-1,2, order='F'))
    idx4 = filter(lambda x: x[0]<100 and x[0]>0, np.concatenate([x22, y]).reshape(-1,2, order='F'))
    Idx = np.array(idx1 + idx2 + idx3 + idx4)
    Idxnear = Idx[Idx[:,1] > 170]
    return np.sum(img[Idx[:,1], Idx[:,0]]>0) + 20* np.sum(img[Idxnear[:,1], Idxnear[:,0]]>0)


def getBestParams(img, params, refPos):
    la,lb,lc,ld = params
    l_loss = []
    l_param = []
    for aa in la:
        for bb in lb:
            for cc in lc:
                for dd in ld:
                    pp = [aa,bb,cc,dd]
                    l = loss(pp, img, refPos)
                    l_param.append(pp)
                    l_loss.append(l)
    
    np_params = np.array(l_param)
    l_best = pd.DataFrame({
           "Loss" : np.array(l_loss), 
              "A" : np_params[:,0], 
              "B" : np_params[:,1],
              "C" : np_params[:,2], 
              "D" : np_params[:,3]  }
    ).sort_values(['Loss'], ascending=False).iloc[0].values
    return func(l_best[0:4], img, refPos), l_best


def PrevFilter(d, l_errors):
    if len(l_errors)==0:
        return d
    
    else:
        return d*0.5 + np.array(l_errors[-1]*0.5).mean()


def generateAllFrame(samp, xml_INFO, net, BEV_coord, initParams, predLinFit=None):
    
    l_image =  os.listdir("%s/%s" % (image_dir,  samp))
    l_image = sorted(l_image)
    
    sampOutDir = "%s/%s" % (out_dir, samp)
    if not os.path.isdir(sampOutDir):
        os.mkdir(sampOutDir)
    
    coord_3d = BEV_coord['coord_3d']
    coord_6m = BEV_coord['coord_600cm']

    M_RTK = parseInfo(xml_INFO)
    src,jac = cv2.projectPoints(coord_3d, M_RTK['R'],  M_RTK['T'], M_RTK['K'], M_RTK['D'] )
    src = src[src[:,0,:].argsort(axis=0)[:,0],0,:]
    src_6m = cv2.projectPoints(coord_6m, M_RTK['R'],  M_RTK['T'], M_RTK['K'], M_RTK['D'] )[0][0][0]
    
    dst = np.array([[[10,200], [10,0], [90,0], [90,200]]]).astype(np.float32)
    M    = cv2.getPerspectiveTransform(src, dst)
    Minv = cv2.getPerspectiveTransform(dst, src)
    mm = np.dot(M, np.array(list(src_6m) + [1.]))
    xy_trans = (mm / mm[2])[0:2]
    #npz_trans = "%s/%s-BEV.npz" % (sampOutDir, samp)
    #np.savez(npz_trans, M=M, Minv=Minv, pt600=xy_trans)
    
    refPos = xy_trans
    l_name = []
    l_nameRaw = []
    l_t_x = []
    l_t_y = []
    l_errors = []
    
    seedParams = []
    
    for i in tqdm(range(len(l_image))):
        inName  = "%s/%s/%s" % (image_dir,samp,l_image[i])
        caffeOutImg,rawImg = roadLaneFromSegNet(inName, net)

        rawImg2 = rawImg.copy()
        binarizedImg = caffeOutImg             #extractBinarizedImg(caffeOutImg, rawImg2)
        binarizedImg_t = cv2.warpPerspective(binarizedImg, M, (100, 200), cv2.WARP_INVERSE_MAP)
        #outName = "%s/%s.BEVLane.png" % (sampOutDir, l_image[i].split(".png")[0])
        #cv2.imwrite(outName, binarizedImg_t)   #cv2.cvtColor(binarizedImg_t, cv2.COLOR_BGR2RGB))
        
        
        paramSearch = None
        if len(seedParams) == 0:
            paramSearch = initParams
        else:
            lla = np.linspace(seedParams[0]-(initParams[0][1]-initParams[0][0]),
                              seedParams[0]+(initParams[0][1]-initParams[0][0]), 3)
            llb = np.linspace(seedParams[1]-(initParams[1][1]-initParams[1][0]),
                              seedParams[1]+(initParams[1][1]-initParams[1][0]), 3)
            llc = np.arange(seedParams[2]-2, seedParams[2]+3, 2)
            lld = np.arange(seedParams[3]-1, seedParams[3]+2)
            paramSearch = [lla, llb, llc, lld]
        
        coords,params = getBestParams(binarizedImg_t, paramSearch, refPos)
        #print(params)
        d = params[3]
        seedParams = params
        d = PrevFilter(d, l_errors)
        l_errors.append(d)
        
    if predLinFit is None:
        return l_errors
    
    else:
        return [e*predLinFit[0]+predLinFit[1] for e in l_errors]
    



In [3]:
image_dir    = "/hdfs/huboqiang/dataset/renwu/TSD-LKSM/"
out_dir = "./LKSM2"
model   = "./Example_Models/segnet_model_driving_webdemo.prototxt"
weights = "./Example_Models/segnet_weights_driving_webdemo.caffemodel"

width   = 300
x_start = 200
x_end   = 6000



coord_3d = np.float32([
        [x_start,-width,0], [x_start, width,0], 
        [x_end,  -width,0], [x_end,   width,0]
]).reshape(-1,3)
coord_6m = np.array([600., 0., 0.]).reshape(-1,3)

BEV_coords = {
    "coord_3d":coord_3d,
    "coord_600cm" : coord_6m
}

initParams = [
    np.linspace(0.001, 0.02, 10),
    np.array(list(np.linspace(1.5, 50, 10)) + list(np.linspace(-50, 1.5, 10))),
    np.arange( 23, 28, 2),
    np.arange(-15, 15, 2)
]
predLinFit = np.array([ 6.66317499,  1.35541496])

# Predict

In [4]:
samp = "TSD-LKSM-00159"
xml_INFO = "./xml/TSD-LKSM-Info/TSD-LKSM-00159-Info.xml"

In [5]:
net = loadNet(model, weights)

l_err = generateAllFrame(samp, xml_INFO=xml_INFO, net=net, BEV_coord=BEV_coords, initParams=initParams, predLinFit=predLinFit)

with open("./%s.xml" % (samp), 'w') as f_out:
    f_out.write("<opencv_storage>\n")
    for i,e in enumerate(l_err):
        f_out.write("  <Frame%0*d>%e</Frame%0*d>\n" % (5, 1, e, 5, 1))
        
    f_out.write("</opencv_storage>")

100%|██████████| 50/50 [00:10<00:00,  4.57it/s]


# Training for predLinFit

In [7]:
l_samp = !ls ./LKSM2/

from functools import partial
ll_err = map(partial(generateAllFrame, xml_INFO=xml_INFO,net=net, BEV_coord=BEV_coords, initParams=initParams), l_samp)


100%|██████████| 50/50 [00:11<00:00,  4.40it/s]
100%|██████████| 50/50 [00:11<00:00,  4.37it/s]
100%|██████████| 50/50 [00:11<00:00,  4.31it/s]
100%|██████████| 50/50 [00:10<00:00,  4.57it/s]
100%|██████████| 50/50 [00:11<00:00,  4.38it/s]
100%|██████████| 50/50 [00:11<00:00,  4.19it/s]
100%|██████████| 50/50 [00:11<00:00,  4.31it/s]
100%|██████████| 50/50 [00:11<00:00,  4.45it/s]
100%|██████████| 50/50 [00:11<00:00,  4.32it/s]
100%|██████████| 50/50 [00:11<00:00,  4.32it/s]


In [12]:
def getGT(samp,xml_GT_dir="/hdfs/huboqiang/kaggle/selfDriving/suzhou/SegNet-Tutorial/xml/TSD-LKSM-GT/"):
    tree = etree.parse('%s/%s-GT.xml' % (xml_GT_dir, samp))
    data = tree.getroot()
    l_gtVal = []
    for node in data:
        l_gtVal.append(float(node.text))

    return l_gtVal
    

In [13]:
l_d = reduce(lambda x,y: x+y, ll_err)
l_GT = reduce(lambda x,y: x+y, map(getGT, l_samp))

In [15]:
f_fit = np.polyfit(l_d, l_GT, 1)
f_fit

array([ 6.66317499,  1.35541496])

In [None]:
plt.plot( np.array(l_d)*f_fit[0]+f_fit[1], l_GT, '.')
plt.plot(np.linspace(-40, 120, 10),np.linspace(-40, 120, 10)+20, 'g')
plt.plot(np.linspace(-40, 120, 10),np.linspace(-40, 120, 10)-20, 'g')