In [1]:
%pylab

import numpy as np
import cv2, os, glob, opengm, time
from PIL import Image
from skimage import measure
# from sklearn import mixture
from sklearn import ensemble
import sklearn.cluster as skc
from scipy.optimize import minimize
from scipy import cluster
import scipy.interpolate as spint
from scipy.ndimage.morphology import distance_transform_edt
from pylsd import lsd
import chumpy as ch

DICT_SEQUENCE_NAME = 'semantic_sequence_name'
DICT_BBOXES = 'bboxes'
DICT_FOOTPRINTS = 'footprints' ## same as bboxes but it indicates the footprint of the sprite on the ground plane
DICT_BBOX_ROTATIONS = 'bbox_rotations'
DICT_BBOX_CENTERS = 'bbox_centers'
DICT_FRAMES_LOCATIONS = 'frame_locs'
DICT_MASK_LOCATION = 'frame_masks_location'
DICT_ICON_TOP_LEFT = "icon_top_left"
DICT_ICON_FRAME_KEY = "icon_frame_key"
DICT_ICON_SIZE = "icon_size"
DICT_REPRESENTATIVE_COLOR = 'representative_color'
DICT_FRAME_SEMANTICS = "semantics_per_frame"
DICT_NUM_SEMANTICS = "number_of_semantic_classes"
DICT_PATCHES_LOCATION = "sequence_preloaded_patches_location"
DICT_TRANSITION_COSTS_LOCATION = "sequence_precomputed_transition_costs_location"
# DICT_FRAME_COMPATIBILITY_LABELS = 'compatibiliy_labels_per_frame'
DICT_LABELLED_FRAMES = 'labelled_frames' ## includes the frames labelled for the semantic labels (the first [DICT_FRAME_SEMANTICS].shape[1])
DICT_NUM_EXTRA_FRAMES = 'num_extra_frames' ## same len as DICT_LABELLED_FRAMES
DICT_CONFLICTING_SEQUENCES = 'conflicting_sequences'
DICT_DISTANCE_MATRIX_LOCATION = 'sequence_precomputed_distance_matrix_location' ## for label propagation
DICT_SEQUENCE_LOCATION = "sequence_location"

DICT_FILMED_DATASET_BASE_LOC = 'filmed_dataset_base_location'

DICT_FILMED_OBJECT_NAME = 'filmed_object_name'
DICT_TRAJECTORY_POINTS = 'trajectory_points'
DICT_NEEDS_UNDISTORT = 'do_undistort_trajectory_points'
DICT_OBJECT_BILLBOARD_ORIENTATION = 'object_color_billboard_orientation_angle'
DICT_OBJECT_BILLBOARD_SCALE = 'object_color_bilboard_scale'
DICT_TRACK_LOCATION='track_points_location'

DICT_FILMED_SCENE_BASE_LOC = 'filmed_scene_base_location'
DICT_CAMERA_EXTRINSICS = 'camera_extrinsics'
DICT_CAMERA_INTRINSICS = 'camera_intrinsics'
DICT_DISTORTION_PARAMETER = 'distortion_parameter'
DICT_DISTORTION_RATIO = 'distortion_ratio'
DICT_DOWNSAMPLED_FRAMES_RATE = 'downsampled_frames_rate'
DICT_COMMENTS = "comments_and_info"
DICT_GROUND_MESH_POINTS = 'camera_ground_plane_mesh_points'
DICT_GROUND_MESH_SEGS_EXTRUDE = 'ground_plane_mesh_segments_to_extrude'
DICT_OBJECT_LENGTH = 'object_bounding_volume_length'
DICT_OBJECT_WIDTH = 'object_bounding_volume_width'
DICT_OBJECT_HEIGHT = 'object_bounding_volume_height'

Using matplotlib backend: Qt5Agg
Populating the interactive namespace from numpy and matplotlib


In [2]:
def quaternionTo4x4Rotation(quaternion, inverted=False):
    x, y, z, w = quaternion
    ## quaternion rotation
    M = np.array([[1.0 - 2.0*(y**2) - 2.0*(z**2), 2*x*y + 2*w*z, 2*x*z - 2*w*y, 0.0],
                  [2*x*y - 2*w*z, 1.0 - 2.0*(x**2) - 2.0*(z**2), 2*y*z + 2*w*x, 0.0],
                  [2*x*z + 2*w*y, 2*y*z - 2*w*x, 1.0 - 2.0*(x**2) - 2.0*(y**2), 0.0],
                  [0.0, 0.0, 0.0, 1.0]])
    ## invert it
    if inverted :
        M[:-1, :-1] = M[:-1, :-1].T
        
    return M

def angleAxisToQuaternion(angle, axis) :
    return np.array([axis[0]*np.sin(angle/2.0), axis[1]*np.sin(angle/2.0), axis[2]*np.sin(angle/2.0), np.cos(angle/2.0)])

def calipersOMBB(points) :
    """ computes minimum area oriented bounding box given a set of points using the calipers algorithm """
    
    convexHull = cv2.convexHull(points.astype(np.float32))[:, 0, :]
#     figure(); scatter(points[:, 0], points[:, 1]); plot(convexHull[np.mod(arange(len(convexHull)+1), len(convexHull)), 0], convexHull[np.mod(arange(len(convexHull)+1), len(convexHull)), 1])
#     scatter(convexHull[0, 0],convexHull[0, 1], color="red")
#     xlim([-1, 1])
#     ylim([-6.5, -4.5])
    
    minArea = 10000000000.0
    ombb = np.zeros([4, 2])
    for i in np.arange(len(convexHull))[0:] :
        j = np.mod(i+1, len(convexHull))
        
        segmentDir = convexHull[j, :] - convexHull[i, :]
        segmentDir /= np.linalg.norm(segmentDir)
        
        crossProduct = np.cross(np.array([1.0, 0.0, 0.0]), np.concatenate([segmentDir, [0]]))
        dotProduct = np.dot(segmentDir, np.array([1.0, 0.0]))
        #I know the last component of both vectors is 0 so the rotation matrix will be all zeros there
        T = quaternionTo4x4Rotation(angleAxisToQuaternion(np.arccos(dotProduct), crossProduct/np.linalg.norm(crossProduct)))[:-2, :-2]
        
        transformedPoints = np.dot(T, points.T-convexHull[i, :][:, np.newaxis]).T + convexHull[i, :]
        
        [x0, y0], [x1, y1] = np.min(transformedPoints, axis=0), np.max(transformedPoints, axis=0)
        
        transformedBBox = np.array([[x0, y0], [x0, y1], [x1, y1], [x1, y0]])
        bbox = np.dot(np.linalg.inv(T), transformedBBox.T-convexHull[i, :][:, np.newaxis]).T + convexHull[i, :]
        
        bboxArea = (x1-x0)*(y1-y0)
        if bboxArea < minArea :
            minArea = np.copy(bboxArea)
            ombb = np.copy(bbox)
            
#         transformedHull = np.dot(T, convexHull.T-convexHull[i, :][:, np.newaxis]).T + convexHull[i, :]
#         scatter(transformedPoints[:, 0], transformedPoints[:, 1], color="cyan")
#         plot(transformedHull[np.mod(arange(len(convexHull)+1), len(convexHull)), 0], transformedHull[np.mod(arange(len(convexHull)+1), len(convexHull)), 1], color="magenta")
#         scatter(transformedHull[np.mod(arange(len(convexHull)+1), len(convexHull)), 0], transformedHull[np.mod(arange(len(convexHull)+1), len(convexHull)), 1], color="magenta")
#         plot(transformedBBox[np.mod(arange(len(transformedBBox)+1), len(transformedBBox)), 0], transformedBBox[np.mod(arange(len(transformedBBox)+1), len(transformedBBox)), 1], color="magenta")
#         scatter(convexHull[i, 0], convexHull[i, 1], color="green"); plot([convexHull[i, 0], convexHull[i, 0]+segmentDir[0]], [convexHull[i, 1], convexHull[i, 1]+segmentDir[1]], color="green")
#         plot([convexHull[i, 0], convexHull[i, 0]+1], [convexHull[i, 1], convexHull[i, 1]], color="green")
#         plot(bbox[np.mod(arange(len(bbox)+1), len(bbox)), 0], bbox[np.mod(arange(len(bbox)+1), len(bbox)), 1], color="red")
#     plot(ombb[np.mod(arange(len(ombb)+1), len(ombb)), 0], ombb[np.mod(arange(len(ombb)+1), len(ombb)), 1], color="green")
    return ombb
    
def line2lineIntersection(line1, line2) :
    """x1, y1, x2, y2 = line1
       x3, y3, x4, y4 = line2"""
    
    x1, y1, x2, y2 = line1
    x3, y3, x4, y4 = line2
    denominator = (x1-x2)*(y3-y4)-(y1-y2)*(x3-x4)
    if denominator != 0 :
        Px = ((x1*y2-y1*x2)*(x3-x4)-(x1-x2)*(x3*y4-y3*x4))/denominator
        Py = ((x1*y2-y1*x2)*(y3-y4)-(y1-y2)*(x3*y4-y3*x4))/denominator
        return np.array([Px, Py])
    else :
        return None

def isABetweenBandC(a, b, c):
    distAB = np.linalg.norm(a-b)
    distAC = np.linalg.norm(a-c)
    distBC = np.linalg.norm(b-c)
    return np.abs(distAB+distAC-distBC) < 1e-10

In [3]:
def getDistortionCoeffFromParamAndRatio(distortionParameter, distortionRatio) :
    return np.array([distortionParameter, distortionParameter*distortionRatio, 0.0, 0.0, 0.0])
    
def undistortImage(distortionParameter, distortionRatio, image, cameraIntrinsics, doUncrop=True, interpolation=cv2.INTER_LANCZOS4, doReturnMaps=True, borderMode=cv2.BORDER_CONSTANT, borderValue=0) :
    distortionCoeff = getDistortionCoeffFromParamAndRatio(distortionParameter, distortionRatio)
    
    frameSize = np.array([image.shape[1], image.shape[0]])

    ## undistort image
    if doUncrop :
        ## here I use opencv to figure out the best new camera matrix that includes all possible pixels
        newIntrinsics = cv2.getOptimalNewCameraMatrix(cameraIntrinsics, distortionCoeff, tuple(frameSize), 1)[0]
        ## the above tends to change the camera center in different way and giving x and y focals different values
        ## so I scale the center to match the old intrinsics and the corresponding focals which should bring them to be the same
        newIntrinsics[0, [0, 2]] *= cameraIntrinsics[0, 2]/newIntrinsics[0, 2]
        newIntrinsics[1, [1, 2]] *= cameraIntrinsics[1, 2]/newIntrinsics[1, 2]
        ## the above, changes the focal length to see the full scene, but I want to keep focal length and have a bigger image instead, so I change the intrinsics to get the original focal length but bigger image
        scale = np.average([cameraIntrinsics[0, 0]/newIntrinsics[0, 0], cameraIntrinsics[1, 1]/newIntrinsics[1, 1]])
        newFrameSize = np.ceil(np.copy(frameSize)*scale).astype(int)
        newIntrinsics[0, 0] = cameraIntrinsics[0, 0]
        newIntrinsics[1, 1] = cameraIntrinsics[1, 1]
        ## I want the camera center to be a full number and the new frame size to be divisible by two
        newIntrinsics[:-1, -1] = np.ceil(newFrameSize/2.0)
        newFrameSize = np.array(newIntrinsics[:-1, -1]*2, dtype=int)
    else :
        newIntrinsics = np.copy(cameraIntrinsics)
        newFrameSize = np.copy(frameSize)
    
    map1, map2 = cv2.initUndistortRectifyMap(cameraIntrinsics, distortionCoeff, None, newIntrinsics, tuple(newFrameSize), cv2.CV_32FC1)
    undistortedImage = cv2.remap(image, map1, map2, interpolation, borderMode=borderMode, borderValue=borderValue)
    if doReturnMaps :
        return undistortedImage, newIntrinsics, distortionCoeff, map1, map2
    else :
        return undistortedImage, newIntrinsics, distortionCoeff

In [4]:
def readDatasetData(filmedDatasetLoc, filmedSceneToLoadIdx=0, filmedObjectToLoadIdx=0) :
    """Returns a FilmedDataset's relevant data, its baseLocs and its requested FilmedScene data and FilmedObject data."""
    
    filmedDatasetData = np.load(filmedDatasetLoc).item()
    dataLoc = filmedDatasetData[DICT_FILMED_DATASET_BASE_LOC]+os.sep
    
    filmedScenesLocs = np.sort(glob.glob(dataLoc+"filmed_scene-*.npy"))
    if filmedSceneToLoadIdx >= 0 and filmedSceneToLoadIdx < len(filmedScenesLocs) :
        print("LOADING SCENE:", filmedScenesLocs[filmedSceneToLoadIdx])
        filmedSceneData = np.load(filmedScenesLocs[filmedSceneToLoadIdx]).item()
    else :
        raise Exception("Requested FilmedScene does not exist")
        
    filmedObjectsLocs = np.sort(glob.glob(dataLoc+"filmed_object-*.npy"))
    if filmedObjectToLoadIdx >= 0 and filmedObjectToLoadIdx < len(filmedObjectsLocs) :
        print("LOADING OBJECT:", filmedObjectsLocs[filmedObjectToLoadIdx])
        filmedObjectData = np.load(filmedObjectsLocs[filmedObjectToLoadIdx]).item()
    else :
        raise Exception("Requested FilmedObject does not exist")
    
    return filmedDatasetData, filmedSceneData, filmedObjectData, dataLoc

In [5]:
def readMOTTracks(csvFilename, detThresh=0.1) :
    objects = {}
    perFrameObjects = {}
    if "IOU-tracker" in csvFilename.split(os.sep)[-1] :
        ## fields order: ['frame', 'id', 'x', 'y', 'w', 'h', 'score', 'wx', 'wy', 'wz']
        with open(csvFilename) as f :
            for line in f.readlines() :
                vals = np.array(line.split(","), dtype=float)
                vals[0] -= 1 ## starting idx with 0 
                if int(vals[0]) not in perFrameObjects.keys() :
                    perFrameObjects[int(vals[0])] = {}
                if int(vals[1]) not in objects.keys() :
                    objects[int(vals[1])] = {}
                    
                objects[int(vals[1])][int(vals[0])] = vals[2:6]
                perFrameObjects[int(vals[0])][int(vals[1])] = vals[2:6]
    elif "det.txt" == csvFilename.split(os.sep)[-1] :
        ## fields order: ['frame', 'id', 'x', 'y', 'w', 'h', 'score', 'wx', 'wy', 'wz']
        with open(csvFilename) as f :
            for line in f.readlines() :
                vals = np.array(line.split(","), dtype=float)
                if vals[6] >= detThresh : ## threshold the score using detThresh
                    vals[0] -= 1 ## starting idx with 0
                    if int(vals[0]) not in perFrameObjects.keys() :
                        perFrameObjects[int(vals[0])] = {}
                    objectId = 1
                    if len(perFrameObjects[int(vals[0])].keys()) > 0 :
                        objectId = int(np.max(perFrameObjects[int(vals[0])].keys())+1)
                    if objectId not in objects.keys() :
                        objects[objectId] = {}

                    objects[objectId][int(vals[0])] = vals[2:6]
                    perFrameObjects[int(vals[0])][objectId] = vals[2:6]
    else :
        raise Exception("Unknown tracker for csv file \"{0}\"".format(csvFilename))
    
    return objects, perFrameObjects

In [6]:
def cameraToWorldGroundPlane(cameraIntrinsics, cameraExtrinsics, camPoints) :
    """assumes camPoints has shape Nx2
       returns Nx3 worldPoints on 3D world ground plane"""
    if camPoints.shape[1] != 2 :
        raise Exception("Need 2D points")
    
    inverseT = np.linalg.inv(np.dot(cameraIntrinsics, cameraExtrinsics[0:3, [0, 1, 3]]))
    worldPoints = np.dot(inverseT, np.concatenate([camPoints, np.ones([len(camPoints), 1], np.float32)], axis=1).T)
    worldPoints /= worldPoints[-1, :]
    worldPoints[-1, :] = 0
    return worldPoints.T.astype(np.float32)
    
def worldToCameraSpace(cameraIntrinsics, cameraExtrinsics, worldPoints) :
    """assumes worldPoints has shape Nx3
       returns Nx2 camPoints in image space"""
    if worldPoints.shape[1] != 3 :
        raise Exception("Need 3D points")
    T = np.dot(cameraIntrinsics, cameraExtrinsics[0:3, :])
    camPoints = np.dot(T, np.concatenate([worldPoints, np.ones([len(worldPoints), 1], np.float32)], axis=1).T)
    camPoints /= camPoints[-1, :]
    return camPoints.T.astype(np.float32)[:, :-1]

def projectPointsOnLineSegment(pointsToProject, endPoint1, endPoint2) :
    return (np.dot(np.reshape(endPoint2-endPoint1, [1, 2]), (pointsToProject-endPoint1).T)/(np.linalg.norm(endPoint2-endPoint1)**2)).T*(endPoint2-endPoint1)[np.newaxis, :]+endPoint1

def cameraExtrinsicsFromRotAngles(alphaRot, betaRot, gammaRot) :
    sinAlpha = np.sin(alphaRot); cosAlpha = np.cos(alphaRot) ## rotation about x axis
    sinBeta = np.sin(betaRot); cosBeta = np.cos(betaRot) ## rotation about y axis
    sinGamma = np.sin(gammaRot); cosGamma = np.cos(gammaRot) ## rotation about z axis

    rotationMat = np.array([[cosBeta*cosGamma                             , -cosBeta*sinGamma                            , sinBeta          , 0],
                            [cosAlpha*sinGamma + sinAlpha*sinBeta*cosGamma, cosAlpha*cosGamma - sinAlpha*sinBeta*sinGamma, -sinAlpha*cosBeta, 0],
                            [sinAlpha*sinGamma - cosAlpha*sinBeta*cosGamma, sinAlpha*cosGamma + cosAlpha*sinBeta*sinGamma, cosAlpha*cosBeta , 0],
                            [0                                            , 0                                            , 0                , 1]], dtype=float)
    extrinsics = np.dot(np.array([[-1, 0, 0, 0],
                                  [0, 1, 0, 0],
                                  [0, 0, 1, -1]], dtype=float), rotationMat)
    return extrinsics

def getSortedTrajectoryPointsFromBBoxes(trackedObject, imageShape, minClosenessToEdge=None) :
    sortedFrameIdxs = np.sort(trackedObject.keys())
    bboxes = np.array([trackedObject[frameIdx] for frameIdx in sortedFrameIdxs])
    bottomMiddlePoints = bboxes[:, :2] + bboxes[:, 2:]*np.array([[0.5, 1.0]])
    bboxes[:, 2:] = bboxes[:, :2] + bboxes[:, 2:] ## turn bboxes in (top_left bottom_left) instead of (top_left size)
    if minClosenessToEdge is not None :
        ## find which bboxes are too close to the border and remove them from the considered points
        isFrameValid = np.all(np.abs(bboxes - np.array([[0.0, 0.0, imageShape[1], imageShape[0]]])) >= minClosenessToEdge, axis=1)
        ## only keep bboxes and bottom middle points for the valid frames and use validFrameIdxs which frames are subsequent so that I can correctly index bboxes
        sortedFrameIdxs = sortedFrameIdxs[isFrameValid]
        bboxes = bboxes[isFrameValid, :]
        bottomMiddlePoints = bottomMiddlePoints[isFrameValid, :]
    return bboxes, bottomMiddlePoints, sortedFrameIdxs

# cameraBBoxes, cameraTrajectoryPoints, usedFrameIdxs = getSortedTrajectoryPointsFromBBoxes(trackedObjects[objectId], bgImage.shape[0:2], minClosenessToEdge)

In [7]:
# ## thresh detections based on a certain thresh
# detThresh = 0.1
# with open("/home/ilisescu/PhD/data/havana/1-3-5_det.txt") as inputDets :
#     with open("/home/ilisescu/PhD/data/havana/1-3-5_det-{0}.txt".format(detThresh), "a") as outputDets :
#         for line in inputDets.readlines() :
#             vals = np.array(line.split(","), dtype=float)
#             if vals[6] > detThresh :
#                 outputDets.write(",".join(np.concatenate([vals[0:2].astype(int).astype(np.string_),
#                                                           vals[2:7].astype(np.string_),
#                                                           vals[7:].astype(int).astype(np.string_)])) + "\n")

In [8]:
# # trackedObjects, perFrameTrackedObjects = readMOTTracks("/home/ilisescu/segtracking/data/PETS09-S2L2/IOU-tracker-result.txt")
# # frameLocs = np.sort(glob.glob("/home/ilisescu/segtracking/data/PETS09-S2L2/img1/*.jpg"))
# # # trackedObjects, perFrameTrackedObjects = readMOTTracks("/home/ilisescu/PhD/data/havana_short/det.txt")
# # trackedObjects, perFrameTrackedObjects = readMOTTracks("/home/ilisescu/PhD/data/havana_short/IOU-tracker-result-0.1.txt")
# # frameLocs = np.sort(glob.glob("/home/ilisescu/PhD/data/havana_short/frame-*.png"))
# trackedObjects, perFrameTrackedObjects = readMOTTracks("/home/ilisescu/PhD/data/havana/1-3-5_IOU-tracker-result-0.1.txt")
# frameLocs = np.sort(glob.glob("/home/ilisescu/PhD/data/havana/frame-*.png"))


# colors = cm.gist_ncar(np.mod(np.sort(trackedObjects.keys()), 40)/39.0)

# # figure(); imshow(Image.open(frameLocs[0])); gca().set_autoscale_on(False)
# # for objectId in np.sort(perFrameTrackedObjects[0].keys()) :
# #     bbox = np.array([[perFrameTrackedObjects[0][objectId][0], perFrameTrackedObjects[0][objectId][1]],
# #                      [perFrameTrackedObjects[0][objectId][0]+perFrameTrackedObjects[0][objectId][2], perFrameTrackedObjects[0][objectId][1]],
# #                      [perFrameTrackedObjects[0][objectId][0]+perFrameTrackedObjects[0][objectId][2], perFrameTrackedObjects[0][objectId][1]+perFrameTrackedObjects[0][objectId][3]],
# #                      [perFrameTrackedObjects[0][objectId][0], perFrameTrackedObjects[0][objectId][1]+perFrameTrackedObjects[0][objectId][3]],
# #                      [perFrameTrackedObjects[0][objectId][0], perFrameTrackedObjects[0][objectId][1]]])
# #     plot(bbox[:, 0], bbox[:, 1], linewidth=2, color=tuple(colors[objectId-1, :]))
# #     gca().text(perFrameTrackedObjects[0][objectId][0]+perFrameTrackedObjects[0][objectId][2]*0.02,
# #                perFrameTrackedObjects[0][objectId][1]+perFrameTrackedObjects[0][objectId][3]*0.96, np.string_(objectId), color=tuple(colors[objectId-1, :]), fontweight="bold")
# #     print colors[objectId-1, :], objectId

# fig = mpl.pylab.figure("animation", frameon=False)
# fig.set_size_inches(8,5)
# gca().set_axis_off()
# img = None
# objectToShow = 279
# for i in xrange(len(frameLocs)):
#     cla()
# #     if img is None:
#     gca().imshow(np.array(Image.open(frameLocs[i])), aspect='auto'); gca().set_autoscale_on(False)
# #     else:
# #         img.set_data(np.array(Image.open(frameLocs[i])))
        
#     for objectId in np.sort(perFrameTrackedObjects[i].keys()) :
#         bbox = np.array([[perFrameTrackedObjects[i][objectId][0], perFrameTrackedObjects[i][objectId][1]],
#                          [perFrameTrackedObjects[i][objectId][0]+perFrameTrackedObjects[i][objectId][2], perFrameTrackedObjects[i][objectId][1]],
#                          [perFrameTrackedObjects[i][objectId][0]+perFrameTrackedObjects[i][objectId][2], perFrameTrackedObjects[i][objectId][1]+perFrameTrackedObjects[i][objectId][3]],
#                          [perFrameTrackedObjects[i][objectId][0], perFrameTrackedObjects[i][objectId][1]+perFrameTrackedObjects[i][objectId][3]],
#                          [perFrameTrackedObjects[i][objectId][0], perFrameTrackedObjects[i][objectId][1]]])
#         plot(bbox[:, 0], bbox[:, 1], linewidth=2, color=tuple(colors[objectId-1, :]))
#         gca().text(perFrameTrackedObjects[i][objectId][0]+perFrameTrackedObjects[i][objectId][2]*0.02,
#                    perFrameTrackedObjects[i][objectId][1]+perFrameTrackedObjects[i][objectId][3]*0.96, np.string_(objectId), color=tuple(colors[objectId-1, :]), fontweight="bold")
        
#     mpl.pylab.pause(0.1)
#     mpl.pylab.draw()
# # #     fig.savefig("/home/ilisescu/segtracking/data/PETS09-S2L2/IOU-tracker-result/{0:06}.png".format(i+1))
# #     fig.savefig("/home/ilisescu/PhD/data/havana_short/IOU-tracker-result-0.1/frame-{0:05}.png".format(i+1))
#     fig.savefig("/home/ilisescu/PhD/data/havana/1-3-5_IOU-tracker-result-0.1/frame-{0:05}.png".format(i+1))

In [9]:
filmedDatasetLoc = "/home/ilisescu/PhD/data/havana_short/filmed_dataset-havana_short.npy"
filmedDatasetData, filmedSceneData, filmedObjectData, dataLoc = readDatasetData(filmedDatasetLoc)

## used to downsample images if I want to work at a lower res
resizeMultiplier = 1.0
bgImage = cv2.resize(np.array(Image.open(dataLoc+"median.png")), (0, 0), fx=resizeMultiplier, fy=resizeMultiplier, interpolation=cv2.INTER_AREA)[:, :, 0:3]
# figure(); imshow(bgImage)

## get camera extrinsics and intrinsics (after undistortion)
cameraExtrinsics = filmedSceneData[DICT_CAMERA_EXTRINSICS]
undistortedBgImage, cameraIntrinsics, distortionCoeff = undistortImage(filmedSceneData[DICT_DISTORTION_PARAMETER], filmedSceneData[DICT_DISTORTION_RATIO], bgImage,
                                                                       filmedSceneData[DICT_CAMERA_INTRINSICS], doReturnMaps=False)
figure(); imshow(undistortedBgImage)

('LOADING SCENE:', u'/home/ilisescu/PhD/data/havana_short/filmed_scene-havana_short.npy')
('LOADING OBJECT:', u'/home/ilisescu/PhD/data/havana_short/filmed_object-green_car1.npy')


<matplotlib.image.AxesImage at 0x7f3e28049d10>

In [10]:
trackedObjects, perFrameTrackedObjects = readMOTTracks("/home/ilisescu/PhD/data/havana/1-3-5_IOU-tracker-result-0.1.txt")
perObjectTrackLengths = np.array([[key, len(trackedObjects[key].keys())] for key in trackedObjects.keys()])
perObjectTrackLengths = perObjectTrackLengths[np.argsort(perObjectTrackLengths[:, 1])[::-1], :]
minTrackLength = 20
minAvgPixelDisplacement = 15
minFrameToFramePixelDisplacement = np.linalg.norm(undistortedBgImage.shape[0:2])*0.05 # the object must have moved between frame i and j by at least this value
minClosenessToEdge = 10
doKeepIfValidTrackShort = True
smoothScaleChangeKernelSize = 5
## (0, 1) object location at time t, (2, 3) object location at time t+1, (4) scale change between t and t+1 (I do area_{t+1}/area_{t}, i.e. if (4) > 1, object got bigger and smaller otherwise)
# trackPointsAndScaleChanges = np.empty([0, 5], dtype=float)
tracksBottomMiddlePoints = np.empty([0, 4], dtype=float)
undistortedTrackPointsAndScaleChanges = np.empty([0, 5], dtype=float)
## (0) objectId, (1) start index in undistortedTrackPointsAndScaleChanges, (2) end index +1 in undistortedTrackPointsAndScaleChanges (indices valid for tracksBottomMiddlePoints as well)
mapObjectIdToTrackPointsAndScaleChanges = np.empty([0, 3], dtype=int)
for (objectId, trackLength) in perObjectTrackLengths :
    print objectId, trackLength, "({0}, {1})".format(np.min(trackedObjects[objectId].keys()), np.max(trackedObjects[objectId].keys())), 
    if trackLength >= minTrackLength :
#         bboxes = np.array([trackedObjects[objectId][frameIdx] for frameIdx in np.sort(trackedObjects[objectId].keys())])
#         bottomMiddlePoints = bboxes[:, :2] + bboxes[:, 2:]*np.array([[0.5, 1.0]])
#         bboxes[:, 2:] = bboxes[:, :2] + bboxes[:, 2:] ## turn bboxes in (top_left bottom_left) instead of (top_left size)        
#         ## find which bboxes are too close to the border and remove them from the considered points
#         isFrameValid = np.all(np.abs(bboxes - np.array([[0.0, 0.0, bgImage.shape[1], bgImage.shape[0]]])) >= minClosenessToEdge, axis=1)
#         ## only keep bboxes and bottom middle points for the valid frames and use validFrameIdxs which frames are subsequent so that I can correctly index bboxes
#         validFrameIdxs = np.sort(trackedObjects[objectId].keys())[isFrameValid]
#         bboxes = bboxes[isFrameValid, :]
#         bottomMiddlePoints = bottomMiddlePoints[isFrameValid, :]
        
        bboxes, bottomMiddlePoints, validFrameIdxs = getSortedTrajectoryPointsFromBBoxes(trackedObjects[objectId], bgImage.shape[0:2], minClosenessToEdge)
        
        ## not sure I should throw the remaining of the track away if it's too short
        if (doKeepIfValidTrackShort or len(bboxes) >= minTrackLength) and len(bboxes) > 0 :
        
            ## undistort the bboxes and their bottom middle points
            undistortedBBoxes = cv2.undistortPoints(bboxes.reshape([1, len(bboxes)*2, 2]),
                                                    filmedSceneData[DICT_CAMERA_INTRINSICS], distortionCoeff, P=cameraIntrinsics)[0, :, :].reshape([len(bboxes), 4])
            ## get the undistortedBottomMiddlePoints not from undistorting bottomMiddlePoints but from the undistortedBBoxes directly as the undistortion messes up the BBoxes in a non-linear way
            ## so while I think they still encompass the object as well as possible, the bottom middle points are a bit different
            undistortedBottomMiddlePoints = undistortedBBoxes[:, :2] + (undistortedBBoxes[:, 2:]-undistortedBBoxes[:, :2])*np.array([[0.5, 1.0]])
            
            displacements = np.sqrt(np.sum((undistortedBottomMiddlePoints[:, :, np.newaxis]-undistortedBottomMiddlePoints.T[np.newaxis, :, :])**2, axis=1))
            
            if False :
                ## some idx wanking magic which should make sure that even if there are jumps in the timeline I only compare subsequent frames in time
                framePairsIdxs = np.vstack([np.argwhere(validFrameIdxs[1:]-validFrameIdxs[:-1] == 1).flatten(), np.argwhere(validFrameIdxs[1:]-validFrameIdxs[:-1] == 1).flatten()+1]).T
                ## sanity check
                if np.any(np.diff(np.hstack([validFrameIdxs[framePairsIdxs[:, 0], np.newaxis], validFrameIdxs[framePairsIdxs[:, 1], np.newaxis]]), axis=1) != 1) :
                    raise Exception("Something wrong with the subsequent frames indices")
            elif False:
                ## this takes all frame combinations with i < j where there is a displacement bigger than minFrameToFramePixelDisplacement
                ## NOTE: this could have shittons of pairs so a lot of points so be careful with memory
                framePairsIdxs = np.argwhere(np.triu(displacements, k=1) > minFrameToFramePixelDisplacement)
            else :
                ## this only takes the combination of frame i, j with i < j and min(j-i) where there is a displacement bigger than minFrameToFramePixelDisplacement
                framePairsIdxs = np.hstack([np.arange(len(displacements))[:, np.newaxis], np.sum(np.array(~(np.triu(displacements, k=1) > minFrameToFramePixelDisplacement), dtype=int), axis=1)[:, np.newaxis]])
                framePairsIdxs = framePairsIdxs[framePairsIdxs[:, 1]<len(displacements), :]
                
            ## compute mean only between frame i, j with i < j
            if np.mean(displacements[np.triu(np.ones_like(displacements), k=1).astype(bool)]) >= minAvgPixelDisplacement :
                if len(framePairsIdxs) > 0 :
                    print "Mean displacement = {0}; track length = {1}; num datapoints = {2}".format(np.mean(displacements[np.triu(np.ones_like(displacements), k=1).astype(bool)]), 
                                                                                                     len(undistortedBBoxes), len(framePairsIdxs))
                    bboxAreas = np.prod(undistortedBBoxes[:, 2:] - undistortedBBoxes[:, :2], axis=1)
                    areaScaleChange = bboxAreas[framePairsIdxs[:, 1]]/bboxAreas[framePairsIdxs[:, 0]]
#                     areaScaleChange = np.convolve(np.repeat(areaScaleChange, np.concatenate([[(smoothScaleChangeKernelSize-1)/2 + 1],
#                                                                                              np.ones(len(areaScaleChange)-2, dtype=int),
#                                                                                              [(smoothScaleChangeKernelSize-1)/2 + 1]])),
#                                                   np.ones(smoothScaleChangeKernelSize)/float(smoothScaleChangeKernelSize), mode='valid')
                    ## keep track of the bottom middle points before undistortion just for funs
                    tracksBottomMiddlePoints = np.vstack([tracksBottomMiddlePoints, np.hstack([bottomMiddlePoints[framePairsIdxs[:, 0]], bottomMiddlePoints[framePairsIdxs[:, 1]]])])
                    undistortedTrackPointsAndScaleChanges = np.vstack([undistortedTrackPointsAndScaleChanges,
                                                                       np.hstack([undistortedBottomMiddlePoints[framePairsIdxs[:, 0]], undistortedBottomMiddlePoints[framePairsIdxs[:, 1]],
                                                                                  areaScaleChange[:, np.newaxis]])])
    
                    currentMap = np.array([[objectId, 0, len(undistortedTrackPointsAndScaleChanges)]])
                    if len(mapObjectIdToTrackPointsAndScaleChanges) > 0 :
                        currentMap[0, 1] = mapObjectIdToTrackPointsAndScaleChanges[-1, 2]
                    mapObjectIdToTrackPointsAndScaleChanges = np.vstack([mapObjectIdToTrackPointsAndScaleChanges, currentMap])
                else :
                    print "############## NO FRAMES WITH MIN FRAME-TO-FRAME DISPLACEMENT"
            else :
                print "############## NOT MOVING! MEAN DISPLACEMENT =", np.mean(displacements[np.triu(np.ones_like(displacements), k=1).astype(bool)])
        else :
            print "############## TRACK TOO SHORT (LEN = {0}) AFTER REMOVING INVALID FRAMES!!".format(len(undistortedBBoxes))
    else :
        print "############## TRACK TOO SHORT"
        
print "NUMBER POINTS:", len(undistortedTrackPointsAndScaleChanges)

# figure(); imshow(bgImage)
# for indices in mapObjectIdToTrackPointsAndScaleChanges :
#     plot(tracksBottomMiddlePoints[indices[1]:indices[2], 0], tracksBottomMiddlePoints[indices[1]:indices[2], 1])
# figure(); imshow(undistortedBgImage)
# for indices in mapObjectIdToTrackPointsAndScaleChanges :
#     plot(undistortedTrackPointsAndScaleChanges[indices[1]:indices[2], 0], undistortedTrackPointsAndScaleChanges[indices[1]:indices[2], 1])

301 1516 (1904, 3419) ############## NOT MOVING! MEAN DISPLACEMENT = 5.37982521287
339 1138 (3422, 4559) ############## NOT MOVING! MEAN DISPLACEMENT = 6.85340120481
239 935 (1090, 2024) Mean displacement = 306.195808855; track length = 225; num datapoints = 204
154 739 (251, 989) Mean displacement = 54.4399618656; track length = 739; num datapoints = 661
252 709 (1681, 2389) Mean displacement = 157.198490504; track length = 680; num datapoints = 662
377 706 (4694, 5399) ############## NOT MOVING! MEAN DISPLACEMENT = 6.33294671309
238 609 (1400, 2008) Mean displacement = 79.655622373; track length = 609; num datapoints = 135
253 490 (1949, 2438) Mean displacement = 291.74826363; track length = 468; num datapoints = 452
372 479 (4806, 5284) Mean displacement = 179.702523783; track length = 479; num datapoints = 454
232 479 (1445, 1923) Mean displacement = 116.256638927; track length = 466; num datapoints = 110
168 442 (760, 1201) ############## NOT MOVING! MEAN DISPLACEMENT = 10.0646836

170 64 (1170, 1233) ############## NOT MOVING! MEAN DISPLACEMENT = 1.89734460497
108 61 (453, 513) ############## NOT MOVING! MEAN DISPLACEMENT = 3.1846233806
273 61 (2816, 2876) Mean displacement = 90.4036063644; track length = 34; num datapoints = 19
128 60 (618, 677) ############## NOT MOVING! MEAN DISPLACEMENT = 8.73979266432
246 59 (2103, 2161) Mean displacement = 97.0387655494; track length = 55; num datapoints = 41
183 58 (1416, 1473) ############## NOT MOVING! MEAN DISPLACEMENT = 3.95965482151
213 57 (1703, 1759) Mean displacement = 152.629385033; track length = 57; num datapoints = 45
41 55 (213, 267) ############## NOT MOVING! MEAN DISPLACEMENT = 5.14809384415
39 55 (204, 258) Mean displacement = 64.4732436519; track length = 55; num datapoints = 25
176 54 (1302, 1355) ############## NOT MOVING! MEAN DISPLACEMENT = 4.02622175122
141 53 (810, 862) ############## NO FRAMES WITH MIN FRAME-TO-FRAME DISPLACEMENT
151 53 (918, 970) ############## NO FRAMES WITH MIN FRAME-TO-FRAME DI

In [11]:
objectIdToTrack = 179 #310 #293 #331
cameraBBoxes, cameraTrajectoryPoints, usedFrameIdxs = getSortedTrajectoryPointsFromBBoxes(trackedObjects[objectIdToTrack], bgImage.shape[0:2], minClosenessToEdge)
cameraUndistortedBBoxes = cv2.undistortPoints(cameraBBoxes.reshape([1, len(cameraBBoxes)*2, 2]),
                                              filmedSceneData[DICT_CAMERA_INTRINSICS], distortionCoeff, P=cameraIntrinsics)[0, :, :].reshape([len(cameraBBoxes), 4])
cameraUndistortedTrajectoryPoints = cameraUndistortedBBoxes[:, :2] + (cameraUndistortedBBoxes[:, 2:]-cameraUndistortedBBoxes[:, :2])*np.array([[0.5, 1.0]])
# figure(); imshow(bgImage); plot(cameraTrajectoryPoints[:, 0], cameraTrajectoryPoints[:, 1])
# figure(); imshow(undistortedBgImage); plot(cameraUndistortedTrajectoryPoints[:, 0], cameraUndistortedTrajectoryPoints[:, 1])

worldUndistortedTrajectoryPoints = cameraToWorldGroundPlane(cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], cameraUndistortedTrajectoryPoints)
figure(); plot(worldUndistortedTrajectoryPoints[:, 0], worldUndistortedTrajectoryPoints[:, 1])

[<matplotlib.lines.Line2D at 0x7f3df8192190>]

In [12]:
kernelSize = 55
smoothXs = np.convolve(np.repeat(worldUndistortedTrajectoryPoints[:, 0], np.concatenate([[(kernelSize-1)/2 + 1], np.ones(len(worldUndistortedTrajectoryPoints)-2, dtype=int),
                                                                                         [(kernelSize-1)/2 + 1]])), np.ones(kernelSize)/float(kernelSize), mode='valid')
smoothYs = np.convolve(np.repeat(worldUndistortedTrajectoryPoints[:, 1], np.concatenate([[(kernelSize-1)/2 + 1], np.ones(len(worldUndistortedTrajectoryPoints)-2, dtype=int),
                                                                                         [(kernelSize-1)/2 + 1]])), np.ones(kernelSize)/float(kernelSize), mode='valid')
plot(smoothXs, smoothYs)

[<matplotlib.lines.Line2D at 0x7f3df81ae650>]

In [13]:
smoothedWorldUndistortedTrajectoryPoints = np.hstack([smoothXs[:, np.newaxis], smoothYs[:, np.newaxis], np.zeros([len(smoothXs), 1])])
## get directions from 1 point to the other (I feel like there should be a way to get a tangent direction to trajectory curve where the trajectory point projects onto it but it 
## probably won't matter as this is only used as an initialization and will get changed during the optimization)
smoothedWorldDirections = smoothedWorldUndistortedTrajectoryPoints[1:, :]-smoothedWorldUndistortedTrajectoryPoints[:-1, :]
smoothedWorldDirections = np.vstack([smoothedWorldDirections, smoothedWorldDirections[-1, :][np.newaxis, :]])
## normalize only directions where the norm is not 0
norms = np.linalg.norm(smoothedWorldDirections, axis=1)
if np.any(norms == 0.0) :
    ## if the norm is 0, copy the closest direction where the norm is not 0 --> not sure how to do dis but if the trajectory is smoothed out it should basically never happen
    raise Exception("I guess I have to figure this out now")
smoothedWorldDirections = smoothedWorldDirections/norms[:, np.newaxis]
## angles from np.array([1.0, 0.0]) --> see https://math.stackexchange.com/a/879474
smootherWorldOrientationAngles = np.arctan2(-smoothedWorldDirections[:, 1], smoothedWorldDirections[:, 0])

In [14]:
# interpFunc, u = spint.splprep(x=[smoothXs, smoothYs], u=usedFrameIdxs, k=4)
# smoothedWorldUndistortedTrajectoryPoints = np.array(spint.splev(usedFrameIdxs, interpFunc)).T
# plot(smoothedWorldUndistortedTrajectoryPoints[:, 0], smoothedWorldUndistortedTrajectoryPoints[:, 1])

In [14]:
blurredGrayBGImage = cv2.GaussianBlur(cv2.cvtColor(bgImage, cv2.COLOR_RGB2GRAY), (11, 11), 10.0)

allObjectFrames = np.zeros([bgImage.shape[0], bgImage.shape[1], bgImage.shape[2], len(usedFrameIdxs)], np.uint8)
allSegmentedObjectFrames = np.zeros([bgImage.shape[0], bgImage.shape[1], len(usedFrameIdxs)], np.uint8)
allObjectFramesFGBGTransitionCost = np.zeros([bgImage.shape[0], bgImage.shape[1], len(usedFrameIdxs)])
allObjectFramesUndistortedFGBGTransitionCost = np.zeros([undistortedBgImage.shape[0], undistortedBgImage.shape[1], len(usedFrameIdxs)])
for idx, frameKey in enumerate(usedFrameIdxs) :
    allObjectFrames[:, :, :, idx] = np.array(Image.open("/home/ilisescu/PhD/data/havana/frame-{0:05}.png".format(frameKey+1)))
    
    if False :
        raise Exception("This doesn't work yet as allObjectFramesFGBGTransitionCost gets overwritten outside of the if statement")
        ## use bg cut to segment the image and compute cost based on distance to the edges between fg and bg
        allSegmentedObjectFrames[:, :, idx] = backgroundCut3D(bgImage, allObjectFrames[:, :, :, idx][:, :, :, np.newaxis])[:, :, 0]*255
        ## close holes
        segmentedFrameImg = cv2.morphologyEx(segmentedFrameImg.astype(float), cv2.MORPH_CLOSE, np.ones((5,5),np.uint8), iterations=2).astype(np.uint8)
        
        allObjectFramesFGBGTransitionCost[:, :, idx] = distance_transform_edt(cv2.Canny(segmentedFrameImg, 100,200)==0)
#         figure(); imshow(frameImg)
#         figure(); imshow(segmentedFrameImg)
    elif False :
        ## basing photoconsistency on gradient images meaning it's mostly sensitive to relative intensity changes
        ## this one was picking on color changes in shadows so the border between fg and bg was stretched out
        blurredGrayImage = cv2.GaussianBlur(cv2.cvtColor(allObjectFrames[:, :, :, idx], cv2.COLOR_RGB2GRAY), (11, 11), 10.0)

        xGradDiff = np.abs(np.abs(cv2.Sobel(blurredGrayImage, cv2.CV_64F, 1, 0)) - np.abs(cv2.Sobel(blurredGrayBGImage, cv2.CV_64F, 1, 0)))
        yGradDiff = np.abs(np.abs(cv2.Sobel(blurredGrayImage, cv2.CV_64F, 0, 1)) - np.abs(cv2.Sobel(blurredGrayBGImage, cv2.CV_64F, 0, 1)))

        photoconsistency = cv2.GaussianBlur(np.clip(xGradDiff+yGradDiff, 0, 5), (31, 31), 10.0)
    elif False :
        ## comparing patches (because of the blur) and I don't care about low frequency details (i.e. comparing the high frequency textures of patches because I'm removing the low frequency blurred shit)
        ## this wasn't picking up on the relatively small change where soft shadows are
        bob = bgImage - cv2.GaussianBlur(bgImage, (11, 11), 10.0)
        sue = allObjectFrames[:, :, :, idx] - cv2.GaussianBlur(allObjectFrames[:, :, :, idx], (11, 11), 10.0)

        photoconsistency = cv2.GaussianBlur(cv2.cvtColor(np.clip(np.abs(bob-sue), 0, 11), cv2.COLOR_RGB2GRAY).astype(float), (31, 31), 10.0)
    else :
        ## same as above but now I also care about the colors (because I'm removing the low frequency intensity details, whereas above I was considering channels separately so I was removing the colors of the patterns)
        ## looks like the best one for now
        bob = bgImage - cv2.cvtColor(cv2.GaussianBlur(cv2.cvtColor(bgImage, cv2.COLOR_RGB2GRAY), (11, 11), 10.0), cv2.COLOR_GRAY2RGB)
        sue = allObjectFrames[:, :, :, idx] - cv2.cvtColor(cv2.GaussianBlur(cv2.cvtColor(allObjectFrames[:, :, :, idx], cv2.COLOR_RGB2GRAY), (11, 11), 10.0), cv2.COLOR_GRAY2RGB)

        photoconsistency = cv2.GaussianBlur(cv2.cvtColor(np.clip(np.abs(bob-sue), 0, 11), cv2.COLOR_RGB2GRAY).astype(float), (31, 31), 10.0)

    ## base the cost on the gradients of the photoconsistency
    allObjectFramesFGBGTransitionCost[:, :, idx] = np.exp(-(np.abs(cv2.Sobel(photoconsistency, cv2.CV_64F, 1, 0)) + np.abs(cv2.Sobel(photoconsistency, cv2.CV_64F, 0, 1))))
    allObjectFramesUndistortedFGBGTransitionCost[:, :, idx], _, _ = undistortImage(filmedSceneData[DICT_DISTORTION_PARAMETER], filmedSceneData[DICT_DISTORTION_RATIO],
                                                                                   allObjectFramesFGBGTransitionCost[:, :, idx], filmedSceneData[DICT_CAMERA_INTRINSICS], doReturnMaps=False,
                                                                                   borderValue=np.max(allObjectFramesFGBGTransitionCost[:, :, idx]))
    

## making 1 pixel edge around the cost frames to max cost so that when I clamp out of bounds pixels they take maxcost
maxFGBGTransitionCost = np.max(allObjectFramesUndistortedFGBGTransitionCost)
allObjectFramesUndistortedFGBGTransitionCost[0, :, :] = maxFGBGTransitionCost
allObjectFramesUndistortedFGBGTransitionCost[-1, :, :] = maxFGBGTransitionCost
allObjectFramesUndistortedFGBGTransitionCost[:, 0, :] = maxFGBGTransitionCost
allObjectFramesUndistortedFGBGTransitionCost[:, -1, :] = maxFGBGTransitionCost

In [16]:
figure("animation")
img = None
for i in xrange(allObjectFramesUndistortedFGBGTransitionCost.shape[-1]):
    if img is None:
        img = mpl.pylab.imshow(allObjectFramesUndistortedFGBGTransitionCost[:, :, i])
    else:
        img.set_data(allObjectFramesUndistortedFGBGTransitionCost[:, :, i])
    mpl.pylab.pause(0.1)
    mpl.pylab.draw()

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/ilisescu/anaconda2/lib/python2.7/site-packages/IPython/core/ultratb.py", line 1132, in get_records
    return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)
  File "/home/ilisescu/anaconda2/lib/python2.7/site-packages/IPython/core/ultratb.py", line 313, in wrapped
    return f(*args, **kwargs)
  File "/home/ilisescu/anaconda2/lib/python2.7/site-packages/IPython/core/ultratb.py", line 358, in _fixed_getinnerframes
    records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))
  File "/home/ilisescu/anaconda2/lib/python2.7/inspect.py", line 1048, in getinnerframes
    framelist.append((tb.tb_frame,) + getframeinfo(tb, context))
  File "/home/ilisescu/anaconda2/lib/python2.7/inspect.py", line 1008, in getframeinfo
    filename = getsourcefile(frame) or getfile(frame)
  File "/home/ilisescu/anaconda2/lib/python2.7/inspect.py", line 453, in getsourcefile
    if hasattr(getmodule(object, filename), '__loader

IndexError: string index out of range

In [59]:
def placeAndProjectAllBoxVolumes(params, paramsScale, cameraIntrinsics, cameraExtrinsics) :
    footprintWidth, footprintLength, volumeHeight = np.abs(params[0:3]/paramsScale[0:3])
    posAndRots = np.reshape(params[3:]/paramsScale[3:], [(len(params)-3)/3, 3])
    
    defaultBoxVolumePoints = np.array([[-0.5, -0.5, 0.0, 1.0],
                                       [-0.5,  0.5, 0.0, 1.0],
                                       [ 0.5,  0.5, 0.0, 1.0],
                                       [ 0.5, -0.5, 0.0, 1.0],
                                       [-0.5, -0.5, 1.0, 1.0],
                                       [-0.5,  0.5, 1.0, 1.0],
                                       [ 0.5,  0.5, 1.0, 1.0],
                                       [ 0.5, -0.5, 1.0, 1.0],
                                       [ 0.0,  0.0, 0.0, 1.0],
                                       [ 0.5,  0.0, 0.0, 1.0]])*np.concatenate([[footprintLength, footprintWidth, volumeHeight], [1]])
    transform = np.vstack([[np.vstack([np.cos(posAndRots[:, 2]) , np.sin(posAndRots[:, 2]), np.zeros(len(posAndRots)), posAndRots[:, 0]])],
                           [np.vstack([-np.sin(posAndRots[:, 2]), np.cos(posAndRots[:, 2]), np.zeros(len(posAndRots)), posAndRots[:, 1]])],
                           np.array([[[0.0]                      , [0.0]                    , [1.0], [0.0]]]).repeat(len(posAndRots), axis=-1),
                           np.array([[[0.0]                      , [0.0]                    , [0.0], [1.0]]]).repeat(len(posAndRots), axis=-1)])
    worldBoxVolumePoints = np.dot(defaultBoxVolumePoints, transform)
    T = cameraIntrinsics.dot(cameraExtrinsics[0:3, :])
    cameraBoxVolumePoints = np.rollaxis(np.dot(T, worldBoxVolumePoints), 1)
    cameraBoxVolumePoints = cameraBoxVolumePoints[:, :-1, :]/cameraBoxVolumePoints[:, -1:, :]
    
    return worldBoxVolumePoints, cameraBoxVolumePoints

def placeAndProjectAllBoxVolumesRelatives(params, paramsScale, relativePositions, relativeRots, cameraIntrinsics, cameraExtrinsics, startRot) :
    """this places all box volumes given params = (footprintWidth, footprintLength, volumeHeight, globalDx, globalDy, globalRot, stretchX, stretchY)"""
    globalDx, globalDy, globalRot, stretchX, stretchY = params[3:]/paramsScale[3:]
    footprintWidth, footprintLength, volumeHeight = np.abs(params[0:3]/paramsScale[0:3])
    
    deltaStartRot = globalRot-startRot/paramsScale[5]
    globalRelativeRot = globalRot+relativeRots
    
    defaultBoxVolumePoints = np.array([[-0.5, -0.5, 0.0, 1.0],
                                       [-0.5,  0.5, 0.0, 1.0],
                                       [ 0.5,  0.5, 0.0, 1.0],
                                       [ 0.5, -0.5, 0.0, 1.0],
                                       [-0.5, -0.5, 1.0, 1.0],
                                       [-0.5,  0.5, 1.0, 1.0],
                                       [ 0.5,  0.5, 1.0, 1.0],
                                       [ 0.5, -0.5, 1.0, 1.0],
                                       [ 0.0,  0.0, 0.0, 1.0],
                                       [ 0.5,  0.0, 0.0, 1.0]])*np.concatenate([[footprintLength, footprintWidth, volumeHeight], [1]])
    tx = globalDx+(np.cos(deltaStartRot)*relativePositions[:, 0]+np.sin(deltaStartRot)*relativePositions[:, 1])*stretchX
    ty = globalDy+(np.cos(deltaStartRot)*relativePositions[:, 1]-np.sin(deltaStartRot)*relativePositions[:, 0])*stretchY
    transform = np.vstack([[np.vstack([np.cos(globalRelativeRot) , np.sin(globalRelativeRot), np.zeros(len(globalRelativeRot)), tx])],
                           [np.vstack([-np.sin(globalRelativeRot), np.cos(globalRelativeRot), np.zeros(len(globalRelativeRot)), ty])],
                           np.array([[[0.0]                      , [0.0]                    , [1.0], [0.0]]]).repeat(len(globalRelativeRot), axis=-1),
                           np.array([[[0.0]                      , [0.0]                    , [0.0], [1.0]]]).repeat(len(globalRelativeRot), axis=-1)])
    worldBoxVolumePoints = np.dot(defaultBoxVolumePoints, transform)
    T = cameraIntrinsics.dot(cameraExtrinsics[0:3, :])
    cameraBoxVolumePoints = np.rollaxis(np.dot(T, worldBoxVolumePoints), 1)
    cameraBoxVolumePoints = cameraBoxVolumePoints[:, :-1, :]/cameraBoxVolumePoints[:, -1:, :]
    
    return worldBoxVolumePoints, cameraBoxVolumePoints
    
def computeAllProjectedBoxVolumeAABBtoBBoxCost(cameraBoxVolumePoints, cameraTrackedBBoxes, doVisualize=False) :
    
    aabbs = np.hstack([np.min(cameraBoxVolumePoints, axis=0).T, np.max(cameraBoxVolumePoints, axis=0).T])
    projectedBoxVolumeAABBtoBBoxCost = np.sqrt((aabbs[:, 0]-cameraTrackedBBoxes[:, 0])**2+(aabbs[:, 1]-cameraTrackedBBoxes[:, 1])**2)
    projectedBoxVolumeAABBtoBBoxCost += np.sqrt((aabbs[:, 2]-cameraTrackedBBoxes[:, 2])**2+(aabbs[:, 1]-cameraTrackedBBoxes[:, 1])**2)
    projectedBoxVolumeAABBtoBBoxCost += np.sqrt((aabbs[:, 2]-cameraTrackedBBoxes[:, 2])**2+(aabbs[:, 3]-cameraTrackedBBoxes[:, 3])**2)
    projectedBoxVolumeAABBtoBBoxCost += np.sqrt((aabbs[:, 0]-cameraTrackedBBoxes[:, 0])**2+(aabbs[:, 3]-cameraTrackedBBoxes[:, 3])**2)
    
    if doVisualize :
        figure(); imshow(undistortedBgImage)
        for idx in np.arange(0, cameraBoxVolumePoints.shape[-1], 28) :
            edgesIdxs = np.array([[0, 1], [1, 2], [2, 3], [3, 0], [4, 5], [5, 6], [6, 7], [7, 4], [0, 4], [1, 5], [2, 6], [3, 7]])
            for edgeIdxs in edgesIdxs :
                plot(cameraBoxVolumePoints[edgeIdxs, 0, idx], cameraBoxVolumePoints[edgeIdxs, 1, idx], color=cm.Set1(np.mod(idx/20, 9)/8.0))
            plot([cameraBoxVolumePoints[-2, 0, idx], cameraBoxVolumePoints[-1, 0, idx]], [cameraBoxVolumePoints[-2, 1, idx], cameraBoxVolumePoints[-1, 1, idx]], color=cm.Set1(np.mod(idx/20, 9)/8.0))
            plot(cameraTrackedBBoxes[idx, [0, 2, 2, 0, 0]], cameraTrackedBBoxes[idx, [1, 1, 3, 3, 1]], color=cm.Set1(np.mod(idx/20, 9)/8.0))
#             print projectedBoxVolumeAABBtoBBoxCost[idx]
        xlim([0, undistortedBgImage.shape[1]]); ylim([undistortedBgImage.shape[0], 0])
    
    return np.sum(np.log(1+projectedBoxVolumeAABBtoBBoxCost))
    
def computeAllFGBGTransitionCost(cameraBoxVolumePoints, worldBoxVolumePoints, perFrameFGBGTransitionCosts, worldCameraLoc, doVisualize=False, numSamplesPerEdge=10) :
#     inPlaneNeighboursIdxs = np.array([[1, 3], [2, 0], [3, 1], [0, 2], [5, 7], [6, 4], [7, 5], [4, 6]])
    inPlaneNeighboursIdxs = np.array([[3, 0, 1], [0, 1, 2], [1, 2, 3], [2, 3, 0], [7, 4, 5], [4, 5, 6], [5, 6, 7], [6, 7, 4]])
    outOfPlaneNeighboursIdxs = np.array([4, 5, 6, 7, 0, 1, 2, 3])
    
    worldBoxVolumeCornersToCameraLocDists = np.linalg.norm(worldBoxVolumePoints[0:8, 0:3, :]-worldCameraLoc[np.newaxis, :, np.newaxis], axis=1)
    ## these are the indices of the corners that project inside the convex hull
    worldFurthestFromCameraBoxVolumeCornerIdx = np.argmax(worldBoxVolumeCornersToCameraLocDists, axis=0)
    worldClosestToCameraBoxVolumeCornerIdx = np.argmin(worldBoxVolumeCornersToCameraLocDists, axis=0)
    ## the opposite corners that are part of the convex hull are the out-of-plane neighbours of the corners that project inside the convex hull
    ## the convex hull is then just the concatenated in-plane neighbours are stored in inPlaneNeighboursIdxs
    convexHullCornersIdxs = np.hstack([inPlaneNeighboursIdxs[outOfPlaneNeighboursIdxs[worldFurthestFromCameraBoxVolumeCornerIdx], :],
                                       inPlaneNeighboursIdxs[outOfPlaneNeighboursIdxs[worldClosestToCameraBoxVolumeCornerIdx], :]])
#     print convexHullCornersIdxs.shape, cameraBoxVolumePoints.shape
#     print cameraBoxVolumePoints[convexHullCornersIdxs, :, :].shape
    cameraAllFramesHullCorners = np.rollaxis(np.array([cameraBoxVolumePoints[idxs, :, idx] for idx, idxs in enumerate(convexHullCornersIdxs)]).T, 1)
#     print cameraAllFramesHullCorners.shape
    
    ## here I need to first find the convex hull of all the frames
    ## check out peter's idea about finding the top most and bottom most points and then connecting them somehow in order to get convex hull instead of using opencv's implementation
    ## although the stuff below only adds 1 ms to the whole thing so meh
    height, width, numFrames = perFrameFGBGTransitionCosts.shape
    
    ## then I can sample the edges, find bilinear weights and pixel indices to closest pixels, find which ones are outside and set their cost to max fg bg cost
    cameraAllFramesHullEdgesDirs = cameraAllFramesHullCorners[[1, 2, 3, 4, 5, 0], :, :] - cameraAllFramesHullCorners[[0, 1, 2, 3, 4, 5], :, :]
#     print cameraAllFramesHullEdgesDirs.shape
    cameraAllFramesHullSamples = (np.rollaxis(cameraAllFramesHullCorners, 2).repeat(numSamplesPerEdge, axis=1) + np.rollaxis(cameraAllFramesHullEdgesDirs, 2).repeat(numSamplesPerEdge, axis=1)
                                  * np.arange(0.0, 1.0, 1.0/numSamplesPerEdge)[np.newaxis, :].repeat(6, axis=0).reshape([1, numSamplesPerEdge*6, 1])).reshape([numFrames*numSamplesPerEdge*6, 2])
    ## then index all the pixel indices from the correct frame index, multiply values by bilinear weights and sum the shit out of everything
    pixelIdxsCeiling = np.ceil(cameraAllFramesHullSamples).astype(int)
    pixelIdxsFloor = np.floor(cameraAllFramesHullSamples).astype(int)
    ## I take the diff between the ceiled coords of the samples and the sample coords
    bilinearWeights = pixelIdxsCeiling - cameraAllFramesHullSamples
    perSampleFrameIdx = np.arange(numFrames).repeat(numSamplesPerEdge*6)
    
    topLeftPixelXs = np.clip(pixelIdxsFloor[:, 0], 0, width-1)
    topLeftPixelYs = np.clip(pixelIdxsFloor[:, 1], 0, height-1)
    topRightPixelXs = np.clip(pixelIdxsCeiling[:, 0], 0, width-1)
    topRightPixelYs = np.clip(pixelIdxsFloor[:, 1], 0, height-1)
    bottomRightPixelXs = np.clip(pixelIdxsCeiling[:, 0], 0, width-1)
    bottomRightPixelYs = np.clip(pixelIdxsCeiling[:, 1], 0, height-1)
    bottomLeftPixelXs = np.clip(pixelIdxsFloor[:, 0], 0, width-1)
    bottomLeftPixelYs = np.clip(pixelIdxsCeiling[:, 1], 0, height-1)
#     print cameraAllFramesHullSamples.shape, pixelIdxsFloor.shape, pixelIdxsCeiling.shape, bilinearWeights.shape
        
    if doVisualize :
#         figure(); imshow(undistortedBgImage)
        costImg = np.zeros(perFrameFGBGTransitionCosts.shape[0:2])
        for idx in np.arange(0, numFrames, 56) :
            topLeftRow, topLeftCol = np.floor(np.min(cameraAllFramesHullCorners[:, :, idx], axis=0)).astype(int)[::-1]-10
            bottomRightRow, bottomRightCol = np.ceil(np.max(cameraAllFramesHullCorners[:, :, idx], axis=0)).astype(int)[::-1]+10
            topLeftRow = np.clip(topLeftRow, 0, height-1)
            topLeftCol = np.clip(topLeftCol, 0, width-1)
            bottomRightRow = np.clip(bottomRightRow, 0, height-1)
            bottomRightCol = np.clip(bottomRightCol, 0, width-1)
            
            costImg[topLeftRow:bottomRightRow+1, topLeftCol:bottomRightCol+1] = perFrameFGBGTransitionCosts[topLeftRow:bottomRightRow+1, topLeftCol:bottomRightCol+1, idx]
        figure(); imshow(costImg)
        for idx in np.arange(0, numFrames, 56) :
            plot(cameraAllFramesHullCorners[[0, 1, 2, 3, 4, 5, 0], 0, idx], cameraAllFramesHullCorners[[0, 1, 2, 3, 4, 5, 0], 1, idx], color=cm.Set1(np.mod(idx/20, 9)/8.0))
            scatter(cameraAllFramesHullSamples[idx*numSamplesPerEdge*6:(idx+1)*numSamplesPerEdge*6, 0], cameraAllFramesHullSamples[idx*numSamplesPerEdge*6:(idx+1)*numSamplesPerEdge*6, 1], marker='x', color='red')
            scatter(topLeftPixelXs[idx*numSamplesPerEdge*6:(idx+1)*numSamplesPerEdge*6],
                    topLeftPixelYs[idx*numSamplesPerEdge*6:(idx+1)*numSamplesPerEdge*6], color="magenta")
            scatter(topRightPixelXs[idx*numSamplesPerEdge*6:(idx+1)*numSamplesPerEdge*6],
                    topRightPixelYs[idx*numSamplesPerEdge*6:(idx+1)*numSamplesPerEdge*6], color="cyan")
            scatter(bottomRightPixelXs[idx*numSamplesPerEdge*6:(idx+1)*numSamplesPerEdge*6],
                    bottomRightPixelYs[idx*numSamplesPerEdge*6:(idx+1)*numSamplesPerEdge*6], color="yellow")
            scatter(bottomLeftPixelXs[idx*numSamplesPerEdge*6:(idx+1)*numSamplesPerEdge*6],
                    bottomLeftPixelYs[idx*numSamplesPerEdge*6:(idx+1)*numSamplesPerEdge*6], color="green")
        xlim([0, costImg.shape[1]]); ylim([costImg.shape[0], 0])
    
    ## then I use the horizontal diff to weigh the bottom pixels against each other
    bottomPixelsFGBGTransitionCost = (bilinearWeights[:, 0]*perFrameFGBGTransitionCosts[bottomLeftPixelYs, bottomLeftPixelXs, perSampleFrameIdx]
                                      + (1.0-bilinearWeights[:, 0])*perFrameFGBGTransitionCosts[bottomRightPixelYs, bottomRightPixelXs, perSampleFrameIdx])
    ## then the same horizontal diff to weigh the top pixels against each other
    topPixelsFGBGTransitionCost = (bilinearWeights[:, 0]*perFrameFGBGTransitionCosts[topLeftPixelYs, topLeftPixelXs, perSampleFrameIdx]
                                   + (1.0-bilinearWeights[:, 0])*perFrameFGBGTransitionCosts[topRightPixelYs, topRightPixelXs, perSampleFrameIdx])
    ## finally I use the vertical diff to weigh the above interpolated results
    allPixelsFGBGTransitionCost = bilinearWeights[:, 1]*bottomPixelsFGBGTransitionCost + (1.0-bilinearWeights[:, 1])*topPixelsFGBGTransitionCost
    ## taking average of cost at samples on the same edge
    return np.sum(np.log(1+np.mean(allPixelsFGBGTransitionCost.reshape([numFrames, 6, numSamplesPerEdge]), axis=2)))

def computeTotalCost(volumeSize, worldBoxVolumePoints, cameraBoxVolumePoints, worldCameraLoc, cameraTrackedBBoxes, perFrameFGBGTransitionCosts,
                     areaCostMultiplier, shapePriorMultiplier, doVisualize=False) :
    
    fgBGTransitionCost = computeAllFGBGTransitionCost(cameraBoxVolumePoints, worldBoxVolumePoints, perFrameFGBGTransitionCosts, worldCameraLoc, doVisualize)
    
    projectedBoxVolumeAABBtoBBoxCost = computeAllProjectedBoxVolumeAABBtoBBoxCost(cameraBoxVolumePoints, cameraTrackedBBoxes, doVisualize)
    
    footprintWidth, footprintLength, volumeHeight = volumeSize
    ## the width and the height should be close to a certain ratio of the length
    ## manually set to best selling car in the world, the Ford Fiesta, but could be learned from a database
    widthRatioMean, widthRatioStd = 0.43, 1.0
    heightRatioMean, heightRatioStd = 0.36, 1.0
    shapePrior = np.sum(np.log(1+np.array([((footprintWidth/footprintLength - widthRatioMean)**2)/(2.0*(widthRatioStd**2)),
                                           ((volumeHeight/footprintLength - heightRatioMean)**2)/(2.0*(heightRatioStd**2))])))
    
    return fgBGTransitionCost + areaCostMultiplier*projectedBoxVolumeAABBtoBBoxCost + shapePriorMultiplier*shapePrior

def fitVolumeToTrack(params, paramsScale, cameraIntrinsics, cameraExtrinsics, worldCameraLoc, cameraTrackedBBoxes, perFrameFGBGTransitionCosts,
                     areaCostMultiplier=1000000.0, shapePriorMultiplier=100000.0, doVisualize=False) :
    
    worldBoxVolumePoints, cameraBoxVolumePoints = placeAndProjectAllBoxVolumes(params, paramsScale, cameraIntrinsics, cameraExtrinsics)
    
    totalCost = computeTotalCost(np.abs(params[0:3]/paramsScale[0:3]), worldBoxVolumePoints, cameraBoxVolumePoints, worldCameraLoc, cameraTrackedBBoxes, perFrameFGBGTransitionCosts,
                                 areaCostMultiplier, shapePriorMultiplier, doVisualize)
#     print params, totalCost, "matrix"
    return totalCost

def fitVolumeToTrackRelativesWithStretchRotFullTraj(params, paramsScale, relativePositions, relativeRots, cameraIntrinsics, cameraExtrinsics, worldCameraLoc, cameraTrackedBBoxes, startRot,
                                                    perFrameFGBGTransitionCosts, areaCostMultiplier=1000000.0, shapePriorMultiplier=100000.0, doVisualize=False) :
    
    worldBoxVolumePoints, cameraBoxVolumePoints = placeAndProjectAllBoxVolumesRelatives(params, paramsScale, relativePositions, relativeRots, cameraIntrinsics, cameraExtrinsics, startRot)
    
    totalCost = computeTotalCost(np.abs(params[0:3]/paramsScale[0:3]), worldBoxVolumePoints, cameraBoxVolumePoints, worldCameraLoc, cameraTrackedBBoxes, perFrameFGBGTransitionCosts,
                                 areaCostMultiplier, shapePriorMultiplier, doVisualize)
#     print params, totalCost, "matrix"
    return totalCost

# worldCameraLoc = np.linalg.inv(cameraExtrinsics)[:-1, -1]

# # optimizerMethod = 'BFGS'
# optimizerMethod = 'Nelder-Mead'
# if optimizerMethod == 'BFGS' :
#     paramDimensionalityScale = np.array([10.0, 10.0, 10.0, 1.0, 1.0, 100.0, 1.0, 1.0])
# else :
#     paramDimensionalityScale = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
# initParamsToEstimate = np.concatenate([np.ones(3), worldUndistortedTrajectoryPoints[0, 0:2], [smootherWorldOrientationAngles[0]], np.ones(2)])*paramDimensionalityScale

# if False :
#     try :
#         initParamsToEstimate = np.copy(optResult.x)
#     except Exception as e:
#         print e
# dimensionsToEstimate = 8
# startRotation = initParamsToEstimate[5]
# print initParamsToEstimate

# startTime = time.time()
# print fitVolumeToTrackRelativesWithStretchRotFullTraj(initParamsToEstimate*np.array([1, 1, 1, 1, 1, 1, 1, 1]), paramDimensionalityScale, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
#                                                       smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], worldCameraLoc,
#                                                       cameraUndistortedBBoxes, startRotation, allObjectFramesUndistortedFGBGTransitionCost, True)
# print "TIME:", time.time()-startTime

# # initParamsToEstimate = np.concatenate([np.ones(3), np.hstack([worldUndistortedTrajectoryPoints[:, 0:2], smootherWorldOrientationAngles[:, np.newaxis]]).flatten()])
# # paramDimensionalityScale = np.ones(len(initParamsToEstimate))
# # startTime = time.time()
# # print fitVolumeToTrack(initParamsToEstimate, paramDimensionalityScale, cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], worldCameraLoc,
# #                        cameraUndistortedBBoxes, allObjectFramesUndistortedFGBGTransitionCost, False)
# # print "TIME:", time.time()-startTime

In [348]:
######################## OPTIMIZES ALL PARAMS AT ONCE ########################
# worldCameraLoc = np.linalg.inv(cameraExtrinsics)[:-1, -1]

# initParamsToEstimate = np.concatenate([np.ones(3), np.hstack([worldUndistortedTrajectoryPoints[:, 0:2], smootherWorldOrientationAngles[:, np.newaxis]]).flatten()])
# # optimizerMethod = 'BFGS'
# optimizerMethod = 'Nelder-Mead'
# if optimizerMethod == 'BFGS' :
#     paramDimensionalityScale = np.concatenate([np.ones(3)*10.0, np.ones(len(initParamsToEstimate)-3)])
# else :
#     paramDimensionalityScale = np.ones(len(initParamsToEstimate))

# startTime = time.time()
# optResult = minimize(fitVolumeToTrack, initParamsToEstimate, method='Nelder-Mead',#optimizerMethod,
#                      args=(paramDimensionalityScale, cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], worldCameraLoc,
#                            cameraUndistortedBBoxes, allObjectFramesUndistortedFGBGTransitionCost), options={'maxiter': 5000})#, 'eps':0.1})
# # print optResult.x
# print ["Optimization Failed!", "Optimization Successful!"][int(optResult.success)], " ---  MESSAGE: \"{0}\" --- cost: {1} --- It took {2} secs".format(optResult.message, optResult.fun, time.time()-startTime)

# # print fitVolumeToTrack(optResult.x, paramDimensionalityScale, cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], worldCameraLoc,
# #                        cameraUndistortedBBoxes, allObjectFramesUndistortedFGBGTransitionCost, True)

Optimization Failed!  ---  MESSAGE: "Maximum number of iterations has been exceeded." --- cost: 13651324.4791 --- It took 40.9929919243 secs


In [238]:
######################## MOVED DIS SHIT INTO THE FIT AND REFINE METHOD ########################
# worldCameraLoc = np.linalg.inv(cameraExtrinsics)[:-1, -1]

# # optimizerMethod = 'BFGS'
# optimizerMethod = 'Nelder-Mead'
# if optimizerMethod == 'BFGS' :
#     paramDimensionalityScale = np.array([10.0, 10.0, 10.0, 1.0, 1.0, 100.0, 1.0, 1.0])
# else :
#     paramDimensionalityScale = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
# initParamsToEstimate = np.concatenate([np.ones(3), worldUndistortedTrajectoryPoints[0, 0:2], [smootherWorldOrientationAngles[0]], np.ones(2)])*paramDimensionalityScale

# if False :
#     try :
#         initParamsToEstimate = np.copy(optResult.x)
#     except Exception as e:
#         print e
# dimensionsToEstimate = 8
# startRotation = initParamsToEstimate[5]
# print initParamsToEstimate
# startTime = time.time()
# optResult = minimize(fitVolumeToTrackRelativesWithStretchRotFullTraj, initParamsToEstimate, method='Nelder-Mead',#optimizerMethod,
#                      args=(paramDimensionalityScale, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
#                                            smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], worldCameraLoc,
#                                            cameraUndistortedBBoxes, startRotation, allObjectFramesUndistortedFGBGTransitionCost), options={'maxiter': 3000})#, 'eps':0.1})
# print optResult.x
# print ["Optimization Failed!", "Optimization Successful!"][int(optResult.success)], " ---  MESSAGE: \"{0}\" --- cost: {1} --- It took {2} secs".format(optResult.message, optResult.fun, time.time()-startTime)

# print fitVolumeToTrackRelativesWithStretchRotFullTraj(optResult.x, paramDimensionalityScale, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
#                                                       smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], worldCameraLoc,
#                                                       cameraUndistortedBBoxes, startRotation, allObjectFramesUndistortedFGBGTransitionCost, True)

[  1.           1.           1.         -22.58143616 -27.20477486
   0.18026529   1.           1.        ]
[  1.07617315   4.79634726   0.52104097 -24.94780249 -26.13499737
   0.17621985   1.04619108   1.07717758]
Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 886155771.712 --- It took 13.9998128414 secs
9.62081466417
14.7790967476
4.30763173447
11.9263876236
21.5364281507
11.1980505511
12.0517778977
25.0575159824
26.7185097219
38.7544103644
42.2298187448
33.8275565852
37.3712878601
107.182100416
89.3292615438
886155771.712


In [151]:
# print fitVolumeToTrackRelativesWithStretchRotFullTraj(optResult.x, paramDimensionalityScale, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
#                                                       smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], worldCameraLoc,
#                                                       cameraUndistortedBBoxes, startRotation, allObjectFramesUndistortedFGBGTransitionCost, True)

36.9485927201
47.4285017949
40.5597631723
40.4623199721
45.3007123104
41.2606835651
40.4642258468
28.525912672
10.8083657604
30.7318924261
18.8995298906
18.6363580439
15.3339240432
71.3097846537
65.2765033332
10550459.8219


In [103]:
"/home/ilisescu/PhD/data/havana/1-3-5_IOU-tracker-volumes_{0}/".format(objectIdToTrack)

'/home/ilisescu/PhD/data/havana/1-3-5_IOU-tracker-volumes_293/'

In [104]:
######################## VISUALIZATION STUFF ########################
_, allObjectFramesBoxVolumePoints = placeAndProjectAllBoxVolumesRelatives(optResult.x, paramDimensionalityScale, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
                                                                          smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], startRotation)

if not os.path.isdir("/home/ilisescu/PhD/data/havana/1-3-5_IOU-tracker-volumes_{0}/".format(objectIdToTrack)) :
    os.mkdir("/home/ilisescu/PhD/data/havana/1-3-5_IOU-tracker-volumes_{0}/".format(objectIdToTrack))

fig = mpl.pylab.figure("animation", frameon=False)
fig.set_size_inches(17,12)
gca().set_axis_off()
img = None
for idx in np.arange(allObjectFrames.shape[-1]):
    cla()
    undistortedCurrentFrame, _, _ = undistortImage(filmedSceneData[DICT_DISTORTION_PARAMETER], filmedSceneData[DICT_DISTORTION_RATIO], allObjectFrames[:, :, :, idx],
                                                   filmedSceneData[DICT_CAMERA_INTRINSICS], doReturnMaps=False)
    gca().imshow(undistortedCurrentFrame)
    
    edgesIdxs = np.array([[0, 1], [1, 2], [2, 3], [3, 0], [4, 5], [5, 6], [6, 7], [7, 4], [0, 4], [1, 5], [2, 6], [3, 7]])
    for edgeIdxs in edgesIdxs :
        gca().plot(allObjectFramesBoxVolumePoints[edgeIdxs, 0, idx], allObjectFramesBoxVolumePoints[edgeIdxs, 1, idx], color='red')
    gca().plot([allObjectFramesBoxVolumePoints[-2, 0, idx], allObjectFramesBoxVolumePoints[-1, 0, idx]],
               [allObjectFramesBoxVolumePoints[-2, 1, idx], allObjectFramesBoxVolumePoints[-1, 1, idx]], color='red')
#     if img is None:
#         img = mpl.pylab.imshow(undistortedCurrentFrame)
#     else:
#         img.set_data(undistortedCurrentFrame)
    mpl.pylab.pause(0.1)
    mpl.pylab.draw()
    fig.savefig("/home/ilisescu/PhD/data/havana/1-3-5_IOU-tracker-volumes_{1}/frame-{0:05}.png".format(idx+1, objectIdToTrack), bbox_inches='tight')



In [44]:
def placeRefinedBoxVolume(params, paramsScale, worldFrameBoxVolumePoints, cameraIntrinsics, cameraExtrinsics, doVisualize=False) :
    """the params are (dx, dy, rot)"""
    dx, dy, rot = params/paramsScale
    ## move the object to the origin so I can rotate it around it's middle point (which is the second to last point in worldFrameBoxVolumePoints)
    transform = np.array([[1.0, 0.0, 0.0, -worldFrameBoxVolumePoints[-2, 0]],
                          [0.0, 1.0, 0.0, -worldFrameBoxVolumePoints[-2, 1]],
                          [0.0, 0.0, 1.0, 0.0],
                          [0.0, 0.0, 0.0, 1.0]])
    ## now rotate and place back to the original location plus whatever displacement (dx, dy)
    transform = np.dot(np.array([[np.cos(rot) , np.sin(rot), 0.0, dx + worldFrameBoxVolumePoints[-2, 0]],
                                 [-np.sin(rot), np.cos(rot), 0.0, dy + worldFrameBoxVolumePoints[-2, 1]],
                                 [0.0         , 0.0        , 1.0, 0.0],
                                 [0.0         , 0.0        , 0.0, 1.0]]), transform)
    worldFrameBoxVolumePointsTransformed = np.dot(transform, worldFrameBoxVolumePoints.T)
    worldFrameBoxVolumePointsTransformed = worldFrameBoxVolumePointsTransformed/worldFrameBoxVolumePointsTransformed[-1, :]
    
    
    T = cameraIntrinsics.dot(cameraExtrinsics[0:3, :])
    cameraFrameBoxVolumePointsTransformed = np.dot(T, worldFrameBoxVolumePointsTransformed)
    cameraFrameBoxVolumePointsTransformed = cameraFrameBoxVolumePointsTransformed[:-1, :]/cameraFrameBoxVolumePointsTransformed[-1:, :]
    
    if doVisualize :
        figure(); imshow(undistortedBgImage)
        edgesIdxs = np.array([[0, 1], [1, 2], [2, 3], [3, 0], [4, 5], [5, 6], [6, 7], [7, 4], [0, 4], [1, 5], [2, 6], [3, 7]])
        for edgeIdxs in edgesIdxs :
            plot(cameraFrameBoxVolumePointsTransformed[0, edgeIdxs], cameraFrameBoxVolumePointsTransformed[1, edgeIdxs], color='red')
        plot([cameraFrameBoxVolumePointsTransformed[0, -2], cameraFrameBoxVolumePointsTransformed[0, -1]],
             [cameraFrameBoxVolumePointsTransformed[1, -2], cameraFrameBoxVolumePointsTransformed[1, -1]], color='red')
        
    
    return worldFrameBoxVolumePointsTransformed.T, cameraFrameBoxVolumePointsTransformed.T
    
# def refineInFrameBoxVolume(params, paramsScale, worldFrameBoxVolumePoints, worldCameraLoc, cameraIntrinsics, cameraExtrinsics, cameraTrackedBBox, frameFGBGTransitionCost,
#                            worldPreviousFramesBoxVolumePoints, startLocation, areaCostMultiplier=1.0, shapePriorMultiplier=0.0, doVisualize=False) :
#     """the params are (dx, dy, rot)"""
    
#     worldFrameBoxVolumePointsTransformed, cameraFrameBoxVolumePointsTransformed = placeRefinedBoxVolume(params, paramsScale, worldFrameBoxVolumePoints, cameraIntrinsics, cameraExtrinsics, doVisualize)
                       
# #     ## I don't actually care about the shape because I'm not going to modify it so I just use bogus volumeSize and set the shapePriorMultiplier to 0
#     totalCost = computeTotalCost(np.ones(3), worldFrameBoxVolumePointsTransformed[:, :, np.newaxis], cameraFrameBoxVolumePointsTransformed[:, :, np.newaxis],
#                                  worldCameraLoc, cameraTrackedBBox[np.newaxis, :], frameFGBGTransitionCost[:, :, np.newaxis], areaCostMultiplier, shapePriorMultiplier, doVisualize)
    
# #     fgBGTransitionCost = computeAllFGBGTransitionCost(cameraFrameBoxVolumePointsTransformed[:, :, np.newaxis], worldFrameBoxVolumePointsTransformed[:, :, np.newaxis],
# #                                                       frameFGBGTransitionCost[:, :, np.newaxis], worldCameraLoc, doVisualize)
    
# #     totalCost = 1.0*fgBGTransitionCost
#     if worldPreviousFramesBoxVolumePoints.shape[-1] > 0 :
#         previousFrameLookingDir = worldPreviousFramesBoxVolumePoints[-1, 0:2, 0]-worldPreviousFramesBoxVolumePoints[-2, 0:2, 0]
#         previousFrameLookingDir /= np.linalg.norm(previousFrameLookingDir)
        
#         ## should move in the direction of the previous frame's looking dir
#         movingDir = worldFrameBoxVolumePointsTransformed[-2, 0:2] - worldPreviousFramesBoxVolumePoints[-2, 0:2, 0]
#         movingDirAngularDistance = 0.0
#         if np.linalg.norm(movingDir) > 0.0 :
#             movingDir = movingDir/np.linalg.norm(movingDir)

#             movingDirAngularDistance = np.abs(np.arctan2(movingDir[1], movingDir[0]) - np.arctan2(previousFrameLookingDir[1], previousFrameLookingDir[0]))
# #             print movingDirAngularDistance, params
# #             totalCost += 1.0*movingDirAngularDistance
# #             figure(); plot([0, previousFrameLookingDir[0]], [0, previousFrameLookingDir[1]]); plot([0, movingDir[0]], [0, movingDir[1]])
# #             scatter([cameraFrameBoxVolumePointsTransformed[-2, 0]], [cameraFrameBoxVolumePointsTransformed[-2, 1]], color='red')
# #             T = cameraIntrinsics.dot(cameraExtrinsics[0:3, :])
# #             cameraFrameBoxVolumePointsTransformed = np.dot(T, worldPreviousFramesBoxVolumePoints[:, :, 0].T)
# #             cameraFrameBoxVolumePointsTransformed = cameraFrameBoxVolumePointsTransformed[:-1, :]/cameraFrameBoxVolumePointsTransformed[-1:, :]
# #             scatter([cameraFrameBoxVolumePointsTransformed[0, -2]], [cameraFrameBoxVolumePointsTransformed[1, -2]], color='white')
        
#         ## should look in the direction of the previous frame's looking dir
#         lookingDir = worldFrameBoxVolumePointsTransformed[-1, 0:2]-worldFrameBoxVolumePointsTransformed[-2, 0:2]
#         lookingDirAngularDistance = 0.0
#         if np.linalg.norm(lookingDir) > 0.0 :
#             lookingDir = lookingDir/np.linalg.norm(lookingDir)

#             lookingDirAngularDistance = np.abs(np.arctan2(lookingDir[1], lookingDir[0]) - np.arctan2(previousFrameLookingDir[1], previousFrameLookingDir[0]))
#     #             print lookingDirAngularDistance, params
    
# #         figure(); plot([0, previousFrameLookingDir[0]], [0, previousFrameLookingDir[1]], color='red'); plot([0, movingDir[0]], [0, movingDir[1]], color='green'); plot([0, lookingDir[0]], [0, lookingDir[1]], color='blue')
# #         print lookingDirAngularDistance, movingDirAngularDistance
# #         totalCost += 10000.0*(lookingDirAngularDistance+movingDirAngularDistance)
#         totalCost += 100.0*lookingDirAngularDistance
    
#         travelDistance = np.linalg.norm(worldFrameBoxVolumePointsTransformed[-2, 0:2]-worldPreviousFramesBoxVolumePoints[-2, 0:2, 0])
#         currentPosition = worldFrameBoxVolumePointsTransformed[-2, 0:2]
#         desiredPosition = worldPreviousFramesBoxVolumePoints[-2, 0:2, 0] + previousFrameLookingDir*travelDistance
#         totalCost += 10.0*np.linalg.norm(currentPosition-desiredPosition)
# #         print travelDistance, currentPosition, desiredPosition
    
#     return totalCost
# refineInFrameBoxVolume(np.zeros(3), np.ones(3), worldAllObjectFramesBoxVolumePoints[:, :, 0], worldCameraLoc, cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS],
#                        cameraUndistortedBBoxes[0, :], allObjectFramesUndistortedFGBGTransitionCost[:, :, 0], np.empty([10, 4, 0]), False)
# 1.5976193118348496

def refineInFrameBoxVolume(params, paramsScale, worldFrameBoxVolumePoints, worldCameraLoc, cameraIntrinsics, cameraExtrinsics, cameraTrackedBBox, frameFGBGTransitionCost,
                           worldPredictedMovingDirBasedOnNeighbours, worldPredictedLocationBasedOnNeighbours, areaCostMultiplier=10000.0, shapePriorMultiplier=0.0, doVisualize=False) :
    """the params are (dx, dy, rot)"""
    
    worldFrameBoxVolumePointsTransformed, cameraFrameBoxVolumePointsTransformed = placeRefinedBoxVolume(params, paramsScale, worldFrameBoxVolumePoints, cameraIntrinsics, cameraExtrinsics, doVisualize)
                       
    ## I don't actually care about the shape because I'm not going to modify it so I just use bogus volumeSize and set the shapePriorMultiplier to 0
    fgBGTransitionCost = computeTotalCost(np.ones(3), worldFrameBoxVolumePointsTransformed[:, :, np.newaxis], cameraFrameBoxVolumePointsTransformed[:, :, np.newaxis],
                                          worldCameraLoc, cameraTrackedBBox[np.newaxis, :], frameFGBGTransitionCost[:, :, np.newaxis], areaCostMultiplier, shapePriorMultiplier, doVisualize)
    
#     fgBGTransitionCost = computeAllFGBGTransitionCost(cameraFrameBoxVolumePointsTransformed[:, :, np.newaxis], worldFrameBoxVolumePointsTransformed[:, :, np.newaxis],
#                                                       frameFGBGTransitionCost[:, :, np.newaxis], worldCameraLoc, doVisualize)
    

    ## should move in the direction predicted by the neighbouring frames
    ## the moving direction is not really the direction in which the car is moving but rather the direction in which it's looking
    movingDir = worldFrameBoxVolumePointsTransformed[-1, 0:2]-worldFrameBoxVolumePointsTransformed[-2, 0:2]
    movingDirAngularDistance = 0.0
    if np.linalg.norm(movingDir) > 0.0 :
        movingDir = movingDir/np.linalg.norm(movingDir)
        movingDirAngularDistance = np.abs(np.arctan2(movingDir[1], movingDir[0]) - np.arctan2(worldPredictedMovingDirBasedOnNeighbours[1], worldPredictedMovingDirBasedOnNeighbours[0]))
    
    distanceToPredictedLocation = 0.0
    if worldPredictedLocationBasedOnNeighbours is not None :
        distanceToPredictedLocation = np.linalg.norm(worldFrameBoxVolumePointsTransformed[-2, 0:2]-worldPredictedLocationBasedOnNeighbours)

#     print fgBGTransitionCost, movingDirAngularDistance, distanceToPredictedLocation, worldFrameBoxVolumePointsTransformed[-2, 0:2], worldPredictedLocationBasedOnNeighbours
#     figure(); plot([0, movingDir[0]], [0, movingDir[1]]); plot([0, worldPredictedMovingDirBasedOnNeighbours[0]], [0, worldPredictedMovingDirBasedOnNeighbours[1]])
    totalCost = 1.0*fgBGTransitionCost + 100000.0*(movingDirAngularDistance + distanceToPredictedLocation)
    return totalCost
    

In [244]:
######################## MOVED DIS SHIT INTO THE FIT AND REFINE METHOD ########################
# worldAllObjectFramesBoxVolumePoints, _ = placeAndProjectAllBoxVolumesRelatives(optResult.x, paramDimensionalityScale, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
#                                                                                smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, 
#                                                                                filmedSceneData[DICT_CAMERA_EXTRINSICS], startRotation)

# worldRefinedAllObjectFramesBoxVolumePoints = np.zeros_like(worldAllObjectFramesBoxVolumePoints)
# refinementParamDimensionalityScale = np.array([1.0, 1.0, 1.0])
# numNeighbouringFrames = 11
# for idx in np.arange(worldRefinedAllObjectFramesBoxVolumePoints.shape[-1])[0:] :
#     clippedIdx = np.clip(idx - (numNeighbouringFrames-1)/2, 0, worldRefinedAllObjectFramesBoxVolumePoints.shape[-1] - numNeighbouringFrames)
#     neighbouringFramesIdxs = np.arange(numNeighbouringFrames) + clippedIdx
#     currentFrameNeighbourIdx = int(np.argwhere(neighbouringFramesIdxs == idx)) ## where in neighbouringFramesIdxs, the cuurrent frame is
    
#     ## define importance weighing for data coming from neihgbouring frames
#     neighbouringFramesWeights = np.ones(numNeighbouringFrames)
#     neighbouringFramesWeights[:currentFrameNeighbourIdx] *= 2 ## value previous frames in time more because they should be smooth already
#     ## also value frames closer in time to current frame, more than further away ones (the max is set at the frame immediately before the current one in order to make the closest smoothed out frame more important)
# #     temporalDistSmoothKernel = exp(-(np.arange(-currentFrameNeighbourIdx+1, numNeighbouringFrames-currentFrameNeighbourIdx+1)**2)/(numNeighbouringFrames*5.0))
# #     temporalDistSmoothKernel /= np.sum(temporalDistSmoothKernel)
# #     neighbouringFramesWeights *= temporalDistSmoothKernel
#     neighbouringFramesWeights /= np.sum(neighbouringFramesWeights)
# #     print idx, neighbouringFramesIdxs, neighbouringFramesWeights
    
#     ## get all points belonging to the box volume of neighbouring frames
#     worldNeighbouringFramesBoxVolumePoints = np.concatenate([worldRefinedAllObjectFramesBoxVolumePoints[:, :, neighbouringFramesIdxs[:currentFrameNeighbourIdx]],
#                                                              worldAllObjectFramesBoxVolumePoints[:, :, neighbouringFramesIdxs[currentFrameNeighbourIdx:]]], axis=-1)
#     ## find moving direction of the object at each neighbouring frame
#     worldNeighbouringFramesOnGroundPlaneMovingDirs = (worldNeighbouringFramesBoxVolumePoints[-1, 0:2, :]-worldNeighbouringFramesBoxVolumePoints[-2, 0:2, :]).T
#     validMovingDirs = np.linalg.norm(worldNeighbouringFramesOnGroundPlaneMovingDirs, axis=1) != 0.0
#     worldNeighbouringFramesOnGroundPlaneMovingDirs[validMovingDirs, :] /= np.linalg.norm(worldNeighbouringFramesOnGroundPlaneMovingDirs, axis=1)[validMovingDirs, np.newaxis]
    
#     ## find average moving dir over all neighbouring frames
#     worldNeighbouringFramesOnGroundPlaneAverageMovingDir = np.sum(worldNeighbouringFramesOnGroundPlaneMovingDirs[validMovingDirs, :]*neighbouringFramesWeights[validMovingDirs, np.newaxis], axis=0)
#     worldNeighbouringFramesOnGroundPlaneAverageMovingDir /= np.linalg.norm(worldNeighbouringFramesOnGroundPlaneAverageMovingDir)
    
#     ## find average travel distance between neighbouring frames
#     worldNeighbouringFramesAverageOnGroundPlaneTravelDistance = np.linalg.norm(worldNeighbouringFramesBoxVolumePoints[-2, 0:2, 1:].T - worldNeighbouringFramesBoxVolumePoints[-2, 0:2, :-1].T, axis=1)
#     worldNeighbouringFramesAverageOnGroundPlaneTravelDistance = np.sum(worldNeighbouringFramesAverageOnGroundPlaneTravelDistance*neighbouringFramesWeights[1:])
# #     worldNeighbouringFramesAverageOnGroundPlaneTravelDistance = np.median(worldNeighbouringFramesAverageOnGroundPlaneTravelDistance)
    
#     ## find predicted location based on average moving dir, average travel distance and location of the object at the previous frame
#     worldPredictedLocationBasedOnNeighbours = None
#     if idx > 0 :
#         worldPredictedLocationBasedOnNeighbours = (worldRefinedAllObjectFramesBoxVolumePoints[-2, 0:2, idx-1]
#                                                    + worldNeighbouringFramesOnGroundPlaneAverageMovingDir*worldNeighbouringFramesAverageOnGroundPlaneTravelDistance)
#     startTime = time.time()
#     ## this was using the old refining method that is commented out above
# #     refinementResult = minimize(refineInFrameBoxVolume, np.zeros(3), method='Nelder-Mead',#optimizerMethod,
# #                                 args=(refinementParamDimensionalityScale, worldAllObjectFramesBoxVolumePoints[:, :, idx], worldCameraLoc, cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS],
# #                                       cameraUndistortedBBoxes[idx, :], allObjectFramesUndistortedFGBGTransitionCost[:, :, idx],
# #                                       worldRefinedAllObjectFramesBoxVolumePoints[:, :, idx-1:idx], worldAllObjectFramesBoxVolumePoints[-2, :, idx]), options={'maxiter': 5000})#, 'eps':0.1})
#     refinementResult = minimize(refineInFrameBoxVolume, np.zeros(3), method='Nelder-Mead',#optimizerMethod,
#                                 args=(refinementParamDimensionalityScale, worldAllObjectFramesBoxVolumePoints[:, :, idx], worldCameraLoc, cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS],
#                                       cameraUndistortedBBoxes[idx, :], allObjectFramesUndistortedFGBGTransitionCost[:, :, idx],
#                                       worldNeighbouringFramesOnGroundPlaneAverageMovingDir, worldPredictedLocationBasedOnNeighbours), options={'maxiter': 5000})#, 'eps':0.1})
#     print refinementResult.x
#     print idx, ["Optimization Failed!", "Optimization Successful!"][int(refinementResult.success)],
#     print " ---  MESSAGE: \"{0}\" --- cost: {1} --- It took {2} secs".format(refinementResult.message, refinementResult.fun, time.time()-startTime)
#     worldRefinedBoxVolumePoints, _ = placeRefinedBoxVolume(refinementResult.x, refinementParamDimensionalityScale, worldAllObjectFramesBoxVolumePoints[:, :, idx],
#                                                            cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], False)
#     worldRefinedAllObjectFramesBoxVolumePoints[:, :, idx] = worldRefinedBoxVolumePoints

[-0.20967835  0.03238546 -0.00356996]
0 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 31368.1637122 --- It took 0.103442192078 secs
[-0.21866817  0.03441229 -0.00261207]
1 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 31984.7718197 --- It took 0.0916230678558 secs
[-0.22690763  0.03674649 -0.00085655]
2 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 31540.2660306 --- It took 0.0991868972778 secs
[-0.24213412  0.03992349 -0.00359042]
3 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 30582.5234252 --- It took 0.110769987106 secs
[-0.23984895  0.1266046  -0.00356297]
4 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 30268.6118965 --- It took 0.17315196991 secs
[-0.25188981  0.12830944 -0.00700578]
5 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully."

[-0.24376138  0.11253333  0.00403494]
47 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 36569.2864718 --- It took 0.131428003311 secs
[-0.17702725  0.10594318  0.00154975]
48 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 42921.4555952 --- It took 0.138370037079 secs
[-0.17464686  0.1078704   0.00160174]
49 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 36507.047047 --- It took 0.156019926071 secs
[-0.17611187  0.11024994  0.00117732]
50 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 38248.7551109 --- It took 0.14258813858 secs
[-0.17238479  0.11176428  0.00076073]
51 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 38307.8424472 --- It took 0.0801420211792 secs
[-0.17722174  0.11458769  0.00126069]
52 Optimization Successful!  ---  MESSAGE: "Optimization terminated successful

[-0.32283177  0.20302717  0.01088111]
94 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 38808.3767914 --- It took 0.118955850601 secs
[-0.33342552  0.20230586  0.01099565]
95 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 38218.9540154 --- It took 0.0983169078827 secs
[-0.3455922   0.20139823  0.01011896]
96 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 37827.1426913 --- It took 0.109693050385 secs
[-0.35771556  0.20040753  0.00973142]
97 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 37698.5384903 --- It took 0.0934679508209 secs
[-0.35580783  0.19879053  0.00926064]
98 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 38236.0068135 --- It took 0.086699962616 secs
[-0.35187724  0.19695967  0.00919923]
99 Optimization Successful!  ---  MESSAGE: "Optimization terminated success

[-0.23189347  0.1997177   0.00860429]
142 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 34579.8496551 --- It took 0.127867937088 secs
[-0.24239551  0.18971193  0.00912607]
143 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 39306.3641858 --- It took 0.151580095291 secs
[-0.22041013  0.19085803  0.00919553]
144 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 34634.4196504 --- It took 0.0948300361633 secs
[-0.29957575  0.20861772  0.00916404]
145 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 33542.9192411 --- It took 0.0887551307678 secs
[ 0.04562292 -0.02445189  0.003173  ]
146 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 49851.9064683 --- It took 0.277390956879 secs
[ 0.10133758 -0.07675062 -0.03247331]
147 Optimization Successful!  ---  MESSAGE: "Optimization terminated s

[-0.27206637 -0.18259638  0.00584721]
188 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 57726.5193673 --- It took 0.137255191803 secs
[-0.26719648 -0.1858727   0.0062574 ]
189 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 46864.3745527 --- It took 0.116487979889 secs
[-0.25677971 -0.18790762  0.00594155]
190 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 45464.8871009 --- It took 0.0750789642334 secs
[-0.24311896 -0.18962193  0.00562642]
191 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 44702.1906265 --- It took 0.0827989578247 secs
[-0.22369849 -0.19750042  0.0053247 ]
192 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 43432.2472368 --- It took 0.077064037323 secs
[-0.22623518 -0.2047347   0.00505967]
193 Optimization Successful!  ---  MESSAGE: "Optimization terminated s

[-0.3024935  -0.48084549 -0.00532757]
235 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 47308.5974034 --- It took 0.13719701767 secs
[-0.29832331 -0.47507386 -0.00541412]
236 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 49932.5719506 --- It took 0.103250026703 secs
[-0.29208809 -0.46862915 -0.00556259]
237 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 49513.6601174 --- It took 0.108045101166 secs
[-0.29018036 -0.45802523 -0.00559887]
238 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 46201.4838132 --- It took 0.116345882416 secs
[-0.27931114 -0.4391169  -0.00560529]
239 Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 47669.7163839 --- It took 0.13905787468 secs
[-0.28809897 -0.42682924 -0.0054842 ]
240 Optimization Successful!  ---  MESSAGE: "Optimization terminated succe

In [60]:
def fitAndRefineBoxVolumeToTrackedBBox(initWorldAllFramesTrajectoryPoints, initWorldAllFramesOrientationAngles, allFramesFGBGTransitionCost, cameraAllFramesTrackedBBoxes, 
                                       cameraIntrinsics, cameraExtrinsics, areaCostMultiplier=10000.0, shapePriorMultiplier=10000000.0, optimizerMethod='Nelder-Mead', maxItersNum=2) :
    worldCameraLoc = np.linalg.inv(cameraExtrinsics)[:-1, -1]
    
    worldAllFramesTrajectoryPoints = np.copy(initWorldAllFramesTrajectoryPoints)
    worldAllFramesOrientationAngles = np.copy(initWorldAllFramesOrientationAngles)
    
    if optimizerMethod == 'BFGS' :
        globalParamsDimensionalityScale = np.array([10.0, 10.0, 10.0, 1.0, 1.0, 100.0, 1.0, 1.0])
        refinementParamsDimensionalityScale = np.array([1.0, 1.0, 1.0])
    else :
        globalParamsDimensionalityScale = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
        refinementParamsDimensionalityScale = np.array([1.0, 1.0, 1.0])
    
    for iterIdx in np.arange(maxItersNum+1) :
        ########################## FIND THE BEST GLOBAL TRANSFORMATION OF THE TRAJECTORY DEFINED BY worldAllFramesTrajectoryPoints AND worldAllFramesOrientationAngles ##########################
        initGlobalParamsToEstimate = np.concatenate([np.ones(3), worldAllFramesTrajectoryPoints[0, 0:2], [worldAllFramesOrientationAngles[0]], np.ones(2)])*globalParamsDimensionalityScale
        startRotation = initGlobalParamsToEstimate[5]
        print initGlobalParamsToEstimate
        
        print "STARTING ITER {0} --- params: {1} --- COST: {2}".format(iterIdx, initGlobalParamsToEstimate,
                                                                       fitVolumeToTrackRelativesWithStretchRotFullTraj(initGlobalParamsToEstimate, globalParamsDimensionalityScale,
                                                                                                                       worldAllFramesTrajectoryPoints[:, 0:2]-worldAllFramesTrajectoryPoints[0, 0:2],
                                                                                                                       worldAllFramesOrientationAngles-worldAllFramesOrientationAngles[0], cameraIntrinsics,
                                                                                                                       cameraExtrinsics, worldCameraLoc, cameraAllFramesTrackedBBoxes, startRotation,
                                                                                                                       allFramesFGBGTransitionCost, areaCostMultiplier, shapePriorMultiplier, True))
        
        startTime = time.time()
        globalOptResult = minimize(fitVolumeToTrackRelativesWithStretchRotFullTraj, initGlobalParamsToEstimate, method='Nelder-Mead',#optimizerMethod,
                             args=(globalParamsDimensionalityScale, worldAllFramesTrajectoryPoints[:, 0:2]-worldAllFramesTrajectoryPoints[0, 0:2],
                                   worldAllFramesOrientationAngles-worldAllFramesOrientationAngles[0], cameraIntrinsics, cameraExtrinsics, worldCameraLoc,
                                   cameraAllFramesTrackedBBoxes, startRotation, allFramesFGBGTransitionCost, areaCostMultiplier, shapePriorMultiplier), options={'maxiter': 5000})#, 'eps':0.1})
        print globalOptResult.x
        print "GLOBAL", ["Optimization Failed!", "Optimization Successful!"][int(globalOptResult.success)],
        print " ---  MESSAGE: \"{0}\" --- cost: {1} --- It took {2} secs".format(globalOptResult.message, globalOptResult.fun, time.time()-startTime)

        print "NEW GLOBAL params: {0} --- COST: {1}".format(globalOptResult.x, 
                                                            fitVolumeToTrackRelativesWithStretchRotFullTraj(globalOptResult.x, globalParamsDimensionalityScale,
                                                                                                            worldAllFramesTrajectoryPoints[:, 0:2]-worldAllFramesTrajectoryPoints[0, 0:2],
                                                                                                            worldAllFramesOrientationAngles-worldAllFramesOrientationAngles[0], cameraIntrinsics, cameraExtrinsics,
                                                                                                            worldCameraLoc, cameraAllFramesTrackedBBoxes, startRotation, allFramesFGBGTransitionCost,
                                                                                                            areaCostMultiplier, shapePriorMultiplier, True))
        
        ## the points belonging to the tracked box volume over time after global optimization
        worldAllFramesBoxVolumePoints, _ = placeAndProjectAllBoxVolumesRelatives(globalOptResult.x, globalParamsDimensionalityScale, worldAllFramesTrajectoryPoints[:, 0:2]-worldAllFramesTrajectoryPoints[0, 0:2],
                                                                                 worldAllFramesOrientationAngles-worldAllFramesOrientationAngles[0], cameraIntrinsics, cameraExtrinsics, startRotation)
        ## break here if I've already iterated through the local refinement as many times as defined by maxItersNum
        ## and return the points after a final global fitting
        if iterIdx == maxItersNum :
            return worldAllFramesBoxVolumePoints
        
        ########################## FIND THE BEST PER FRAME LOCAL TRANSFORMATION OF THE BOX VOLUME DEFINED USING THE GLOBALLY TRANSFORMED TRAJECTORY FOUND ABOVE ##########################
        initRefinementParamsToEstimate = np.zeros(3)

        worldRefinedAllFramesBoxVolumePoints = np.zeros_like(worldAllFramesBoxVolumePoints)        
        numNeighbouringFrames = 11
        for idx in np.arange(worldRefinedAllFramesBoxVolumePoints.shape[-1])[0:] :
            clippedIdx = np.clip(idx - (numNeighbouringFrames-1)/2, 0, worldRefinedAllFramesBoxVolumePoints.shape[-1] - numNeighbouringFrames)
            neighbouringFramesIdxs = np.arange(numNeighbouringFrames) + clippedIdx
            currentFrameNeighbourIdx = int(np.argwhere(neighbouringFramesIdxs == idx)) ## where in neighbouringFramesIdxs, the cuurrent frame is

            ## define importance weighing for data coming from neihgbouring frames
            neighbouringFramesWeights = np.ones(numNeighbouringFrames)
            neighbouringFramesWeights[:currentFrameNeighbourIdx] *= 2 ## value previous frames in time more because they should be smooth already
            ## also value frames closer in time to current frame, more than further away ones (the max is set at the frame immediately before the current one in order to make the closest smoothed out frame more important)
#             temporalDistSmoothKernel = exp(-(np.arange(-currentFrameNeighbourIdx+1, numNeighbouringFrames-currentFrameNeighbourIdx+1)**2)/(numNeighbouringFrames*5.0))
#             temporalDistSmoothKernel /= np.sum(temporalDistSmoothKernel)
#             neighbouringFramesWeights *= temporalDistSmoothKernel
            neighbouringFramesWeights /= np.sum(neighbouringFramesWeights)

            ## get all points belonging to the box volume of neighbouring frames
            worldNeighbouringFramesBoxVolumePoints = np.concatenate([worldRefinedAllFramesBoxVolumePoints[:, :, neighbouringFramesIdxs[:currentFrameNeighbourIdx]],
                                                                     worldAllFramesBoxVolumePoints[:, :, neighbouringFramesIdxs[currentFrameNeighbourIdx:]]], axis=-1)
            ## find moving direction of the object at each neighbouring frame
            worldNeighbouringFramesOnGroundPlaneMovingDirs = (worldNeighbouringFramesBoxVolumePoints[-1, 0:2, :]-worldNeighbouringFramesBoxVolumePoints[-2, 0:2, :]).T
            validMovingDirs = np.linalg.norm(worldNeighbouringFramesOnGroundPlaneMovingDirs, axis=1) != 0.0
            worldNeighbouringFramesOnGroundPlaneMovingDirs[validMovingDirs, :] /= np.linalg.norm(worldNeighbouringFramesOnGroundPlaneMovingDirs, axis=1)[validMovingDirs, np.newaxis]

            ## find average moving dir over all neighbouring frames
            worldNeighbouringFramesOnGroundPlaneAverageMovingDir = np.sum(worldNeighbouringFramesOnGroundPlaneMovingDirs[validMovingDirs, :]*neighbouringFramesWeights[validMovingDirs, np.newaxis], axis=0)
            worldNeighbouringFramesOnGroundPlaneAverageMovingDir /= np.linalg.norm(worldNeighbouringFramesOnGroundPlaneAverageMovingDir)

            ## find average travel distance between neighbouring frames
            worldNeighbouringFramesAverageOnGroundPlaneTravelDistance = np.linalg.norm(worldNeighbouringFramesBoxVolumePoints[-2, 0:2, 1:].T - worldNeighbouringFramesBoxVolumePoints[-2, 0:2, :-1].T, axis=1)
            worldNeighbouringFramesAverageOnGroundPlaneTravelDistance = np.sum(worldNeighbouringFramesAverageOnGroundPlaneTravelDistance*neighbouringFramesWeights[1:])
        #     worldNeighbouringFramesAverageOnGroundPlaneTravelDistance = np.median(worldNeighbouringFramesAverageOnGroundPlaneTravelDistance)

            ## find predicted location based on average moving dir, average travel distance and location of the object at the previous frame
            worldPredictedLocationBasedOnNeighbours = None
            if idx > 0 :
                worldPredictedLocationBasedOnNeighbours = (worldRefinedAllFramesBoxVolumePoints[-2, 0:2, idx-1]
                                                           + worldNeighbouringFramesOnGroundPlaneAverageMovingDir*worldNeighbouringFramesAverageOnGroundPlaneTravelDistance)
                
            startTime = time.time()
            refinementOptResult = minimize(refineInFrameBoxVolume, initRefinementParamsToEstimate, method=optimizerMethod,
                                           args=(refinementParamsDimensionalityScale, worldAllFramesBoxVolumePoints[:, :, idx], worldCameraLoc, cameraIntrinsics, cameraExtrinsics,
                                                 cameraAllFramesTrackedBBoxes[idx, :], allFramesFGBGTransitionCost[:, :, idx], worldNeighbouringFramesOnGroundPlaneAverageMovingDir,
                                                 worldPredictedLocationBasedOnNeighbours), options={'maxiter': 5000})#, 'eps':0.1})
            print refinementOptResult.x
            print idx, "LOCAL", ["Optimization Failed!", "Optimization Successful!"][int(refinementOptResult.success)],
            print " ---  MESSAGE: \"{0}\" --- cost: {1} --- It took {2} secs".format(refinementOptResult.message, refinementOptResult.fun, time.time()-startTime)
            worldRefinedBoxVolumePoints, _ = placeRefinedBoxVolume(refinementOptResult.x, refinementParamsDimensionalityScale, worldAllFramesBoxVolumePoints[:, :, idx],
                                                                   cameraIntrinsics, cameraExtrinsics, False)
            worldRefinedAllFramesBoxVolumePoints[:, :, idx] = worldRefinedBoxVolumePoints
        
        
        ########################## RECOMPUTE worldAllFramesTrajectoryPoints AND worldAllFramesOrientationAngles FROM REFINED RESULT SO THAT I CAN ITERATE ##########################
        figure(); plot(worldAllFramesBoxVolumePoints[-2, 0, :], worldAllFramesBoxVolumePoints[-2, 1, :], color="red") ## trajectory points before refinement
        plot(worldRefinedAllFramesBoxVolumePoints[-2, 0, :], worldRefinedAllFramesBoxVolumePoints[-2, 1, :], color="green") ## trajectory points after refinement
        
        ## the new trajectory points for all frames are the center points of the refined box volumes
        worldAllFramesTrajectoryPoints = worldRefinedAllFramesBoxVolumePoints[-2, 0:3, :].T

        kernelSize = 55
        smoothTrajectoryXs = np.convolve(np.repeat(worldAllFramesTrajectoryPoints[:, 0], np.concatenate([[(kernelSize-1)/2 + 1], np.ones(len(worldAllFramesTrajectoryPoints)-2, dtype=int),
                                                                                                         [(kernelSize-1)/2 + 1]])), np.ones(kernelSize)/float(kernelSize), mode='valid')
        smoothTrajectoryYs = np.convolve(np.repeat(worldAllFramesTrajectoryPoints[:, 1], np.concatenate([[(kernelSize-1)/2 + 1], np.ones(len(worldAllFramesTrajectoryPoints)-2, dtype=int),
                                                                                                         [(kernelSize-1)/2 + 1]])), np.ones(kernelSize)/float(kernelSize), mode='valid')
        plot(smoothTrajectoryXs, smoothTrajectoryYs, color="orange") ## heavily smoothed trajectory points used to compute smooth moving directions

        smoothedWorldAllFramesTrajectoryPoints = np.hstack([smoothTrajectoryXs[:, np.newaxis], smoothTrajectoryYs[:, np.newaxis], np.zeros([len(smoothTrajectoryXs), 1])])
        ## get directions from 1 point to the other (I feel like there should be a way to get a tangent direction to trajectory curve where the trajectory point projects onto it but it 
        ## probably won't matter as this is only used as an initialization and will get changed during the optimization)
        smoothedWorldAllFramesMovingDirs = smoothedWorldAllFramesTrajectoryPoints[1:, :]-smoothedWorldAllFramesTrajectoryPoints[:-1, :]
        ## using the moving dir at the second to last frame as the moving dir for the last frame as well
        smoothedWorldAllFramesMovingDirs = np.vstack([smoothedWorldAllFramesMovingDirs, smoothedWorldAllFramesMovingDirs[-1, :][np.newaxis, :]])
        ## normalize only directions where the norm is not 0
        norms = np.linalg.norm(smoothedWorldAllFramesMovingDirs, axis=1)
        if np.any(norms == 0.0) :
            ## if the norm is 0, copy the closest direction where the norm is not 0 --> not sure how to do dis but if the trajectory is smoothed out it should basically never happen
            raise Exception("I guess I have to figure this out now")
        smoothedWorldAllFramesMovingDirs = smoothedWorldAllFramesMovingDirs/norms[:, np.newaxis]
        
        ## now compute the angles that I use to start the global transformation optimization
        ## angles from np.array([1.0, 0.0]) --> see https://math.stackexchange.com/a/879474
        worldAllFramesOrientationAngles = np.arctan2(-smoothedWorldAllFramesMovingDirs[:, 1], smoothedWorldAllFramesMovingDirs[:, 0])

worldFinalAllFramesBoxVolumePoints = fitAndRefineBoxVolumeToTrackedBBox(worldUndistortedTrajectoryPoints, smootherWorldOrientationAngles, allObjectFramesUndistortedFGBGTransitionCost, cameraUndistortedBBoxes,
                                                                        cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], optimizerMethod='Nelder-Mead') 

[  1.           1.           1.         -22.58143616 -27.20477486
   0.18026529   1.           1.        ]
STARTING ITER 0 --- params: [  1.           1.           1.         -22.58143616 -27.20477486
   0.18026529   1.           1.        ] --- COST: 17795415.3073
[ -0.38525071   4.83756209   1.56950166 -23.74828434 -26.80783176
   0.16799039   1.01795163   0.99114963]
GLOBAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 10550459.8219 --- It took 8.82033395767 secs
NEW GLOBAL params: [ -0.38525071   4.83756209   1.56950166 -23.74828434 -26.80783176
   0.16799039   1.01795163   0.99114963] --- COST: 10550459.8219
[ 0.06025671  0.29990251 -0.00113456]
0 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 33362.9139686 --- It took 0.139958143234 secs
[ 0.08055081  0.30618398 -0.00196462]
1 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 33124.755657 --- 

[-0.17169951  0.30951312 -0.00974932]
42 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 36598.7197631 --- It took 0.0763351917267 secs
[-0.03163822  0.29116822 -0.00472692]
43 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 44466.9826457 --- It took 0.0678391456604 secs
[-0.12154482  0.29804221 -0.00441404]
44 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 36870.7962062 --- It took 0.0853309631348 secs
[-0.10710551  0.26955873 -0.00596573]
45 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 37085.6088754 --- It took 0.204522848129 secs
[-0.00534406  0.25698218 -0.00755223]
46 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 37201.7815449 --- It took 0.0895681381226 secs
[-0.0575387   0.25439237 -0.00622828]
47 LOCAL Optimization Successful!  ---  MES

[-0.13323085  0.30483456 -0.00170572]
88 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 39155.9416626 --- It took 0.147500038147 secs
[-0.09203118  0.29284091  0.01021574]
89 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 39614.2471427 --- It took 0.0850958824158 secs
[-0.01400198  0.2973918   0.01034317]
90 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 39885.9312416 --- It took 0.0887730121613 secs
[-0.04481214  0.29849696  0.01228479]
91 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 39517.2952812 --- It took 0.103202104568 secs
[-0.35232396  0.42344454  0.00499733]
92 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 37769.9701563 --- It took 0.192427873611 secs
[-0.24231355  0.42999328  0.00628047]
93 LOCAL Optimization Successful!  ---  MESSA

[-0.20001293  0.36054223  0.01602486]
135 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 38390.1770656 --- It took 0.0950989723206 secs
[-0.43315442  0.38858085  0.01716356]
136 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 37633.8297989 --- It took 0.0841178894043 secs
[-0.41274265  0.3950787   0.02374649]
137 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 37357.6681595 --- It took 0.109133958817 secs
[-0.32804124  0.39254881  0.00756111]
138 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 37754.2355803 --- It took 0.112854957581 secs
[-0.40394928  0.38745839  0.00153717]
139 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 38351.454271 --- It took 0.0826408863068 secs
[-0.29333788  0.3841404  -0.00248215]
140 LOCAL Optimization Successful!  --- 

[ 0.29605884 -0.03391952  0.02003239]
180 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 34795.3772293 --- It took 0.164716959 secs
[ 0.25117684  0.01275812  0.01225429]
181 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 35085.9335344 --- It took 0.0873370170593 secs
[ 0.30373066 -0.03259012  0.01294483]
182 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 33617.1240533 --- It took 0.0953850746155 secs
[ 0.16773535 -0.07089489  0.00052575]
183 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 36445.1534783 --- It took 0.141603946686 secs
[ 0.09326051 -0.09306438  0.00069488]
184 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 35038.1164359 --- It took 0.14746594429 secs
[ 0.13942296 -0.10598969  0.00073639]
185 LOCAL Optimization Successful!  ---  MES

[ 0.04828636 -0.03902709  0.00767312]
225 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 30102.2985115 --- It took 0.0707910060883 secs
[ 0.1339858  -0.10936204  0.00720093]
226 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 35231.1798466 --- It took 0.116760015488 secs
[ 0.15870726 -0.10727802 -0.01424407]
227 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 35148.9207591 --- It took 0.19627904892 secs
[-0.0205645  -0.0869253   0.00997056]
228 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 28171.6828439 --- It took 0.0654790401459 secs
[ 0.01486314 -0.04967196 -0.00227068]
229 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 19052.3157096 --- It took 0.0507910251617 secs
[ 0.04569076 -0.03165755 -0.02708857]
230 LOCAL Optimization Successful!  --- 

[ 0.23572745  0.76106424 -0.02856368]
270 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 48903.9311837 --- It took 0.173144817352 secs
[ 0.2958716   0.72272624  0.00741763]
271 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 48432.2165647 --- It took 0.104640960693 secs
[ 0.5757778   0.83972104  0.05170104]
272 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 51066.4012666 --- It took 0.120270013809 secs
[ 0.65400786  1.10339037  0.04403635]
273 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 50664.955381 --- It took 0.152911186218 secs
[ 0.87729171  0.63054328  0.04090431]
274 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 52136.9335351 --- It took 0.127328872681 secs
[ 1.06143571  0.58019646 -0.00575661]
275 LOCAL Optimization Successful!  ---  ME

[ 0.00280938  0.01626812 -0.00845095]
30 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 19483.2515042 --- It took 0.0681970119476 secs
[-0.01110157  0.02350969 -0.00613869]
31 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 14143.4728349 --- It took 0.0953240394592 secs
[-0.03920507  0.03348828 -0.00649768]
32 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 21551.0444066 --- It took 0.0808529853821 secs
[-0.06284118  0.04141787 -0.00499206]
33 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 19986.5438105 --- It took 0.0826869010925 secs
[ 0.0192614  -0.00290882  0.00833644]
34 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 18729.5817227 --- It took 0.0760369300842 secs
[ 0.03207376 -0.02877954 -0.00068622]
35 LOCAL Optimization Successful!  ---  ME

[-0.27816074  0.19119996  0.00433006]
77 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 31882.5020206 --- It took 0.120542049408 secs
[-0.27557144  0.19493493  0.00447202]
78 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 32479.1631805 --- It took 0.0922210216522 secs
[-0.28805609  0.20027215  0.00468985]
79 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 33205.3946605 --- It took 0.0984189510345 secs
[-0.29213556  0.20464415  0.00530443]
80 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 33128.2483976 --- It took 0.0906178951263 secs
[-0.28883479  0.20815602  0.00552988]
81 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 33134.222478 --- It took 0.0788221359253 secs
[-0.29584467  0.21268942  0.00571166]
82 LOCAL Optimization Successful!  ---  MESS

[-0.27607631  0.01990974  0.0095424 ]
122 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 31678.2716657 --- It took 0.0867509841919 secs
[-0.27093511  0.01749813  0.01012269]
123 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 30666.9750692 --- It took 0.0843381881714 secs
[-0.26684652  0.01321264  0.01081452]
124 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 33040.1228102 --- It took 0.138642072678 secs
[-0.26902585  0.00938955  0.01329629]
125 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 32287.8981364 --- It took 0.119973182678 secs
[-0.27303958 -0.01299512  0.00898828]
126 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 34755.9109574 --- It took 0.0777959823608 secs
[-0.28129145 -0.019876    0.01150846]
127 LOCAL Optimization Successful!  ---

[-0.21641881  0.22456596  0.01174957]
167 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 36391.4924701 --- It took 0.122791051865 secs
[-0.19744837  0.24309153  0.01116876]
168 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 36580.0645544 --- It took 0.112684011459 secs
[-0.20566401  0.23143515  0.01084605]
169 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 36090.7988638 --- It took 0.092738866806 secs
[-0.20107071  0.22201807  0.01056088]
170 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 37270.9930442 --- It took 0.0814578533173 secs
[-0.18794672  0.21496471  0.01066009]
171 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 41116.6692531 --- It took 0.0672540664673 secs
[-0.1654573   0.21776223  0.01083455]
172 LOCAL Optimization Successful!  --- 

[-0.27687427 -0.34456195 -0.00148077]
213 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 44082.0636881 --- It took 0.0831110477448 secs
[-0.25223955 -0.33285669 -0.00147329]
214 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 43639.7205242 --- It took 0.0853440761566 secs
[-0.21212507 -0.31689898  0.00130235]
215 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 41198.1017903 --- It took 0.0870158672333 secs
[-0.16668387 -0.30065254  0.00109724]
216 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 40572.0987433 --- It took 0.0932669639587 secs
[-0.17463312 -0.29251026  0.00116562]
217 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 41082.3464464 --- It took 0.0869510173798 secs
[-0.13854615 -0.28006624  0.00118338]
218 LOCAL Optimization Successful!  -

[ 0.40440533 -0.49734475 -0.0031352 ]
259 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 47362.6830927 --- It took 0.112955093384 secs
[ 0.38089776 -0.50077391 -0.00287339]
260 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 48045.9769876 --- It took 0.104692935944 secs
[ 0.36390477 -0.50326706 -0.00269693]
261 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 47243.8342142 --- It took 0.188512086868 secs
[ 0.34069539 -0.50715946 -0.00258045]
262 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 49689.9233009 --- It took 0.12765288353 secs
[ 0.31159944 -0.51226906 -0.00231774]
263 LOCAL Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 50439.1819748 --- It took 0.135756969452 secs
[ 0.29493786 -0.5146163  -0.00207422]
264 LOCAL Optimization Successful!  ---  ME

In [55]:
tmpT = cameraIntrinsics.dot(filmedSceneData[DICT_CAMERA_EXTRINSICS][0:3, :])
tmpCameraPoints = np.rollaxis(np.dot(tmpT, worldFinalAllFramesBoxVolumePoints[:, :, :]), 1)
tmpCameraPoints = tmpCameraPoints[:, :-1, :]/tmpCameraPoints[:, -1:, :]
figure(); imshow(undistortedBgImage)
for idx in np.arange(0, worldFinalAllFramesBoxVolumePoints.shape[-1], 1) :
    edgesIdxs = np.array([[0, 1], [1, 2], [2, 3], [3, 0], [4, 5], [5, 6], [6, 7], [7, 4], [0, 4], [1, 5], [2, 6], [3, 7]])
    for edgeIdxs in edgesIdxs :
        plot(tmpCameraPoints[edgeIdxs, 0, idx], tmpCameraPoints[edgeIdxs, 1, idx], color='red')
    plot([tmpCameraPoints[-2, 0, idx], tmpCameraPoints[-1, 0, idx]],
         [tmpCameraPoints[-2, 1, idx], tmpCameraPoints[-1, 1, idx]], color='red')
print tmpCameraPoints.shape

(10, 2, 283)


In [110]:
######################## VISUALIZATION STUFF ########################
_, allObjectFramesBoxVolumePoints = placeAndProjectAllBoxVolumesRelatives(optResult.x, paramDimensionalityScale, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
                                                                          smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], startRotation)

if not os.path.isdir("/home/ilisescu/PhD/data/havana/1-3-5_IOU-tracker-refined-volumes_{0}/".format(objectIdToTrack)) :
    os.mkdir("/home/ilisescu/PhD/data/havana/1-3-5_IOU-tracker-refined-volumes_{0}/".format(objectIdToTrack))

fig = mpl.pylab.figure("animation", frameon=False)
fig.set_size_inches(17,12)
gca().set_axis_off()
img = None
for idx in np.arange(allObjectFrames.shape[-1]):
    cla()
    undistortedCurrentFrame, _, _ = undistortImage(filmedSceneData[DICT_DISTORTION_PARAMETER], filmedSceneData[DICT_DISTORTION_RATIO], allObjectFrames[:, :, :, idx],
                                                   filmedSceneData[DICT_CAMERA_INTRINSICS], doReturnMaps=False)
    gca().imshow(undistortedCurrentFrame)
    
    edgesIdxs = np.array([[0, 1], [1, 2], [2, 3], [3, 0], [4, 5], [5, 6], [6, 7], [7, 4], [0, 4], [1, 5], [2, 6], [3, 7]])
    for edgeIdxs in edgesIdxs :
        gca().plot(allObjectFramesBoxVolumePoints[edgeIdxs, 0, idx], allObjectFramesBoxVolumePoints[edgeIdxs, 1, idx], color='red')
    gca().plot([allObjectFramesBoxVolumePoints[-2, 0, idx], allObjectFramesBoxVolumePoints[-1, 0, idx]],
               [allObjectFramesBoxVolumePoints[-2, 1, idx], allObjectFramesBoxVolumePoints[-1, 1, idx]], color='red')
#     if img is None:
#         img = mpl.pylab.imshow(undistortedCurrentFrame)
#     else:
#         img.set_data(undistortedCurrentFrame)
    mpl.pylab.pause(0.1)
    mpl.pylab.draw()
    fig.savefig("/home/ilisescu/PhD/data/havana/1-3-5_IOU-tracker-refined-volumes_{1}/frame-{0:05}.png".format(idx+1, objectIdToTrack), bbox_inches='tight')



In [None]:
#################################### OLD WAY OF COMPUTING THE CONVEX HULL CORNERS ####################################
#     cameraAllFramesHullCorners = np.zeros([6, 2, numFrames])
#     for idx in np.arange(numFrames) :
#         hull = cv2.convexHull(cameraBoxVolumePoints[0:8, :, idx].astype(np.float32))[:, 0, :]
        
#         if hull.shape[0] > 6 :
#             ## I don't think this can ever happen but if it does I'll have to figure something out
#             figure(); imshow(undistortedBgImage)
#             edgesIdxs = np.array([[0, 1], [1, 2], [2, 3], [3, 0], [4, 5], [5, 6], [6, 7], [7, 4], [0, 4], [1, 5], [2, 6], [3, 7]])
#             for edgeIdxs in edgesIdxs :
#                 plot(cameraBoxVolumePoints[edgeIdxs, 0, idx], cameraBoxVolumePoints[edgeIdxs, 1, idx], color='red')
#             scatter(hull[:, 0], hull[:, 1])
            
#             raise Exception("Hull of box volume has more than 6 points {0}".format(hull.shape))
#         elif hull.shape[0] < 6 :
#             ## if there's less than 6 points, interpolate as many points as needed between the last and second to last just so that I always have 6 and the rest of the code works
#             cameraAllFramesHullCorners[:len(hull)-1, :, idx] = hull[:-1, :]
#             cameraAllFramesHullCorners[-1, :, idx] = hull[-1, :]
#             ratios = (np.arange(1.0, len(cameraAllFramesHullCorners)-len(hull)+1)/(len(cameraAllFramesHullCorners)-len(hull)+1))[:, np.newaxis]
#             cameraAllFramesHullCorners[len(hull)-1:-1, :, idx] = hull[-2, :]+ratios*(hull[-1, :]-hull[-2, :])[np.newaxis, :]
# #             figure(); imshow(undistortedBgImage)
# #             edgesIdxs = np.array([[0, 1], [1, 2], [2, 3], [3, 0], [4, 5], [5, 6], [6, 7], [7, 4], [0, 4], [1, 5], [2, 6], [3, 7]])
# #             for edgeIdxs in edgesIdxs :
# #                 plot(cameraBoxVolumePoints[edgeIdxs, 0, idx], cameraBoxVolumePoints[edgeIdxs, 1, idx], color='red')
# #             plot(hull[[0, 1, 2, 3, 4, 0], 0], hull[[0, 1, 2, 3, 4, 0], 1])
# #             scatter(cameraAllFramesHullCorners[:, 0, idx], cameraAllFramesHullCorners[:, 1, idx])
            
# #             raise Exception("poop")
#         else :
#             cameraAllFramesHullCorners[:, :, idx] = hull
#     print cameraAllFramesHullCorners.shape

In [452]:
#################################### SLOWER VERSION OF AREA COST AS IT LOOPS THROUGH FRAMES ####################################
def getProjectedBoxVolumeAABBtoBBoxCost(params, paramsScale, relativePos, relativeRot, cameraIntrinsics, cameraExtrinsics, cameraBBox, startRot, frameIdx=0, doVisualize=False) :
    footprintWidth, footprintLength, volumeHeight, globalDx, globalDy, globalRot, stretchX, stretchY = params/paramsScale
    
    deltaStartRot = globalRot-startRot/paramsScale[5]
    globalRelativeRot = globalRot+relativeRot
    
    defaultBoxVolumePoints = np.array([[-0.5, -0.5, 0.0, 1.0],
                                       [-0.5,  0.5, 0.0, 1.0],
                                       [ 0.5,  0.5, 0.0, 1.0],
                                       [ 0.5, -0.5, 0.0, 1.0],
                                       [-0.5, -0.5, 1.0, 1.0],
                                       [-0.5,  0.5, 1.0, 1.0],
                                       [ 0.5,  0.5, 1.0, 1.0],
                                       [ 0.5, -0.5, 1.0, 1.0],
                                       [ 0.0,  0.0, 0.0, 1.0],
                                       [ 0.5,  0.0, 0.0, 1.0]])*np.concatenate([[footprintLength, footprintWidth, volumeHeight], [1]])
    
    transform = np.array([[np.cos(globalRelativeRot), np.sin(globalRelativeRot), 0.0, globalDx+(np.cos(deltaStartRot)*relativePos[0]+np.sin(deltaStartRot)*relativePos[1])*stretchX],
                          [-np.sin(globalRelativeRot), np.cos(globalRelativeRot), 0.0, globalDy+(np.cos(deltaStartRot)*relativePos[1]-np.sin(deltaStartRot)*relativePos[0])*stretchY],
                          [0.0                       , 0.0                      , 1.0, 0.0],
                          [0.0                       , 0.0                      , 0.0, 1.0]])
    worldBoxVolumePoints = np.dot(transform, defaultBoxVolumePoints.T)[0:3, :].T
    cameraBoxVolumePoints = worldToCameraSpace(cameraIntrinsics, cameraExtrinsics, worldBoxVolumePoints)
    cameraBoxVolumeCorners = np.array(cameraBoxVolumePoints[0:8, :])
        
    cameraBoxVolumeCornersAABB = np.vstack([[np.min(cameraBoxVolumeCorners, axis=0)], [[np.max(cameraBoxVolumeCorners[:, 0]), np.min(cameraBoxVolumeCorners[:, 1])]],
                                            [np.max(cameraBoxVolumeCorners, axis=0)], [[np.min(cameraBoxVolumeCorners[:, 0]), np.max(cameraBoxVolumeCorners[:, 1])]]])
    if doVisualize :
        edgesIdxs = np.array([[0, 1], [1, 2], [2, 3], [3, 0], [4, 5], [5, 6], [6, 7], [7, 4], [0, 4], [1, 5], [2, 6], [3, 7]])
        for edgeIdxs in edgesIdxs :
            plot(cameraBoxVolumeCorners[edgeIdxs, 0], cameraBoxVolumeCorners[edgeIdxs, 1], color=cm.Set1(np.mod(frameIdx, 9)/8.0))
            
        plot([cameraBoxVolumePoints[-2, 0], cameraBoxVolumePoints[-1, 0]], [cameraBoxVolumePoints[-2, 1], cameraBoxVolumePoints[-1, 1]], color=cm.Set1(np.mod(frameIdx, 9)/8.0))
        print np.sum(np.linalg.norm(cameraBoxVolumeCornersAABB - np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]]), axis=1))
        
    return np.log(1+np.sum(np.linalg.norm(cameraBoxVolumeCornersAABB - np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]]), axis=1)))

def computeAllProjectedBoxVolumeAABBtoBBoxCost(params, paramsScale, relativePositions, relativeRots, cameraIntrinsics, cameraExtrinsics, cameraTrackedBBoxes, startRot, doVisualize=False) :
    if doVisualize:
        figure(); imshow(undistortedBgImage)
        
    totalProjectedBoxVolumeAABBtoBBoxCost = 0.0
    for idx, (relativePos, relativeRot, cameraBBox) in enumerate(zip(relativePositions, relativeRots, cameraTrackedBBoxes)) :
        if not doVisualize or np.mod(idx, 20) == 0 :
            totalProjectedBoxVolumeAABBtoBBoxCost += getProjectedBoxVolumeAABBtoBBoxCost(params, paramsScale, relativePos, relativeRot, cameraIntrinsics, cameraExtrinsics, cameraBBox, startRot, idx/20, doVisualize)
            
    return totalProjectedBoxVolumeAABBtoBBoxCost
    
optimizerMethod = 'Nelder-Mead'
if optimizerMethod == 'BFGS' :
    paramDimensionalityScale = np.array([10.0, 10.0, 10.0, 1.0, 1.0, 100.0, 1.0, 1.0])
else :
    paramDimensionalityScale = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
initParamsToEstimate = np.concatenate([np.ones(3), worldUndistortedTrajectoryPoints[0, 0:2], [smootherWorldOrientationAngles[0]], np.ones(2)])*paramDimensionalityScale
dimensionsToEstimate = 8
startRotation = initParamsToEstimate[5]
computeAllProjectedBoxVolumeAABBtoBBoxCost(initParamsToEstimate, paramDimensionalityScale, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
                                           smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS],
                                           cameraUndistortedBBoxes, startRotation)

1504.1843438990807

In [None]:
######################## VISUALIZATION STUFF ########################
def getFootprintsFromEstimatedParams(estimatedParams, paramDimensionalityScale, relativePositions, relativeRots, initialRot) :

    footprintWidth, footprintLength = estimatedParams[0:2]
    rot = estimatedParams[4] - initialRot/paramDimensionalityScale[4]
    rotatedRelativePositions = np.dot(np.array([[np.cos(rot) , np.sin(rot), 0.0, 0.0],
                                                [-np.sin(rot), np.cos(rot), 0.0, 0.0],
                                                [0.0         , 0.0        , 1.0, 0.0],
                                                [0.0         , 0.0        , 0.0, 1.0]]), np.hstack([relativePositions, np.repeat(np.array([[0.0, 1.0]]), len(relativePositions), axis=0)]).T)
    rotatedRelativePositions = rotatedRelativePositions[:-1, :]/rotatedRelativePositions[-1, :]

    perFramePosAndRot = np.hstack([np.dot(rotatedRelativePositions[0:2, :].T, np.array([[estimatedParams[5], 0.0], [0.0, estimatedParams[6]]]))+estimatedParams[2:4], relativeRots[:, np.newaxis]+estimatedParams[4]])
    return footprintWidth, footprintLength, perFramePosAndRot

footprintWidth, footprintLength, perFramePosAndRot = getFootprintsFromEstimatedParams(np.concatenate([optResult.x, initParamsToEstimate[dimensionsToEstimate:7]]), paramDimensionalityScale,
                                                                                      worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
                                                                                      smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], initParamsToEstimate[4])

fig = mpl.pylab.figure("animation", frameon=False)
fig.set_size_inches(17,12)
gca().set_axis_off()
img = None
for i in np.arange(allObjectFrames.shape[-1]):
    cla()
    undistortedCurrentFrame, _, _ = undistortImage(filmedSceneData[DICT_DISTORTION_PARAMETER], filmedSceneData[DICT_DISTORTION_RATIO], allObjectFrames[:, :, :, i],
                                                   filmedSceneData[DICT_CAMERA_INTRINSICS], doReturnMaps=False)
    gca().imshow(undistortedCurrentFrame)
    
    dx, dy, rot = perFramePosAndRot[i, :]
    worldFootprintPoints = placeFootprint(footprintWidth, footprintLength, dx, dy, rot)
    cameraFootprintPoints = worldToCameraSpace(cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], worldFootprintPoints)
    gca().plot(cameraFootprintPoints[[0, 1, 2, 3, 0], 0], cameraFootprintPoints[[0, 1, 2, 3, 0], 1])
    gca().plot([cameraFootprintPoints[4, 0], cameraFootprintPoints[5, 0]], [cameraFootprintPoints[4, 1], cameraFootprintPoints[5, 1]])
#     if img is None:
#         img = mpl.pylab.imshow(undistortedCurrentFrame)
#     else:
#         img.set_data(undistortedCurrentFrame)
    mpl.pylab.pause(0.1)
    mpl.pylab.draw()
    fig.savefig("/home/ilisescu/PhD/data/havana/1-3-5_IOU-tracker-footprints_{1}/frame-{0:05}.png".format(i+1, objectIdToTrack), bbox_inches='tight')

In [None]:
"/home/ilisescu/PhD/data/havana/1-3-5_IOU-tracker-footprints_{1}/frame-{0:05}.png".format(i+1, objectIdToTrack)

## Automatic camera calibration stuff

In [None]:
undistortFillMask, _, distortionCoeff, map1, map2 = undistortImage(filmedSceneData[DICT_DISTORTION_PARAMETER], filmedSceneData[DICT_DISTORTION_RATIO], np.ones_like(bgImage[:, :, 0]),
                                                                   filmedSceneData[DICT_CAMERA_INTRINSICS], doReturnMaps=True)
undistortFillMask = cv2.erode(undistortFillMask, np.ones([5, 5])) ## eroding because I want to get rid of edges where there is high gradients because of the undistortion filling
edges = cv2.Canny(undistortedBgImage,100,200)
edges[undistortFillMask < 1] = 0
figure(); imshow(edges)

minLineLength = 100
maxLineGap = 10
lines = cv2.HoughLinesP(edges,1,np.pi/180,100,minLineLength,maxLineGap)
linesImg = np.copy(undistortedBgImage)
for x1,y1,x2,y2 in lines[0]:
    cv2.line(linesImg,(x1,y1),(x2,y2),(0,255,0),2)
figure(); imshow(linesImg)

#########################

lines = lsd.lsd(cv2.cvtColor(undistortedBgImage, cv2.COLOR_RGB2GRAY))
## if both line endpoints have at least 1 coordinate projected back into the distorted image space outside of the image bounds then they should be discarded
endPoint1Coords = np.hstack([map1[lines[:, 1].astype(int), lines[:, 0].astype(int), np.newaxis], map2[lines[:, 1].astype(int), lines[:, 0].astype(int), np.newaxis]])
endPoint2Coords = np.hstack([map1[lines[:, 3].astype(int), lines[:, 2].astype(int), np.newaxis], map2[lines[:, 3].astype(int), lines[:, 2].astype(int), np.newaxis]])
## points that have at least one coordinate below 2 or above width (or height) - 2
outOfBoundsEndPoint1 = np.any([np.any(endPoint1Coords < 2.0, axis=1)[:, np.newaxis], np.any(endPoint1Coords > np.array(bgImage.shape[0:2][::-1])[np.newaxis, :]-2.0, axis=1)[:, np.newaxis]], axis=0)
outOfBoundsEndPoint2 = np.any([np.any(endPoint2Coords < 2.0, axis=1)[:, np.newaxis], np.any(endPoint2Coords > np.array(bgImage.shape[0:2][::-1])[np.newaxis, :]-2.0, axis=1)[:, np.newaxis]], axis=0)
outOfBoundsSegments = np.all([outOfBoundsEndPoint1, outOfBoundsEndPoint2], axis=0).flatten()
lines = lines[~outOfBoundsSegments, :]
linesImg = np.copy(undistortedBgImage)
for i in xrange(lines.shape[0]):
    pt1 = (int(lines[i, 0]), int(lines[i, 1]))
    pt2 = (int(lines[i, 2]), int(lines[i, 3]))
    width = lines[i, 4]
    cv2.line(linesImg, pt1, pt2, (0, 0, 255), int(np.ceil(width / 2)))
figure(); imshow(linesImg)

#########################

print cameraIntrinsics
### method from https://annals-csis.org/proceedings/2012/pliks/110.pdf
o_i = np.array(undistortedBgImage.shape[0:2][::-1])/2.0
# v1 = np.array([19.8403591507208, 227.308004627469])
# v2 = np.array([1344.71552789342, 241.607733871685])
# v3 = np.array([867.041076605350, 2365.66347200499])
# v1 = np.array([-88.5473780083063, 189.264517571305])
# v2 = np.array([1276.67256515769, 195.613017891702])
# v3 = np.array([869.226873471016, 2360.25504279812])
v1 = np.array([-38.2370844022271, 213.107516830185])
v2 = np.array([1558.91196185510, 184.120214331716])
v3 = np.array([868.834979573965, 2356.97194934093])
f = np.sqrt(np.abs(np.dot(o_i-v1, v2-o_i)))
print o_i, f ## principal point and focal length
A = np.array([[v1[0], v2[0], v3[0]],
              [v1[1], v2[1], v3[1]],
              [v1[0]**2, v2[0]**2, v3[0]**2],
              [v1[1]**2, v2[1]**2, v3[1]**2],
              [v1[0]*v1[1], v2[0]*v2[1], v3[0]*v3[1]]])
b = np.array([o_i[0], o_i[1], f**2 + o_i[0]**2, f**2 + o_i[1]**2, o_i[0]*o_i[1]])
lambdas = np.sqrt(np.linalg.lstsq(A, b)[0])
print lambdas

#########################

v_i = np.dot(v2-v1, o_i-v1)/(np.linalg.norm(v2-v1)**2)*(v2-v1)+v1 # projection of o_i onto vanishing line defined by v1 and v2
f = np.sqrt(np.linalg.norm(v1 - v_i)*np.linalg.norm(v_i - v2)-np.linalg.norm(o_i - v_i)**2) ## focal length if I only had 2 vanishing points as shown in https://annals-csis.org/proceedings/2012/pliks/110.pdf
print f

xPrime_c = np.array([v1[0], v1[1], f])/np.linalg.norm(np.array([v1[0], v1[1], f]))
yPrime_c = np.array([v2[0], v2[1], f])/np.linalg.norm(np.array([v2[0], v2[1], f]))
zPrime_c = np.cross(xPrime_c, yPrime_c)
R = np.array([[v1[0]/np.sqrt(v1[0]**2 + v1[1]**2 + f), v2[0]/np.sqrt(v2[0]**2 + v2[1]**2 + f), zPrime_c[0]],
              [v1[1]/np.sqrt(v1[0]**2 + v1[1]**2 + f), v2[1]/np.sqrt(v2[0]**2 + v2[1]**2 + f), zPrime_c[1]],
              [f/np.sqrt(v1[0]**2 + v1[1]**2 + f), f/np.sqrt(v2[0]**2 + v2[1]**2 + f), zPrime_c[2]]])
print R
print np.dot(R, R.T)
print np.linalg.norm(R[:, -1])
print np.linalg.norm(xPrime_c)

#########################

figure(); imshow(undistortedBgImage)
scatter(o_i[0], o_i[1]); plot(np.vstack([v1, v2, v3])[[0, 1, 2, 0], 0], np.vstack([v1, v2, v3])[[0, 1, 2, 0], 1])

#########################

xPrime_c = np.array([v1[0], v1[1], f])/np.linalg.norm(v1-o_i)
xPrime_c = xPrime_c/np.linalg.norm(xPrime_c)
yPrime_c = np.array([v2[0], v2[1], f])/np.linalg.norm(v2-o_i)
yPrime_c = yPrime_c/np.linalg.norm(yPrime_c)
print np.linalg.norm(np.cross(xPrime_c, yPrime_c))

#########################

def minimizeChangeDiff(angles, cameraIntrinsics, trackPointsAndScaleChanges, VP1, VP2, doReturnData=False) :
    alphaRot, betaRot, gammaRot = angles[0], angles[1], 0.0#-np.pi/5
    if len(angles) == 3 :
        gammaRot = angles[2]
    currentCameraExtrinsics = cameraExtrinsicsFromRotAngles(alphaRot, betaRot, gammaRot)
    worldCameraLoc = np.linalg.inv(np.vstack([currentCameraExtrinsics, np.array([[0.0, 0.0, 0.0, 1.0]])]))[:-1, -1]
    
    worldTrackPoints = cameraToWorldGroundPlane(cameraIntrinsics, currentCameraExtrinsics, trackPointsAndScaleChanges[:, :-1].reshape([len(trackPointsAndScaleChanges)*2, 2]))
    worldTrackPointsToCameraDists = np.linalg.norm(worldTrackPoints-worldCameraLoc, axis=1).reshape([len(worldTrackPoints)/2, 2])
    distsChange = worldTrackPointsToCameraDists[:, 0]/worldTrackPointsToCameraDists[:, 1] ## reverse of scale computation because distance is inversely proportional to size
    
#     figure(); imshow(undistortedBgImage); plot([VP1[0], VP2[0]], [VP1[1], VP2[1]], color='blue')
    unitSquare = worldToCameraSpace(cameraIntrinsics, currentCameraExtrinsics, np.array([[0, 0, 0], [0.1, 0, 0], [0, 0.1, 0], [0.1, 0.1, 0]], dtype=float))
    unitSquareVP1 = line2lineIntersection(np.concatenate([unitSquare[0, :], unitSquare[1, :]]), np.concatenate([unitSquare[2, :], unitSquare[3, :]]))
    unitSquareVP2 = line2lineIntersection(np.concatenate([unitSquare[0, :], unitSquare[2, :]]), np.concatenate([unitSquare[1, :], unitSquare[3, :]]))
    if unitSquareVP1 is not None and unitSquareVP2 is not None :
#         scatter(unitSquare[:, 0], unitSquare[:, 1]); plot([unitSquareVP1[0], unitSquareVP2[0]], [unitSquareVP1[1], unitSquareVP2[1]], color='yellow')

        projectedVP1 = projectPointsOnLineSegment(unitSquareVP1[np.newaxis, :], VP1, VP2).flatten()
        # move VPs so that VP1 coincides with its projected counterpart
        adjustedVP1 = np.copy(projectedVP1) 
        adjustedVP2 = unitSquareVP2+projectedVP1-unitSquareVP1
#         plot([adjustedVP1[0], adjustedVP2[0]], [adjustedVP1[1], adjustedVP2[1]], color='red')
#         scatter(projectedVP1[0], projectedVP1[1], marker='x')

        angularDistance = np.abs(np.cross((adjustedVP2-adjustedVP1)/np.linalg.norm(adjustedVP2-adjustedVP1), (VP2-VP1)/np.linalg.norm(VP2-VP1)))
        spatialDistance = np.linalg.norm(projectedVP1-unitSquareVP1)

        betaParam = 0.995
        horizonDistance = angularDistance*betaParam + spatialDistance*(1.0-betaParam)
    else :
        horizonDistance = 10.0
    
    if doReturnData :
        return distsChange
    else :
        alphaParam = 0.0*0.03
    # #     keepPoints = np.random.randint(0, 2, len(distsChange))
    # #     return np.sqrt(np.sum((distsChange[keepPoints==1]-trackPointsAndScaleChanges[keepPoints==1, 4])**2))
        return np.sqrt(np.sum((distsChange-trackPointsAndScaleChanges[:, 4])**2))*alphaParam + horizonDistance*(1.0-alphaParam)
    #     return float(len(distsChange))-len(np.argwhere(np.all([distsChange > 1.0, trackPointsAndScaleChanges[:, 4] > 1.0], axis=0)))

# print minimizeChangeDiff(np.zeros(3), cameraIntrinsics, undistortedTrackPointsAndScaleChanges, v1, v2)
# print minimizeChangeDiff(np.array([-np.pi*0.37, -np.pi*0.01, -np.pi/5]), cameraIntrinsics, undistortedTrackPointsAndScaleChanges, v1, v2)

In [None]:
DO_ESTIMATE_3D = True
minCost = 10000000.0
for i in np.arange(5000) :
    if DO_ESTIMATE_3D :
        anglesInit = (np.random.rand(3)-0.5)*np.pi*2
    else :
        anglesInit = (np.random.rand(2)-0.5)*np.pi*2
#     print anglesInit
    optResult = minimize(minimizeChangeDiff, anglesInit, method='BFGS', args=(cameraIntrinsics, undistortedTrackPointsAndScaleChanges, v1, v2))
    if optResult.fun < minCost :
        minCost = optResult.fun
        angles = optResult.x
        print i, ["Optimization Failed!", "Optimization Successful!"][int(optResult.success)], " ---  MESSAGE: \"{0}\" --- cost: {1}".format(optResult.message, optResult.fun)
        print angles, anglesInit

In [None]:
alphaRot, betaRot, gammaRot = angles[0], angles[1], 0.0#-np.pi/5
if DO_ESTIMATE_3D :
    gammaRot = angles[2] #-np.pi*1.15 #
# alphaRot, betaRot, gammaRot = -np.pi*0.37, -np.pi*0.01, -np.pi/5
print (alphaRot, betaRot, gammaRot)
# betaRot = betaRot+np.pi

tmp = cameraExtrinsicsFromRotAngles(alphaRot, betaRot, gammaRot)
tmp = np.dot(tmp, np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]))
axesDirsCameraSpace = worldToCameraSpace(cameraIntrinsics, tmp, np.array([[0, 0, 0], [0.1, 0, 0], [0, 0.1, 0], [0, 0, 0.1]], dtype=float))
worldGridPoints = np.array(np.meshgrid(np.arange(-1.0, 1.1, 0.1), np.arange(-1.0, 1.1, 0.1))).T.reshape([len(np.arange(-1., 1.1, 0.1))**2, 2])
worldGridPoints = np.hstack([worldGridPoints, np.zeros([len(worldGridPoints), 1])])
cameraGridPoints = worldToCameraSpace(cameraIntrinsics, tmp, worldGridPoints)
tmpWorldCameraLoc = np.linalg.inv(np.vstack([tmp, np.array([[0.0, 0.0, 0.0, 1.0]])]))[:-1, -1]
tmpCameraDists = np.linalg.norm(worldGridPoints-tmpWorldCameraLoc, axis=1)
# print cm.jet((tmpCameraDists-np.min(tmpCameraDists))/np.max(tmpCameraDists-np.min(tmpCameraDists)), alpha=1)

figure(); imshow(undistortedBgImage)
scatter(cameraGridPoints[:, 0], cameraGridPoints[:, 1], c=cm.jet((tmpCameraDists-np.min(tmpCameraDists))/np.max(tmpCameraDists-np.min(tmpCameraDists)), alpha=1)) # x dir
plot(axesDirsCameraSpace[[0, 1], 0], axesDirsCameraSpace[[0, 1], 1], color='red') # x dir
plot(axesDirsCameraSpace[[0, 2], 0], axesDirsCameraSpace[[0, 2], 1], color='green') # y dir
plot(axesDirsCameraSpace[[0, 3], 0], axesDirsCameraSpace[[0, 3], 1], color='blue') # z dir
xlim([0, undistortedBgImage.shape[1]]); ylim([undistortedBgImage.shape[0], 0])

unitSquare = worldToCameraSpace(cameraIntrinsics, tmp, np.array([[0, 0, 0], [0.1, 0, 0], [0, 0.1, 0], [0.1, 0.1, 0]], dtype=float))
unitSquareVP1 = line2lineIntersection(np.concatenate([unitSquare[0, :], unitSquare[1, :]]), np.concatenate([unitSquare[2, :], unitSquare[3, :]]))
unitSquareVP2 = line2lineIntersection(np.concatenate([unitSquare[0, :], unitSquare[2, :]]), np.concatenate([unitSquare[1, :], unitSquare[3, :]]))
plot([unitSquareVP1[0], unitSquareVP2[0]], [unitSquareVP1[1], unitSquareVP2[1]], color='yellow')
plot([v1[0], v2[0]], [v1[1], v2[1]], color='blue')

# worldTrackPoints = cameraToWorldGroundPlane(cameraIntrinsics, tmp,
#                                             undistortedTrackPointsAndScaleChanges[:, :-1].reshape([len(undistortedTrackPointsAndScaleChanges)*2, 2])).reshape([len(undistortedTrackPointsAndScaleChanges), 6])
# figure()
# for indices in mapObjectIdToTrackPointsAndScaleChanges :
#     plot(worldTrackPoints[indices[1]:indices[2], 0], worldTrackPoints[indices[1]:indices[2], 1])
# xlim([-4.0, 4.0]); ylim([-4.0, 4.0])
# figure(); imshow(undistortedBgImage)
# for indices in mapObjectIdToTrackPointsAndScaleChanges :
#     plot(undistortedTrackPointsAndScaleChanges[indices[1]:indices[2], 0], undistortedTrackPointsAndScaleChanges[indices[1]:indices[2], 1])

## FIRST EXPERIMENTS USING PHOTOCONSISTENCY AND AREA COSTS THAT THE STUFF ABOVE IS BASED ON, JUST WAAAAAY FASTER

In [724]:
def clip(subjectPolygon, clipPolygon):
    """Sutherland-Hodgman clipping from https://rosettacode.org/wiki/Sutherland-Hodgman_polygon_clipping#Python"""
    def inside(p):
        return(cp2[0]-cp1[0])*(p[1]-cp1[1]) > (cp2[1]-cp1[1])*(p[0]-cp1[0])
 
    def computeIntersection():
        dc = [ cp1[0] - cp2[0], cp1[1] - cp2[1] ]
        dp = [ s[0] - e[0], s[1] - e[1] ]
        n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
        n2 = s[0] * e[1] - s[1] * e[0] 
        n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
        return [(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3]
 
    outputList = subjectPolygon
    cp1 = clipPolygon[-1]
    
    for clipVertex in clipPolygon:
        cp2 = clipVertex
        inputList = outputList
        outputList = []
        if len(inputList) == 0 :
            return None
        s = inputList[-1]

        for subjectVertex in inputList:
            e = subjectVertex
            if inside(e):
                if not inside(s):
                    outputList.append(computeIntersection())
                outputList.append(e)
            elif inside(s):
                outputList.append(computeIntersection())
            s = e
        cp1 = cp2
    return np.array(outputList)

def polyArea(p):
    """From here https://stackoverflow.com/a/451482"""
    return 0.5 * abs(sum([x0*y1 - x1*y0 for ((x0, y0), (x1, y1)) in zip(p, p[np.mod(np.arange(1, len(p)+1), len(p)), :])]))


figure(); imshow(undistortedBgImage); plot(cameraUndistortedBBoxes[10, [0, 2, 2, 0, 0]], cameraUndistortedBBoxes[10, [1, 1, 3, 3, 1]])
bob = np.array([[1392.1, 591.3], [1506.71, 603.1], [1486.92, 638], [1379.4, 638.9]]) - np.array([[10, 30]])
plot(bob[[0, 1, 2, 3, 0], 0], bob[[0, 1, 2, 3, 0], 1])
clippedBob = clip([tuple(point) for point in bob], [tuple(cameraUndistortedBBoxes[10, [0, 1]]), tuple(cameraUndistortedBBoxes[10, [2, 1]]),
                                                    tuple(cameraUndistortedBBoxes[10, [2, 3]]), tuple(cameraUndistortedBBoxes[10, [0, 3]])])

insideAreaPercentage = 0.0
if clippedBob is not None :
    plot(clippedBob[:, 0], clippedBob[:, 1])
    insideAreaPercentage = polyArea(clippedBob)/polyArea(bob)
print insideAreaPercentage

0.0


In [110]:
def placeFootprint(width, length, dx, dy, rot) :
    ## the convention is that the footprint is aligned with the x and y axis such that the long side (which is footprintLength long) is aligned with the x axis
    ## the first 4 points are the corners while the remaining two are center point and forward point respectively
    defaultFootprintPoints = np.array([[-length/2.0, -width/2.0, 0.0],
                                       [-length/2.0, width/2.0 , 0.0],
                                       [length/2.0 , width/2.0 , 0.0],
                                       [length/2.0 , -width/2.0, 0.0],
                                       [0.0        , 0.0       , 0.0],
                                       [length/2.0 , 0.0       , 0.0]])
    ## here I flip the signs of the sin wrt what's shown here https://en.wikipedia.org/wiki/Rotation_matrix
    ## for reasons I don't really understand
    T = np.array([[np.cos(rot) , np.sin(rot) , 0.0, dx ],
                  [-np.sin(rot), np.cos(rot) , 0.0, dy ],
                  [0.0         , 0.0         , 1.0, 0.0],
                  [0.0         , 0.0         , 0.0, 1.0]])
    
    worldFootprintPoints = np.dot(T, np.vstack([defaultFootprintPoints.T, np.ones([1, len(defaultFootprintPoints)])]))
    worldFootprintPoints = worldFootprintPoints[:-1, :]/worldFootprintPoints[-1, :]
    return worldFootprintPoints.T

def clipFootprintWithCameraBBox(footprintWidth, footprintLength, posAndRot, cameraIntrinsics, cameraExtrinsics, cameraBBox, footprintIdx=0, doVisualize=False) :
    """places a footprint in the 3D world, projects it into image space and compares is to the given cameraBBox in camera space"""
    
    dx, dy, rot = posAndRot
    worldFootprintPoints = placeFootprint(footprintWidth, footprintLength, dx, dy, rot)
    
    cameraFootprintPoints = worldToCameraSpace(cameraIntrinsics, cameraExtrinsics, worldFootprintPoints)
    cameraFootprintCorners = cameraFootprintPoints[0:4, :]
    cameraFootprintCenterPoint = cameraFootprintPoints[4, :]
    cameraFootprintForwardPoint = cameraFootprintPoints[5, :]
    cameraClippedFootprintCorners = clip([tuple(point) for point in cameraFootprintCorners], [tuple(cameraBBox[[0, 1]]), tuple(cameraBBox[[2, 1]]),
                                                                                              tuple(cameraBBox[[2, 3]]), tuple(cameraBBox[[0, 3]])])
    ## check if line from each of the footprint corners to the camera (projected onto the ground plane for simplicity) intersects any of the edges of the footprint that point doesn't belong to
    ## which tells me which edges I can see directly and which are occluded by the object
    edgesIdxs = np.array([[0, 1], [1, 2], [2, 3], [3, 0]])
    cameraLocOnGroundPlane = np.linalg.inv(cameraExtrinsics)[0:2, -1]
    invisibleCornersIdx = []
    for pointIdx, point in enumerate(worldFootprintPoints[0:4, 0:2]) :
        for edgeIdxs in edgesIdxs :
            if pointIdx not in edgeIdxs :
                intersectionPoint = line2lineIntersection(np.concatenate([point, cameraLocOnGroundPlane]),
                                                          np.concatenate([worldFootprintPoints[edgeIdxs[0], 0:2], worldFootprintPoints[edgeIdxs[1], 0:2]]))
                ## intersection point must be lie on the segment it intersects and the distance from the intersection point to the camera must be smaller than the distance from 
                ## the footprint corner (named point here) to the camera (meaning the intersection point and therefore the segment is in front of the footprint corner which is therefore occluded)
                if (isABetweenBandC(intersectionPoint, worldFootprintPoints[edgeIdxs[0], 0:2], worldFootprintPoints[edgeIdxs[1], 0:2])
                    and np.linalg.norm(point-cameraLocOnGroundPlane) > np.linalg.norm(intersectionPoint-cameraLocOnGroundPlane)) :
                    
                    invisibleCornersIdx.append(pointIdx)
                    break
    
    if doVisualize :
        for edgeIdxs in edgesIdxs :
            if edgeIdxs[0] not in invisibleCornersIdx and edgeIdxs[1] not in invisibleCornersIdx :
                plot(cameraFootprintCorners[edgeIdxs, 0], cameraFootprintCorners[edgeIdxs, 1], color=cm.Set1(np.mod(footprintIdx, 9)/8.0))
            else :
                plot(cameraFootprintCorners[edgeIdxs, 0], cameraFootprintCorners[edgeIdxs, 1], '--', color=cm.Set1(np.mod(footprintIdx, 9)/8.0), linewidth=0.75)
        plot([cameraFootprintForwardPoint[0], cameraFootprintCenterPoint[0]], [cameraFootprintForwardPoint[1], cameraFootprintCenterPoint[1]], color=cm.Set1(np.mod(footprintIdx, 9)/8.0), linewidth=1.5)
        plot(cameraBBox[[0, 2, 2, 0, 0]], cameraBBox[[1, 1, 3, 3, 1]], color=cm.Set1(np.mod(footprintIdx, 9)/8.0), linewidth=0.75)
        if False and cameraClippedFootprintCorners is not None and cameraClippedFootprintCorners.shape[0] > 0 :
            plot(cameraClippedFootprintCorners[np.mod(np.arange(len(cameraClippedFootprintCorners)+1), len(cameraClippedFootprintCorners)), 0],
                 cameraClippedFootprintCorners[np.mod(np.arange(len(cameraClippedFootprintCorners)+1), len(cameraClippedFootprintCorners)), 1], color=cm.Set1(np.mod(footprintIdx, 9)/8.0))
    
    return cameraFootprintCorners, invisibleCornersIdx, cameraClippedFootprintCorners
    
def computeTotalCost(footprintWidth, footprintLength, perFramePosAndRot, cameraIntrinsics, cameraExtrinsics, cameraTrackedBBoxes, perFrameFGBGTransitionCosts, doVisualize=False) :
    
    if doVisualize :
        figure(); imshow(undistortedBgImage)
        frameIdxs = np.arange(0, len(perFramePosAndRot), 20)
    else :
        frameIdxs = np.arange(len(perFramePosAndRot))
        
    maxFGBGCost = np.max(perFrameFGBGTransitionCosts)
    totalInsideAreaCost = 0.0 ## how much area of camera space footprint is inside bbox
    totalBBoxToFootprintAreaCost = 0.0 ## how big are of camera space footprint is wrt bbox area
    totalFGBGTransitionCost = 0.0
    totalAreaBloatCost = 0.0
#     allWorldFootprintCornersAABB = np.array([100000.0, 100000.0, -100000.0, -100000.0])
    for idx, (posAndRot, cameraBBox) in enumerate(zip(perFramePosAndRot[frameIdxs, :], cameraTrackedBBoxes[frameIdxs, :])) :
        if False and doVisualize :
            print frameIdxs[idx]
#             tmpImage, _, _ = undistortImage(filmedSceneData[DICT_DISTORTION_PARAMETER], filmedSceneData[DICT_DISTORTION_RATIO],
#                                             allObjectFrames[:, :, :, frameIdxs[idx]], filmedSceneData[DICT_CAMERA_INTRINSICS], doReturnMaps=False)
#             figure(); imshow(tmpImage)
            figure(); imshow(perFrameFGBGTransitionCosts[:, :, frameIdxs[idx]])
            
#         dx, dy, rot = posAndRot
#         worldFootprintPoints = placeFootprint(footprintWidth, footprintLength, dx, dy, rot)
#         allWorldFootprintCornersAABB[0:2] = np.min(np.vstack([allWorldFootprintCornersAABB[0:2][np.newaxis, :], worldFootprintPoints[:, 0:2]]), axis=0)
#         allWorldFootprintCornersAABB[2:4] = np.max(np.vstack([allWorldFootprintCornersAABB[2:4][np.newaxis, :], worldFootprintPoints[:, 0:2]]), axis=0)
        cameraFootprintCorners, invisibleCornersIdx, cameraClippedFootprintCorners = clipFootprintWithCameraBBox(footprintWidth, footprintLength, posAndRot, cameraIntrinsics,
                                                                                                                 cameraExtrinsics, cameraBBox, idx, doVisualize)
        edgesIdxs = np.array([[0, 1], [1, 2], [2, 3], [3, 0]])
        ## this make sure that I only the edges that don't contain an invisible corner are used in the computation
        visibleEdgesIdxs = edgesIdxs[~np.any(np.any(edgesIdxs[np.newaxis, :, :] == np.reshape(invisibleCornersIdx, [len(invisibleCornersIdx), 1, 1]), axis=0), axis=1), :]
        for edgeIdxs in visibleEdgesIdxs :
            testLine = np.concatenate([cameraFootprintCorners[edgeIdxs[0], :], cameraFootprintCorners[edgeIdxs[1], :]])
            rasterImage = np.zeros(perFrameFGBGTransitionCosts.shape[0:2], np.uint8)
            cv2.line(rasterImage, tuple(np.round(testLine[0:2]).astype(int)), tuple(np.round(testLine[2:]).astype(int)), 1, thickness=2)
            
            ## clip the line to image borders
            isInside, point1, point2 = cv2.clipLine(tuple(np.concatenate([[0, 0], perFrameFGBGTransitionCosts.shape[:2][::-1]])), tuple(testLine[0:2]), tuple(testLine[2:4]))
            ## if the testLine is outside the image borders then no ratio of the fgBGTransitionCost will come from perFrameFGBGTransitionCosts
            costWeight = 0.0
            ## if the testLine is inside the image borders then costWeight of the fgBGTransitionCost comes from perFrameFGBGTransitionCosts while 1-costWeight is multiplied by max cost
            ## the more of the testLine is inside the closer is costWeight to 1.0
            if isInside :
                costWeight = np.linalg.norm(np.array(point1)-np.array(point2))/np.linalg.norm(testLine[:2]-testLine[2:])
            
            linePixelsIdxs = np.argwhere(rasterImage.astype(bool))
            projectedPixelCenters = projectPointsOnLineSegment(linePixelsIdxs[:, ::-1].astype(float), testLine[0:2], testLine[2:])
            perPixelWeights = np.linalg.norm(projectedPixelCenters-linePixelsIdxs[:, ::-1].astype(float), axis=1)
            perPixelWeights = perPixelWeights/np.sum(perPixelWeights)
            
            fgBGTransitionCost = np.sum(perFrameFGBGTransitionCosts[linePixelsIdxs[:, 0], linePixelsIdxs[:, 1], frameIdxs[idx]]*perPixelWeights)
            totalFGBGTransitionCost = totalFGBGTransitionCost + fgBGTransitionCost*costWeight + (1.0-costWeight)*maxFGBGCost
            if False and doVisualize :
                print invisibleCornersIdx, edgeIdxs, testLine, fgBGTransitionCost
                plot([testLine[0], testLine[2]], [testLine[1], testLine[3]], color='yellow')
                scatter(linePixelsIdxs[:, ::-1].astype(float)[:, 0], linePixelsIdxs[:, ::-1].astype(float)[:, 1], color=cm.jet(perPixelWeights/np.max(perPixelWeights)))
                scatter(projectedPixelCenters[:, 0], projectedPixelCenters[:, 1], color=cm.jet(perPixelWeights/np.max(perPixelWeights)))
            
        if cameraClippedFootprintCorners is not None and cameraClippedFootprintCorners.shape[0] > 0 :
            insideAreaCost = np.abs(1.0 - polyArea(cameraClippedFootprintCorners)/polyArea(cameraFootprintCorners))
#             bboxToFootprintAreaCost = np.abs(1.0 - polyArea(cameraClippedFootprintCorners)/polyArea(np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]])))
        else :
            insideAreaCost = 1.0
#             bboxToFootprintAreaCost = 1.0
#         bboxToFootprintAreaCost = np.abs(1.0 - polyArea(cameraFootprintCorners)/polyArea(np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]])))
        cameraFootprintCornersAABB = np.vstack([[np.min(cameraFootprintCorners, axis=0)], [[np.max(cameraFootprintCorners[:, 0]), np.min(cameraFootprintCorners[:, 1])]],
                                                [np.max(cameraFootprintCorners, axis=0)], [[np.min(cameraFootprintCorners[:, 0]), np.max(cameraFootprintCorners[:, 1])]]])
    
#         cameraFootprintCornersClippedAABB = clip([tuple(point) for point in cameraFootprintCornersAABB], [tuple(cameraBBox[[0, 1]]), tuple(cameraBBox[[2, 1]]),
#                                                                                                           tuple(cameraBBox[[2, 3]]), tuple(cameraBBox[[0, 3]])])
#         if cameraFootprintCornersClippedAABB is not None and cameraFootprintCornersClippedAABB.shape[0] > 0 :
#             bboxToFootprintAreaCost = np.abs(1.0 - polyArea(cameraFootprintCornersClippedAABB)/polyArea(np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]])))
#         else :
#             bboxToFootprintAreaCost = 1.0
# #         bboxToFootprintAreaCost = np.abs(1.0 - polyArea(cameraFootprintCornersAABB)/polyArea(np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]])))

        bboxToFootprintAreaCost = np.sum(np.linalg.norm(cameraFootprintCornersAABB - np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]]), axis=1))
#         print "asdojahsdf", bboxToFootprintAreaCost, np.linalg.norm(cameraFootprintCornersAABB - np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]]), axis=1)
#         plot([cameraFootprintCornersAABB[0, 0], np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]])[0, 0]],
#              [cameraFootprintCornersAABB[0, 1], np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]])[0, 1]])
#         plot([cameraFootprintCornersAABB[1, 0], np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]])[1, 0]],
#              [cameraFootprintCornersAABB[1, 1], np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]])[1, 1]])
#         plot([cameraFootprintCornersAABB[2, 0], np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]])[2, 0]],
#              [cameraFootprintCornersAABB[2, 1], np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]])[2, 1]])
#         plot([cameraFootprintCornersAABB[3, 0], np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]])[3, 0]],
#              [cameraFootprintCornersAABB[3, 1], np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]])[3, 1]])
        
        ## this is similar to the cost defined for 3D in Multi-view Stereo via Volumetric Graph-cuts [Vogiatzis]
        totalAreaBloatCost += np.abs(1.0-polyArea(cameraFootprintCorners)/float(np.prod(undistortedBgImage.shape[0:2])))
        totalInsideAreaCost += insideAreaCost
        totalBBoxToFootprintAreaCost += np.log(1+bboxToFootprintAreaCost) ## added cauchy loss
        if doVisualize :
            print totalAreaBloatCost, totalInsideAreaCost, totalBBoxToFootprintAreaCost

    geometricCostParam = 0.1 #0.85
    geometricCost = totalInsideAreaCost*geometricCostParam+(1.0-geometricCostParam)*totalBBoxToFootprintAreaCost
    geometricCost = totalAreaBloatCost
#     geometricCost = 1.0-footprintWidth*footprintLength/np.prod(allWorldFootprintCornersAABB[2:4]-allWorldFootprintCornersAABB[0:2])
    geometricCost = np.abs(1.0-footprintWidth*footprintLength/ACTIVE_AABB_AREA)
    alphaParam = 0.5
    if doVisualize :
        print geometricCost
# #     print (1.0-alphaParam)*geometricCost, totalFGBGTransitionCost*alphaParam+(1.0-alphaParam)*geometricCost, totalAreaBloatCost
# #     return totalFGBGTransitionCost*alphaParam+(1.0-alphaParam)*geometricCost
#     return totalFGBGTransitionCost + 100000.0*geometricCost
#     return geometricCost
    return totalFGBGTransitionCost+10.0*totalBBoxToFootprintAreaCost

def placeBoxVolumeAndProject(footprintWidth, footprintLength, volumeHeight, posAndRot, cameraIntrinsics, cameraExtrinsics, cameraBBox, footprintIdx=0, doVisualize=False) :
    """places a footprint in the 3D world, projects the box volume it and volumeHeight defines into image space and finds which edges correspond to its convex hull in camera space"""
    
    dx, dy, rot = posAndRot
    worldFootprintPoints = placeFootprint(footprintWidth, footprintLength, dx, dy, rot)
    worldBoxVolumePoints = np.vstack([worldFootprintPoints[0:4, :], worldFootprintPoints[0:4, :]+np.array([[0.0, 0.0, volumeHeight]]), worldFootprintPoints[4:]])
    
    cameraBoxVolumePoints = worldToCameraSpace(cameraIntrinsics, cameraExtrinsics, worldBoxVolumePoints)
    cameraBoxVolumeCorners = cameraBoxVolumePoints[0:8, :]
    cameraFootprintCenterPoint = cameraBoxVolumePoints[8, :]
    cameraFootprintForwardPoint = cameraBoxVolumePoints[9, :]
    
    cameraConvexHullPoints = cv2.convexHull(cameraBoxVolumeCorners)[:, 0, :]
    cornersInConvexHullIdx = []
    ## find which corners of the box volume should be ignored in the cost computation because they are not part of the convex hull
    for point in cameraConvexHullPoints :
        distsToCorners = np.linalg.norm(point[np.newaxis, :] - cameraBoxVolumeCorners, axis=1)
        if not np.any(distsToCorners == 0.0) :
            raise Exception("This corner is not in the convex hull!") ## should never happen because convexHull uses existing points
        cornersInConvexHullIdx.append(int(np.argwhere(distsToCorners == 0.0)))
            
    cornersToIgnoreIdx = [idx for idx in np.arange(8) if idx not in cornersInConvexHullIdx]
    
    if doVisualize :
        edgesIdxs = np.array([[0, 1], [1, 2], [2, 3], [3, 0], [4, 5], [5, 6], [6, 7], [7, 4], [0, 4], [1, 5], [2, 6], [3, 7]])
        for edgeIdxs in edgesIdxs :
            if edgeIdxs[0] not in cornersToIgnoreIdx and edgeIdxs[1] not in cornersToIgnoreIdx :
                plot(cameraBoxVolumeCorners[edgeIdxs, 0], cameraBoxVolumeCorners[edgeIdxs, 1], color=cm.Set1(np.mod(footprintIdx, 9)/8.0))
            else :
                plot(cameraBoxVolumeCorners[edgeIdxs, 0], cameraBoxVolumeCorners[edgeIdxs, 1], '--', color=cm.Set1(np.mod(footprintIdx, 9)/8.0), linewidth=0.75)
        plot([cameraFootprintForwardPoint[0], cameraFootprintCenterPoint[0]], [cameraFootprintForwardPoint[1], cameraFootprintCenterPoint[1]], color=cm.Set1(np.mod(footprintIdx, 9)/8.0), linewidth=1.5)
        plot(cameraBBox[[0, 2, 2, 0, 0]], cameraBBox[[1, 1, 3, 3, 1]], color=cm.Set1(np.mod(footprintIdx, 9)/8.0), linewidth=0.75)
        
    return cameraBoxVolumeCorners, cornersToIgnoreIdx

def computeBoxVolumeTotalCost(footprintWidth, footprintLength, volumeHeight, perFramePosAndRot, cameraIntrinsics, cameraExtrinsics, cameraTrackedBBoxes, perFrameFGBGTransitionCosts, doVisualize=False) :
    
    if doVisualize :
        figure(); imshow(undistortedBgImage)
        frameIdxs = np.arange(0, len(perFramePosAndRot), 20)
    else :
        frameIdxs = np.arange(len(perFramePosAndRot))
        
    maxFGBGCost = np.max(perFrameFGBGTransitionCosts)
    totalProjectedBoxVolumeAABBtoBBoxCost = 0.0 ## how big area of camera space footprint is wrt bbox area
    totalFGBGTransitionCost = 0.0
    for idx, (posAndRot, cameraBBox) in enumerate(zip(perFramePosAndRot[frameIdxs, :], cameraTrackedBBoxes[frameIdxs, :])) :
        if False and doVisualize :
            print frameIdxs[idx]
#             tmpImage, _, _ = undistortImage(filmedSceneData[DICT_DISTORTION_PARAMETER], filmedSceneData[DICT_DISTORTION_RATIO],
#                                             allObjectFrames[:, :, :, frameIdxs[idx]], filmedSceneData[DICT_CAMERA_INTRINSICS], doReturnMaps=False)
#             figure(); imshow(tmpImage)
            figure(); imshow(perFrameFGBGTransitionCosts[:, :, frameIdxs[idx]])
    
        cameraBoxVolumeCorners, cornersToIgnoreIdx = placeBoxVolumeAndProject(footprintWidth, footprintLength, volumeHeight, posAndRot, cameraIntrinsics, cameraExtrinsics, cameraBBox, idx, doVisualize)
        
#         edgesIdxs = np.array([[0, 1], [1, 2], [2, 3], [3, 0], [4, 5], [5, 6], [6, 7], [7, 4], [0, 4], [1, 5], [2, 6], [3, 7]])
#         ## this make sure that only the edges that don't contain an invisible corner are used in the computation
#         edgesToComputeCostOnIdxs = edgesIdxs[~np.any(np.any(edgesIdxs[np.newaxis, :, :] == np.reshape(cornersToIgnoreIdx, [len(cornersToIgnoreIdx), 1, 1]), axis=0), axis=1), :]
#         for edgeIdxs in edgesToComputeCostOnIdxs :
#             testLine = np.concatenate([cameraBoxVolumeCorners[edgeIdxs[0], :], cameraBoxVolumeCorners[edgeIdxs[1], :]])
#             rasterImage = np.zeros(perFrameFGBGTransitionCosts.shape[0:2], np.uint8)
#             cv2.line(rasterImage, tuple(np.round(testLine[0:2]).astype(int)), tuple(np.round(testLine[2:]).astype(int)), 1, thickness=2)
            
#             ## clip the line to image borders
#             isInside, point1, point2 = cv2.clipLine(tuple(np.concatenate([[0, 0], perFrameFGBGTransitionCosts.shape[:2][::-1]])), tuple(testLine[0:2]), tuple(testLine[2:4]))
#             ## if the testLine is outside the image borders then no ratio of the fgBGTransitionCost will come from perFrameFGBGTransitionCosts
#             costWeight = 0.0
#             ## if the testLine is inside the image borders then costWeight of the fgBGTransitionCost comes from perFrameFGBGTransitionCosts while 1-costWeight is multiplied by max cost
#             ## the more of the testLine is inside the closer is costWeight to 1.0
#             if isInside :
#                 costWeight = np.linalg.norm(np.array(point1)-np.array(point2))/np.linalg.norm(testLine[:2]-testLine[2:])
            
#             linePixelsIdxs = np.argwhere(rasterImage.astype(bool))
#             projectedPixelCenters = projectPointsOnLineSegment(linePixelsIdxs[:, ::-1].astype(float), testLine[0:2], testLine[2:])
#             perPixelWeights = np.linalg.norm(projectedPixelCenters-linePixelsIdxs[:, ::-1].astype(float), axis=1)
#             perPixelWeights = perPixelWeights/np.sum(perPixelWeights)
            
#             fgBGTransitionCost = np.sum(perFrameFGBGTransitionCosts[linePixelsIdxs[:, 0], linePixelsIdxs[:, 1], frameIdxs[idx]]*perPixelWeights)
#             totalFGBGTransitionCost = totalFGBGTransitionCost + fgBGTransitionCost*costWeight + (1.0-costWeight)*maxFGBGCost
#             if False and doVisualize :
#                 print cornersToIgnoreIdx, edgeIdxs, testLine, fgBGTransitionCost
#                 plot([testLine[0], testLine[2]], [testLine[1], testLine[3]], color='yellow')
#                 scatter(linePixelsIdxs[:, ::-1].astype(float)[:, 0], linePixelsIdxs[:, ::-1].astype(float)[:, 1], color=cm.jet(perPixelWeights/np.max(perPixelWeights)))
#                 scatter(projectedPixelCenters[:, 0], projectedPixelCenters[:, 1], color=cm.jet(perPixelWeights/np.max(perPixelWeights)))
        
        cameraBoxVolumeCornersAABB = np.vstack([[np.min(cameraBoxVolumeCorners, axis=0)], [[np.max(cameraBoxVolumeCorners[:, 0]), np.min(cameraBoxVolumeCorners[:, 1])]],
                                                [np.max(cameraBoxVolumeCorners, axis=0)], [[np.min(cameraBoxVolumeCorners[:, 0]), np.max(cameraBoxVolumeCorners[:, 1])]]])

        projectedBoxVolumeAABBtoBBoxCost = np.sum(np.linalg.norm(cameraBoxVolumeCornersAABB - np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]]), axis=1))
        if doVisualize :
#             plot(cameraBoxVolumeCornersAABB[[0, 1, 2, 3, 0], 0], cameraBoxVolumeCornersAABB[[0, 1, 2, 3, 0], 1])
#             plot([cameraBoxVolumeCornersAABB[0, 0], np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]])[0, 0]],
#                  [cameraBoxVolumeCornersAABB[0, 1], np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]])[0, 1]])
#             plot([cameraBoxVolumeCornersAABB[1, 0], np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]])[1, 0]],
#                  [cameraBoxVolumeCornersAABB[1, 1], np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]])[1, 1]])
#             plot([cameraBoxVolumeCornersAABB[2, 0], np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]])[2, 0]],
#                  [cameraBoxVolumeCornersAABB[2, 1], np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]])[2, 1]])
#             plot([cameraBoxVolumeCornersAABB[3, 0], np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]])[3, 0]],
#                  [cameraBoxVolumeCornersAABB[3, 1], np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]])[3, 1]])
            
            print np.linalg.norm(cameraBoxVolumeCornersAABB - np.array([cameraBBox[[0, 1]], cameraBBox[[2, 1]], cameraBBox[[2, 3]], cameraBBox[[0, 3]]]), axis=1), projectedBoxVolumeAABBtoBBoxCost,
            print np.log(1+projectedBoxVolumeAABBtoBBoxCost), totalFGBGTransitionCost
            
        totalProjectedBoxVolumeAABBtoBBoxCost += np.log(1+projectedBoxVolumeAABBtoBBoxCost) ## added cauchy loss
    print totalFGBGTransitionCost, totalProjectedBoxVolumeAABBtoBBoxCost
    return 0.0*totalFGBGTransitionCost+1.0*totalProjectedBoxVolumeAABBtoBBoxCost

In [64]:
tmpPoops = cameraToWorldGroundPlane(cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], undistortedTrackPointsAndScaleChanges[:, 0:2])[:, 0:2]
allWorldFootprintCornersAABB = np.concatenate([np.min(tmpPoops, axis=0), np.max(tmpPoops, axis=0)])
ACTIVE_AABB_AREA = np.prod(allWorldFootprintCornersAABB[2:4]-allWorldFootprintCornersAABB[0:2]); print ACTIVE_AABB_AREA
# tmpPoop = worldToCameraSpace(cameraIntrinsics, cameraExtrinsics, np.hstack([np.array([allWorldFootprintCornersAABB[[0, 1]],
#                                                                                       allWorldFootprintCornersAABB[[2, 1]],
#                                                                                       allWorldFootprintCornersAABB[[2, 3]],
#                                                                                       allWorldFootprintCornersAABB[[0, 3]]]), np.zeros([4, 1])]))
# plot(tmpPoop[[0, 1, 2, 3, 0], 0], tmpPoop[[0, 1, 2, 3, 0], 1])
# print tmpPoop

3308.11


In [415]:
optimizerMethod = 'Nelder-Mead'
if optimizerMethod == 'BFGS' :
    paramDimensionalityScale = np.array([10.0, 10.0, 10.0, 1.0, 1.0, 100.0, 1.0, 1.0])
else :
    paramDimensionalityScale = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
initParamsToEstimate = np.concatenate([np.ones(3), worldUndistortedTrajectoryPoints[0, 0:2], [smootherWorldOrientationAngles[0]], np.ones(2)])*paramDimensionalityScale
dimensionsToEstimate = 8
initialRotation = initParamsToEstimate[5]

fitVolumeToTrackRelativesWithStretchRotFullTraj(initParamsToEstimate, paramDimensionalityScale, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
                                                smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS],
                                                cameraUndistortedBBoxes, initialRotation, allObjectFramesUndistortedFGBGTransitionCost, True, initParamsToEstimate[dimensionsToEstimate:])

[ 70.04790784  59.98683869  44.46997428  57.32248415] 231.82720496 5.45029656882 0.0
[ 56.21082116  48.53507348  36.81258944  46.46686775] 188.025351821 5.24188114268 0.0
[ 50.39363588  45.0846029   34.32009911  41.04590505] 170.844242941 5.14658850225 0.0
[ 45.02528061  40.51210153  32.20313903  37.72383447] 155.464355631 5.05282822449 0.0
[ 40.51956845  36.95051285  30.66804673  34.88587276] 143.024000786 4.96997995781 0.0
[ 41.15724951  38.83760324  30.6348089   33.52687396] 144.156535606 4.97781271602 0.0
[ 43.75282602  42.36277671  31.55673967  33.3995921 ] 151.071934493 5.02435366214 0.0
[ 44.59798143  43.63901986  33.82301988  35.05157011] 157.111591273 5.06330105761 0.0
[ 44.00005152  43.11023409  31.37459256  32.58645898] 151.071337152 5.02434973412 0.0
[ 39.03928045  38.32799271  29.73615733  30.64750309] 137.750933576 4.93268048108 0.0
[ 42.69027875  42.33985494  32.00784758  32.46996906] 149.507950332 5.01401590892 0.0
[ 45.42823254  45.99032343  35.04337822  34.30237338] 1

76.379221711394834

In [568]:
# def fitVolumeToTrackRelativesWithStretchRotFullTraj(paramsToEstimate, paramDimensionalityScale, relativePositions, relativeRots, cameraIntrinsics,
#                                                     cameraExtrinsics, cameraTrackedBBoxes, initialRot, perFrameFGBGTransitionCosts, doVisualize=False, hackedParams=None) :
#     """paramsToEstimate are (footprintWidth, footprintLength, volumeHeight, globalDx, globaDy, globalRot, stretchX, stretchY)"""
#     if hackedParams is not None :
#         paramsToEstimate = np.concatenate([paramsToEstimate, hackedParams])
#     paramsToEstimate /= paramDimensionalityScale
    
#     footprintWidth, footprintLength, volumeHeight = paramsToEstimate[0:3]
#     rot = paramsToEstimate[5] - initialRot/paramDimensionalityScale[5]
#     rotatedRelativePositions = np.dot(np.array([[np.cos(rot) , np.sin(rot), 0.0, 0.0],
#                                                 [-np.sin(rot), np.cos(rot), 0.0, 0.0],
#                                                 [0.0         , 0.0        , 1.0, 0.0],
#                                                 [0.0         , 0.0        , 0.0, 1.0]]), np.hstack([relativePositions, np.repeat(np.array([[0.0, 1.0]]), len(relativePositions), axis=0)]).T)
#     rotatedRelativePositions = rotatedRelativePositions[:-1, :]/rotatedRelativePositions[-1, :]
    
#     perFramePosAndRot = np.hstack([np.dot(rotatedRelativePositions[0:2, :].T, np.array([[paramsToEstimate[6], 0.0], [0.0, paramsToEstimate[7]]]))+paramsToEstimate[3:5], relativeRots[:, np.newaxis]+paramsToEstimate[5]])
    
#     cost = computeBoxVolumeTotalCost(footprintWidth, footprintLength, volumeHeight, perFramePosAndRot, cameraIntrinsics, cameraExtrinsics, cameraTrackedBBoxes, perFrameFGBGTransitionCosts, doVisualize)
#     if hackedParams is not None and len(hackedParams) > 0 :
#         print paramsToEstimate[:-len(hackedParams)],
#     else :
#         print paramsToEstimate,
#     print cost, "vol"
#     return cost


# optimizerMethod = 'BFGS'
optimizerMethod = 'Nelder-Mead'
if optimizerMethod == 'BFGS' :
    paramDimensionalityScale = np.array([10.0, 10.0, 10.0, 1.0, 1.0, 100.0, 1.0, 1.0])
else :
    paramDimensionalityScale = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
initParamsToEstimate = np.concatenate([np.ones(3), worldUndistortedTrajectoryPoints[0, 0:2], [smootherWorldOrientationAngles[0]], np.ones(2)])*paramDimensionalityScale

if False :
    try :
        initParamsToEstimate = np.copy(optResult.x)
    except Exception as e:
        print e
dimensionsToEstimate = 8
initialRotation = initParamsToEstimate[5]

startTime = time.time()
## this about 23 secs for 50 iters and only area cost
# optResult = minimize(fitVolumeToTrackRelativesWithStretchRotFullTraj, initParamsToEstimate[0:dimensionsToEstimate], method=optimizerMethod,
#                      args=(paramDimensionalityScale, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
#                            smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], cameraUndistortedBBoxes,
#                            initialRotation, allObjectFramesUndistortedFGBGTransitionCost, False, initParamsToEstimate[dimensionsToEstimate:]), options={'maxiter': 50, 'eps':0.1})

## this about 2.23 secs for 50 iters
# optResult = minimize(computeAllProjectedBoxVolumeAABBtoBBoxCost, initParamsToEstimate, method=optimizerMethod,
#                      args=(paramDimensionalityScale, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
#                                            smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS],
#                                            cameraUndistortedBBoxes, initialRotation), options={'maxiter': 50, 'eps':0.1})
## this about 0.06 secs for 50 iters
optResult = minimize(fitVolumeToTrackRelativesWithStretchRotFullTraj, initParamsToEstimate, method=optimizerMethod,
                     args=(paramDimensionalityScale, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
                                           smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS],
                                           cameraUndistortedBBoxes, initialRotation, allObjectFramesUndistortedFGBGTransitionCost), options={'maxiter': 1000, 'eps':0.1})
# print optResult.x
print ["Optimization Failed!", "Optimization Successful!"][int(optResult.success)], " ---  MESSAGE: \"{0}\" --- cost: {1} --- It took {2} secs".format(optResult.message, optResult.fun, time.time()-startTime)

# fitVolumeToTrackRelativesWithStretchRotFullTraj(initParamsToEstimate[0:dimensionsToEstimate, paramDimensionalityScale, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
#                                                    smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS],
#                                                    cameraUndistortedBBoxes, initialRotation, allObjectFramesUndistortedFGBGTransitionCost, True, initParamsToEstimate[dimensionsToEstimate:])



Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 959.30395339 --- It took 11.3815250397 secs


In [570]:
# bob = np.copy(initParamsToEstimate)
bob = np.copy(optResult.x)
# bob = np.array([1.36535064, 1.68247883, 0.7418005, -12.36272974, -18.98843994, 2.95323432, 1.08794953, 1.03739611])
fitVolumeToTrackRelativesWithStretchRotFullTraj(bob, paramDimensionalityScale, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
                                                smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS],
                                                cameraUndistortedBBoxes, initialRotation, allObjectFramesUndistortedFGBGTransitionCost, True)

32.863392307
35.5098565172
34.6864956048
28.6786547259
35.2963537014
15.1577829056
8.1266191972
9.87555471533
20.2001046682
23.465962464
26.6295299626
33.511989545
25.7341283987
84.3669269639
91.3591259771


959.30395338958033

In [110]:
# optimizerMethod = 'BFGS'
optimizerMethod = 'Nelder-Mead'
if optimizerMethod == 'BFGS' :
    paramDimensionalityScale = np.array([10.0, 10.0, 10.0, 1.0, 1.0, 100.0, 1.0, 1.0])
else :
    paramDimensionalityScale = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
initParamsToEstimate = np.concatenate([np.ones(3), worldUndistortedTrajectoryPoints[0, 0:2], [smootherWorldOrientationAngles[0]], np.ones(2)])*paramDimensionalityScale
# try :
#     initParamsToEstimate = np.copy(optResult.x)
# except Exception as e:
#     print e
dimensionsToEstimate = 8
figure(); imshow(undistortedBgImage)
fitVolumeToTrackRelativesWithStretchRotFullTraj(initParamsToEstimate[0:dimensionsToEstimate], paramDimensionalityScale, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
                                                   smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS],
                                                   cameraUndistortedBBoxes, initParamsToEstimate[5], allObjectFramesUndistortedFGBGTransitionCost, True, initParamsToEstimate[dimensionsToEstimate:])

README [0, 6] [0 6]
[ 32.96192474  32.13801942  31.84390985  32.67523198  32.5019105
  31.66603852  31.36750385  32.21112367]
[ 31.36750385  31.66603852  31.84390985  32.13801942  32.21112367
  32.5019105   32.67523198  32.96192474]
[6 5 2 1 7 4 3 0]
[[ 1513.74572754   654.11303711]
 [ 1520.98571777   671.25701904]
 [ 1483.02001953   661.86773682]
 [ 1477.13415527   645.3314209 ]
 [ 1525.40185547   630.26074219]
 [ 1533.18359375   646.90557861]
 [ 1494.27954102   637.79614258]
 [ 1487.91711426   621.75177002]]
[ 70.04790784  59.98683869  44.46997428  57.32248415] 231.82720496 5.45029656882 2.87672358692
README [0, 6] [0 6]
[ 31.91740049  31.07721177  30.8658349   31.71162448  31.44210803
  30.58886717  30.37409204  31.23320077]
[ 30.37409204  30.58886717  30.8658349   31.07721177  31.23320077
  31.44210803  31.71162448  31.91740049]
[6 5 2 1 7 4 3 0]
[[ 1389.4831543    626.85528564]
 [ 1393.21362305   642.36315918]
 [ 1359.63464355   634.39544678]
 [ 1357.00415039   619.3828125 ]
 [ 13

[ 55.68103992  59.507817    42.56909703  37.03141834] 194.78937228 5.27703945018 43.5112262905
[  1.           1.           1.          27.85343933 -13.28369617
   3.09503034   1.           1.        ] 807.303443404 vol


807.30344340447289

In [59]:
tmpLine = np.array([1800, 569, 1900, 369])
figure(); imshow(undistortedBgImage); plot([tmpLine[0], tmpLine[2]], [tmpLine[1], tmpLine[3]])
isInside, point1, point2 = cv2.clipLine(tuple(np.concatenate([[0, 0], undistortedBgImage.shape[:2][::-1]])), tuple(tmpLine[0:2]), tuple(tmpLine[2:4]))
plot([point1[0], point2[0]], [point1[1], point2[1]])

[<matplotlib.lines.Line2D at 0x7fa483443d90>]

In [393]:
def fitFootprintToTrack(paramsToEstimate, cameraIntrinsics, cameraExtrinsics, cameraTrackedBBoxes, doVisualize=False) :
    """first 2 numbers in paramsToEstimate are the width and length of the footprint while the rest are 3 numbers per tracked frame (dx, dy, rot) so len(paramsToEstimate)-2 must be divisible by 3"""
    footprintWidth, footprintLength = paramsToEstimate[0:2]
    perFramePosAndRot = paramsToEstimate[2:].reshape([(len(paramsToEstimate)-2)/3, 3])
    
    return computeTotalCost(footprintWidth, footprintLength, perFramePosAndRot, cameraIntrinsics, cameraExtrinsics, cameraTrackedBBoxes, doVisualize)
        
initParamsToEstimate = np.concatenate([np.ones(2), np.hstack([worldUndistortedTrajectoryPoints[:, 0:2], smootherWorldOrientationAngles[:, np.newaxis]]).flatten()])
fitFootprintToTrack(initParamsToEstimate, cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], cameraUndistortedBBoxes, True)

optResult = minimize(fitFootprintToTrack, initParamsToEstimate, method='Nelder-Mead', args=(cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], cameraUndistortedBBoxes), options={'maxiter': 200})
# print optResult.x
print ["Optimization Failed!", "Optimization Successful!"][int(optResult.success)], " ---  MESSAGE: \"{0}\" --- cost: {1}".format(optResult.message, optResult.fun)
fitFootprintToTrack(optResult.x, cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], cameraUndistortedBBoxes, True)

Optimization Failed!  ---  MESSAGE: "Maximum number of iterations has been exceeded." --- cost: 169.316913781


8.6320556279817389

In [396]:
def fitFootprintPositionsToTrack(paramsToEstimate, footprintSize, footprintRots, cameraIntrinsics, cameraExtrinsics, cameraTrackedBBoxes, doVisualize=False) :
    """paramsToEstimate are the (dx dy) positions of the footprints"""
    footprintWidth, footprintLength = footprintSize
    perFramePosAndRot = np.hstack([paramsToEstimate.reshape([len(paramsToEstimate)/2, 2]), footprintRots[:, np.newaxis]])
    
    return computeTotalCost(footprintWidth, footprintLength, perFramePosAndRot, cameraIntrinsics, cameraExtrinsics, cameraTrackedBBoxes, doVisualize)
        
initParamsToEstimate = np.copy(worldUndistortedTrajectoryPoints[:, 0:2]).flatten()
fitFootprintPositionsToTrack(initParamsToEstimate, np.ones(2), smootherWorldOrientationAngles, cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], cameraUndistortedBBoxes, True)

optResult = minimize(fitFootprintPositionsToTrack, initParamsToEstimate, method='Nelder-Mead', args=(np.ones(2), smootherWorldOrientationAngles, cameraIntrinsics,
                                                                                              filmedSceneData[DICT_CAMERA_EXTRINSICS], cameraUndistortedBBoxes), options={'maxiter': 1000})
# print optResult.x
print ["Optimization Failed!", "Optimization Successful!"][int(optResult.success)], " ---  MESSAGE: \"{0}\" --- cost: {1}".format(optResult.message, optResult.fun)
fitFootprintPositionsToTrack(optResult.x, np.ones(2), smootherWorldOrientationAngles, cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], cameraUndistortedBBoxes, True)

Optimization Failed!  ---  MESSAGE: "Maximum number of iterations has been exceeded." --- cost: 168.484129357


8.5846788450943041

In [323]:
def fitFootprintToTrackRelatives(paramsToEstimate, relativePositions, relativeRots, cameraIntrinsics, cameraExtrinsics, cameraTrackedBBoxes, doVisualize=False) :
    """paramsToEstimate are the (footprintWidth, footprintLength, globalDx, globaDy, globalRot) positions of the footprints"""
    footprintWidth, footprintLength = paramsToEstimate[0:2]
    perFramePosAndRot = np.hstack([relativePositions+paramsToEstimate[2:4], relativeRots[:, np.newaxis]+paramsToEstimate[4]])
    
    return computeTotalCost(footprintWidth, footprintLength, perFramePosAndRot, cameraIntrinsics, cameraExtrinsics, cameraTrackedBBoxes, doVisualize)
        
initParamsToEstimate = np.concatenate([np.ones(2), worldUndistortedTrajectoryPoints[0, 0:2], [smootherWorldOrientationAngles[0]]])
fitFootprintToTrackRelatives(initParamsToEstimate, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
                             smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], cameraUndistortedBBoxes, True)
print fitFootprintToTrackRelatives(initParamsToEstimate, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
                                   smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], cameraUndistortedBBoxes)

optResult = minimize(fitFootprintToTrackRelatives, initParamsToEstimate, method='BFGS', args=(worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
                                                                                              smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics,
                                                                                              filmedSceneData[DICT_CAMERA_EXTRINSICS], cameraUndistortedBBoxes), options={'maxiter': 100})
# print optResult.x
print ["Optimization Failed!", "Optimization Successful!"][int(optResult.success)], " ---  MESSAGE: \"{0}\" --- cost: {1}".format(optResult.message, optResult.fun)
fitFootprintToTrackRelatives(optResult.x, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
                             smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], cameraUndistortedBBoxes, True)

212.473661167
Optimization Failed!  ---  MESSAGE: "Desired error not necessarily achieved due to precision loss." --- cost: 156.583218402


7.9524183114578797

In [358]:
def fitFootprintToTrackRelativesWithStretch(paramsToEstimate, relativePositions, relativeRots, cameraIntrinsics, cameraExtrinsics, cameraTrackedBBoxes, doVisualize=False) :
    """paramsToEstimate are the (footprintWidth, footprintLength, globalDx, globaDy, globalRot, stretchX, stretchY) positions of the footprints"""
    footprintWidth, footprintLength = paramsToEstimate[0:2]
    perFramePosAndRot = np.hstack([np.dot(relativePositions, np.array([[paramsToEstimate[5], 0.0], [0.0, paramsToEstimate[6]]]))+paramsToEstimate[2:4], relativeRots[:, np.newaxis]+paramsToEstimate[4]])
    
    return computeTotalCost(footprintWidth, footprintLength, perFramePosAndRot, cameraIntrinsics, cameraExtrinsics, cameraTrackedBBoxes, doVisualize)
        
initParamsToEstimate = np.concatenate([np.ones(2), worldUndistortedTrajectoryPoints[0, 0:2], [smootherWorldOrientationAngles[0]], np.ones(2)])
fitFootprintToTrackRelativesWithStretch(initParamsToEstimate, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
                                        smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], cameraUndistortedBBoxes, True)
# print fitFootprintToTrackRelativesWithStretch(initParamsToEstimate, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
#                                               smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], cameraUndistortedBBoxes)

# optResult = minimize(fitFootprintToTrackRelativesWithStretch, initParamsToEstimate, method='Nelder-Mead', args=(worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
#                                                                                               smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics,
#                                                                                               filmedSceneData[DICT_CAMERA_EXTRINSICS], cameraUndistortedBBoxes), options={'maxiter': 500})
# # print optResult.x
# print ["Optimization Failed!", "Optimization Successful!"][int(optResult.success)], " ---  MESSAGE: \"{0}\" --- cost: {1}".format(optResult.message, optResult.fun)
# fitFootprintToTrackRelativesWithStretch(optResult.x, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
#                                         smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], cameraUndistortedBBoxes, True)

12.669189950840183

In [352]:
def fitFootprintToTrackRelativesWithStretchMinimalRotChange(paramsToEstimate, relativePositions, relativeRots, cameraIntrinsics, cameraExtrinsics, cameraTrackedBBoxes, initialRot, doVisualize=False) :
    """paramsToEstimate are the (footprintWidth, footprintLength, globalDx, globaDy, globalRot, stretchX, stretchY) positions of the footprints"""
    footprintWidth, footprintLength = paramsToEstimate[0:2]
    perFramePosAndRot = np.hstack([np.dot(relativePositions, np.array([[paramsToEstimate[5], 0.0], [0.0, paramsToEstimate[6]]]))+paramsToEstimate[2:4], relativeRots[:, np.newaxis]+paramsToEstimate[4]])
    
    return computeTotalCost(footprintWidth, footprintLength, perFramePosAndRot, cameraIntrinsics, cameraExtrinsics, cameraTrackedBBoxes, doVisualize)+np.sqrt((paramsToEstimate[4]-initialRot)**2)*len(relativePositions)*0.3
        
initParamsToEstimate = np.concatenate([np.ones(2), worldUndistortedTrajectoryPoints[0, 0:2], [smootherWorldOrientationAngles[0]], np.ones(2)])
fitFootprintToTrackRelativesWithStretchMinimalRotChange(initParamsToEstimate, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
                                                        smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS],
                                                        cameraUndistortedBBoxes, initParamsToEstimate[4], True)
print fitFootprintToTrackRelativesWithStretchMinimalRotChange(initParamsToEstimate, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
                                                              smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS],
                                                              cameraUndistortedBBoxes, initParamsToEstimate[4])

optResult = minimize(fitFootprintToTrackRelativesWithStretchMinimalRotChange, initParamsToEstimate, method='Nelder-Mead',
                     args=(worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
                           smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics,
                           filmedSceneData[DICT_CAMERA_EXTRINSICS], cameraUndistortedBBoxes, initParamsToEstimate[4]), options={'maxiter': 500})
# print optResult.x
print ["Optimization Failed!", "Optimization Successful!"][int(optResult.success)], " ---  MESSAGE: \"{0}\" --- cost: {1}".format(optResult.message, optResult.fun)
fitFootprintToTrackRelativesWithStretchMinimalRotChange(optResult.x, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
                                                        smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS],
                                                        cameraUndistortedBBoxes, initParamsToEstimate[4], True)

249.109309352
Optimization Successful!  ---  MESSAGE: "Optimization terminated successfully." --- cost: 215.114889294


10.944247087592164

In [68]:
def fitFootprintToTrackRelativesWithStretchRotFullTraj(paramsToEstimate, paramDimensionalityScale, relativePositions, relativeRots, cameraIntrinsics, cameraExtrinsics, cameraTrackedBBoxes,
                                                       initialRot, perFrameFGBGTransitionCosts, doVisualize=False, hackedParams=None) :
    """paramsToEstimate are the (footprintWidth, footprintLength, globalDx, globaDy, globalRot, stretchX, stretchY) positions of the footprints"""
    if hackedParams is not None :
        paramsToEstimate = np.concatenate([paramsToEstimate, hackedParams])
    paramsToEstimate /= paramDimensionalityScale
    
    footprintWidth, footprintLength = paramsToEstimate[0:2]
    rot = paramsToEstimate[4] - initialRot/paramDimensionalityScale[4]
    rotatedRelativePositions = np.dot(np.array([[np.cos(rot) , np.sin(rot), 0.0, 0.0],
                                                [-np.sin(rot), np.cos(rot), 0.0, 0.0],
                                                [0.0         , 0.0        , 1.0, 0.0],
                                                [0.0         , 0.0        , 0.0, 1.0]]), np.hstack([relativePositions, np.repeat(np.array([[0.0, 1.0]]), len(relativePositions), axis=0)]).T)
    rotatedRelativePositions = rotatedRelativePositions[:-1, :]/rotatedRelativePositions[-1, :]
    
    perFramePosAndRot = np.hstack([np.dot(rotatedRelativePositions[0:2, :].T, np.array([[paramsToEstimate[5], 0.0], [0.0, paramsToEstimate[6]]]))+paramsToEstimate[2:4], relativeRots[:, np.newaxis]+paramsToEstimate[4]])
    
    cost = computeTotalCost(footprintWidth, footprintLength, perFramePosAndRot, cameraIntrinsics, cameraExtrinsics, cameraTrackedBBoxes, perFrameFGBGTransitionCosts, doVisualize)
    if hackedParams is not None and len(hackedParams) > 0 :
        print paramsToEstimate[:-len(hackedParams)],
    else :
        print paramsToEstimate,
    print cost
    return cost


# optimizerMethod = 'BFGS'
optimizerMethod = 'Nelder-Mead'
if optimizerMethod == 'BFGS' :
    paramDimensionalityScale = np.array([10.0, 10.0, 1.0, 1.0, 100.0, 1.0, 1.0])
else :
    paramDimensionalityScale = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
initParamsToEstimate = np.concatenate([np.ones(2), worldUndistortedTrajectoryPoints[0, 0:2], [smootherWorldOrientationAngles[0]], np.ones(2)])*paramDimensionalityScale
dimensionsToEstimate = 7
# fitFootprintToTrackRelativesWithStretchRotFullTraj(initParamsToEstimate, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
#                                                         smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS],
#                                                         cameraUndistortedBBoxes, initParamsToEstimate[4], allObjectFramesUndistortedFGBGTransitionCost, True)
# print fitFootprintToTrackRelativesWithStretchRotFullTraj(initParamsToEstimate, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
#                                                               smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS],
#                                                               cameraUndistortedBBoxes, initParamsToEstimate[4], allObjectFramesUndistortedFGBGTransitionCost)

startTime = time.time()
optResult = minimize(fitFootprintToTrackRelativesWithStretchRotFullTraj, initParamsToEstimate[0:dimensionsToEstimate], method=optimizerMethod,
                     args=(paramDimensionalityScale, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
                           smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], cameraUndistortedBBoxes,
                           initParamsToEstimate[4], allObjectFramesUndistortedFGBGTransitionCost, False, initParamsToEstimate[dimensionsToEstimate:7]), options={'maxiter': 300, 'eps':0.1})
# print optResult.x
print ["Optimization Failed!", "Optimization Successful!"][int(optResult.success)], " ---  MESSAGE: \"{0}\" --- cost: {1} --- It took {2} secs".format(optResult.message, optResult.fun, time.time()-startTime)
# fitFootprintToTrackRelativesWithStretchRotFullTraj(optResult.x, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
#                                                         smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS],
#                                                         cameraUndistortedBBoxes, initParamsToEstimate[4], allObjectFramesUndistortedFGBGTransitionCost, True)



[  1.           1.          30.38116074  -7.30104923   3.09662438   1.           1.        ] 7966.09860339
[  1.05         1.          30.38116074  -7.30104923   3.09662438   1.           1.        ] 7958.05698768
[  1.           1.05        30.38116074  -7.30104923   3.09662438   1.           1.        ] 7951.59112188
[  1.           1.          31.90021877  -7.30104923   3.09662438   1.           1.        ] 8246.60117316
[  1.           1.          30.38116074  -7.66610169   3.09662438   1.           1.        ] 8019.28955192
[  1.           1.          30.38116074  -7.30104923   3.2514556    1.           1.        ] 8321.98375305
[  1.           1.          30.38116074  -7.30104923   3.09662438   1.05
   1.        ] 7989.41677374
[  1.           1.          30.38116074  -7.30104923   3.09662438   1.
   1.05      ] 7965.9060013
[  1.01428571   1.01428571  30.81517732  -7.40534994   2.94179316
   1.01428571   1.01428571] 8865.78409623
[  1.00357143   1.00357143  30.48966488  -7.32712

[  1.25811532   1.44912078  29.98179746  -6.06755438   3.16711679
   1.04427856   0.3856119 ] 7658.58083135
[  1.24735576   1.51550413  29.55845826  -6.03927194   3.17558696
   1.03367261   0.35256411] 7648.33916455
[  1.26620361   1.56484338  30.46334203  -5.55751625   3.14834911
   1.05457237   0.3033871 ] 7622.87816888
[  1.3253905    1.70905547  30.92446284  -5.13466441   3.13715757
   1.06612371   0.14306712] 7569.23601265
[  1.33632696   1.61470197  30.28370267  -5.67990538   3.15663061
   1.03691102   0.21377384] 7577.57465838
[  1.38374514   1.73967006  29.87568823  -5.42836527   3.21427066
   1.04224805   0.03446591] 7527.20663117
[  1.50506447   1.95999958  29.62791331  -5.07013226   3.26542872
   1.04077136  -0.27000264] 7640.50826054
[  1.41474781   1.76180425  30.40896454  -5.22457781   3.19316869
   1.05717305  -0.06741306] 7537.97434225
[  1.4433607    1.83458627  29.89650768  -5.19481238   3.1860738
   1.05012194  -0.11478826] 7573.62018841
[  1.44101666   1.90361767  3

[  2.13338948   3.55978141  30.58776253  -1.8873272    3.13157773
   1.02613108  -1.61970651] 7031.71829469
[  1.88731538   3.03074714  30.51722544  -3.09478139   3.14507851
   1.02905041  -1.09885673] 6907.32065606
[  1.8967477    3.1979475   29.76105044  -2.84360081   3.13409455
   1.00439316  -1.08620347] 6920.73713059
[  1.94654953   3.27178621  29.63287112  -2.77296346   3.15267521
   1.001343    -1.18486315] 6872.29857257
[  2.08729853   3.51089441  29.77858472  -1.96120704   3.14115464
   1.01341969  -1.51223921] 6815.00512541
[  2.21331837   3.78141995  29.40181475  -1.24511663   3.1399711
   1.00924486  -1.78295744] 6840.32103615
[  1.82499494   2.90964485  30.03029733  -2.94067761   3.15586245
   1.02454228  -0.95151122] 6964.90291377
[  2.0277099    3.39569781  30.13424548  -2.47015566   3.13763283
   1.01458279  -1.38331059] 6870.05493507
[  1.91509044   3.13119932  29.60731737  -3.50234659   3.13998995
   1.00279067  -1.07806562] 6866.77819077
[  2.03377488   3.26357231  3

[  2.26557542   3.39086677  30.02851392  -3.49259038   3.0889921
   1.03851141  -1.73414854] 6669.76196073
[  2.33227825   3.37649263  30.40585472  -3.66658154   3.07984756
   1.05296464  -1.84836285] 6681.86594845
[  2.28579921   3.31748606  30.02400094  -4.08728363   3.0776932
   1.03976743  -1.71325938] 6648.09278459
[  2.31494735   3.30761621  30.28702728  -4.13719067   3.0765005
   1.04896246  -1.77435788] 6633.36206359
[  2.35460421   3.28308926  30.41354767  -4.48751847   3.06907143
   1.05451955  -1.82091854] 6618.28976987
[  2.22468159   3.261107    29.91735705  -4.14169761   3.08493268
   1.03429039  -1.59939036] 6661.7458643
[  2.30394465   3.40134292  30.27990248  -3.71477493   3.0888133
   1.04274528  -1.7932517 ] 6617.45376001
[  2.33255756   3.48112487  30.41219376  -3.52005546   3.0942403
   1.04299853  -1.86875025] 6607.50471797
[  2.30422581   3.27804319  30.37633128  -4.33424558   3.07928001
   1.04775468  -1.73981538] 6610.28025685
[  2.3243836    3.28823614  30.341

[  3.00739799   4.01463307  30.14371528  -6.43276153   3.05823114
   1.04102523  -2.89583774] 6196.95260688
[  3.14401934   4.11171261  30.32662136  -6.40298216   3.05358208
   1.05405098  -3.15015575] 6200.27222733
[  3.20586192   4.14329496  29.98338385  -7.25492464   3.047717
   1.04230635  -3.1907415 ] 6247.70960953
[  2.97462521   3.94564531  30.32668528  -6.48470695   3.05589066
   1.04538602  -2.83255823] 6187.48076615
[  3.13731731   4.11313976  30.29298907  -7.22943715   3.05027379
   1.04108256  -3.08864819] 6182.1681968
[  3.22198225   4.2107812   30.35204095  -7.69173349   3.04740011
   1.03805569  -3.22342137] 6236.15062471
[  2.97448358   4.03090196  30.2992723   -6.25549129   3.06335851
   1.03920504  -2.85175006] 6194.00102952
[  3.12543101   4.17147772  30.20047869  -6.70413255   3.05981671
   1.03861876  -3.10013212] 6176.27970633
[  3.19824717   4.29864523  30.15272886  -6.70924024   3.06397628
   1.03496852  -3.23866422] 6224.33683674
[  2.95589943   3.99398353  30.

[  3.44768839   4.43666473  30.67482857  -6.60595648   3.06137706
   1.06642447  -3.72320859] 6090.05176027
[  3.44398918   4.42053331  30.67237907  -6.58926119   3.05918439
   1.06778516  -3.71482254] 6091.72761858
[  3.46960303   4.44858903  30.67912334  -6.49964441   3.06182622
   1.06971105  -3.76790638] 6093.70262063
[  3.43299948   4.40325132  30.66438075  -6.71982351   3.05877135
   1.06613473  -3.68676052] 6090.02985463
[  3.52805983   4.51452629  30.67815329  -6.73961913   3.06013877
   1.06813595  -3.86292358] 6093.14505933
[  3.41554741   4.383791    30.66552313  -6.62759976   3.05950355
   1.06685845  -3.65959623] 6090.07013577
[  3.37028219   4.33320864  30.61495863  -6.68093857   3.05876917
   1.06403364  -3.57038984] 6093.0510412
[  3.47439065   4.45149328  30.68728969  -6.65338325   3.05999537
   1.06829685  -3.76840344] 6089.82260297
[  3.44407261   4.41244254  30.65823978  -6.75419611   3.0602206
   1.06611261  -3.7052841 ] 6093.66829244
[  3.44401004   4.41851062  30

[  3.45277276   4.44365785  30.65095006  -6.64001074   3.06028386
   1.06563553  -3.72872366] 6088.18115666
[  3.45493742   4.44541111  30.65383852  -6.63933494   3.06030928
   1.06581945  -3.73289027] 6088.08794212
[  3.45461851   4.44449932  30.65112779  -6.64445789   3.06025722
   1.0657556   -3.73188249] 6088.09945896
[  3.45293607   4.44238475  30.65199404  -6.63849301   3.06029251
   1.06582036  -3.7290495 ] 6088.09305412
[  3.45293778   4.44387724  30.65173883  -6.64100642   3.06032249
   1.06563512  -3.72906042] 6088.22640325
[  3.45357526   4.44312077  30.65174741  -6.64087346   3.06022583
   1.06580732  -3.73011619] 6088.22986695
[  3.45309715   4.44368812  30.65174098  -6.64097318   3.06029832
   1.06567817  -3.72932437] 6088.05278089
[  3.45383296   4.4433488   30.65276216  -6.64214412   3.06026858
   1.0658191   -3.73062458] 6088.03983596
[  3.45436306   4.44319428  30.65366821  -6.64321081   3.06026095
   1.06591089  -3.73157504] 6088.1193063
[  3.45427668   4.44359689  3

In [72]:
def getFootprintsFromEstimatedParams(estimatedParams, paramDimensionalityScale, relativePositions, relativeRots, initialRot) :

    footprintWidth, footprintLength = estimatedParams[0:2]
    rot = estimatedParams[4] - initialRot/paramDimensionalityScale[4]
    rotatedRelativePositions = np.dot(np.array([[np.cos(rot) , np.sin(rot), 0.0, 0.0],
                                                [-np.sin(rot), np.cos(rot), 0.0, 0.0],
                                                [0.0         , 0.0        , 1.0, 0.0],
                                                [0.0         , 0.0        , 0.0, 1.0]]), np.hstack([relativePositions, np.repeat(np.array([[0.0, 1.0]]), len(relativePositions), axis=0)]).T)
    rotatedRelativePositions = rotatedRelativePositions[:-1, :]/rotatedRelativePositions[-1, :]

    perFramePosAndRot = np.hstack([np.dot(rotatedRelativePositions[0:2, :].T, np.array([[estimatedParams[5], 0.0], [0.0, estimatedParams[6]]]))+estimatedParams[2:4], relativeRots[:, np.newaxis]+estimatedParams[4]])
    return footprintWidth, footprintLength, perFramePosAndRot

footprintWidth, footprintLength, perFramePosAndRot = getFootprintsFromEstimatedParams(np.concatenate([optResult.x, initParamsToEstimate[dimensionsToEstimate:7]]), paramDimensionalityScale,
                                                                                      worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
                                                                                      smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], initParamsToEstimate[4])

fig = mpl.pylab.figure("animation", frameon=False)
fig.set_size_inches(17,12)
gca().set_axis_off()
img = None
for i in np.arange(allObjectFrames.shape[-1]):
    cla()
    undistortedCurrentFrame, _, _ = undistortImage(filmedSceneData[DICT_DISTORTION_PARAMETER], filmedSceneData[DICT_DISTORTION_RATIO], allObjectFrames[:, :, :, i],
                                                   filmedSceneData[DICT_CAMERA_INTRINSICS], doReturnMaps=False)
    gca().imshow(undistortedCurrentFrame)
    
    dx, dy, rot = perFramePosAndRot[i, :]
    worldFootprintPoints = placeFootprint(footprintWidth, footprintLength, dx, dy, rot)
    cameraFootprintPoints = worldToCameraSpace(cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS], worldFootprintPoints)
    gca().plot(cameraFootprintPoints[[0, 1, 2, 3, 0], 0], cameraFootprintPoints[[0, 1, 2, 3, 0], 1])
    gca().plot([cameraFootprintPoints[4, 0], cameraFootprintPoints[5, 0]], [cameraFootprintPoints[4, 1], cameraFootprintPoints[5, 1]])
#     if img is None:
#         img = mpl.pylab.imshow(undistortedCurrentFrame)
#     else:
#         img.set_data(undistortedCurrentFrame)
    mpl.pylab.pause(0.1)
    mpl.pylab.draw()
    fig.savefig("/home/ilisescu/PhD/data/havana/1-3-5_IOU-tracker-footprints_{1}/frame-{0:05}.png".format(i+1, objectIdToTrack), bbox_inches='tight')

In [70]:
"/home/ilisescu/PhD/data/havana/1-3-5_IOU-tracker-footprints_{1}/frame-{0:05}.png".format(i+1, objectIdToTrack)

'/home/ilisescu/PhD/data/havana/1-3-5_IOU-tracker-footprints_331/frame-00154.png'

In [None]:
from scipy.optimize import show_options
show_options('minimize', 'bfgs')

In [114]:
# print initParamsToEstimate
# print optResult.x
# fitFootprintToTrackRelativesWithStretchRotFullTraj(optResult.x, paramDimensionalityScale, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
#                                                    smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS],
#                                                    cameraUndistortedBBoxes, initParamsToEstimate[4], allObjectFramesUndistortedFGBGTransitionCost, True, initParamsToEstimate[dimensionsToEstimate:7])


# # # # paramsToVisualize = np.array([178.44042794, 178.4041048, 217.79230376, -84.44020742, 18.36637741])*paramDimensionalityScale[:dimensionsToEstimate]
# # # paramsToVisualize = np.array([98.6711556, 98.08860351, 2.03356001, 41.79815793, 3.29251172])*paramDimensionalityScale[:dimensionsToEstimate]
# # paramsToVisualize = np.array([57.68376572,  57.34567977,  12.86879027,  18.683233,     3.21063928])*paramDimensionalityScale[:dimensionsToEstimate]# optimizerMethod = 'BFGS'
# paramsToVisualize = np.concatenate([np.ones(2), worldUndistortedTrajectoryPoints[0, 0:2], [smootherWorldOrientationAngles[0]]])*paramDimensionalityScale[:dimensionsToEstimate]
if optimizerMethod == 'BFGS' :
    paramDimensionalityScale = np.array([10.0, 10.0, 1.0, 1.0, 100.0, 1.0, 1.0])
else :
    paramDimensionalityScale = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
initParamsToEstimate = np.concatenate([np.ones(2), worldUndistortedTrajectoryPoints[0, 0:2], [smootherWorldOrientationAngles[0]], np.ones(2)])*paramDimensionalityScale
dimensionsToEstimate = 7
fitFootprintToTrackRelativesWithStretchRotFullTraj(initParamsToEstimate, paramDimensionalityScale, worldUndistortedTrajectoryPoints[:, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2],
                                                   smootherWorldOrientationAngles-smootherWorldOrientationAngles[0], cameraIntrinsics, filmedSceneData[DICT_CAMERA_EXTRINSICS],
                                                   cameraUndistortedBBoxes, initParamsToEstimate[4], allObjectFramesUndistortedFGBGTransitionCost, True, initParamsToEstimate[dimensionsToEstimate:7])

0.999668539729 0.513641860971 5.6580752345
1.9993888781 1.02662218599 11.1258518027
2.99914847554 1.53893188266 16.4986932645
3.99893949481 2.05097987535 21.7738225846
4.99873926263 2.562955616 26.9651070581
5.99853008141 3.07495476168 32.1647799922
6.99829396199 3.58726386137 37.4076406161
7.99803458604 4.09985274065 42.6771923175
8.99776881271 4.61273551494 47.9236227607
9.99750750479 5.12510725777 53.092618132
10.9972245442 5.63781340437 58.3303404886
11.9969075555 6.15098311414 63.6310205485
12.9965094605 6.66521887306 69.0315470644
13.9960240077 7.18042226999 74.2652002912
14.9952853577 7.6979041887 79.7942778048
0.999697712489
[  1.           1.          27.85343933 -13.28369617   3.09503034   1.           1.        ] 797.942778048


797.94277804761339

In [290]:
## RENDER "DEPTH" MAP FOR THE GROUND PLANE
pixelLocs = array(np.meshgrid(np.arange(undistortedBgImage.shape[0]), np.arange(undistortedBgImage.shape[1]))).reshape([2, np.prod(undistortedBgImage.shape[0:2])]).T
worldPixelLocs = cameraToWorldGroundPlane(cameraIntrinsics, tmp, pixelLocs[:, ::-1])
pixelsCameraDists = np.linalg.norm(worldPixelLocs-tmpWorldCameraLoc, axis=1)
print np.min(pixelsCameraDists)
depthImage = np.zeros(undistortedBgImage.shape[0:2])
depthImage[pixelLocs[:, 0], pixelLocs[:, 1]] = pixelsCameraDists
horizonLineY = np.argwhere(depthImage == np.max(depthImage)).flatten()[0]
maxDepthAllowed = 3
depthImage[:horizonLineY, :] = maxDepthAllowed
depthImage = np.clip(depthImage, 0, maxDepthAllowed)
figure(); imshow(depthImage, cmap=cm.gray)

0.467810168404


<matplotlib.image.AxesImage at 0x7fcf10b59fd0>

In [864]:
tmpDistsChange = minimizeChangeDiff(angles, cameraIntrinsics, undistortedTrackPointsAndScaleChanges, True)
print tmpDistsChange.shape, undistortedTrackPointsAndScaleChanges.shape
cols = cm.jet((tmpDistsChange-np.min(tmpDistsChange))/np.max(tmpDistsChange-np.min(tmpDistsChange)), alpha=1)
figure(); imshow(undistortedBgImage)
for pointIdx, points in enumerate(undistortedTrackPointsAndScaleChanges) :
    plot([points[0], points[2]], [points[1], points[3]], color=cols[pointIdx, :])
    scatter(points[0], points[1], color=cols[pointIdx, :])
    
cols = cm.jet((undistortedTrackPointsAndScaleChanges[:, -1]-np.min(tmpDistsChange))/np.max(tmpDistsChange-np.min(tmpDistsChange)), alpha=1)
figure(); imshow(undistortedBgImage)
for pointIdx, points in enumerate(undistortedTrackPointsAndScaleChanges) :
    plot([points[0], points[2]], [points[1], points[3]], color=cols[pointIdx, :])
    scatter(points[0], points[1], color=cols[pointIdx, :])

(8868,) (8868, 5)


KeyboardInterrupt: 

## Stuff I tried to get optimization based on Area cost only to work with chumpy

In [21]:
def chWorldToCameraSpace(cameraIntrinsics, cameraExtrinsics, worldPoints) :
    """assumes worldPoints has shape Nx3
       returns Nx2 camPoints in image space"""
    T = cameraIntrinsics.dot(cameraExtrinsics[0:3, :])
    camPoints = T.dot(ch.concatenate([worldPoints, ch.ones([len(worldPoints), 1])], axis=1).T)
    camPoints /= camPoints[-1, :]
    return camPoints.T[:, :-1]

def chPlaceFootprint(width, length, dx, dy, rot) :
    ## the convention is that the footprint is aligned with the x and y axis such that the long side (which is footprintLength long) is aligned with the x axis
    ## the first 4 points are the corners while the remaining two are center point and forward point respectively
    defaultFootprintPoints = ch.array([[-0.5, -0.5, 0.0, 1.0],
                                       [-0.5,  0.5, 0.0, 1.0],
                                       [ 0.5,  0.5, 0.0, 1.0],
                                       [ 0.5, -0.5, 0.0, 1.0],
                                       [ 0.0,  0.0, 0.0, 1.0],
                                       [ 0.5,  0.0, 0.0, 1.0]])*ch.concatenate([length, width, [0, 1]])
    ## here I flip the signs of the sin wrt what's shown here https://en.wikipedia.org/wiki/Rotation_matrix
    ## for reasons I don't really understand
    T = ch.vstack([ch.concatenate([ ch.cos(rot), ch.sin(rot), 0.0, dx ]),
                   ch.concatenate([-ch.sin(rot), ch.cos(rot), 0.0, dy ]),
                   ch.array([[              0.0,         0.0, 1.0, 0.0],
                             [              0.0,         0.0, 0.0, 1.0]])])

    worldFootprintPoints = T.dot(defaultFootprintPoints.T)
    worldFootprintPoints = worldFootprintPoints[:-1, :]/worldFootprintPoints[-1, :]
    return worldFootprintPoints.T
    
def chGetCameraConvexHullCorners(cameraPoints) :
    npCameraPoints = np.array(cameraPoints)
    cameraConvexHullPoints = cv2.convexHull(npCameraPoints)[:, 0, :]
    cornersInConvexHullIdx = []
    ## find which corners of the box volume should be ignored in the cost computation because they are not part of the convex hull
    for point in cameraConvexHullPoints :
        distsToCorners = np.linalg.norm(point[np.newaxis, :] - npCameraPoints, axis=1)
        if not np.any(distsToCorners == 0.0) :
            raise Exception("This corner is not in the convex hull!") ## should never happen because convexHull uses existing points
        cornersInConvexHullIdx.append(int(np.argwhere(distsToCorners == 0.0)))
            
    cornersInConvexHullIdx = ch.array(cornersInConvexHullIdx)
    
    return cameraPoints[cornersInConvexHullIdx, :]

def chPlaceBoxVolume(footprintWidth, footprintLength, volumeHeight, pos, rot, cameraIntrinsics, cameraExtrinsics, footprintIdx=0) :
    """places a footprint in the 3D world and projects the box volume it and volumeHeight defines into image space"""
    
    worldFootprintPoints = chPlaceFootprint(footprintWidth, footprintLength, pos[0], pos[1], rot)
    worldBoxVolumePoints = ch.vstack([worldFootprintPoints[0:4, :], worldFootprintPoints[0:4, :]+ch.concatenate([ch.zeros(2), volumeHeight]), worldFootprintPoints[4:]])
    
    cameraBoxVolumePoints = chWorldToCameraSpace(cameraIntrinsics, cameraExtrinsics, worldBoxVolumePoints)
#     cameraBoxVolumeCorners = np.array(cameraBoxVolumePoints[0:8, :])
#     cameraFootprintCenterPoint = np.array(cameraBoxVolumePoints[8, :])
#     cameraFootprintForwardPoint = np.array(cameraBoxVolumePoints[9, :])

#     edgesIdxs = np.array([[0, 1], [1, 2], [2, 3], [3, 0], [4, 5], [5, 6], [6, 7], [7, 4], [0, 4], [1, 5], [2, 6], [3, 7]])
#     for edgeIdxs in edgesIdxs :
#         plot(cameraBoxVolumeCorners[edgeIdxs, 0], cameraBoxVolumeCorners[edgeIdxs, 1], color=cm.Set1(np.mod(footprintIdx, 9)/8.0))
#     plot([cameraFootprintForwardPoint[0], cameraFootprintCenterPoint[0]], [cameraFootprintForwardPoint[1], cameraFootprintCenterPoint[1]], color=cm.Set1(np.mod(footprintIdx, 9)/8.0), linewidth=1.5)
#     plot(cameraBBox[[0, 2, 2, 0, 0]], cameraBBox[[1, 1, 3, 3, 1]], color=cm.Set1(np.mod(footprintIdx, 9)/8.0), linewidth=0.75)
    
    return worldBoxVolumePoints[0:8, :], cameraBoxVolumePoints[0:8, :]

def chComputeBoxVolumeTotalCost(footprintWidth, footprintLength, volumeHeight, perFramePos, perFrameRot, cameraIntrinsics, cameraExtrinsics, cameraTrackedBBoxes) :
    
    totalProjectedBoxVolumeAABBtoBBoxCost = 0.0
    for pos, rot, cameraBBox in zip(perFramePos, perFrameRot, cameraTrackedBBoxes) :
        worldBoxVolumeCorners, cameraBoxVolumeCorners = chPlaceBoxVolume(footprintWidth, footprintLength, volumeHeight, pos, rot, cameraIntrinsics, cameraExtrinsics)
        aabb = ch.concatenate([ch.min(cameraBoxVolumeCorners, axis=0), ch.max(cameraBoxVolumeCorners, axis=0)])
        projectedBoxVolumeAABBtoBBoxCost = ch.sqrt((aabb[0]-cameraBBox[0])**2+(aabb[1]-cameraBBox[1])**2)
        projectedBoxVolumeAABBtoBBoxCost += ch.sqrt((aabb[2]-cameraBBox[2])**2+(aabb[1]-cameraBBox[1])**2)
        projectedBoxVolumeAABBtoBBoxCost += ch.sqrt((aabb[2]-cameraBBox[2])**2+(aabb[3]-cameraBBox[3])**2)
        projectedBoxVolumeAABBtoBBoxCost += ch.sqrt((aabb[0]-cameraBBox[0])**2+(aabb[3]-cameraBBox[3])**2)

        totalProjectedBoxVolumeAABBtoBBoxCost += ch.log(1+projectedBoxVolumeAABBtoBBoxCost) ## added cauchy loss
#         print projectedBoxVolumeAABBtoBBoxCost, ch.log(1+projectedBoxVolumeAABBtoBBoxCost)
        
    return totalProjectedBoxVolumeAABBtoBBoxCost

In [315]:
def chComputeAllProjectedBoxVolumeAABBtoBBoxCost(params, paramsScale, relativePositions, relativeRots, cameraIntrinsics, cameraExtrinsics, cameraTrackedBBoxes, startRot) :
    footprintWidth, footprintLength, volumeHeight, globalDx, globalDy, globalRot, stretchX, stretchY = params/paramsScale
    
    deltaStartRot = globalRot-startRot/paramsScale[5]
    globalRelativeRot = globalRot+relativeRots
    
    defaultBoxVolumePoints = ch.array([[-0.5, -0.5, 0.0, 1.0],
                                       [-0.5,  0.5, 0.0, 1.0],
                                       [ 0.5,  0.5, 0.0, 1.0],
                                       [ 0.5, -0.5, 0.0, 1.0],
                                       [-0.5, -0.5, 1.0, 1.0],
                                       [-0.5,  0.5, 1.0, 1.0],
                                       [ 0.5,  0.5, 1.0, 1.0],
                                       [ 0.5, -0.5, 1.0, 1.0],
                                       [ 0.0,  0.0, 0.0, 1.0],
                                       [ 0.5,  0.0, 0.0, 1.0]])*ch.concatenate([footprintLength, footprintWidth, volumeHeight, 1])
    
    
    tx = globalDx+(ch.cos(deltaStartRot)*relativePositions[:, 0]+ch.sin(deltaStartRot)*relativePositions[:, 1])*stretchX
    ty = globalDy+(ch.cos(deltaStartRot)*relativePositions[:, 1]-ch.sin(deltaStartRot)*relativePositions[:, 0])*stretchY
    transform = ch.vstack([ch.reshape(ch.vstack([ch.cos(globalRelativeRot) , ch.sin(globalRelativeRot), ch.zeros(len(globalRelativeRot)), tx]), [1, 4, len(globalRelativeRot)]),
                           ch.reshape(ch.vstack([-ch.sin(globalRelativeRot), ch.cos(globalRelativeRot), ch.zeros(len(globalRelativeRot)), ty]), [1, 4, len(globalRelativeRot)]),
                           ch.repeat(ch.array([[[0.0]                      , [0.0]                    , [1.0], [0.0]]]), len(globalRelativeRot), axis=-1),
                           ch.repeat(ch.array([[[0.0]                      , [0.0]                    , [0.0], [1.0]]]), len(globalRelativeRot), axis=-1)])
    worldBoxVolumePoints = defaultBoxVolumePoints.dot(transform)
    T = cameraIntrinsics.dot(cameraExtrinsics[0:3, :])
    cameraBoxVolumePoints = ch.rollaxis(T.dot(worldBoxVolumePoints), 1).T
    print cameraBoxVolumePoints.shape, cameraBoxVolumePoints.dr_wrt(params)
#     cameraBoxVolumePoints = cameraBoxVolumePoints[:, :-1, :]/cameraBoxVolumePoints[:, -1:, :]
    
#     aabbs = ch.hstack([ch.min(cameraBoxVolumePoints, axis=2).T, ch.max(cameraBoxVolumePoints, axis=2).T])
#     projectedBoxVolumeAABBtoBBoxCost = ch.sqrt((aabbs[:, 0]-cameraTrackedBBoxes[:, 0])**2+(aabbs[:, 1]-cameraTrackedBBoxes[:, 1])**2)
#     projectedBoxVolumeAABBtoBBoxCost += ch.sqrt((aabbs[:, 2]-cameraTrackedBBoxes[:, 2])**2+(aabbs[:, 1]-cameraTrackedBBoxes[:, 1])**2)
#     projectedBoxVolumeAABBtoBBoxCost += ch.sqrt((aabbs[:, 2]-cameraTrackedBBoxes[:, 2])**2+(aabbs[:, 3]-cameraTrackedBBoxes[:, 3])**2)
#     projectedBoxVolumeAABBtoBBoxCost += ch.sqrt((aabbs[:, 0]-cameraTrackedBBoxes[:, 0])**2+(aabbs[:, 3]-cameraTrackedBBoxes[:, 3])**2)
    
#     return ch.sum(ch.log(1+projectedBoxVolumeAABBtoBBoxCost))

chCameraIntrinsics = ch.array(cameraIntrinsics)
chCameraExtrinsics = ch.array(filmedSceneData[DICT_CAMERA_EXTRINSICS])
chParams = ch.array(initParamsToEstimate)
# chFootprintWidth, chFootprintLength, chVolumeHeight, chGlobalDx = ch.array(initParamsToEstimate[0]), ch.array(initParamsToEstimate[1]), ch.array(initParamsToEstimate[2]), ch.array(initParamsToEstimate[3])
# chGlobalDy, chGlobalRot, chStretchX, chStretchY = ch.array(initParamsToEstimate[4]), ch.array(initParamsToEstimate[5]), ch.array(initParamsToEstimate[6]), ch.array(initParamsToEstimate[7])
chParamsScale = ch.array(paramDimensionalityScale)
framesToUse = np.arange(0, len(worldUndistortedTrajectoryPoints), 1)
chRelativePositions = ch.array(worldUndistortedTrajectoryPoints[framesToUse, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2])
chRelativeRots = ch.array(smootherWorldOrientationAngles[framesToUse]-smootherWorldOrientationAngles[0])
chCameraUndistortedBBoxes = ch.array(cameraUndistortedBBoxes[framesToUse, :])
chInitialRotation = ch.array(initialRotation)

# sys.setrecursionlimit(9999)
# chMinimizeFunction = chComputeAllProjectedBoxVolumeAABBtoBBoxCost(ch.concatenate([chFootprintWidth, chFootprintLength, chVolumeHeight, chGlobalDx, chGlobalDy, chGlobalRot, chStretchX, chStretchY]),
#                                                                   chParamsScale, chRelativePositions, chRelativeRots, chCameraIntrinsics, chCameraExtrinsics, chCameraUndistortedBBoxes, chInitialRotation)
chMinimizeFunction = chComputeAllProjectedBoxVolumeAABBtoBBoxCost(chParams, chParamsScale, chRelativePositions, chRelativeRots, chCameraIntrinsics, chCameraExtrinsics, chCameraUndistortedBBoxes, chInitialRotation)

 (295, 3, 10)Traceback (most recent call last):
  File "/home/ilisescu/anaconda2/lib/python2.7/site-packages/chumpy/ch.py", line 638, in _superdot
    return lhs.dot(rhs)
  File "/home/ilisescu/anaconda2/lib/python2.7/site-packages/scipy/sparse/base.py", line 302, in dot
    return self * other
  File "/home/ilisescu/anaconda2/lib/python2.7/site-packages/scipy/sparse/base.py", line 368, in __mul__
    raise ValueError('dimension mismatch')
ValueError: dimension mismatch


ValueError: dimension mismatch

In [22]:
chCameraIntrinsics = ch.array(cameraIntrinsics)
chCameraExtrinsics = ch.array(filmedSceneData[DICT_CAMERA_EXTRINSICS])
chParams = ch.array(initParamsToEstimate+np.random.rand(8)*0.0001)
# chFootprintWidth, chFootprintLength, chVolumeHeight, chGlobalDx = ch.array(initParamsToEstimate[0]), ch.array(initParamsToEstimate[1]), ch.array(initParamsToEstimate[2]), ch.array(initParamsToEstimate[3])
# chGlobalDy, chGlobalRot, chStretchX, chStretchY = ch.array(initParamsToEstimate[4]), ch.array(initParamsToEstimate[5]), ch.array(initParamsToEstimate[6]), ch.array(initParamsToEstimate[7])
chParamsScale = ch.array(paramDimensionalityScale)
framesToUse = np.arange(0, len(worldUndistortedTrajectoryPoints), 1)
chRelativePositions = ch.array(worldUndistortedTrajectoryPoints[framesToUse, 0:2]-worldUndistortedTrajectoryPoints[0, 0:2])
chRelativeRots = ch.array(smootherWorldOrientationAngles[framesToUse]-smootherWorldOrientationAngles[0])
chCameraUndistortedBBoxes = ch.array(cameraUndistortedBBoxes[framesToUse, :])
chInitialRotation = ch.array(initialRotation)

def chFitVolumeToTrackRelativesWithStretchRotFullTraj(params, paramsScale, relativePositions, relativeRots, cameraIntrinsics, cameraExtrinsics, cameraTrackedBBoxes, initialRot) :
    """params are (footprintWidth, footprintLength, volumeHeight, globalDx, globalDy, globalRot, stretchX, stretchY)"""
    params /= paramsScale
    
    footprintWidth, footprintLength, volumeHeight = params[0:3]
    rot = params[5] - initialRot/paramsScale[5]
    rotatedRelativePositions = ch.array([[ch.cos(rot) , ch.sin(rot), 0.0, 0.0],
                                         [-ch.sin(rot), ch.cos(rot), 0.0, 0.0],
                                         [0.0         , 0.0        , 1.0, 0.0],
                                         [0.0         , 0.0        , 0.0, 1.0]]).dot(ch.hstack([relativePositions, ch.repeat(ch.array([[0.0, 1.0]]), len(relativePositions), axis=0)]).T)
    rotatedRelativePositions = rotatedRelativePositions[:-1, :]/rotatedRelativePositions[-1, :]
    
    perFramePos = rotatedRelativePositions[0:2, :].T.dot(ch.eye(2)*ch.concatenate([params[6], params[7]]))+params[3:5]
    perFrameRot = relativeRots+params[5]
    
    cost = chComputeBoxVolumeTotalCost(footprintWidth, footprintLength, volumeHeight, perFramePos, perFrameRot, cameraIntrinsics, cameraExtrinsics, cameraTrackedBBoxes)
    print params, cost, "ch"
    return cost
sys.setrecursionlimit(9999)
# chMinimizeFunction = chFitVolumeToTrackRelativesWithStretchRotFullTraj(ch.concatenate([chFootprintWidth, chFootprintLength, chVolumeHeight, chGlobalDx, chGlobalDy, chGlobalRot, chStretchX, chStretchY]),
#                                                                        chParamsScale, chRelativePositions, chRelativeRots, chCameraIntrinsics, chCameraExtrinsics, chCameraUndistortedBBoxes, chInitialRotation)
chMinimizeFunction = chFitVolumeToTrackRelativesWithStretchRotFullTraj(chParams, chParamsScale, chRelativePositions, chRelativeRots, chCameraIntrinsics, chCameraExtrinsics, chCameraUndistortedBBoxes, chInitialRotation)

NameError: name 'initParamsToEstimate' is not defined

In [405]:
# sys.setrecursionlimit(9999)
np.array([chFootprintWidth, chFootprintLength, chVolumeHeight, chGlobalDx, chGlobalDy, chGlobalRot, chStretchX, chStretchY]).flatten()
print np.array(chParams).flatten()

[  1.43958901   1.61274087   1.32846944  27.83606858 -13.16678149
   2.92947161   1.0376362    0.96044603]


In [401]:
startTime = time.time()
# ch.minimize(chMinimizeFunction, x0=[chFootprintWidth, chFootprintLength, chVolumeHeight, chGlobalDx, chGlobalDy, chGlobalRot, chStretchX, chStretchY], method='dogleg', options={'maxiter': 50})
ch.minimize(chMinimizeFunction, x0=[chParams], method='dogleg', options={'maxiter': 50})
print "DONE in", time.time()-startTime

2.26e+06
2.18e+06
2.17e+06
2.16e+06
2.15e+06
2.14e+06
2.13e+06
2.12e+06
2.11e+06
2.10e+06
2.10e+06
2.09e+06
2.08e+06
2.07e+06
2.06e+06
2.05e+06
2.05e+06
2.04e+06
2.03e+06
2.02e+06
2.02e+06
2.00e+06
2.00e+06
1.99e+06
1.99e+06
1.98e+06
1.97e+06
1.96e+06
1.95e+06
1.95e+06
1.95e+06
1.93e+06
1.90e+06
1.90e+06
1.89e+06
1.88e+06
1.86e+06
1.85e+06
1.84e+06
1.82e+06
1.82e+06
1.81e+06
1.81e+06
1.80e+06
1.80e+06
1.79e+06
1.79e+06
1.78e+06
1.78e+06
1.78e+06
1.77e+06


DONE in 410.115784883


### EXPERIMENTS USING SEGMENTED BLOBS AND SHIT

In [92]:
## ideally, I should combine the segmentation with the clustering
## and the clustering should be extendable to use 1 or more of any sources of information (e.g. pixel connected components blobs, object detectors)
## moreover, it should enforce a bunch of assumptions, e.g. give priority to objects closer to the camera and to the fact that objects tend to separate but not merge and 
## consider that pixel area changes based on distance to the camera and much more drastically when objects get occluded (so there should be some smoothing on occupied pixel area)

frameLocs = np.sort(glob.glob(dataLoc+os.sep+"frame-*.png"))

numNeighbouringFrames = 2

# HOW DO I DO DIS SHITE


In [7]:
######## HACK: just loading a subset of the images out of the corpus of images because I cba to deal with all of them
## load images
allFrames = np.zeros([bgImage.shape[0], bgImage.shape[1], bgImage.shape[2], 70], dtype=np.uint8)
for idx, frameLoc in enumerate(frameLocs[200:270]) :
    allFrames[:, :, :, idx] = np.array(Image.open(frameLoc), dtype=np.uint8)[:, :, :bgImage.shape[2]]

## segment them
# allFramesSegmentations = np.zeros([allFrames.shape[0], allFrames.shape[1], allFrames.shape[3]], dtype=np.uint8)
# for idx in np.arange(numNeighbouringFrames/2, allFrames.shape[3]-numNeighbouringFrames/2) :
#     segmentation = backgroundCut3D(bgImage, allFrames[:, :, :, idx-numNeighbouringFrames/2:idx+numNeighbouringFrames/2+1])
    
#     ## HACK: this only makes sense if numNeighbouringFrames == 2
#     if idx == 1 :
#         allFramesSegmentations[:, :, idx-1:idx+1] = segmentation[:, :, :-1]
#     elif idx == allFrames.shape[3]-2 :
#         allFramesSegmentations[:, :, idx:idx+2] = segmentation[:, :, 1:]
#     else :
#         allFramesSegmentations[:, :, idx] = segmentation[:, :, 1]
        
#     sys.stdout.write("Segmentation progress: %d%%   \r" % (float(idx+1)/(allFrames.shape[3]-numNeighbouringFrames/2)*100))
#     sys.stdout.flush() 
        
# ## close small holes and remove small clusters
# for idx in np.arange(allFramesSegmentations.shape[-1]) :
#     allFramesSegmentations[:, :, idx] = cv2.morphologyEx(allFramesSegmentations[:, :, idx].astype(float), cv2.MORPH_CLOSE, np.ones((5,5),np.uint8), iterations=2).astype(np.uint8)

# np.save("well_grounded_segmentation_back.npy", allFramesSegmentations)
allFramesSegmentations = np.load("well_grounded_segmentation_back.npy")

In [8]:
############# REMOVING SMALL BLOBS #############
blobMinArea= 35
for i in np.arange(allFramesSegmentations.shape[-1]) :
    labelling = measure.label(allFramesSegmentations[:, :, i])
    for region in measure.regionprops(labelling) :
        if len(np.argwhere(labelling == region["label"])) < blobMinArea :
            allFramesSegmentations[labelling == region["label"], i] = 0

### SECOND APPROACH WHERE I LEARN A 1-VS-ALL MODEL PER BLOB AND USE THAT TO FIGURE OUT WHICH MODELS LEARN ABOUT 1 CLUSTER AND WHICH LEARN ABOUT MORE THAN 1

In [583]:
def getGridPairIndices(width, height) :
## deal with pixels that have East and South neighbours i.e. all of them apart from last column and last row
    pairIdxs = np.zeros(((width*height-(width+height-1))*2, 2), dtype=int)
## each column contains idxs [0, h-2]
    idxs = np.arange(0, height-1, dtype=int).reshape((height-1, 1)).repeat(width-1, axis=-1)
## each column contains idxs [0, h-2]+h*i where i is the column index 
## (i.e. now I have indices of all nodes in the grid apart from last col and row)
    idxs += (np.arange(0, width-1)*height).reshape((1, width-1)).repeat(height-1, axis=0)
    # figure(); imshow(idxs)
## now flatten idxs and repeat once so that I have the idx for each node that has E and S neighbours twice
    idxs = np.ndarray.flatten(idxs.T).repeat(2)
## idxs for each "left" node (that is connected to the edge) are the ones just computed
    pairIdxs[:, 0] = idxs
## idxs for each "right" node are to the E and S so need to sum "left" idx to height and to 1
# print np.ndarray.flatten(np.array([[patchSize[0]], [1]]).repeat(np.prod(patchSize)-(np.sum(patchSize)-1), axis=-1).T)
    pairIdxs[:, 1] = idxs + np.ndarray.flatten(np.array([[height], [1]]).repeat(width*height-(width+height-1), axis=-1).T)
    
## deal with pixels that have only East neighbours
## get "left" nodes
    leftNodes = np.arange(height-1, width*height-1, height)
## now connect "left" nodes to the nodes to their East (i.e. sum to height) and add them to the list of pair indices
    pairIdxs = np.concatenate((pairIdxs, np.array([leftNodes, leftNodes+height]).T), axis=0)
    
## deal with pixels that have only South neighbours
## get "top" nodes
    topNodes = np.arange(width*height-height, width*height-1)
## now connect "to" nodes to the nodes to their South (i.e. sum to 1) and add them to the list of pair indices
    pairIdxs = np.concatenate((pairIdxs, np.array([topNodes, topNodes+1]).T), axis=0)
    
    return pairIdxs

def backgroundCut3D(bgImage, images, k1=30.0/255.0, k2=60.0/255.0, K=5.0/255.0, sigmaZ=10.0/255.0, doVisualize=False) :
    """ Given a stack of temporally sequential images and a static background bgImage, it computes temporally consistent fg/bg segmentation
    
    based on BGcut [Sun et al. ECCV2006] with modifications seen in Video Synposis [Pritch et al. PAMI2008]"""
    
    if np.all(bgImage.shape != images.shape[:-1]) :
        raise Exception("The two specified patches have different shape so graph cannot be built")
    
    height, width, channels, numImages = images.shape
    maxCost = 10000000.0
    
    bgPixels = bgImage.reshape([height*width, channels], order='F')/255.0
    
    s = time.time()
    ## build graph
    numLabels = 2
    gm = opengm.gm(np.ones(height*width*numImages,dtype=opengm.label_type)*numLabels)
    
    for i in np.arange(numImages) :
        imagePixels1 = images[:, :, :, i].reshape([height*width, channels], order='F')/255.0


        ############################### COMPUTE UNARIES ###############################
        unaries = np.zeros((height*width,numLabels))

        dr = np.sqrt(np.sum((imagePixels1-bgPixels)**2.0, axis=-1))

        unaries[dr<=k1, 1] = (k1-dr)[dr<=k1]
        unaries[dr>k2, 0] = maxCost
        unaries[np.all(np.array([dr>k1, k2>dr]), axis=0), 0] = (dr-k1)[np.all(np.array([dr>k1, k2>dr]), axis=0)]

        # add functions
        fids = gm.addFunctions(unaries)
        # add first order factors
        gm.addFactors(fids, np.arange(i*height*width, (i+1)*height*width, 1))


        ############################### COMPUTE PAIRWISE ###############################
        for j in np.arange(2) :
            if j == 0 or (i > 0 and j ==1) :
                pairIndices = getGridPairIndices(width, height)

                imagePixels2 = imagePixels1
                if i > 0 and j == 1 :
                    ## in this case compute pairwise between temporally neighbouring pixels in current image and previous one
                    pairIndices = np.concatenate([[np.arange(width*height)], [np.arange(width*height)]]).T
                    imagePixels2 = images[:, :, :, i-1].reshape([height*width, channels], order='F')/255.0

                pairwise = np.zeros(len(pairIndices))

                zrs = np.max([np.sqrt(np.sum((imagePixels2[pairIndices[:, 0], :]-bgPixels[pairIndices[:, 0], :])**2.0, axis=-1)),
                              np.sqrt(np.sum((imagePixels1[pairIndices[:, 1], :]-bgPixels[pairIndices[:, 1], :])**2.0, axis=-1))], axis=0)

                imPixelsDiff = np.sqrt(np.sum((imagePixels2[pairIndices[:, 0], :]-imagePixels1[pairIndices[:, 1], :])**2.0, axis=-1))
                bgPixelsDiff = np.sqrt(np.sum((bgPixels[pairIndices[:, 0], :]-bgPixels[pairIndices[:, 1], :])**2.0, axis=-1))
                drs = imPixelsDiff/(1+((bgPixelsDiff/K)**2.0)*np.exp(-(zrs**2)/sigmaZ))
                beta = 2.0/np.mean(imPixelsDiff)
                pairwise = np.exp(-beta*drs)

                ## visualize
                if doVisualize :
                    contrastMap = np.zeros(len(bgPixels))
                    for i in np.arange((width-1)*(height-1)*2) :
                        contrastMap[pairIndices[i, 0]] += drs[i]
                    figure(); imshow(np.reshape(np.sqrt(np.copy(contrastMap)), [height, width], 'F'))

                # add functions
                fids = gm.addFunctions(np.array([[0.0, 1.0],[1.0, 0.0]]).reshape((1, 2, 2)).repeat(len(pairwise), axis=0)*
                                       pairwise.reshape((len(pairwise), 1, 1)).repeat(2, axis=1).repeat(2, axis=2))

                if j == 0 :
                    ## in this case compute pairwise between neighbouring pixels in the current image
                    # add second order factors
                    gm.addFactors(fids, pairIndices+(i*height*width))
                elif i > 0 and j == 1 :
                    ## in this case compute pairwise between temporally neighbouring pixels in current image and previous one
                    pairIndices[:, 0] += ((i-1)*height*width)
                    pairIndices[:, 1] += (i*height*width)
                    gm.addFactors(fids, pairIndices)
    
    if doVisualize :
        print(gm)
    
    
    graphCut = opengm.inference.GraphCut(gm=gm)
    graphCut.infer()    
    labels = np.array(graphCut.arg(), dtype=int)
    reshapedLabels = np.reshape(np.copy(labels), [height, width, numImages], 'F')
    
    return reshapedLabels

def multiObjectModelScore(clusterSimilarities, a=2.0, b=2.5) :
    ## this function should return a high value if there is only 1 high value in clusterSimilarities or if they are all small
    ## i.e the higher the diff between highest value and the rest or the smaller the highest value, the higher the returned value
    maxClusterIdx = np.argmax(clusterSimilarities)
    maxClusterVal = clusterSimilarities[maxClusterIdx]
    idxs = np.array([[maxClusterIdx, i] for i in np.arange(len(clusterSimilarities)) if i != maxClusterIdx])
    diffs = clusterSimilarities[idxs[:, 0]]-clusterSimilarities[idxs[:, 1]]
    
    ############### THIS VISUALIZES THE FUNCTION I'M RETURNING FOR A BUNCH OF DIFFERENT VALUES OF maxClusterVal AND diffs ###############
#     gridSize = 100
#     xs, ys = np.arange(gridSize)/float(gridSize), np.arange(gridSize)/float(gridSize)
#     xs, ys = np.meshgrid(xs, ys)
#     xs = xs.flatten() # values of diffs
#     ys = ys.flatten() # values of maxClusterVale
#     result = (((ys*2.0-1.0)*(xs*3.0-2.0)+2.0)/2.0)**a
#     figure(); imshow(np.exp(-result.reshape([gridSize, gridSize])/b)); xlim([0, gridSize]); ylim([0, gridSize])
    
    return np.exp(-np.mean((((maxClusterVal*2.0-1.0)*(diffs*(1+b)-b)+b)/2.0)**a))
    
# print multiObjectModelScore(np.array([0.69460287, 0.00132064, 0.41139595])), multiObjectModelScore(np.array([0.75207736, 0.18222587, 0.03754402])), multiObjectModelScore(np.array([0.09748928, 0.6943132, 0.85043667])), 
# print multiObjectModelScore(np.array([0.89113196, 0.9677084])), multiObjectModelScore(np.array([0.9697229, 0.0430777])), multiObjectModelScore(np.array([0.0993084, 0.0223453]))

In [4]:
def functionToMinimize(p, sameFrameClustersIdxs, differentFrameClustersIdxs, differentFrameClustersSimilarities, differentFrameClustersSimilarityWeights, alpha=0.5, targetDistance=1.0) :
    """The function is minimized when points p associated to clusters found in the same frames (indexed by sameFrameClustersIdxs) or in different frames (indexed by differentFrameClustersIdxs)
            but dissimilar (as stored in differentFrameClustersSimilarities) are far away and points associated to similar clusters in different frames that are close by in time
            (as stored in differentFrameClustersSimilarityWeights) are placed close by."""
    
    currentPoints = np.reshape(p, [len(p)/2, 2])
    
    ## this is the cost of placing clusters found in the same frame close by
    ## the distance between the points p corresponding to clusters found in the same frame should be as far from each other as the targetDistance
    if len(sameFrameClustersIdxs) > 0 :
#         sameFrameCost = np.sum((np.sum((currentPoints[sameFrameClustersIdxs[:, 0], :] - currentPoints[sameFrameClustersIdxs[:, 1], :])**2, axis=1) - targetDistance**2)**2)/len(sameFrameClustersIdxs)
        ## this should have the same effec as above but nicer to look at
        sameFrameCost = np.sum(np.abs(squaredTargetDistance-squaredDistances))/len(sameFrameClustersIdxs)
    else :
        sameFrameCost = 0.0
    
    ## the clusters in different frames should be close by if they are similar and far away if they are dissimilar
    ## I assume differentFrameClustersSimilarities is in [0, 1]
    if len(differentFrameClustersIdxs) > 0 :
        squaredDistances = np.sum((currentPoints[differentFrameClustersIdxs[:, 0], :] - currentPoints[differentFrameClustersIdxs[:, 1], :])**2, axis=1)
        squaredTargetDistance = targetDistance**2
        if False :
            ## forcing similar clsuters to stick together and dissimilar ones to maximise distance
            functionVals = squaredDistances**(differentFrameClustersSimilarities*2.0-1.0)
        elif False :
            ## forcing similar clsuters to stick together and dissimilar ones to be as far away as targetDistance
            functionVals = (squaredDistances - (1.0-differentFrameClustersSimilarities)*squaredTargetDistance)**2
        elif False :
            ## my unnecessarily complicated, quadratic based cost function
            a = (1.0-differentFrameClustersSimilarities)**5#((1.0-possibleSimilarities)*2.0-1.0)*10.0 ## sets the steepness
            b = -squaredTargetDistance*2.0*a ## rearranged x=-b/(2a) where x is the target distance because that's where I want the minimum of the parabola to be
            c = -a*squaredTargetDistance**2 - b*squaredTargetDistance

            functionVals = a*squaredDistances**2 + b*squaredDistances + c
            functionVals += squaredDistances*differentFrameClustersSimilarities/2.0*squaredTargetDistance
            functionVals *= 10.0
#             print functionVals[417], differentFrameClustersSimilarities[417], differentFrameClustersSimilarityWeights[417], functionVals[417]*differentFrameClustersSimilarityWeights[417]

        else :
            ## peter's more manageable approach
            functionVals = differentFrameClustersSimilarities*squaredDistances + (1.0-differentFrameClustersSimilarities)*np.abs(squaredTargetDistance-squaredDistances)
            
        differentFrameCost = np.sum(functionVals*differentFrameClustersSimilarityWeights)/float(len(differentFrameClustersSimilarities))
    else :
        differentFrameCost = 0.0
#     print sameFrameCost, differentFrameCost
    return alpha*sameFrameCost + (1.0-alpha)*differentFrameCost

# functionToMinimize(clusterPoints.flatten(), sameFrameClustersIdxs, differentFrameClustersIdxs, differentFrameClustersSimilarities, differentFrameClustersSimilarityWeights, 0.25, targetDistance)
########################## CAN USE THIS TO VISUALIZE THE FUNCTION TO MINIMIZE ##########################
# targetDistance = 1.0
# possibleDistances = np.arange(targetDistance*200.0 + 1.0)/100.0
# possibleSimilarities = np.arange(101.0)/100.0
# figSize = np.array([len(possibleDistances), len(possibleSimilarities)])
# possibleDistances, possibleSimilarities = np.array(np.meshgrid(possibleDistances, possibleSimilarities)).reshape([2, np.prod(figSize)])

# ## my quadratic based cost function
# if False :
#     a = (1.0-possibleSimilarities)**5#((1.0-possibleSimilarities)*2.0-1.0)*10.0 ## sets the steepness
#     b = -targetDistance*2.0*a ## rearranged x=-b/(2a) where x is the target distance because that's where I want the minimum of the parabola to be
#     c = -a*targetDistance**2 - b*targetDistance


#     functionVals = a*possibleDistances**2 + b*possibleDistances +c ## function to minimize
#     # # figure(); imshow(functionVals.reshape(figSize[::-1])); xlim([0, figSize[0]]); ylim([0, figSize[1]])
#     functionVals += possibleDistances*possibleSimilarities/2.0*targetDistance
# else :
#     ## peter's annoyingly simple alternative
#     functionVals = possibleSimilarities*possibleDistances + (1.0-possibleSimilarities)*np.abs(targetDistance-possibleDistances)
# figure(); imshow(functionVals.reshape(figSize[::-1])*10); xlim([0, figSize[0]]); ylim([0, figSize[1]])

In [453]:
def splitMergedCluster(pixelFGColors, pixelBGColors, pixelIdxs, pixelObjectPriors, predictedObjectLocationPriors, k1=30.0/255.0, k2=60.0/255.0, K=5.0/255.0, sigmaZ=10.0/255.0, doVisualize=False) :
    """ Assigns an object label to pixels indexed by pixelIdxs."""
    
    if np.all(pixelFGColors.shape != pixelBGColors.shape) :
        raise Exception("The two specified patches have different shape so graph cannot be built")
        
    if pixelObjectPriors.shape[-1] == 0 :
        raise Exception("Object prior with shape[-1] == 0 specified.")
        
    maxCost = 10000000.0
    
    s = time.time()
    ## build graph
    numLabels = pixelObjectPriors.shape[-1] + 1
    gm = opengm.gm(np.ones(pixelFGColors.shape[0], dtype=opengm.label_type)*numLabels)


    ############################### COMPUTE UNARIES ###############################
#     unaries = np.hstack([np.ones([len(pixelObjectPriors), 1])/float(numLabels), pixelObjectPriors])
    unaries = np.hstack([1.0-np.sum(pixelObjectPriors, axis=-1)[:, np.newaxis], pixelObjectPriors])
    unaries = unaries/np.sum(unaries, axis=-1)[:, np.newaxis]    
    unaries = 0.3*np.exp(-unaries)+0.7*np.exp(-np.hstack([np.zeros([len(predictedObjectLocationPriors), 1]), predictedObjectLocationPriors]))

    # add functions
    fids = gm.addFunctions(unaries)
    # add first order factors
    gm.addFactors(fids, np.arange(len(pixelIdxs)))

    ############################### COMPUTE PAIRWISE ###############################
    ## find indices of pixels with a distance of 1 by making a distance matrix and then I can use argwhere to find indices.
    pairIndices = np.argwhere((np.sum((pixelIdxs[:, :, np.newaxis]-pixelIdxs[:, :, np.newaxis].T)**2, axis=1) == 1).astype(int) * np.triu(np.ones(len(pixelIdxs)), k=1))
    
    pairwise = np.zeros(len(pairIndices))
    
    zrs = np.max([np.sqrt(np.sum((pixelFGColors[pairIndices[:, 0], :]/255.0-pixelFGColors[pairIndices[:, 0], :]/255.0)**2.0, axis=-1)),
                  np.sqrt(np.sum((pixelBGColors[pairIndices[:, 1], :]/255.0-pixelBGColors[pairIndices[:, 1], :]/255.0)**2.0, axis=-1))], axis=0)
    
    imPixelsDiff = np.sqrt(np.sum((pixelFGColors[pairIndices[:, 0], :]/255.0-pixelFGColors[pairIndices[:, 1], :]/255.0)**2.0, axis=-1))
    bgPixelsDiff = np.sqrt(np.sum((pixelBGColors[pairIndices[:, 0], :]/255.0-pixelBGColors[pairIndices[:, 1], :]/255.0)**2.0, axis=-1))
    drs = imPixelsDiff/(1+((bgPixelsDiff/K)**2.0)*np.exp(-(zrs**2)/sigmaZ))
    beta = 2.0/np.mean(imPixelsDiff)
    pairwise = 0.7*np.exp(-beta*drs) + 0.3*np.ones_like(drs)
    
    ## visualize
    if False and doVisualize :
        contrastMap = np.zeros(len(pixelIdxs))
        for i in np.arange(len(pairIndices)) :
            contrastMap[pairIndices[i, 0]] += pairwise[i]
        tmp = -np.ones(bgImage.shape[0:2])
        tmp[pixelIdxs[:, 0], pixelIdxs[:, 1]] = contrastMap
        figure(); imshow(tmp, interpolation='nearest')
    
    # add functions
    fids = gm.addFunctions((1.0-np.eye(numLabels)).reshape((1, numLabels, numLabels)).repeat(len(pairwise), axis=0)*
                           pairwise.reshape((len(pairwise), 1, 1)).repeat(numLabels, axis=1).repeat(numLabels, axis=2))
    # add second order factors
    gm.addFactors(fids, pairIndices)
    
    solver = opengm.inference.TrwsExternal(gm=gm)
    solver.infer()    
    labels = np.array(solver.arg(), dtype=int)
    
    if doVisualize :
        print(gm)
        print "Labelling cost:", gm.evaluate(labels)
    
    return labels#, unaries, (1.0-np.eye(numLabels)).reshape((1, numLabels, numLabels)).repeat(len(pairwise), axis=0)*pairwise.reshape((len(pairwise), 1, 1)).repeat(numLabels, axis=1).repeat(numLabels, axis=2)

In [9]:
## find connected pixel clusters in 3D
allLabeledSegmentations3D = measure.label(allFramesSegmentations)
## find connected pixel clusters in each frame individually
allLabeledSegmentations = np.zeros_like(allLabeledSegmentations3D)
for idx in np.arange(allLabeledSegmentations3D.shape[-1]) :
    allLabeledSegmentations[:, :, idx] = measure.label(allFramesSegmentations[:, :, idx])
    
## find indices of all clusters found in 3D
clustersFound3DIds = np.arange(1, np.max(allLabeledSegmentations3D)+1)
## find which frames each cluster ID is present in
clustersFound3DFrameRanges = np.array([np.any(allLabeledSegmentations3D.reshape([bgImage.shape[0]*bgImage.shape[1], allLabeledSegmentations3D.shape[-1]]) == clusterIdx, axis=0) for clusterIdx in clustersFound3DIds])
## based on the above result save the ranges only
clustersFound3DFrameRanges = np.array([[np.min(np.argwhere(clusterIdxFramePresence)), np.max(np.argwhere(clusterIdxFramePresence))] for clusterIdxFramePresence in clustersFound3DFrameRanges])

In [10]:
figure(); imshow(allLabeledSegmentations3D[:, :, 0])
print clustersFound3DIds

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48]


In [466]:
def getPixelFeats(image, idxs, doNormalize=True, doUseCoords=True) :
    ## colors
    feats = image[idxs[:, 0], idxs[:, 1], :]
    if doNormalize :
        feats = feats/255.0
    ## pixel coordinates
    if doUseCoords :
        if doNormalize :
            feats = np.hstack([feats, idxs/np.array(image.shape[0:2], dtype=float).reshape([1, 2])])
        else :
            feats = np.hstack([feats, idxs])
    return feats

doPerformTests = True
## if True, the pixel features used to learn the color model, are normalized to a [0, 1] interval
normalizeFeats = False
## if True, the pixel coordinates are used as features for the color model along with the RGB values
useCoordsAsFeats = False
## if True, when matching single object blobs, they are first clustered using SpectralClustering based on an affinity matrix computed from perModelObjectSimilarity
## the clustering is used to initalize the points p, randomly, around cluster centers placed equidistantly along a circle
doUseClusteredInitialization = False


## this will contain the segmentations with unique cluster ids per moving object assigned to each FG pixel
## merged clusters of more objects from allLabeledSegmentations3D should be split according to objects and pixels belonging to the same object over time should have the same consistent cluster ID
allLabeledSegmentedObjects = np.zeros_like(allLabeledSegmentations)
allObjectsColorModels = {}
## take each cluster from allLabeledSegmentations3D and find how many clusters it splits into over time
for current3DClusterId, frameRange in zip(clustersFound3DIds, clustersFound3DFrameRanges) :
    if current3DClusterId == 41 : ## HACK: only consider this cluster cos I wanna test my code
        print "Starting CLUSTER ID {0}...".format(current3DClusterId); clusterStartTime = time.time()
        ## here I will keep track of which clusters need splitting because they contain multiple objects
        perFrameMergedClusters = {}
        print "Fitting Color Models to clustered pixels..."; startTime = time.time()
        colorModels = {}
        perFrameClusterIds = {}
        for frameIdx in np.arange(frameRange[0], frameRange[1]+1) :
            ## find pixels in current image that belong to the current cluster found in 3D
            pixelIdxs = np.array(np.argwhere(allLabeledSegmentations3D[:, :, frameIdx] == current3DClusterId))
            ## find cluster ids (found using current frame only) assigned to pixels from currently considered cluster (found doing the 3D clustering)
            pixelClustersInFrameIds = list(set(allLabeledSegmentations[pixelIdxs[:, 0], pixelIdxs[:, 1], frameIdx]))
            print "Clusters in frame", frameIdx, pixelClustersInFrameIds,
            
            ## find frames where the pixels belonging to the same 3D cluster are separated and train some sort of color model to distinguish between them
            if len(pixelClustersInFrameIds) > 1 :
                print " --- Trained Model accuracies: [",
                for pixelClustersInFrameId in pixelClustersInFrameIds :
                    ## find indices of pixels assigned to current cluster id
                    currentClusterPixelInFrameIdxs = np.array(np.argwhere(allLabeledSegmentations[:, :, frameIdx] == pixelClustersInFrameId))
                    ## find indices of pixels assigned to all the other cluster ids
                    otherClustersInFrameIds = [clusterId for clusterId in pixelClustersInFrameIds if clusterId != pixelClustersInFrameId]
                    otherClustersPixelInFrameIdxs = np.array(np.argwhere(np.any(allLabeledSegmentations[:, :, frameIdx][:, :, np.newaxis]
                                                                                == np.array(otherClustersInFrameIds).reshape([1, 1, len(otherClustersInFrameIds)]), axis=-1)))
                    ## setup pixel features and target indices for learning the color model
                    pixelFeats = getPixelFeats(allFrames[:, :, :, frameIdx], np.vstack([otherClustersPixelInFrameIdxs, currentClusterPixelInFrameIdxs]), doNormalize=normalizeFeats, doUseCoords=useCoordsAsFeats)
#                     print "README BIATCH", frameIdx, np.vstack([otherClustersPixelInFrameIdxs, currentClusterPixelInFrameIdxs]), pixelFeats
                    
                    ## pixels belonging to clusters other than pixelClustersInFrameId will be classified as 0 and the pixels belonging to cluster pixelClustersInFrameId will be classified as 1
                    pixelTargetClusters = np.repeat([0.0, 1.0], [len(otherClustersPixelInFrameIdxs), len(currentClusterPixelInFrameIdxs)])

                    ## fit color model
                    colorModels[(frameIdx, pixelClustersInFrameId)] = ensemble.ExtraTreesClassifier()
                    colorModels[(frameIdx, pixelClustersInFrameId)].fit(X=pixelFeats, y=pixelTargetClusters)
                    
                    ## the amount of pixels in current cluster and the remaining other clusters should be the same as in the 3D cluster at the same frame
                    if doPerformTests and len(np.argwhere(allLabeledSegmentations3D[:, :, frameIdx] == current3DClusterId)) != len(otherClustersPixelInFrameIdxs)+len(currentClusterPixelInFrameIdxs) :
                        raise Exception("Shape mismatch between pixels used for training and all pixels belonging to merged 3D cluster")
                        
                    print "[{0}] =".format(pixelClustersInFrameId), np.sum((colorModels[(frameIdx, pixelClustersInFrameId)].predict(X=pixelFeats) == pixelTargetClusters).astype(int))/float(len(pixelTargetClusters)),
#                     print colorModels[(frameIdx, pixelClustersInFrameId)].predict(X=pixelFeats), pixelTargetClusters
                print " ]"
                perFrameClusterIds[frameIdx] = pixelClustersInFrameIds
            else :
                print
                perFrameMergedClusters[frameIdx] = pixelClustersInFrameIds
        print "...DONE ({0} secs)\n".format(time.time()-startTime)
        
        ## doing a sanity check
        if len(perFrameClusterIds.keys()) + len(perFrameMergedClusters.keys()) != frameRange[1]-frameRange[0]+1 :
            raise Exception("Something went wrong in checking the cluster IDs per frame")
            
        ## if the current 3D cluster never splits in time then just add it as a single object ot allLabeledSegmentedObjects
        if len(perFrameClusterIds.keys()) == 0 :
            print "CLUSTER ID {0} never splits. Adding as single object...".format(current3DClusterId); startTime = time.time()
            newUniqueClusterId = np.max(allLabeledSegmentedObjects) + 1
            for frameIdx in np.arange(frameRange[0], frameRange[1]+1) :
                allLabeledSegmentedObjects[allLabeledSegmentations3D[:, :, frameIdx] == current3DClusterId, frameIdx] = newUniqueClusterId
            print "...DONE ({0} secs)\n".format(time.time()-startTime)
            
            print "...DONE CLUSTER ID {0} ({1} secs)\n".format(current3DClusterId, time.time()-clusterStartTime)
            continue
        
        ## find which color models are modelling the same object
        print "Finding Clustered Objects Similarity using Color Models..."; startTime = time.time()
        perModelObjectSimilarity = {}
        if len(perFrameClusterIds.keys()) > 0 :
            for idx, frameIdx in enumerate(np.sort(perFrameClusterIds.keys())[:-1]) :
                ## find pixels belonging to the clusters found in frameIdx
                pixelInFrameIdxs = np.empty([0, 2], dtype=int)
                pixelInFrameCumulativeNums = np.empty([0], dtype=int)
                for clusterId in perFrameClusterIds[frameIdx] :
                    pixelInFrameIdxs = np.vstack([pixelInFrameIdxs, np.array(np.argwhere(allLabeledSegmentations[:, :, frameIdx] == clusterId))])
                    pixelInFrameCumulativeNums = np.concatenate([pixelInFrameCumulativeNums, [len(pixelInFrameIdxs)]])

                for otherFrameIdx in np.sort(perFrameClusterIds.keys())[idx+1:] :
                    ## find pixels belonging to the clusters found in otherFrameIdx
                    pixelInOtherFrameIdxs = np.empty([0, 2], dtype=int)
                    pixelInOtherFrameCumulativeNums = np.empty([0], dtype=int)
                    for clusterId in perFrameClusterIds[otherFrameIdx] :
                        pixelInOtherFrameIdxs = np.vstack([pixelInOtherFrameIdxs, np.array(np.argwhere(allLabeledSegmentations[:, :, otherFrameIdx] == clusterId))])
                        pixelInOtherFrameCumulativeNums = np.concatenate([pixelInOtherFrameCumulativeNums, [len(pixelInOtherFrameIdxs)]])


                    ## compare clusters predicted by colorModel[otherFrameIdx] in frameIdx with actual clusters in frameIdx and viceversa
                    perModelObjectSimilarity[(frameIdx, otherFrameIdx)] = np.zeros([2, len(perFrameClusterIds[frameIdx]), len(perFrameClusterIds[otherFrameIdx])])

                    ## this makes sure that I process every pair of cluster models from frameIdx and otherFrameIdx:
                    ## the first column is used to index the first dimension of the perModelObjectSimilarity[(frameIdx, otherFrameIdx)] which has shape
                    ## [2, len(perFrameClusterIds[frameIdx]), len(perFrameClusterIds[otherFrameIdx])] as in my first attempt at doing dis shite and implicitly which of frameIdx and 
                    ## otherFrameIdx I'm considering (if it's 0, I'm classifying pixels in frameIdx using models learned in otherFrameIdx and viceversa if it's 1);
                    ## the second column tells me the index of the cluster in the current frame I'm considering, which can be used to index either the second axis (if first column == 0) or the third axis
                    ## (if first column == 1); the third axis tells me the index of the frame which is the first number in the tuple used to index colorModels; the fourth axis tells me the cluster ID the model
                    ## is taught to classify and is the second number in the tuple used to index colorModels

                    ## NOTE: the numbers on first axis == 0 refer to classification scores of pixels in frameIdx using models trained on blobs found in otherFrameIdx
                    processingIndices = np.vstack([[np.repeat([0, 1], [len(perFrameClusterIds[otherFrameIdx]), len(perFrameClusterIds[frameIdx])])],
                                                   [np.concatenate([np.arange(len(perFrameClusterIds[otherFrameIdx])), np.arange(len(perFrameClusterIds[frameIdx]))])],
                                                   [np.repeat([otherFrameIdx, frameIdx], [len(perFrameClusterIds[otherFrameIdx]), len(perFrameClusterIds[frameIdx])])],
                                                   [np.concatenate([perFrameClusterIds[otherFrameIdx], perFrameClusterIds[frameIdx]])]]).T

                    for (firstAxisIdx, otherAxisIdx, colorModelTupleIdxFirst, colorModelTupleIdxSecond) in processingIndices :
                        if firstAxisIdx == 0 :
                            processingPixelIdxs = pixelInFrameIdxs
                            processingFrameIdx = frameIdx
                            processingPixelCumNums = pixelInFrameCumulativeNums
                        elif firstAxisIdx == 1 :
                            processingPixelIdxs = pixelInOtherFrameIdxs
                            processingFrameIdx = otherFrameIdx
                            processingPixelCumNums = pixelInOtherFrameCumulativeNums
                        else :
                            raise Exception("This should never happen! Panic if it does... (nah don't...)")

                        ## predict whether each pixel in processingFrameIdx belongs to the cluster colorModelTupleIdxSecond from frame colorModelTupleIdxFirst
                        frameClusterPredictions = colorModels[(colorModelTupleIdxFirst, colorModelTupleIdxSecond)].predict(getPixelFeats(allFrames[:, :, :, processingFrameIdx], processingPixelIdxs,
                                                                                                                                         doNormalize=normalizeFeats, doUseCoords=useCoordsAsFeats))

                        ## find which percentage of pixels in each cluster in processingFrameIdx are assigned to the cluster that colorModels[(colorModelTupleIdxFirst, colorModelTupleIdxSecond)] has learned to detect
                        percentages = np.array([len(np.argwhere(frameClusterPredictions[startIdx:endIdx] == 1.0))/float(endIdx-startIdx)
                                                for startIdx, endIdx in zip(np.concatenate([[0], processingPixelCumNums[:-1]]), processingPixelCumNums)])
                        
                        if firstAxisIdx == 0 :
                            perModelObjectSimilarity[(frameIdx, otherFrameIdx)][firstAxisIdx, :, otherAxisIdx] = percentages
                        elif firstAxisIdx == 1 :
                            perModelObjectSimilarity[(frameIdx, otherFrameIdx)][firstAxisIdx, otherAxisIdx, :] = percentages
                        else :
                            raise Exception("This should never happen! Again, panic if it does... (or don't...)")

                    print "Analysing frames", (frameIdx, otherFrameIdx)
        print "...DONE ({0} secs)\n".format(time.time()-startTime)
        
        ## going through perModelObjectSimilarity and finding color models that are good at classifying separate cluster blobs in other frames (i.e. meaning they have been
        ## trained on a blob containing 2 or more separate objects)
        print "Finding Multi-cluster Color Models..."; startTime = time.time()
        ## find the multiObjectScore for each learned model in each frame apart from the one it was learned in
        perModelMultiObjectScores = {}
        for framePair in perModelObjectSimilarity.keys() :    
            ## I'm using the same processing indices I used to populate perModelObjectSimilarity, to index it to compute perModelMultiObjectScores
            processingIndices = np.vstack([[np.repeat([0, 1], [len(perFrameClusterIds[framePair[1]]), len(perFrameClusterIds[framePair[0]])])],
                                           [np.concatenate([np.arange(len(perFrameClusterIds[framePair[1]])), np.arange(len(perFrameClusterIds[framePair[0]]))])],
                                           [np.repeat(framePair[::-1], [len(perFrameClusterIds[framePair[1]]), len(perFrameClusterIds[framePair[0]])])],
                                           [np.concatenate([perFrameClusterIds[framePair[1]], perFrameClusterIds[framePair[0]]])]]).T
            ## go through all the clusters defined in this framePair and compute the multiObjectModelScore for each cluster wrt clusters in the other frame
            for (firstAxisIdx, otherAxisIdx, colorModelTupleIdxFirst, colorModelTupleIdxSecond) in processingIndices :
                if (colorModelTupleIdxFirst, colorModelTupleIdxSecond) not in perModelMultiObjectScores :
                    perModelMultiObjectScores[(colorModelTupleIdxFirst, colorModelTupleIdxSecond)] = {}

                ## compute score based on computed similarities
                if firstAxisIdx == 0 :
                    score = multiObjectModelScore(perModelObjectSimilarity[framePair][firstAxisIdx, :, otherAxisIdx])
                elif firstAxisIdx == 1 :
                    score = multiObjectModelScore(perModelObjectSimilarity[framePair][firstAxisIdx, otherAxisIdx, :])
                else :
                    raise Exception("This should never happen! More panicking? :)")

                ## save the multiObjectModelScore of the model learned on cluster colorModelTupleIdxSecond in frame colorModelTupleIdxFirst and keep track of which
                ## frame the multiObjectModelScore refers to (if int(firstAxisIdx == 1) is False (i.e. I'm looking at the object similarity of a cluster in framePair[1] to clusters in framepair[0]), 
                ## I will index framePair[0] and if it's True it will index framePair[1])
                perModelMultiObjectScores[(colorModelTupleIdxFirst, colorModelTupleIdxSecond)][framePair[int(firstAxisIdx == 1)]] = score

        ## go through all the frames and all their clusters and find which clusters are multiple objects based on the perModelMultiObjectScores
        ## not clear how yet but see below how I'm doing that for the time being
        ## here I will keep track of which clusters contain 1 single object according to the perModelMultiObjectScores and the thresholding below (they need to be matched together later on)
        perFrameSingleObjectClusters = {}
        for frameIdx in perFrameClusterIds.keys() :
            for clusterId in perFrameClusterIds[frameIdx] :
                if (frameIdx, clusterId) in perModelMultiObjectScores :
                    allScores = np.array([perModelMultiObjectScores[(frameIdx, clusterId)][idx] for idx in np.sort(perModelMultiObjectScores[(frameIdx, clusterId)].keys())])

                    ## not sure how to define multi-objectness but for now I'll threshold the median of allScores (maybe the threshold could be learned from a bunch of examples...)
                    multiObjectnessThreshold = 0.5
#                     allSingleObjects[allLabeledSegmentations[:, :, frameIdx] == clusterId, frameIdx] = int(np.median(allScores) >= multiObjectnessThreshold)+1
#                     print np.median(allScores), (frameIdx, clusterId)

                    ## the above don't work so instead I'll check if more than 5% (another param here but I can justify it by saying that just a small percentage of "wrong" frames is enough to say it's a multi object
                    ## and the only reason I'm not saying 1 is enough is because of problem with the color models which are not exactly infallible) of the frame are above the multiObjectnessThreshold
#                     allSingleObjects[allLabeledSegmentations[:, :, frameIdx] == clusterId, frameIdx] = 2-int(len(np.argwhere(allScores >= multiObjectnessThreshold))/float(len(allScores)) >= 0.05)
#                     print len(np.argwhere(allScores >= multiObjectnessThreshold))/float(len(allScores)), (frameIdx, clusterId)

                    if len(np.argwhere(allScores >= multiObjectnessThreshold))/float(len(allScores)) >= 0.05 :
                        if frameIdx not in perFrameMergedClusters.keys() :
                            perFrameMergedClusters[frameIdx] = []
                        perFrameMergedClusters[frameIdx].append(clusterId)
                    else :
                        if frameIdx not in perFrameSingleObjectClusters.keys() :
                            perFrameSingleObjectClusters[frameIdx] = []
                        perFrameSingleObjectClusters[frameIdx].append(clusterId)
                        
                else :
                    raise Exception("(frameIdx, clusterId) combination has not been computed in perModelMultiObjectScores")
        print "...DONE ({0} secs)\n".format(time.time()-startTime)
        
        ## similarity between two clusters in different frames (i, j) is in perModelObjectSimilarity and it's an average along axis=0 multiplied to a variable that decays with the difference in time between i and j
        ## if I consider each cluster a different point in a certain space (1D?) then the points corresponding to similar clusters according to the measure above, should be close by
        ## if I also compute this closeness while ensuring that clusters in the same frame are far apart, then it should be relatively easy to figure out how many spikes (and therefore objects) there are in the
        ## sequence of frames and which of the original cluster IDs belong to which of these unique ones
        print "Matching Objects using Similarity..."; startTime = time.time()
        ## count number of clusters in each of the frames where there are single object clusters perFrameSingleObjectClusters
        numClustersPerFrame = [len(perFrameSingleObjectClusters[frameIdx]) for frameIdx in np.sort(perFrameSingleObjectClusters.keys())]
        numClustersAll = np.sum(numClustersPerFrame)
        targetDistance = 1.0

        ## find the indices (used to index p) of clusters found in the same frame
        startIdx = 0
        sameFrameClustersIdxs = np.empty([0, 2], dtype=int)
        for numClusters in numClustersPerFrame :
            sameFrameClustersIdxs = np.vstack([sameFrameClustersIdxs, np.argwhere(np.triu(np.ones(numClusters), k=1))+startIdx])
            startIdx += numClusters

        ## singleObjectClusterIds tells me the cluster id for each single object cluster in perFrameSingleObjectClusters while singleObjectClusterIdxs tells me their index in perFrameClusterIds
        ## which I need to correctly index perModelObjectSimilaritylater on
        singleObjectClusterIds, singleObjectClusterIdxs =  np.array([[clusterId, idx] for frameIdx in np.sort(perFrameSingleObjectClusters.keys())
                                                                     for idx, clusterId in enumerate(perFrameClusterIds[frameIdx]) if clusterId in perFrameSingleObjectClusters[frameIdx]]).T
        # print singleObjectClusterIds, singleObjectClusterIdxs

        ## find the indices (used to index p) of clusters found in different frames and their similiarity measure from perModelObjectSimilarity
        differentFrameClustersIdxs = np.empty([0, 2], dtype=int)
        differentFrameClustersSimilarities = np.empty([0], dtype=float) ## similarities of clusters in the same order I put them in differentFrameClustersIdxs
        differentFrameClustersSimilarityWeights = np.empty([0], dtype=float) ## how important the cluster similarity is in the optimization which varies depending on how far away the frames are
        ## this tells me the range of indices in clusterPoints (or randomInitP) where the ordered points (the same way as in perModelObjectSimilarity) associated to a frame's clusters are
        frameClustersIdxRanges = {np.sort(perFrameSingleObjectClusters.keys())[i]:idxRange for i, idxRange in enumerate(np.hstack([np.concatenate([[0], np.cumsum(numClustersPerFrame)[:-1]])[:, np.newaxis],
                                                                                                                                   np.cumsum(numClustersPerFrame)[:, np.newaxis]]))}
        # print frameClustersIdxRanges
        ## compute the similarities and weights needed by the optimization
        for idx, frameIdx in enumerate(np.sort(perFrameSingleObjectClusters.keys())) :
            for otherFrameIdx in np.sort(perFrameSingleObjectClusters.keys())[idx+1:] :
        #         print (frameIdx, otherFrameIdx)
                frameIdxRange, otherFrameIdxRange = frameClustersIdxRanges[frameIdx], frameClustersIdxRanges[otherFrameIdx]

                framePairClustersIdxs = np.array(np.meshgrid(np.arange(frameIdxRange[0], frameIdxRange[1]),
                                                             np.arange(otherFrameIdxRange[0], otherFrameIdxRange[1]))).reshape([2, np.diff(frameIdxRange)[0]*np.diff(otherFrameIdxRange)[0]]).T
                differentFrameClustersIdxs = np.vstack([differentFrameClustersIdxs, framePairClustersIdxs])
                ## here I take the object similarity average between the A-to-B model and B-to-A model for the clusters indexed by framePairClustersIdxs
                ## unlike with my first attempt, I am now using indices directly framePairClustersIdxs to index singleObjectClusterIdxs which contains the correct indices used to index perModelObjectSimilarity
                ## this means that I don't need to flatten anything as I am using the reshaped indices from meshgrid to get the correct values from perModelObjectSimilarity
                ## the matrix that gets concatenated to differentFrameClustersSimilarities is of shape [2, len(framePairClustersIdxs)]
                differentFrameClustersSimilarities = np.concatenate([differentFrameClustersSimilarities,
                                                                     np.sum(perModelObjectSimilarity[(frameIdx, otherFrameIdx)][:, singleObjectClusterIdxs[framePairClustersIdxs][:, 0],
                                                                                                                                singleObjectClusterIdxs[framePairClustersIdxs][:, 1]], axis=0)/2.0])
                ## I'm also computing the importance weights which are inversely proportional to the distance between frames 
                ## I'm using 2^(-delta+1) where delta is the frame difference so that when two frames are consecutive the similarity matters the most
                ## this check is to avoid float overflow and stuff, and if the frame difference is bigger than 15 the object has probably moved a lot and therefore changed appearance quite a lot so meh
                if otherFrameIdx-frameIdx < 100 :
                    weight = 1.05**(frameIdx-otherFrameIdx+1) ## made this way less restrictive as it was fucking with convergence
                else :
                    weight = 0.0
                differentFrameClustersSimilarityWeights = np.concatenate([differentFrameClustersSimilarityWeights,
                                                                          np.ones(len(differentFrameClustersSimilarities)-len(differentFrameClustersSimilarityWeights))*weight])


        ## making the differentFrameClustersSimilarities a bit more "steep" using the logistic function and mapping from [0.5, 1.0] back to [0.0, 1.0]
        differentFrameClustersSimilarities = 2.0/(1.0+np.exp(-5.0*differentFrameClustersSimilarities))-1.0

        if doUseClusteredInitialization :
            ## compute affinity matrix between each cluster pair
            affinityMatrix = np.zeros([numClustersAll, numClustersAll])
            ## affinityMatrix will have 0 values for cluster pairs found in the same frames as differentFrameClustersIdxs only index the ones in different frames
            for clusterPair, clusterSimilarity in zip(differentFrameClustersIdxs, differentFrameClustersSimilarities) :
                affinityMatrix[clusterPair[0], clusterPair[1]] = affinityMatrix[clusterPair[1], clusterPair[0]] = clusterSimilarity
            # print skc.DBSCAN(metric="precomputed").fit_predict(np.exp(-affinityMatrix/0.5))
            ## try a bunch of different k values for clustering and use the result to initialize the p values on a circle around the equidistant cluster centers
            maxK = int(ceil(len(singleObjectClusterIds)/2.0))
        else :
            maxK = 1

        minCost = 1e20
        for numClusters in np.arange(1, maxK+1) :
            if doUseClusteredInitialization :
                ## find clusters using SpectralClustering
                initClustersIds = skc.SpectralClustering(n_clusters=numClusters, affinity="precomputed").fit_predict(affinityMatrix)
                ## if any of the clusters has less than 2 frame clusters assigned to them, just stop because they can only get smaller and I don't want smaller clusters than 2 as their std is 0.0 and I kinda want
                ## less clusters but with more frame clusters assigned to them
                if np.any(np.array([np.sum((initClustersIds == i).astype(int)) for i in np.arange(numClusters)]) <= 2) :
                    break

                ## compute standard deviation of similarity between points assigned to the same cluster
                intraClusterStd = []
                for clusterId in np.arange(numClusters) :
                    clusterIdxsInNewCluster = np.argwhere(initClustersIds == clusterId).flatten()
                    perClusterSimilarityIdxs = np.array(np.meshgrid(clusterIdxsInNewCluster, clusterIdxsInNewCluster))[:, np.triu(np.ones(len(clusterIdxsInNewCluster)), k=1).astype(np.bool)].T
                    intraClusterStd.append(np.std(affinityMatrix[perClusterSimilarityIdxs[:, 0], perClusterSimilarityIdxs[:, 1]]))

                ## NOT SURE I NEED TO USE THIS TO COMPUTE THE SCORE: compute the median of the similarity scores between frame clusters associated to different clusters
            #     interClusterSimilarityMedian = []
            #     for clusterPair in np.argwhere(np.triu(np.ones(numClusters), k=1)) :
            #         cluster1IdxsInNewCluster = np.argwhere(initClustersIds == clusterPair[0]).flatten()
            #         cluster2IdxsInNewCluster = np.argwhere(initClustersIds == clusterPair[1]).flatten()
            #         clusterPairSimilarityIdxs = np.array(np.meshgrid(cluster1IdxsInNewCluster, cluster2IdxsInNewCluster)).reshape([2, len(cluster1IdxsInNewCluster)*len(cluster2IdxsInNewCluster)]).T

            #         interClusterSimilarityMedian.append(np.median(tmp[clusterPairSimilarityIdxs[:, 0], clusterPairSimilarityIdxs[:, 1]]))

                score = np.sum(intraClusterStd)

                ## init values of p to be around cluster centers (1 for each cluster found by SpectralClustering) placed equidistantly on a circle
                initClusterCenters = np.arange(0, 2.0*np.pi, 2.0*np.pi/numClusters)[:, np.newaxis]
                initClusterCenters = np.hstack([np.cos(initClusterCenters)*targetDistance/2.0, np.sin(initClusterCenters)*targetDistance/2.0])
                randomInitP = (np.random.rand(numClustersAll, 2)-0.5)*targetDistance/5.0+initClusterCenters[initClustersIds, :]
            else :
                ## randomly initialized values for points p
                randomInitP = np.random.rand(numClustersAll, 2)


            ## now optimize the cost function that should ensure that similar clusters are associated to points p that are close to each other and dissimilar clusters are associated to points p that are far apart
            # functionToMinimize(randomInitP, perModelObjectSimilarity, np.sort(perFrameClusterIds.keys()), numClustersPerFrame, sameFrameClustersIdxs)
            optResult = minimize(functionToMinimize, randomInitP.flatten(), method='BFGS',
                                 args=(sameFrameClustersIdxs, differentFrameClustersIdxs, differentFrameClustersSimilarities, differentFrameClustersSimilarityWeights, 0.25, targetDistance))
            print ["Optimization Failed!", "Optimization Successful!"][int(optResult.success)], " ---  MESSAGE: \"{0}\" --- cost: {1}".format(optResult.message, optResult.fun)

            if optResult.fun < minCost :
                minCost = optResult.fun
                clusterPoints = optResult.x.reshape(randomInitP.shape)
                newClustersIds = cluster.hierarchy.fclusterdata(clusterPoints, targetDistance/2.0, criterion="distance") ## this assigns cluster indices starting with 1
        print "Found {0} Objects from Merged Cluster".format(np.max(newClustersIds))
        
        print "...DONE ({0} secs)\n".format(time.time()-startTime)
        
        ## copying split clusters from the original clustered segmentation but setting newly created unique IDs which will also be used to split the merged cluster
        print "Processing Newly Found Objects..."; startTime = time.time()
        ## these 3 arrays have the same shape: I can find the mapping between old and new cluster IDs and which frame indices the old cluster IDs were defined in 
        newUniqueClustersIds = np.max(allLabeledSegmentedObjects)+newClustersIds
        ## these are the ids associated to the clusters by the blob detection algorithm when analysing 1 frame at a time
        oldClustersIds = np.concatenate([perFrameSingleObjectClusters[key] for key in np.sort(perFrameSingleObjectClusters.keys())]).astype(newUniqueClustersIds.dtype)
        ## these are the indices of the frames where the IDs in oldClustersIds are defined
        frameIdxsToClusterIds = np.sort(perFrameSingleObjectClusters.keys()).repeat(numClustersPerFrame)
        for frameIdx, oldClusterId, newClusterId in zip(frameIdxsToClusterIds, oldClustersIds, newUniqueClustersIds) :
            allLabeledSegmentedObjects[allLabeledSegmentations[:, :, frameIdx] == oldClusterId, frameIdx] = newClusterId
#             print frameIdx, oldClusterId, newClusterId

        ## train new color models for each new cluster id and store their average pixel location for later keeping track of speed and such
        perSingleObjectClusterCenter = {}
        for newClusterId in list(set(newUniqueClustersIds)) :
            perSingleObjectClusterCenter[newClusterId] = {}
            clusterPositiveExamples = np.empty([0, 3], dtype=allFrames.dtype)
            clusterNegativeExamples = np.empty([0, 3], dtype=allFrames.dtype)
            clusterPositiveExamplesIdxs = np.empty([0, 3], dtype=int)
            clusterNegativeExamplesIdxs = np.empty([0, 3], dtype=int)
            for frameIdx in np.arange(allLabeledSegmentedObjects.shape[-1]) :
                if newClusterId in allLabeledSegmentedObjects[:, :, frameIdx] :
                    idxs = np.argwhere(np.all(np.concatenate([allLabeledSegmentations3D[:, :, frameIdx][:, :, np.newaxis] == current3DClusterId,
                                                              allLabeledSegmentedObjects[:, :, frameIdx][:, :, np.newaxis] == newClusterId], axis=-1), axis=-1))
                    clusterPositiveExamplesIdxs = np.concatenate([clusterPositiveExamplesIdxs, np.hstack([idxs, np.ones([len(idxs), 1], dtype=int)*frameIdx])])
                    clusterPositiveExamples = np.concatenate([clusterPositiveExamples,
                                                              getPixelFeats(allFrames[:, :, :, frameIdx], idxs, doNormalize=normalizeFeats, doUseCoords=useCoordsAsFeats)])
                    perSingleObjectClusterCenter[newClusterId][frameIdx] = np.mean(idxs, axis=0)
#                     perSingleObjectClusterCenter[newClusterId][frameIdx] = (np.max(idxs, axis=0)-np.min(idxs, axis=0))/2.0 + np.min(idxs, axis=0)
#                     ombb = calipersOMBB(idxs)
#                     perSingleObjectClusterCenter[newClusterId][frameIdx] = (ombb[0, :]-ombb[2, :])/2.0+ombb[2, :]
                    
                    idxs = np.argwhere(np.all(np.concatenate([allLabeledSegmentations3D[:, :, frameIdx][:, :, np.newaxis] == current3DClusterId,
                                                              allLabeledSegmentedObjects[:, :, frameIdx][:, :, np.newaxis] != newClusterId], axis=-1), axis=-1))
                    clusterNegativeExamplesIdxs = np.concatenate([clusterNegativeExamplesIdxs, np.hstack([idxs, np.ones([len(idxs), 1], dtype=int)*frameIdx])])
                    clusterNegativeExamples = np.concatenate([clusterNegativeExamples,
                                                              getPixelFeats(allFrames[:, :, :, frameIdx], idxs, doNormalize=normalizeFeats, doUseCoords=useCoordsAsFeats)])

            ## fit color model
            allObjectsColorModels[newClusterId] = ensemble.ExtraTreesClassifier(n_estimators=100)
            allObjectsColorModels[newClusterId].fit(X=np.vstack([clusterNegativeExamples, clusterPositiveExamples]),
                                                    y=np.repeat([0.0, 1.0], [len(clusterNegativeExamples), len(clusterPositiveExamples)]))
            
            print "New Cluster ID {0}, Model Accuracy".format(newClusterId), 
            print np.sum((allObjectsColorModels[newClusterId].predict(X=np.vstack([clusterNegativeExamples, clusterPositiveExamples]))
                          == np.repeat([0.0, 1.0],[len(clusterNegativeExamples), len(clusterPositiveExamples)])).astype(int))/float(len(clusterNegativeExamples) + len(clusterPositiveExamples))
        
        print "...DONE ({0} secs)\n".format(time.time()-startTime)
        
        ## going through frames where the current 3D cluster is merged and splitting it using graphcuts and shit
        print "Splitting Merged Cluster..."; startTime = time.time()
        
        print "...DONE ({0} secs)\n".format(time.time()-startTime)
        
        print "...DONE CLUSTER ID {0} ({1} secs)\n".format(current3DClusterId, time.time()-clusterStartTime)

Starting CLUSTER ID 41...
Fitting Color Models to clustered pixels...
Clusters in frame 0 [32, 29]  --- Trained Model accuracies: [ [32] = 0.940009794319 [29] = 0.940009794319  ]
Clusters in frame 1 [32, 29]  --- Trained Model accuracies: [ [32] = 0.941909748192 [29] = 0.941909748192  ]
Clusters in frame 2 [28, 31]  --- Trained Model accuracies: [ [28] = 0.933892451899 [31] = 0.933892451899  ]
Clusters in frame 3 [32, 28]  --- Trained Model accuracies: [ [32] = 0.935491723466 [28] = 0.935491723466  ]
Clusters in frame 4 [27, 31]  --- Trained Model accuracies: [ [27] = 0.944297082228 [31] = 0.944297082228  ]
Clusters in frame 5 [34, 29]  --- Trained Model accuracies: [ [34] = 0.940978886756 [29] = 0.940978886756  ]
Clusters in frame 6 [27, 30]  --- Trained Model accuracies: [ [27] = 0.94857421399 [30] = 0.94857421399  ]
Clusters in frame 7 [26, 30]  --- Trained Model accuracies: [ [26] = 0.950411223996 [30] = 0.950411223996  ]
Clusters in frame 8 [27, 31]  --- Trained Model accuracies: 

In [467]:
for frameIdx in perFrameMergedClusters.keys()[0:] :
    print frameIdx, perFrameMergedClusters[frameIdx]
    mergedClusterPixelIdxs = np.empty([0 , 2], dtype=int)
    for mergedClusterId in perFrameMergedClusters[frameIdx] :
        mergedClusterPixelIdxs = np.vstack([mergedClusterPixelIdxs, np.argwhere(allLabeledSegmentations[:, :, frameIdx] == mergedClusterId)])
    
    if len(perFrameMergedClusters[frameIdx]) > 0 :
        singleObjectClusterNewIds = np.sort(list(set(newUniqueClustersIds)))
        mergedClusterPixelSingleObjectModelScores = np.empty([len(mergedClusterPixelIdxs), 0])
        mergedClusterPixelSingleObjectPredictedMovedClusterScores = np.empty([len(mergedClusterPixelIdxs), 0])
        singleObjectClusterNewIdsToFindInFrame = []
        for idx, newClusterId in enumerate(singleObjectClusterNewIds) :
            if newClusterId not in allLabeledSegmentedObjects[:, :, frameIdx] :
                mergedClusterPixelSingleObjectModelScores = np.hstack([mergedClusterPixelSingleObjectModelScores,
                                                                       allObjectsColorModels[newClusterId].predict(getPixelFeats(allFrames[:, :, :, frameIdx], mergedClusterPixelIdxs,
                                                                                                                                 doNormalize=normalizeFeats, doUseCoords=useCoordsAsFeats))[:, np.newaxis]])
                if frameIdx in perSingleObjectClusterCenter[newClusterId].keys() :
                    raise Exception("Trying to segment an object out of a merged cluster when I already have it in perSingleObjectClusterCenter[newClusterId], which means I have its position at the current frameIdx")
                ## find an object's projected cluster center position by fiting a spline to the center positions in the frames I already have the object in
                existingClusterCentersFrameIdxs = np.sort(perSingleObjectClusterCenter[newClusterId].keys())
                existingClusterCenters = np.array([perSingleObjectClusterCenter[newClusterId][key] for key in existingClusterCentersFrameIdxs])
                interpFunc, u = spint.splprep(x=[existingClusterCenters[:, 0], existingClusterCenters[:, 1]], u=existingClusterCentersFrameIdxs, k=1)
                currentProjectedClusterCenter = np.array(spint.splev(frameIdx, interpFunc)).T
                ## find closest frame to frameIdx where I have the current newClusterId
                closestFrameIdx = existingClusterCentersFrameIdxs[np.argmin(np.abs(existingClusterCentersFrameIdxs-frameIdx))]
                ## get the frame from allLabeledSegmentedObjects and set to 1 only the pixels belonging to the current newClusterId and move it according to currentProjectedClusterCenter
                deltaMove = currentProjectedClusterCenter-perSingleObjectClusterCenter[newClusterId][closestFrameIdx]
                currentPredictedMovedClusterImg = cv2.warpAffine((allLabeledSegmentedObjects[:, :, closestFrameIdx] == newClusterId).astype(float),
                                                                 M=np.array([[1.0, 0.0, deltaMove[1]], [0.0, 1.0, deltaMove[0]]]), dsize=tuple(allLabeledSegmentedObjects.shape[0:2][::-1]))
                ## blur the moved cluster by an ammount proportional to the difference between the current frameIdx and the one where I took the cluster from (closestFrameIdx)
                frameIdxDiff = np.abs(closestFrameIdx-frameIdx)
                currentPredictedMovedClusterImg = cv2.GaussianBlur(currentPredictedMovedClusterImg, (1-np.mod(frameIdxDiff, 2) + frameIdxDiff, 1-np.mod(frameIdxDiff, 2) + frameIdxDiff), 0)
                mergedClusterPixelSingleObjectPredictedMovedClusterScores = np.hstack([mergedClusterPixelSingleObjectPredictedMovedClusterScores,
                                                                                       currentPredictedMovedClusterImg[mergedClusterPixelIdxs[:, 0], mergedClusterPixelIdxs[:, 1], np.newaxis]])

                singleObjectClusterNewIdsToFindInFrame.append(newClusterId)

        ## normalize so that for pixels where the color model of multiple objects "sees" the object they were trained to recognize I have an "undecided situation"
        nonZeroProbs = np.sum(mergedClusterPixelSingleObjectModelScores, axis=-1) != 0.0
        mergedClusterPixelSingleObjectModelScores[nonZeroProbs] = mergedClusterPixelSingleObjectModelScores[nonZeroProbs]/np.sum(mergedClusterPixelSingleObjectModelScores, axis=-1)[nonZeroProbs, np.newaxis]

        splitClusterLabels = splitMergedCluster(allFrames[mergedClusterPixelIdxs[:, 0], mergedClusterPixelIdxs[:, 1], :, frameIdx],
                                                bgImage[mergedClusterPixelIdxs[:, 0], mergedClusterPixelIdxs[:, 1], :], mergedClusterPixelIdxs,
                                                mergedClusterPixelSingleObjectModelScores, mergedClusterPixelSingleObjectPredictedMovedClusterScores)

        allLabeledSegmentedObjects[mergedClusterPixelIdxs[:, 0], mergedClusterPixelIdxs[:, 1], frameIdx] = np.array([-1 if label == 0 else singleObjectClusterNewIdsToFindInFrame[label-1] for label in splitClusterLabels])
        ### NOTE: HERE I SHOULD ADD THE NEW CLUSTER CENTER FOR EACH FOUND OBJECT

0 [29]
1 [29]
2 [28]
3 [28]
4 [27]
5 [29]
6 [27]
7 [26]
8 [27]
9 [27]
10 [26]
11 [28]
12 [27]
13 [29]
14 [29]
15 [29]
16 [30]
17 [31]
18 [31]
19 [29]
20 [28]
21 [28]
22 [28]
23 [31]
24 [29]
25 [32]
26 [31]
27 [31]
28 [31]
29 [30]
30 [32]
31 [32]
32 [32]
33 [32]
34 [32]
35 [32]
36 [33]
37 [31]
38 [31]
39 [30]
40 [31]
41 [30]
42 [31]
43 [30]
44 [34]
45 [34]
46 [31]
47 [32]
48 [32]
49 [34]
50 [32]
51 [32]
52 [32]
53 [30]
54 [31]
55 [31]
56 [31]
57 [31]
58 [31]
59 [32]
60 [32]
61 [32]
62 [32]
63 [36]
64 [33]
65 [33]
66 [33]
67 [35]
68 [32]
69 [32]


In [None]:
for idx in np.arange(allLabeledSegmentedObjects.shape[-1]) :
    img = cm.jet((allLabeledSegmentedObjects[:, :, idx]+1.0)/3.0, bytes=True)
    img[allLabeledSegmentedObjects[:, :, idx] == 0, -1] = 0
    Image.fromarray(img).save(dataLoc + os.sep + "objects_frame-{0:05d}.png".format(idx+201))

### MY ATTEMPT USING COLOR MODELS THAT DISTINGUISH BETWEEN BLOBS OF THE SAME FRAME

In [618]:
###################### VISUALIZE THE perModelMultiObjectScores FOR GIVEN FRAME AND CLUSTER ID'S ######################
tmp = (0, 32)
figure(); scatter(np.sort(perModelMultiObjectScores[tmp].keys()), np.array([perModelMultiObjectScores[tmp][idx] for idx in np.sort(perModelMultiObjectScores[tmp].keys())]))
print np.median(np.array([perModelMultiObjectScores[tmp][idx] for idx in np.sort(perModelMultiObjectScores[tmp].keys())]))
tmp = (0, 29)
scatter(np.sort(perModelMultiObjectScores[tmp].keys()), np.array([perModelMultiObjectScores[tmp][idx] for idx in np.sort(perModelMultiObjectScores[tmp].keys())]), c="red")
print np.median(np.array([perModelMultiObjectScores[tmp][idx] for idx in np.sort(perModelMultiObjectScores[tmp].keys())]))

0.232271695561
0.422205795117


In [714]:
## this will contain the segmentations with unique cluster ids per moving object assigned to each FG pixel
## merged clusters of more objects from allLabeledSegmentations3D should be split according to objects and pixels belonging to the same object over time should have the same consistent cluster ID
allLabeledSegmentedObjects = np.zeros_like(allLabeledSegmentations)
## take each cluster from allLabeledSegmentations3D and find how many clusters it splits into over time
for current3DClusterId, frameRange in zip(clustersFound3DIds, clustersFound3DFrameRanges) :
    if current3DClusterId == 103 : ## HACK: only consider this cluster cos I wanna test my code
        print "Starting CLUSTER ID {0}...".format(current3DClusterId); clusterStartTime = time.time()
        print "Fitting Color Models to clustered pixels..."; startTime = time.time()
        colorModels = {}
        perFrameClusterIds = {}
        for frameIdx in np.arange(frameRange[0], frameRange[1]+1) :
            ## find pixels in current image that belong to the current cluster found in 3D
            pixelIdxs = np.array(np.argwhere(allLabeledSegmentations3D[:, :, frameIdx] == current3DClusterId))
            ## find cluster ids (found using current frame only) assigned to pixels from currently considered cluster (found doing the 3D clustering)
            pixelClustersInFrameIds = list(set(allLabeledSegmentations[pixelIdxs[:, 0], pixelIdxs[:, 1], frameIdx]))
            print "Clusters in frame", frameIdx, pixelClustersInFrameIds
            
            ## find frames where the pixels belonging to the same 3D cluster are separated and train some sort of color model to distinguish between them
            if len(pixelClustersInFrameIds) > 1 :
                pixelColors = np.empty([0, 3], dtype=float)
                pixelTargetClusters = np.empty([0], dtype=float)
                for targetClusterIdx, pixelClustersInFrameId in enumerate(pixelClustersInFrameIds) :
                    ## find indices of pixels assigned to current cluster id
                    pixelInFrameIdxs = np.array(np.argwhere(allLabeledSegmentations[:, :, frameIdx] == pixelClustersInFrameId))
                    ## fit a mixture of gaussians model to the pixel colors
    #                 colorModels.append(mixture.GaussianMixture(n_components=4))
    #                 colorModels[-1].fit(allFrames[pixelInFrameIdxs[:, 0], pixelInFrameIdxs[:, 1], :, frameIdx])
                    pixelColors = np.vstack([pixelColors, allFrames[pixelInFrameIdxs[:, 0], pixelInFrameIdxs[:, 1], :, frameIdx]])
                    pixelTargetClusters = np.concatenate([pixelTargetClusters, np.ones([len(pixelInFrameIdxs)], dtype=float)*targetClusterIdx])

                colorModels[frameIdx] = ensemble.ExtraTreesClassifier()
                colorModels[frameIdx].fit(X=pixelColors, y=pixelTargetClusters)
                perFrameClusterIds[frameIdx] = pixelClustersInFrameIds
        print "...DONE ({0} secs)\n".format(time.time()-startTime)
        
        ## find which color models are modelling the same object
        print "Finding Clustered Objects Similarity using Color Models..."; startTime = time.time()
        perModelObjectSimilarity = {}
        if len(colorModels.keys()) > 0 :
            for modelIdx, frameIdx in enumerate(np.sort(colorModels.keys())[:-1]) :
                ## find pixels belonging to the clusters found in frameIdx
                pixelInFrameIdxs = np.empty([0, 2], dtype=int)
                pixelInFrameCumulativeNums = np.empty([0], dtype=int)
                for clusterId in perFrameClusterIds[frameIdx] :
                    pixelInFrameIdxs = np.vstack([pixelInFrameIdxs, np.array(np.argwhere(allLabeledSegmentations[:, :, frameIdx] == clusterId))])
                    pixelInFrameCumulativeNums = np.concatenate([pixelInFrameCumulativeNums, [len(pixelInFrameIdxs)]])
                    
                for otherFrameIdx in np.sort(colorModels.keys())[modelIdx+1:] :
                    ## find pixels belonging to the clusters found in otherFrameIdx
                    pixelInOtherFrameIdxs = np.empty([0, 2], dtype=int)
                    pixelInOtherFrameCumulativeNums = np.empty([0], dtype=int)
                    for clusterId in perFrameClusterIds[otherFrameIdx] :
                        pixelInOtherFrameIdxs = np.vstack([pixelInOtherFrameIdxs, np.array(np.argwhere(allLabeledSegmentations[:, :, otherFrameIdx] == clusterId))])
                        pixelInOtherFrameCumulativeNums = np.concatenate([pixelInOtherFrameCumulativeNums, [len(pixelInOtherFrameIdxs)]])
                        
                    ## predict clusters in otherFrameIdx using model learned in frameIdx
                    otherFrameClusterPredictions = colorModels[frameIdx].predict(allFrames[pixelInOtherFrameIdxs[:, 0], pixelInOtherFrameIdxs[:, 1], :, otherFrameIdx])
                    ## predict clusters in frameIdx using model learned in otherFrameIdx
                    frameClusterPredictions = colorModels[otherFrameIdx].predict(allFrames[pixelInFrameIdxs[:, 0], pixelInFrameIdxs[:, 1], :, frameIdx])
#                     print otherFrameClusterPredictions.shape, pixelInOtherFrameIdxs.shape, frameClusterPredictions.shape, pixelInFrameIdxs.shape
                    
                    ## compare clusters predicted by colorModel[otherFrameIdx] in frameIdx with actual clusters in frameIdx and viceversa
                    perModelObjectSimilarity[(frameIdx, otherFrameIdx)] = np.zeros([2, len(perFrameClusterIds[frameIdx]), len(perFrameClusterIds[otherFrameIdx])])
                    
                    ## find which percentage of pixels in each cluster in frameIdx are assigned to each cluster learned about in otherFrameIdx
                    startIdx = 0
                    for clusterIdx in np.arange(len(perFrameClusterIds[frameIdx])) :
                        perModelObjectSimilarity[(frameIdx, otherFrameIdx)][0, clusterIdx, :] = np.array([(len(np.argwhere(frameClusterPredictions[startIdx:pixelInFrameCumulativeNums[clusterIdx]] == i))/
                                                                                                           float(pixelInFrameCumulativeNums[clusterIdx]-startIdx))
                                                                                                          for i in np.arange(len(perFrameClusterIds[otherFrameIdx]))])
#                         print startIdx, pixelInFrameCumulativeNums[clusterIdx], pixelInFrameCumulativeNums[clusterIdx]-startIdx
                        startIdx = pixelInFrameCumulativeNums[clusterIdx]
                        
                    ## find which percentage of pixels in each cluster in otherFrameIdx are assigned to each cluster learned about in frameIdx
                    startIdx = 0
                    for clusterIdx in np.arange(len(perFrameClusterIds[otherFrameIdx])) :
                        perModelObjectSimilarity[(frameIdx, otherFrameIdx)][1, :, clusterIdx] = np.array([(len(np.argwhere(otherFrameClusterPredictions[startIdx:pixelInOtherFrameCumulativeNums[clusterIdx]] == i))/
                                                                                                           float(pixelInOtherFrameCumulativeNums[clusterIdx]-startIdx))
                                                                                                          for i in np.arange(len(perFrameClusterIds[frameIdx]))])
#                         print startIdx, pixelInOtherFrameCumulativeNums[clusterIdx], pixelInOtherFrameCumulativeNums[clusterIdx]-startIdx
                        startIdx = pixelInOtherFrameCumulativeNums[clusterIdx]
                        
                    print "Analysing frames", (frameIdx, otherFrameIdx)
                
        print "...DONE ({0} secs)\n".format(time.time()-startTime)
        
        ## similarity between two clusters in different frames (i, j) is in perModelObjectSimilarity and it's an average along axis=0 multiplied to a variable that decays with the difference in time between i and j
        ## if I consider each cluster a different point in a certain space (1D?) then the points corresponding to similar clusters according to the measure above, should be close by
        ## if I also compute this closeness while ensuring that clusters in the same frame are far apart, then it should be relatively easy to figure out how many spikes (and therefore objects) there are in the
        ## sequence of frames and which of the original cluster IDs belong to which of these unique ones
        print "Matching Objects using Similarity..."; startTime = time.time()
        ## count number of clusters in each of the frames where there are more than 1 clusters (saved in perFrameClusterIds)
        numClustersPerFrame = [len(perFrameClusterIds[key]) for key in np.sort(perFrameClusterIds.keys())]
        numClustersAll = np.sum(numClustersPerFrame)
        ## randomly initialized values for points p
        randomInitP = np.random.rand(numClustersAll, 2)
        targetDistance = 1.0

        ## find the indices (used to index p) of clusters found in the same frame
        startIdx = 0
        sameFrameClustersIdxs = np.empty([0, 2], dtype=int)
        for numClusters in numClustersPerFrame :
            sameFrameClustersIdxs = np.vstack([sameFrameClustersIdxs, np.argwhere(np.triu(np.ones(numClusters), k=1))+startIdx])
            startIdx += numClusters

        ## find the indices (used to index p) of clusters found in different frames and their similiarity measure from perModelObjectSimilarity
        differentFrameClustersIdxs = np.empty([0, 2], dtype=int)
        differentFrameClustersSimilarities = np.empty([0], dtype=float) ## similarities of clusters in the same order I put them in differentFrameClustersIdxs
        differentFrameClustersSimilarityWeights = np.empty([0], dtype=float) ## how important the cluster similarity is in the optimization which varies depending on how far away the frames are
        ## this tells me the range of indices in clusterPoints (or randomInitP) where the ordered points (the same way as in perModelObjectSimilarity) associated to a frame's clusters are
        frameClustersIdxRanges = {np.sort(perFrameClusterIds.keys())[i]:idxRange for i, idxRange in enumerate(np.hstack([np.concatenate([[0], np.cumsum(numClustersPerFrame)[:-1]])[:, np.newaxis],
                                                                                                                         np.cumsum(numClustersPerFrame)[:, np.newaxis]]))}
        for framePair in perModelObjectSimilarity.keys() :
            frameIdxRange, otherFrameIdxRange = frameClustersIdxRanges[framePair[0]], frameClustersIdxRanges[framePair[1]]
            differentFrameClustersIdxs = np.vstack([differentFrameClustersIdxs,
                                                    np.array(np.meshgrid(np.arange(frameIdxRange[0], frameIdxRange[1]),
                                                                         np.arange(otherFrameIdxRange[0], otherFrameIdxRange[1]))).reshape([2, np.diff(frameIdxRange)[0]*np.diff(otherFrameIdxRange)[0]]).T])
            ## here I take the object similarity average between the A-to-B model and B-to-A model and reshape it so that it's in the same order as the indexing that comes from meshgrid
            ## which takes indices of all rows with the first column, the the indices of all rows with the second column and so on, and this is why I need to flatten the costs using 
            ## Fortran order which flattens the 2D matrix by concatenating each column
            differentFrameClustersSimilarities = np.concatenate([differentFrameClustersSimilarities, np.sum(perModelObjectSimilarity[framePair], axis=0).flatten(order='F')/2.0])
            ## I'm also computing the importance weights which are inversely proportional to the distance between frames 
            ## I'm using 2^(-delta+1) where delta is the frame difference so that when two frames are consecutive the similarity matters the most
            ## this check is to avoid float overflow and stuff, and if the frame difference is bigger than 15 the object has probably moved a lot and therefore changed appearance quite a lot so meh
            if framePair[1]-framePair[0] < 15 :
                weight = 2**(framePair[0]-framePair[1]+1)
            else :
                weight = 0.0
            differentFrameClustersSimilarityWeights = np.concatenate([differentFrameClustersSimilarityWeights,
                                                                      np.ones(len(differentFrameClustersSimilarities)-len(differentFrameClustersSimilarityWeights))*weight])

        # functionToMinimize(randomInitP, perModelObjectSimilarity, np.sort(perFrameClusterIds.keys()), numClustersPerFrame, sameFrameClustersIdxs)
        optResult = minimize(functionToMinimize, randomInitP.flatten(), method='BFGS',
                             args=(sameFrameClustersIdxs, differentFrameClustersIdxs, differentFrameClustersSimilarities, differentFrameClustersSimilarityWeights, 0.25, targetDistance))
        clusterPoints = optResult.x.reshape(randomInitP.shape)
        print ["Optimization Failed!", "Optimization Successful!"][int(optResult.success)], " ---  MESSAGE: \"{0}\")".format(optResult.message)

        newClustersIds = cluster.hierarchy.fclusterdata(clusterPoints, targetDistance/2.0, criterion="distance") ## this assigns cluster indices starting with 1
        print "Found {0} Objects from Merged Cluster".format(np.max(newClustersIds))
        print "...DONE ({0} secs)\n".format(time.time()-startTime)
        
        ## copying split clusters from the original clustered segmentation but setting newly created unique IDs which will also be used to split the merged cluster
        print "Setting up Newly Found Objects..."; startTime = time.time()
        ## these 3 arrays have the same shape: I can find the mapping between old and new cluster IDs and which frame indices the old cluster IDs were defined in 
        newUniqueClustersIds = np.max(allLabeledSegmentedObjects)+newClustersIds
        ## these are the ids associated to the clusters by the blob detection algorithm when analysing 1 frame at a time
        oldClustersIds = np.concatenate([perFrameClusterIds[key] for key in np.sort(perFrameClusterIds.keys())]).astype(newUniqueClustersIds.dtype)
        ## these are the indices of the frames where the IDs in oldClustersIds are defined
        frameIdxsToClusterIds = np.sort(perFrameClusterIds.keys()).repeat(numClustersPerFrame)
        
        for frameIdx, oldClusterId, newClusterId in zip(frameIdxsToClusterIds, oldClustersIds, newUniqueClustersIds) :
            allLabeledSegmentedObjects[allLabeledSegmentations[:, :, frameIdx] == oldClusterId, frameIdx] = newClusterId
#             print frameIdx, oldClusterId, newClusterId
        print "...DONE ({0} secs)\n".format(time.time()-startTime)
        
        ## going through frames where the current 3D cluster is merged and splitting it using graphcuts and shit
        print "Splitting Merged Cluster..."; startTime = time.time()
        
        print "...DONE ({0} secs)\n".format(time.time()-startTime)
        
        print "...DONE CLUSTER ID {0} ({1} secs)\n".format(current3DClusterId, time.time()-clusterStartTime)

In [701]:
## annotated scatter plots
def makeAnnotatedScatterPlot(labels, points, lims) :
    figure(); scatter(points[:, 0], points[:, 1])
    for label, (x, y) in zip(labels, points) :
        annotate(label, xy=(x, y))
#     xlim([-lims, lims]); ylim([-lims, lims])
    
clusterLabels = np.array(["({0}, {1})".format(i, j) for i in np.sort(perFrameClusterIds.keys()) for j in perFrameClusterIds[i] ])
makeAnnotatedScatterPlot(clusterLabels, randomInitP, targetDistance*1.5)
makeAnnotatedScatterPlot(clusterLabels, clusterPoints, targetDistance*1.5)

In [469]:
figure("animation")
img = None
for i in xrange(allLabeledSegmentedObjects.shape[-1]):
    if img is None:
        img = mpl.pylab.imshow(allLabeledSegmentedObjects[:, :, i])
    else:
        img.set_data(allLabeledSegmentedObjects[:, :, i])
    mpl.pylab.pause(0.1)
    mpl.pylab.draw()