# 3D Reconstruction

In [1]:
video_name = 'video5'
input_file_name = f'video/{video_name}.mp4'

## Kalman Tracking

In [17]:
from kalman_tracking_deepbox.sort import Sort
import json
import cv2
import numpy as np

input_file_name = f"video/{video_name}.mp4"
output_file_name = f"output/{video_name}_tracked.mp4"
output_state_file_name = f"output/{video_name}_states.json"

sort = Sort(max_age=30, iou_threshold=0.1, zc=750)

cap = cv2.VideoCapture(input_file_name)
with open(f'output/{video_name}_yolov9.json') as f:
    detections = json.load(f)['final_frames_detections']

fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
fps = int(cap.get(cv2.CAP_PROP_FPS))
width, height = (
            int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
)
out = cv2.VideoWriter()
out.open(output_file_name, fourcc, fps, (width, height), True)

proc_frames = 0
state_rotations = []
color_mapping = {}
try:
    while proc_frames < len(detections):
        ret, frame = cap.read()
        if not ret:
            break

        im = frame
        # Loop through list (if empty this will be skipped) and overlay green bboxes
        detection = detections[proc_frames]
        detection_converted = []
        for Z in detection:
            detection_converted.append([Z[0]-Z[2]/2, Z[1]-Z[3]/2, Z[0]+Z[2]/2, Z[1]+Z[3]/2, Z[4]])
        detection_converted = np.array(detection_converted)

        # tracking
        if len(detection_converted) > 0:
            Zs, Xs = sort.update(detection_converted)
        else:
            Zs, Xs = sort.update(np.empty((0, 4)))

        temp = []
        for (Z, X) in zip(Zs, Xs):
            vehicle_id = int(Z[-1])
            if vehicle_id not in color_mapping:
                bounding_rect = im[int(Z[1]):int(Z[3]), int(Z[0]):int(Z[2])]
                try:
                    bgr_color = np.average(np.average(bounding_rect, axis=0), axis=0)
                except Exception as error:
                    continue

                hsv_color = cv2.cvtColor(np.array([[bgr_color]]).astype(np.uint8), cv2.COLOR_BGR2HSV)
                hsv_color[:, :, 1] = 200
                bgr_color = cv2.cvtColor(hsv_color, cv2.COLOR_HSV2BGR)[0, 0]

                rgb_color = [int(bgr_color[2]), int(bgr_color[1]), int(bgr_color[0])]
                color_mapping[vehicle_id] = rgb_color

            temp.append(X.tolist() + [Z[0], Z[1], Z[2], Z[3], int(Z[-1]), color_mapping[vehicle_id]])
            cv2.rectangle(im, (int(Z[0]), int(Z[1])), (int(Z[2]), int(Z[3])), (0, 255, 0), 3)
            cv2.putText(im, f'{int(Z[6])} ({int(X[0])},0,{int(X[2])},{int(X[3])})', (int((Z[0] + Z[2])//2), int((Z[1] + Z[3])//2)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
        state_rotations.append(temp)

        # write the frame
        out.write(im)

        proc_frames += 1
except Exception as error:
    # Release resources
    import traceback
    print(traceback.format_exc())
finally:
    out.release()
    cap.release()

with open(output_state_file_name, 'w', encoding='utf-8') as f:
    json.dump({ "states": state_rotations }, f, ensure_ascii=False, indent=4)

In [18]:
%%capture
!ffmpeg -i output/video5_tracked.mp4 -i output/video5_audio.mp3 -c:v libx264 -c:a copy -map 0:v:0 -map 1:a:0 output/video5_tracked_audio.mp4 -y

## GPLVM Training

In [2]:
import numpy as np
from mygplvm.reconstruction import Reconstruction

sdfs = []
sdf_names = ['Jeep', 'Pickup', 'Sedan', 'Suv']
voxel_resolution = 64
dct_resolution = 25
for name in sdf_names:
    sdfs.append(np.load(f'./mygplvm/objs3/{name}_SDF_{voxel_resolution}x{voxel_resolution}x{voxel_resolution}.npy'))

rec = Reconstruction(voxel_resolution=voxel_resolution, dct_resolution=dct_resolution)
rec.fit_from_sdf(sdfs=sdfs)

         Current function value: 74057.302578
         Iterations: 3
         Function evaluations: 684
         Gradient evaluations: 56
time: 0.13432860374450684
alpha 1.1739144324584667
beta 0.6012194508534535
gamma 0.9999424872417098


  res = _minimize_cg(f, x0, args, fprime, callback=callback, **opts)


array([[-28.79344749,  15.88861275],
       [ 40.50342178,   7.74950457],
       [ -7.06363434, -13.97268919],
       [ -4.64632087,  -9.66539711]])

## GPLVM Reconstruction

In [6]:
from mygplvm.reconstruction3 import Reconstruction3
from road_segmentation.road_segmentation import RoadSegmentation

road_segmentation = RoadSegmentation()
rec3 = Reconstruction3(rec.gplvm, voxel_resolution, dct_resolution)

import pyrender
import matplotlib.pyplot as plt
from math import cos, sin, pi, atan
import json
import cv2
import trimesh
import numpy as np

generic_mesh = rec.reconstruct_from_x([ -4.6463161 ,  -9.66539711])

output_file_name = f'output/{video_name}_reconstructed.mp4'

fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
fps = int(30)
width, height = (
    int(1280),
    int(720)
)
cap = cv2.VideoCapture(input_file_name)
out = cv2.VideoWriter()
out.open(output_file_name, fourcc, fps, (width, height), True)

with open(f'output/{video_name}_states.json') as f:
    state_rotations = json.load(f)['states']

# plane_dimension = [70, 0.01, 2000]
# planes = [trimesh.creation.box(extents=plane_dimension) for i in range(11)]
# for i, plane in enumerate(planes):
#     abs_offset = abs(i - len(planes)//2)
#     plane.visual.face_colors = [75 + 15*abs_offset, 75 + 15*abs_offset, 75 + 15*abs_offset, 255]

r = pyrender.OffscreenRenderer(1280, 720)
try:
    proc_frames = 0
    while proc_frames < len(state_rotations):
        ret, frame = cap.read()
        if not ret:
            break
        if proc_frames == 1:
            break
        
        state = state_rotations[proc_frames]
        scene = pyrender.Scene(ambient_light=[0.8, 0.8, 0.8], bg_color=[50, 50, 50])
        state_multiplier = 25

        # my vehicle
        car_tmesh = rec.reconstruct_from_x([ -4.6463161 ,  -9.66539711])
        car_tmesh.visual.vertex_colors = [255, 0, 0, 255]
        car_pymesh = pyrender.Mesh.from_trimesh(car_tmesh, smooth=False)
        scene.add(car_pymesh, pose=np.array([
            [1, 0, 0, 0],
            [0, 1, 0, 20],
            [0, 0, 1, 0],
            [0, 0, 0, 1]
        ]))

        # shape reconstruction
        for vehicle in state:
            xy = np.asarray(vehicle[11:15], dtype=int)
            yaw_d = np.deg2rad(vehicle[3])
            # if xy[1] >= xy[3] or xy[0] >= xy[2]: continue

            # mc_mesh = rec3.predict(frame[xy[1]:xy[3], xy[0]:xy[2]], yaw_d)
            # if mc_mesh is None: continue
            # mc_mesh.visual.vertex_colors = vehicle[16] + [255]

            # mesh_pyrender = pyrender.Mesh.from_trimesh(mc_mesh, smooth=False)
            generic_mesh.visual.vertex_colors = [150, 150, 150, 255]
            mesh_pyrender = pyrender.Mesh.from_trimesh(generic_mesh, smooth=False)
            if vehicle[0] < 0:
                yaw = yaw_d + atan(vehicle[0] / vehicle[2])
            else:
                yaw = yaw_d - atan(vehicle[0] / vehicle[2])
            # yaw = yaw_d
            mesh_pose = np.array([
                [cos(yaw), 0, sin(yaw), vehicle[0] * state_multiplier],
                [0, 1, 0, 20],
                [-sin(yaw), 0, cos(yaw), -vehicle[2] * state_multiplier * 0.75],
                [0, 0, 0, 1]
            ])
            scene.add(mesh_pyrender, pose=mesh_pose)

        # road reconstruction
        road_mask = road_segmentation.predict(frame)
        road_tmesh = road_segmentation.reconstruct(road_mask)
        road_tmesh.visual.vertex_colors = [240, 240, 240, 255]
        road_pymesh = pyrender.Mesh.from_trimesh(road_tmesh, smooth=False)
        road_scale = state_multiplier*1.5
        scene.add(road_pymesh, pose=np.array([
            [road_scale, 0, 0, 0],
            [0, road_scale, 0, 0],
            [0, 0, road_scale, 0],
            [0, 0, 0, 1]
        ]))
        
        # camera and lighting
        camera_pitch = -pi/6
        camera_pose = np.array([
            [1, 0, 0, 0],
            [0, cos(camera_pitch), -sin(camera_pitch), 150],
            [0, sin(camera_pitch), cos(camera_pitch), 150],
            [0, 0, 0, 1]
        ])
        light_pose = np.array([
            [1, 0, 0, 0],
            [0, 1, 0, 1000],
            [0, 0, 1, -25],
            [0, 0, 0, 1]
        ])
        pl = pyrender.PointLight(color=[1.0, 1.0, 1.0], intensity=1000000)
        pc = pyrender.PerspectiveCamera(yfov=np.pi / 3.0, aspectRatio=1280/720)
        scene.add(pl, pose=light_pose)
        scene.add(pc, pose=camera_pose)

        # # add planes
        # for i, plane in enumerate(planes):
        #     offset = i - len(planes)//2
        #     plane_pyrender = pyrender.Mesh.from_trimesh(plane, smooth=False)
        #     mesh_pose = np.array([
        #         [1, 0, 0, offset * plane_dimension[0]],
        #         [0, 1, 0, 0],
        #         [0, 0, 1, -plane_dimension[2]/3],
        #         [0, 0, 0, 1]
        #     ])
        #     scene.add(plane_pyrender, pose=mesh_pose)

        color, _ = r.render(scene)
        # plt.figure(figsize=(8,8)), plt.imshow(color)
        out.write(color)
        cv2.imwrite('yolov9.png', color)
        
        proc_frames += 1

        
except Exception as error:
    import traceback
    print(traceback.format_exc())
finally:
    # Release resource
    r.delete()
    cap.release()
    out.release()

In [4]:
%%capture
!ffmpeg -i output/video5_reconstructed.mp4 -i output/video5_audio.mp3 -c:v libx264 -c:a copy -map 0:v:0 -map 1:a:0 output/video5_reconstructed_audio.mp4 -y