# 3D Reconstruction

In [1]:
video_name = 'video3'
input_file_name = f'video/{video_name}.mp4'

## Drawing YOLO Bounding Box

In [2]:
import cv2
import json

yolov9_video_name = f'output/{video_name}_yolov9.mp4'

# Draw bounding box to video
cap = cv2.VideoCapture(input_file_name)
with open(f'output/{video_name}_yolov9.json') as f:
    detections = json.load(f)['final_frames_detections']

fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
fps = int(cap.get(cv2.CAP_PROP_FPS))
width, height = (
            int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
)
out = cv2.VideoWriter()
out.open(yolov9_video_name, fourcc, fps, (width, height), True)

proc_frames = 0
try:
    while proc_frames < len(detections):
        ret, frame = cap.read()
        if not ret:
            break

        im = frame
        # Loop through list (if empty this will be skipped) and overlay green bboxes
        for d in detections[proc_frames]:
            cv2.rectangle(im, (int(d[0] - d[2]/2), int(d[1] - d[3]/2)), (int(d[0] + d[2]/2), int(d[1] + d[3]/2)), (0, 255, 0), 3)
            cv2.putText(im, f'{d[5]:.2f}', (int(d[0]), int(d[1])), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
        
        # write the frame
        out.write(im)

        proc_frames += 1
except Exception as error:
    # Release resources
    print(error)
    cap.release()
    out.release()

out.release()
cap.release()

In [3]:
%%capture
!ffmpeg -i video/video0.mp4 -f mp3 -ab 192000 -vn output/video0_audio.mp3 -y
!ffmpeg -i output/video0_yolov9.mp4 -i output/video0_audio.mp3 -c:v libx264 -c:a copy -map 0:v:0 -map 1:a:0 output/video0_yolov9_audio.mp4 -y

## Kalman Tracking

In [3]:
from kalman_tracking_yolov9.sort import Sort
import json
import cv2
import numpy as np

input_file_name = f"video/{video_name}.mp4"
output_file_name = f"output/{video_name}_tracked.mp4"
output_state_file_name = f"output/{video_name}_states.json"

sort = Sort(max_age=5, iou_threshold=0.3, zc=750)

cap = cv2.VideoCapture(input_file_name)
with open(f'output/{video_name}_yolov9.json') as f:
    detections = json.load(f)['final_frames_detections']

fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
fps = int(cap.get(cv2.CAP_PROP_FPS))
width, height = (
            int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
)
out = cv2.VideoWriter()
out.open(output_file_name, fourcc, fps, (width, height), True)

proc_frames = 0
state_rotations = []
color_mapping = {}
try:
    while proc_frames < len(detections):
        ret, frame = cap.read()
        if not ret:
            break

        im = frame
        # Loop through list (if empty this will be skipped) and overlay green bboxes
        d = detections[proc_frames]
        d_converted = []
        for x in d:
            d_converted.append([x[0]-x[2]/2, x[1]-x[3]/2, x[0]+x[2]/2, x[1]+x[3]/2])
        d_converted = np.array(d_converted)

        # tracking
        if len(d_converted) > 0:
            xs, xrs = sort.update(d_converted)
        else:
            xs, xrs = sort.update()

        temp = []
        for (x, xr) in zip(xs, xrs):
            vehicle_id = int(x[-1])
            if vehicle_id not in color_mapping:
                bounding_rect = im[int(x[1]):int(x[3]), int(x[0]):int(x[2])]
                try:
                    bgr_color = np.average(np.average(bounding_rect, axis=0), axis=0)
                except Exception as error:
                    continue

                hsv_color = cv2.cvtColor(np.array([[bgr_color]]).astype(np.uint8), cv2.COLOR_BGR2HSV)
                # if proc_frames == 0:
                #     print(hsv_color)
                hsv_color[:, :, 1] = 200
                # hsv_color[:, :, 1] = np.array(min(hsv_color[:, :, 1] * 1.5, 255)).astype(np.uint8)
                # hsv_color[:, :, 2] *= np.array(min(hsv_color[:, :, 2] * 1, 255)).astype(np.uint8)
                bgr_color = cv2.cvtColor(hsv_color, cv2.COLOR_HSV2BGR)[0, 0]

                rgb_color = [int(bgr_color[2]), int(bgr_color[1]), int(bgr_color[0])]
                color_mapping[vehicle_id] = rgb_color

            temp.append(xr.tolist() + [x[0], x[1], x[2], x[3], int(x[-1]), color_mapping[vehicle_id]])
            cv2.rectangle(im, (int(x[0]), int(x[1])), (int(x[2]), int(x[3])), (0, 255, 0), 3)
            cv2.putText(im, f'{int(x[4])} ({int(xr[0])},{int(xr[1])},{int(xr[2])})', (int((x[0] + x[2])//2), int((x[1] + x[3])//2)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
        state_rotations.append(temp)

        # write the frame
        out.write(im)

        proc_frames += 1
except Exception as error:
    # Release resources
    cap.release()
    out.release()
    print(error.with_traceback())

out.release()
cap.release()

with open(output_state_file_name, 'w', encoding='utf-8') as f:
    json.dump({ "states": state_rotations }, f, ensure_ascii=False, indent=4)

  w = np.sqrt(x[3] * x[4])


In [5]:
%%capture
!ffmpeg -i output/video0_tracked.mp4 -i output/video0_audio.mp3 -c:v libx264 -c:a copy -map 0:v:0 -map 1:a:0 output/video0_tracked_audio.mp4 -y

## Unicorn Reconstruction

In [4]:
import pyrender
import matplotlib.pyplot as plt
from math import cos, sin, pi, atan
import json
import cv2
import trimesh
from unicorn.unicorn import Unicorn
from unicorn.src.utils.mesh import save_mesh_as_obj, normalize
import os

print(1)
with open(f'output/{video_name}_states.json') as f:
    states = json.load(f)['states']
cap = cv2.VideoCapture(input_file_name)

print(2)
unicorn = Unicorn("/mnt/d/Bryan/TA/12 Deep Learning Reconstruction/unicorn-weights/car_p3d.pkl")
vehicle_id_mesh = {}
vehicle_id_largest_area = {}
state_rotations = []
try:
    print(3)
    for i, state in enumerate(states):
        print(i)
        ret, frame = cap.read()
        if not ret:
            break
        
        state_rotation = []
        for vehicle in state:
            vehicle_id = vehicle[-2]
            area = abs(vehicle[9] - vehicle[11]) * abs(vehicle[10] - vehicle[12])
            
            cv_image = frame[int(vehicle[10]):int(vehicle[12]), int(vehicle[9]):int(vehicle[11])]
            if cv_image.shape[0] == 0 or cv_image.shape[1] == 0: continue
            
            mesh, RT, bkg = unicorn.predict_cv_image(cv_image)
            state_rotation.append(vehicle + [RT[0][0].tolist(), RT[1][0].tolist()])

            if vehicle_id not in vehicle_id_mesh or vehicle_id_largest_area[vehicle_id] < area:
                mcenter = normalize(mesh)
                obj_name = f'output/meshes/{video_name}_{vehicle_id}.obj'
                save_mesh_as_obj(mcenter, obj_name)
                mc_mesh = trimesh.load(obj_name)
                vehicle_id_mesh[vehicle_id] = mc_mesh
                vehicle_id_largest_area[vehicle_id] = area
        state_rotations.append(state_rotation)
except Exception:
    import traceback
    print(traceback.format_exc())
finally:
    cap.release()
print('final')
with open(f'output/{video_name}_state_rotations.json', 'w', encoding='utf-8') as f:
    json.dump({ "state_rotations": state_rotations }, f, ensure_ascii=False, indent=4)

1
2


  self._edges_packed = torch.stack([u // V, u % V], dim=1)


[94m[2024-06-06 02:02:33] progressive field activated powers=[][0m
[94m[2024-06-06 02:02:33] progressive giraffe gen active powers=[2][0m
[94m[2024-06-06 02:02:33] init_rotations: azim=[0.0, 60.0, 120.0, 180.0, 240.0, 300.0], elev=[10.0, 10.0, 10.0, 10.0, 10.0, 10.0], roll=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0][0m
[94m[2024-06-06 02:02:33] progressive giraffe gen active powers=[4][0m
[93m[2024-06-06 02:02:35] WARN load_state_dict: ['T_init', 'T_cam', 'R_cam'] not found[0m
[94m[2024-06-06 02:02:35] progressive field activated powers=[2, 8, 32][0m
[94m[2024-06-06 02:02:35] progressive giraffe gen active powers=[2, 8, 32, 128][0m
[94m[2024-06-06 02:02:35] progressive giraffe gen active powers=[4, 16, 64, 128][0m
3
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93

In [2]:
import pyrender
import matplotlib.pyplot as plt
from math import cos, sin, pi, atan
import json
import cv2
import trimesh
import os
import numpy as np

output_file_name = f'output/{video_name}_reconstructed.mp4'

fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
fps = int(30)
width, height = (
    int(1280),
    int(720)
)
out = cv2.VideoWriter()
out.open(output_file_name, fourcc, fps, (width, height), True)

with open(f'output/{video_name}_state_rotations.json') as f:
    state_rotations = json.load(f)['state_rotations']

plane_dimension = [70, 0.01, 2000]
planes = [trimesh.creation.box(extents=plane_dimension) for i in range(11)]
for i, plane in enumerate(planes):
    abs_offset = abs(i - len(planes)//2)
    plane.visual.face_colors = [75 + 15*abs_offset, 75 + 15*abs_offset, 75 + 15*abs_offset, 255]

vehicle_id_mesh = {}
r = pyrender.OffscreenRenderer(1280, 720)
try:
    for i, state in enumerate(state_rotations):
        scene = pyrender.Scene(ambient_light=[0.6, 0.6, 0.6], bg_color=[1.0, 1.0, 1.0])
        state_multiplier = 0.3
        for j, vehicle in enumerate(state):
            vehicle_id = vehicle[-4]
            if vehicle_id in vehicle_id_mesh:
                mc_mesh = vehicle_id_mesh[vehicle_id]
            else:
                obj_name = f'output/meshes/{video_name}_{vehicle_id}.obj'
                mc_mesh = trimesh.load(obj_name)
                vehicle_id_mesh[vehicle_id] = mc_mesh

            # mc_mesh = rec.reconstruct_from_x([ -4.6463161 ,  -9.66539997])
            # for facet in mc_mesh.facets:
            #     mc_mesh.visual.face_colors[facet] = vehicle[10] + [255]
            mc_mesh.visual.vertex_colors = vehicle[-3] + [255]

            mesh_pyrender = pyrender.Mesh.from_trimesh(mc_mesh, smooth=False)
            if abs(vehicle[0]) > 15:
                sudut = pi / 2
            else:
                sudut = 0
            try:
                sudut += atan(vehicle[5]/vehicle[7])
            except:
                pass
            # mesh_pose = np.array([
            #     [cos(sudut), 0, sin(sudut), vehicle[0] * state_multiplier],
            #     [0, 1, 0, 8],
            #     [-sin(sudut), 0, cos(sudut), -vehicle[2] * state_multiplier],
            #     [0, 0, 0, 1]
            # ])
            # scene.add(mesh_pyrender, pose=mesh_pose)
            R = vehicle[-2]
            m_rot = np.matrix([
                [R[0][0], 0, R[0][2], 0],
                [0, 1, 0, 0],
                [R[2][0], 0, R[2][2], 0],
                [0, 0, 0, 1]
            ])
            m_loc = np.matrix([
                [1, 0, 0, vehicle[0] * state_multiplier],
                [0, 1, 0, 0.1],
                [0, 0, 1, -vehicle[2] * state_multiplier],
                [0, 0, 0, 1]
            ])
            mesh_pose = np.array(m_loc * m_rot)
            scene.add(mesh_pyrender, pose=mesh_pose)
            # scene.add(mesh_pyrender, pose=np.eye(4))

        camera_pose = np.array([
            [1, 0, 0, 0],
            [0, 1, 0, 0.5],
            [0, 0, 1, 0],
            [0, 0, 0, 1]
        ])
        light_pose = np.array([
            [1, 0, 0, 62],
            [0, 1, 0, 62],
            [0, 0, 1, 62],
            [0, 0, 0, 1]
        ])
        pl = pyrender.PointLight(color=[1.0, 1.0, 1.0], intensity=50000)
        pc = pyrender.PerspectiveCamera(yfov=np.pi / 3.0, aspectRatio=1280/720)
        scene.add(pl, pose=light_pose)
        scene.add(pc, pose=camera_pose)

        # add planes
        for i, plane in enumerate(planes):
            offset = i - len(planes)//2
            plane_pyrender = pyrender.Mesh.from_trimesh(plane, smooth=False)
            mesh_pose = np.array([
                [1, 0, 0, offset * plane_dimension[0]],
                [0, 1, 0, 0],
                [0, 0, 1, -plane_dimension[2]/3],
                [0, 0, 0, 1]
            ])
            scene.add(plane_pyrender, pose=mesh_pose)

        color, _ = r.render(scene)
        # plt.figure(figsize=(8,8)), plt.imshow(color)
        out.write(color)
except Exception:
    # Release resources
    import traceback
    print(traceback.format_exc())
finally:
    out.release()
    
r.delete()

In [3]:
%%capture
!ffmpeg -i output/video3_reconstructed.mp4 -i output/video3_audio.mp3 -c:v libx264 -c:a copy -map 0:v:0 -map 1:a:0 output/video3_reconstructed_audio.mp4 -y

## GPLVM Training

In [6]:
import numpy as np
from mygplvm.reconstruction import Reconstruction

sdfs = []
sdf_names = ['Jeep', 'Pickup', 'Sedan', 'Suv']
voxel_resolution = 64
dct_resolution = 25
for name in sdf_names:
    sdfs.append(np.load(f'./mygplvm/objs3/{name}_SDF_{voxel_resolution}x{voxel_resolution}x{voxel_resolution}.npy'))

rec = Reconstruction(voxel_resolution=voxel_resolution, dct_resolution=dct_resolution)
rec.fit_from_sdf(sdfs=sdfs)

         Current function value: 74057.302578
         Iterations: 3
         Function evaluations: 732
         Gradient evaluations: 60
time: 0.15903091430664062
alpha 1.173914432451989
beta 0.6012194508483061
gamma 0.9999424876154688


  res = _minimize_cg(f, x0, args, fprime, callback=callback, **opts)


array([[-28.79345703,  15.8886137 ],
       [ 40.50338745,   7.74951172],
       [ -7.06363434, -13.97269778],
       [ -4.64632135,  -9.66540378]])

## GPLVM Reconstruction

In [6]:
import pyrender
import matplotlib.pyplot as plt
from math import cos, sin, pi, atan
import json
import cv2
import trimesh

output_file_name = f'output/{video_name}_reconstructed.mp4'

fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
fps = int(30)
width, height = (
    int(1280),
    int(720)
)
out = cv2.VideoWriter()
out.open(output_file_name, fourcc, fps, (width, height), True)

with open(f'output/{video_name}_states.json') as f:
    state_rotations = json.load(f)['states']

plane_dimension = [70, 0.01, 2000]
planes = [trimesh.creation.box(extents=plane_dimension) for i in range(11)]
for i, plane in enumerate(planes):
    abs_offset = abs(i - len(planes)//2)
    plane.visual.face_colors = [75 + 15*abs_offset, 75 + 15*abs_offset, 75 + 15*abs_offset, 255]

try:
    for i, state in enumerate(state_rotations):
        scene = pyrender.Scene(ambient_light=[0.6, 0.6, 0.6], bg_color=[1.0, 1.0, 1.0])
        state_multiplier = 25
        for vehicle in state:
            mc_mesh = rec.reconstruct_from_x([ -4.6463161 ,  -9.66539997])
            # for facet in mc_mesh.facets:
            #     mc_mesh.visual.face_colors[facet] = vehicle[10] + [255]
            mc_mesh.visual.vertex_colors = vehicle[10] + [255]

            mesh_pyrender = pyrender.Mesh.from_trimesh(mc_mesh, smooth=False)
            if abs(vehicle[0]) > 15:
                sudut = pi / 2
            else:
                sudut = 0
            try:
                sudut += atan(vehicle[5]/vehicle[7])
            except:
                pass
            mesh_pose = np.array([
                [cos(sudut), 0, sin(sudut), vehicle[0] * state_multiplier],
                [0, 1, 0, 8],
                [-sin(sudut), 0, cos(sudut), -vehicle[2] * state_multiplier],
                [0, 0, 0, 1]
            ])
            scene.add(mesh_pyrender, pose=mesh_pose)

        camera_pose = np.array([
            [1, 0, 0, 0],
            [0, 1, 0, 15],
            [0, 0, 1, -80],
            [0, 0, 0, 1]
        ])
        light_pose = np.array([
            [1, 0, 0, 62],
            [0, 1, 0, 62],
            [0, 0, 1, 62],
            [0, 0, 0, 1]
        ])
        pl = pyrender.PointLight(color=[1.0, 1.0, 1.0], intensity=50000)
        pc = pyrender.PerspectiveCamera(yfov=np.pi / 3.0, aspectRatio=1280/720)
        scene.add(pl, pose=light_pose)
        scene.add(pc, pose=camera_pose)

        # add planes
        for i, plane in enumerate(planes):
            offset = i - len(planes)//2
            plane_pyrender = pyrender.Mesh.from_trimesh(plane, smooth=False)
            mesh_pose = np.array([
                [1, 0, 0, offset * plane_dimension[0]],
                [0, 1, 0, 0],
                [0, 0, 1, -plane_dimension[2]/3],
                [0, 0, 0, 1]
            ])
            scene.add(plane_pyrender, pose=mesh_pose)

        r = pyrender.OffscreenRenderer(1280, 720)
        color, _ = r.render(scene)
        # plt.figure(figsize=(8,8)), plt.imshow(color)
        out.write(color)
except Exception as error:
    # Release resources
    out.release()
    print(error.with_traceback())

out.release()

TypeError: BaseException.with_traceback() takes exactly one argument (0 given)

In [None]:
%%capture
!ffmpeg -i output/video0_reconstructed.mp4 -i output/video0_audio.mp3 -c:v libx264 -c:a copy -map 0:v:0 -map 1:a:0 output/video0_reconstructed_audio.mp4 -y