This repository has been archived by the owner on Oct 31, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 24
/
pose_optimization.py
326 lines (264 loc) · 12.7 KB
/
pose_optimization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
#!/usr/bin/env python3
# Copyright 2004-present Facebook. All Rights Reserved.
from cv2 import CV_32FC3, CV_8UC1
import os
from typing import List
import shutil
from lib_python import (
DepthVideo,
DepthVideoImporter,
DepthVideoPoseOptimizer,
DepthVideoProcessor,
DepthXformType,
FlowConstraintsCollection,
FlowConstraintsParams,
IntrinsicsOptimization,
SmoothLossType,
SpatialXformType,
StaticLossType,
ValueXformType,
XformType,
)
from utils.helpers import Nestedspace
def convert_opt_params(src: Nestedspace) -> DepthVideoPoseOptimizer.Params:
dst = DepthVideoPoseOptimizer.Params()
dst.maxIterations = src.max_iterations
dst.numThreads = src.num_threads
dst.numSteps = src.num_steps
dst.robustness = src.robustness
if src.static_loss_type == "Euclidean":
dst.staticLossType = StaticLossType.Euclidean
elif src.static_loss_type == "ReproDisparity":
dst.staticLossType = StaticLossType.ReproDisparity
elif src.static_loss_type == "ReproDepthRatio":
dst.staticLossType = StaticLossType.ReproDepthRatio
elif src.static_loss_type == "ReproLogDepth":
dst.staticLossType = StaticLossType.ReproLogDepth
else:
raise RuntimeError("Invalid static loss type specified.")
dst.staticSpatialWeight = src.static_spatial_weight
dst.staticDepthWeight = src.static_depth_weight
if src.smooth_loss_type == "EuclideanLaplacian":
dst.smoothLossType = SmoothLossType.EuclideanLaplacian
elif src.smooth_loss_type == "ReproDisparityLaplacian":
dst.smoothLossType = SmoothLossType.ReproDisparityLaplacian
elif src.smooth_loss_type == "ReproDepthRatioConsistency":
dst.smoothLossType = SmoothLossType.ReproDepthRatioConsistency
elif src.smooth_loss_type == "ReproLogDepthConsistency":
dst.smoothLossType = SmoothLossType.ReproLogDepthConsistency
else:
raise RuntimeError("Invalid smooth loss type specified.")
dst.smoothStaticWeight = src.smooth_static_weight
dst.smoothDynamicWeight = src.smooth_dynamic_weight
dst.positionReg = src.position_regularization
dst.scaleReg = src.scale_regularization
dst.scaleRegGridSize = src.scale_regularization_grid_size
dst.depthDeformRegInitial = src.deformation_regularization_initial
dst.depthDeformRegFinal = src.deformation_regularization_final
dst.adaptiveDeformationCost = src.adaptive_deformation_cost
dst.spatialDeformReg = src.spatial_deformation_regularization
dst.graduateDepthDeformReg = src.graduate_deformation_regularization
dst.focalReg = src.focal_regularization
dst.coarseToFine = src.coarse_to_fine
dst.ctfLong = src.ctf_long
dst.ctfShort = src.ctf_short
dst.deferredSpatialOpt = src.deferred_spatial_opt
dst.dsoLong = src.dso_long
dst.dsoShort = src.dso_short
dst.focalLong = src.focal_long
if src.intr_opt == "Fixed":
dst.intrOpt = IntrinsicsOptimization.Fixed
elif src.intr_opt == "Shared":
dst.intrOpt = IntrinsicsOptimization.Shared
elif src.intr_opt == "PerFrame":
dst.intrOpt = IntrinsicsOptimization.PerFrame
else:
raise RuntimeError("Invalid intrinsics optimization mode specified.")
dst.fixPoses = src.fix_poses
dst.fixDepthXforms = src.fix_depth_transforms
dst.fixSpatialXforms = src.fix_spatial_transforms
return dst
class PoseOptimizer:
def __init__(
self,
base_dir: str,
model_type: str,
frames: List[int],
opt_params: Nestedspace,
):
self.base_dir = base_dir
self.frames = frames
# Initialize depth video with initial depth stream (unoptimized depth
# estimate).
self.depth_video = DepthVideo()
discoverStreams = False
DepthVideoImporter.importVideo(self.depth_video, base_dir, discoverStreams)
self.depth_video.createColorStream("full", "color_full", ".png", CV_32FC3)
self.depth_video.createColorStream("down", "color_down", ".raw", CV_32FC3)
if os.path.isdir(os.path.join(base_dir, "dynamic_mask")):
self.depth_video.createColorStream(
"dynamic_mask", "dynamic_mask", ".png", CV_8UC1)
# If a ground truth depth stream exists, we add it first, because we'll
# allways be optimizing the last stream below.
if os.path.exists(f"{base_dir}/depth_gt"):
print("Importing ground truth depth...")
self.depth_video.createDepthStream("depth_gt", "depth_gt", [-1, -1])
poses_file = f"{base_dir}/depth_gt/poses.txt"
if os.path.exists(poses_file):
print("Importing ground truth poses...")
gt_depth_stream = self.depth_video.numDepthStreams() - 1
DepthVideoImporter.importPoses(
self.depth_video, poses_file, gt_depth_stream)
# Import COLMAP poses and depth stream if exists.
if os.path.exists(os.path.join(base_dir, "colmap_dense")) and os.path.exists(
os.path.join(base_dir, "depth_colmap_dense")
):
DepthVideoImporter.importColmapDepth(self.depth_video)
self.depth_video.createDepthStream(
"colmap_dense", "depth_colmap_dense_imported", [-1, -1]
)
colmap_file = os.path.join(base_dir, "colmap_dense/metadata.npz")
stream = self.depth_video.depthStreamIndex("colmap_dense")
DepthVideoImporter.importColmapRecon(
self.depth_video, colmap_file, stream, False
)
# Add the estimated depth stream.
depth_tag = f"depth_{model_type}"
self.depth_video.createDepthStream(depth_tag, depth_tag, [-1, -1])
# If using COLMAP, copy the poses to the Midas stream.
if os.path.exists(os.path.join(base_dir, "colmap_dense")) and os.path.exists(
os.path.join(base_dir, "depth_colmap_dense")
):
src_ds_id = self.depth_video.depthStreamIndex("colmap_dense")
dst_ds_id = self.depth_video.depthStreamIndex(depth_tag)
self.copy_poses(src_ds_id, dst_ds_id)
self.depth_video.printInfo()
self.depth_video.save()
self.opt_params = convert_opt_params(opt_params)
self.use_global_scale = opt_params.use_global_scale
# Initialize flow constraints
flow_constraints_params = FlowConstraintsParams()
# Remove out of bounds frames by explicitly setting clip=True.
flow_constraints_params.frameRange.resolve(self.depth_video.numFrames(), True)
self.flow_constraints = FlowConstraintsCollection(
self.depth_video, flow_constraints_params)
if opt_params.dynamic_constraints == "Mask":
minDynamicDistance = 8
self.flow_constraints.setStaticFlagFromDynamicMask(minDynamicDistance)
elif opt_params.dynamic_constraints == "Ransac":
self.flow_constraints.setStaticFlagFromRansac(opt_params.epipolar_dist_thresh)
self.flow_constraints.save()
def optimize_poses(self):
frames_string = ",".join(str(x) for x in self.frames)
# The underlying depth maps have been changed by fine-tuning, so we need
# to clear our caches here.
self.depth_video.clearDepthCaches()
processor = DepthVideoProcessor(self.depth_video)
params = DepthVideoProcessor.Params()
# We're always optimizing the last depth stream.
params.depthStream = self.depth_video.numDepthStreams() - 1
params.frameRange.fromString(frames_string)
params.poseOptimizer = self.opt_params
params.poseOptimizer.frameRange.fromString(frames_string)
# Reset all transforms, so we can optimize from scratch.
params.op = DepthVideoProcessor.Op.ResetDepthXforms
params.depthXformDesc.type = XformType.Depth
params.depthXformDesc.depthType = DepthXformType.Global
params.depthXformDesc.valueXform = ValueXformType.Scale
processor.process(params)
params.op = DepthVideoProcessor.Op.ResetSpatialXforms
params.spatialXformDesc.type = XformType.Spatial
params.spatialXformDesc.spatialType = SpatialXformType.Identity
params.spatialXformDesc.valueXform = ValueXformType.Scale
processor.process(params)
processor.normalizeDepth(params, self.flow_constraints)
# Now optimize poses and depth transforms jointly.
processor.optimizePoses(params, self.flow_constraints)
# Fixing the estimated pose and updating the depth xform to per-frame scaling
if self.use_global_scale:
params.poseOptimizer.fixPoses = True
params.poseOptimizer.numSteps = 1
params.poseOptimizer.coarseToFine = False
# Reset depth transform
params.op = DepthVideoProcessor.Op.ResetDepthXforms
params.depthXformDesc.type = XformType.Depth
params.depthXformDesc.depthType = DepthXformType.Global
params.depthXformDesc.valueXform = ValueXformType.Scale
processor.process(params)
# Reset spatial transform
params.op = DepthVideoProcessor.Op.ResetSpatialXforms
params.spatialXformDesc.type = XformType.Spatial
params.spatialXformDesc.spatialType = SpatialXformType.Identity
params.spatialXformDesc.valueXform = ValueXformType.Scale
processor.process(params)
# Normalize depth
processor.normalizeDepth(params, self.flow_constraints)
# Optimize depth transfomration (while keeping the pose fixed)
processor.optimizePoses(params, self.flow_constraints)
self.depth_video.save()
def copy_poses(self, src_ds_id, dst_ds_id):
print(f"Copying poses for depth stream {src_ds_id} -> {dst_ds_id}...")
src_ds = self.depth_video.depthStream(src_ds_id)
dst_ds = self.depth_video.depthStream(dst_ds_id)
dst_ds.resetDepthXforms(src_ds.depthXformDesc())
dst_ds.resetSpatialXforms(src_ds.spatialXformDesc())
for i in range(self.depth_video.numFrames()):
src_f = src_ds.frame(i)
dst_f = dst_ds.frame(i)
# Update depth and spatial transformation
dst_f.depthXform().copyFrom(src_f.depthXform())
dst_f.spatialXform().copyFrom(src_f.spatialXform())
# Update intrinsics and extrinsics
dst_f.extrinsics = src_f.extrinsics
dst_f.intrinsics = src_f.intrinsics
# Make a copy of the last depth stream.
def duplicate_last_depth_stream(self, name, dir):
dst_ds_id = self.depth_video.numDepthStreams()
src_ds_id = dst_ds_id - 1
print(f"Copying depth stream {src_ds_id} -> {dst_ds_id}...")
src_ds = self.depth_video.depthStream(src_ds_id)
width = src_ds.width()
height = src_ds.height()
rel_dir = os.path.relpath(dir, self.base_dir)
self.depth_video.createDepthStream(name, rel_dir, [width, height])
print(f"Created depth stream '{name}' (dir '{rel_dir}').")
# Copy the initialized poses to the other depth stream.
self.copy_poses(src_ds_id, dst_ds_id)
dst_ds = self.depth_video.depthStream(dst_ds_id)
# Copy the actual depth maps.
src_depth_dir = os.path.join(src_ds.path(), "depth")
dst_depth_dir = os.path.join(dst_ds.path(), "depth")
os.makedirs(dst_depth_dir, exist_ok=True)
for i in self.frames:
shutil.copyfile(
f"{src_depth_dir}/frame_{i:06d}.raw",
f"{dst_depth_dir}/frame_{i:06d}.raw")
self.depth_video.save()
def filter_depth(self, radius):
dst_ds_id = self.depth_video.numDepthStreams()
src_ds_id = dst_ds_id - 1
print(f"Filtering depth stream {src_ds_id} -> {dst_ds_id}...")
src_ds = self.depth_video.depthStream(src_ds_id)
width = src_ds.width()
height = src_ds.height()
name = src_ds.name() + "_filtered"
dir = f"{src_ds.path()}/{name}"
rel_dir = os.path.relpath(dir, self.base_dir)
self.depth_video.createDepthStream(name, rel_dir, [width, height])
print(f"Created depth stream '{name}' (dir '{rel_dir}').")
processor = DepthVideoProcessor(self.depth_video)
params = DepthVideoProcessor.Params()
frames_string = ",".join(str(x) for x in self.frames)
params.frameRange.fromString(frames_string)
print("Copying stream data...")
params.op = DepthVideoProcessor.Op.Copy
params.sourceDepthStream= src_ds_id
params.depthStream = dst_ds_id
processor.process(params)
print("Filtering...")
params.op = DepthVideoProcessor.Op.FlowGuidedFilter
params.frameRadius = radius
processor.process(params)
print("Saving...")
self.depth_video.saveDepth(dst_ds_id)
self.depth_video.save()