-
Notifications
You must be signed in to change notification settings - Fork 11
/
midas_op.py
160 lines (139 loc) · 5.46 KB
/
midas_op.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
"""
# MiDaS
MiDaS models for computing relative depth from a single image.
> MiDaS computes relative inverse depth from a single image. The repository provides multiple models that cover different use cases ranging from a small, high-speed model to a very large model that provide the highest accuracy. The models have been trained on 10 distinct datasets using multi-objective optimization to ensure high quality on a wide range of inputs.
### Installation:
To install midas offline:
```bash
cd $DORA_DEP_HOME/dependencies/
git clone git@github.com:isl-org/MiDaS.git
cd MiDaS/weights
# If you don't want to add manual download, the program will also automatically download the model file
wget https://github.com/isl-org/MiDaS/releases/download/v2_1/midas_v21_small_256.pt
cp midas_v21_small_256.pt $HOME/.cache/torch/hub/checkpoints/
```
## Inputs
- image: HEIGHT x WIDTH x BGR array.
## Outputs
- bbox: HEIGHT x WIDTH x Relative depth array.
## Example output
![Imgur](https://i.imgur.com/UrF9iPN.png)
Add the following dataflow configuration
```yaml
- id: midas_op
operator:
outputs:
- depth_frame
inputs:
image: webcam/image
python: ../../operators/midas_op.py
env:
PYTORCH_DEVICE: "cuda"
MIDAS_PATH: $DORA_DEP_HOME/dependencies/MiDaS/
MIDAS_WEIGHT_PATH: $DORA_DEP_HOME/dependencies/MiDaS/weights/midas_v21_small_256.pt
MODEL_TYPE: "MiDaS_small"
MODEL_NAME: "MiDaS_small"
```
> - model_type = "DPT_Large" # MiDaS v3 - Large (highest accuracy, slowest inference speed)
> - model_type = "DPT_Hybrid" # MiDaS v3 - Hybrid (medium accuracy, medium inference speed)
> - model_type = "MiDaS_small" # MiDaS v2.1 - Small (lowest accuracy, highest inference speed)
"""
import os
from typing import Callable
import cv2
import numpy as np
import torch
from dora import DoraStatus
IMAGE_WIDTH = 1920
IMAGE_HEIGHT = 1080
DEVICE = os.environ.get("PYTORCH_DEVICE") or "cpu"
MIDAS_PATH = os.environ.get("MIDAS_PATH")
MIDAS_WEIGHT_PATH = os.environ.get("MIDAS_WEIGHT_PATH")
MODEL_TYPE = os.environ.get("MODEL_TYPE")
MODEL_NAME = os.environ.get("MODEL_NAME")
# example source: https://pytorch.org/hub/intelisl_midas_v2/
class Operator:
def __init__(self):
if MIDAS_PATH is None:
# With internet
self.model = torch.hub.load(
"intel-isl/MiDaS",
MODEL_TYPE,
)
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
else:
# Without internet
self.model = torch.hub.load(
repo_or_dir=MIDAS_PATH,
model=MODEL_NAME,
weights=MIDAS_WEIGHT_PATH,
source="local",
)
midas_transforms = torch.hub.load(
repo_or_dir=MIDAS_PATH, model="transforms", source="local"
)
if MODEL_TYPE == "DPT_Large" or MODEL_TYPE == "DPT_Hybrid":
self.transform = midas_transforms.dpt_transform
else:
self.transform = midas_transforms.small_transform
self.model.to(torch.device(DEVICE))
self.model.eval()
def on_event(
self,
dora_event: dict,
send_output: Callable[[str, bytes], None],
) -> DoraStatus:
if dora_event["type"] == "INPUT":
return self.on_input(dora_event, send_output)
return DoraStatus.CONTINUE
def on_input(
self,
dora_input: dict,
send_output: Callable[[str, bytes], None],
) -> DoraStatus:
"""Handle image
Args:
dora_input["id"] (str): Id of the input declared in the yaml configuration
dora_input["data"] (bytes): Bytes message of the input
send_output (Callable[[str, bytes]]): Function enabling sending output back to dora.
"""
if dora_input["id"] == "image":
# Convert bytes to numpy array
frame = np.frombuffer(
dora_input["data"],
np.uint8,
).reshape((IMAGE_HEIGHT, IMAGE_WIDTH, 4))
with torch.no_grad():
image = frame[:, :, :3]
img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
input_batch = self.transform(img).to(DEVICE)
prediction = self.model(input_batch)
prediction = torch.nn.functional.interpolate(
prediction.unsqueeze(1),
size=img.shape[:2],
mode="bicubic",
align_corners=False,
).squeeze()
depth_output = prediction.cpu().numpy()
depth_min = depth_output.min()
depth_max = depth_output.max()
normalized_depth = (
255 * (depth_output - depth_min) / (depth_max - depth_min)
)
normalized_depth *= 3
depth_frame = (
np.repeat(np.expand_dims(normalized_depth, 2), 3, axis=2) / 3
)
depth_frame = cv2.applyColorMap(
np.uint8(depth_frame), cv2.COLORMAP_INFERNO
)
height, width = depth_frame.shape[:2]
depth_frame_4 = np.dstack(
[depth_frame, np.ones((height, width), dtype="uint8") * 255]
)
send_output(
"depth_frame",
depth_frame_4.tobytes(),
dora_input["metadata"],
)
return DoraStatus.CONTINUE