Merge pull request #191 from isl-org/MiDaS_v3_1

MiDaS v3.1
isl-org · Dec 24, 2022 · eaa249f · eaa249f
2 parents b845b78 + 77edbb9
commit eaa249f
Show file tree

Hide file tree

Showing 21 changed files with 1,885 additions and 712 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -22,7 +22,7 @@ COPY ./midas ./midas
 COPY ./*.py ./
 
 # download model weights so the docker image can be used offline
-RUN cd weights && {curl -OL https://github.com/isl-org/MiDaS/releases/download/v3/dpt_hybrid-midas-501f0c75.pt; cd -; }
+RUN cd weights && {curl -OL https://github.com/isl-org/MiDaS/releases/download/v3/dpt_hybrid_384.pt; cd -; }
 RUN python3 run.py --model_type dpt_hybrid; exit 0
 
 # entrypoint (dont forget to mount input and output directories)

diff --git a/README.md b/README.md
diff --git a/environment.yaml b/environment.yaml
@@ -0,0 +1,16 @@
+name: midas-py310
+channels:
+  - pytorch
+  - defaults
+dependencies:
+  - nvidia::cudatoolkit=11.7
+  - python=3.10.8
+  - pytorch::pytorch=1.13.0
+  - torchvision=0.14.0
+  - pip=22.3.1
+  - numpy=1.23.4
+  - pip:
+    - opencv-python==4.6.0.66
+    - imutils==0.5.4
+    - timm==0.6.12
+    - einops==0.6.0
diff --git a/figures/Comparison.png b/figures/Comparison.png
diff --git a/figures/Improvement_vs_FPS.png b/figures/Improvement_vs_FPS.png
diff --git a/hubconf.py b/hubconf.py
@@ -6,6 +6,144 @@
 from midas.midas_net import MidasNet
 from midas.midas_net_custom import MidasNet_small
 
+def DPT_BEit_L_512(pretrained=True, **kwargs):
+    """ # This docstring shows up in hub.help()
+    MiDaS DPT_BEit_L_512 model for monocular depth estimation
+    pretrained (bool): load pretrained weights into model
+    """
+
+    model = DPTDepthModel(
+            path=None,
+            backbone="beitl16_512",
+            non_negative=True,
+        )
+
+    if pretrained:
+        checkpoint = (
+            "https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_beit_large_512.pt"
+        )
+        state_dict = torch.hub.load_state_dict_from_url(
+            checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True
+        )
+        model.load_state_dict(state_dict)
+
+    return model
+
+def DPT_BEit_L_384(pretrained=True, **kwargs):
+    """ # This docstring shows up in hub.help()
+    MiDaS DPT_BEit_L_384 model for monocular depth estimation
+    pretrained (bool): load pretrained weights into model
+    """
+
+    model = DPTDepthModel(
+            path=None,
+            backbone="beitl16_384",
+            non_negative=True,
+        )
+
+    if pretrained:
+        checkpoint = (
+            "https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_beit_large_384.pt"
+        )
+        state_dict = torch.hub.load_state_dict_from_url(
+            checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True
+        )
+        model.load_state_dict(state_dict)
+
+    return model
+
+def DPT_SwinV2_L_384(pretrained=True, **kwargs):
+    """ # This docstring shows up in hub.help()
+    MiDaS DPT_SwinV2_L_384 model for monocular depth estimation
+    pretrained (bool): load pretrained weights into model
+    """
+
+    model = DPTDepthModel(
+            path=None,
+            backbone="swin2l24_384",
+            non_negative=True,
+        )
+
+    if pretrained:
+        checkpoint = (
+            "https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_swin2_large_384.pt"
+        )
+        state_dict = torch.hub.load_state_dict_from_url(
+            checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True
+        )
+        model.load_state_dict(state_dict)
+
+    return model
+
+def DPT_SwinV2_T_256(pretrained=True, **kwargs):
+    """ # This docstring shows up in hub.help()
+    MiDaS DPT_SwinV2_T_256 model for monocular depth estimation
+    pretrained (bool): load pretrained weights into model
+    """
+
+    model = DPTDepthModel(
+            path=None,
+            backbone="swin2t16_256",
+            non_negative=True,
+        )
+
+    if pretrained:
+        checkpoint = (
+            "https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_swin2_tiny_256.pt"
+        )
+        state_dict = torch.hub.load_state_dict_from_url(
+            checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True
+        )
+        model.load_state_dict(state_dict)
+
+    return model
+
+def DPT_Next_ViT_L_384(pretrained=True, **kwargs):
+    """ # This docstring shows up in hub.help()
+    MiDaS DPT_Next_ViT_L_384 model for monocular depth estimation
+    pretrained (bool): load pretrained weights into model
+    """
+
+    model = DPTDepthModel(
+            path=None,
+            backbone="next_vit_large_6m",
+            non_negative=True,
+        )
+
+    if pretrained:
+        checkpoint = (
+            "https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_next_vit_large_384.pt"
+        )
+        state_dict = torch.hub.load_state_dict_from_url(
+            checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True
+        )
+        model.load_state_dict(state_dict)
+
+    return model
+
+def DPT_LeViT_224(pretrained=True, **kwargs):
+    """ # This docstring shows up in hub.help()
+    MiDaS DPT_LeViT_224 model for monocular depth estimation
+    pretrained (bool): load pretrained weights into model
+    """
+
+    model = DPTDepthModel(
+            path=None,
+            backbone="levit_384",
+            non_negative=True,
+        )
+
+    if pretrained:
+        checkpoint = (
+            "https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_levit_224.pt"
+        )
+        state_dict = torch.hub.load_state_dict_from_url(
+            checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True
+        )
+        model.load_state_dict(state_dict)
+
+    return model
+
 def DPT_Large(pretrained=True, **kwargs):
     """ # This docstring shows up in hub.help()
     MiDaS DPT-Large model for monocular depth estimation
@@ -20,7 +158,7 @@ def DPT_Large(pretrained=True, **kwargs):
 
     if pretrained:
         checkpoint = (
-            "https://github.com/isl-org/MiDaS/releases/download/v3/dpt_large-midas-2f21e586.pt"
+            "https://github.com/isl-org/MiDaS/releases/download/v3/dpt_large_384.pt"
         )
         state_dict = torch.hub.load_state_dict_from_url(
             checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True
@@ -43,7 +181,7 @@ def DPT_Hybrid(pretrained=True, **kwargs):
 
     if pretrained:
         checkpoint = (
-            "https://github.com/isl-org/MiDaS/releases/download/v3/dpt_hybrid-midas-501f0c75.pt"
+            "https://github.com/isl-org/MiDaS/releases/download/v3/dpt_hybrid_384.pt"
         )
         state_dict = torch.hub.load_state_dict_from_url(
             checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True
@@ -62,7 +200,7 @@ def MiDaS(pretrained=True, **kwargs):
 
     if pretrained:
         checkpoint = (
-            "https://github.com/isl-org/MiDaS/releases/download/v2_1/model-f6b98070.pt"
+            "https://github.com/isl-org/MiDaS/releases/download/v2_1/midas_v21_384.pt"
         )
         state_dict = torch.hub.load_state_dict_from_url(
             checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True
@@ -73,15 +211,15 @@ def MiDaS(pretrained=True, **kwargs):
 
 def MiDaS_small(pretrained=True, **kwargs):
     """ # This docstring shows up in hub.help()
-    MiDaS small model for monocular depth estimation on resource-constrained devices
+    MiDaS v2.1 small model for monocular depth estimation on resource-constrained devices
     pretrained (bool): load pretrained weights into model
     """
 
     model = MidasNet_small(None, features=64, backbone="efficientnet_lite3", exportable=True, non_negative=True, blocks={'expand': True})
 
     if pretrained:
         checkpoint = (
-            "https://github.com/isl-org/MiDaS/releases/download/v2_1/model-small-70d6b9c8.pt"
+            "https://github.com/isl-org/MiDaS/releases/download/v2_1/midas_v21_small_256.pt"
         )
         state_dict = torch.hub.load_state_dict_from_url(
             checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True

diff --git a/install_next_vit.sh b/install_next_vit.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+git clone https://github.com/bytedance/Next-ViT.git externals/Next_ViT