In [1]:
%cd
%cd unimatch

/home/changjin
/home/changjin/unimatch


In [2]:
# check env
import torch

print('PyTorch version: %s' % torch.__version__)

if not torch.cuda.is_available():
  print('Running on CPU')
else:
  print('Running on GPU %s' % torch.cuda.get_device_name())

PyTorch version: 2.3.0+cu118
Running on GPU NVIDIA GeForce GTX 1080


# Download Model

In [3]:
# we assume the downloaded model weights are located under the pretrained directory.
!mkdir pretrained

In [4]:
# we provide a large number of model weights with different speed-accuracy trade-off trained on different datasets for downloading:
# https://github.com/autonomousvision/unimatch/blob/master/MODEL_ZOO.md
# download example weights for flow, stereo and depth
!wget -P pretrained https://s3.eu-central-1.amazonaws.com/avg-projects/unimatch/pretrained/gmflow-scale2-regrefine6-mixdata-train320x576-4e7b215d.pth
!wget -P pretrained https://s3.eu-central-1.amazonaws.com/avg-projects/unimatch/pretrained/gmflow-scale2-regrefine6-kitti15-25b554d7.pth
!wget -P pretrained https://s3.eu-central-1.amazonaws.com/avg-projects/unimatch/pretrained/gmstereo-scale2-regrefine3-resumeflowthings-middleburyfthighres-a82bec03.pth
!wget -P pretrained https://s3.eu-central-1.amazonaws.com/avg-projects/unimatch/pretrained/gmdepth-scale1-regrefine1-resumeflowthings-scannet-90325722.pth

--2024-06-18 14:05:03--  https://s3.eu-central-1.amazonaws.com/avg-projects/unimatch/pretrained/gmflow-scale2-regrefine6-mixdata-train320x576-4e7b215d.pth
Resolving s3.eu-central-1.amazonaws.com (s3.eu-central-1.amazonaws.com)... 52.219.169.181, 52.219.169.157, 3.5.135.17, ...
Connecting to s3.eu-central-1.amazonaws.com (s3.eu-central-1.amazonaws.com)|52.219.169.181|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 29500177 (28M) [binary/octet-stream]
Saving to: ‘pretrained/gmflow-scale2-regrefine6-mixdata-train320x576-4e7b215d.pth’


2024-06-18 14:05:25 (1.35 MB/s) - ‘pretrained/gmflow-scale2-regrefine6-mixdata-train320x576-4e7b215d.pth’ saved [29500177/29500177]

--2024-06-18 14:05:26--  https://s3.eu-central-1.amazonaws.com/avg-projects/unimatch/pretrained/gmflow-scale2-regrefine6-kitti15-25b554d7.pth
Resolving s3.eu-central-1.amazonaws.com (s3.eu-central-1.amazonaws.com)... 3.5.134.197, 52.219.170.157, 3.5.136.242, ...
Connecting to s3.eu-central-1.amazonaws

In [5]:
# check downloading
%ls pretrained

gmdepth-scale1-regrefine1-resumeflowthings-scannet-90325722.pth
gmflow-scale2-regrefine6-kitti15-25b554d7.pth
gmflow-scale2-regrefine6-mixdata-train320x576-4e7b215d.pth
gmstereo-scale2-regrefine3-resumeflowthings-middleburyfthighres-a82bec03.pth


# Demo

In [None]:
# visualization utils
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import Image, display, HTML
from base64 import b64encode

plt.rcParams["figure.figsize"] = (30, 5)

def vis_img(img):
  plt.imshow(img)
  plt.axis("off")
  plt.show()

def vis_video(video_path):
  mp4 = open(video_path,'rb').read()
  data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
  return HTML("""
  <video width=600 controls>
        <source src="%s" type="video/mp4">
  </video>
  """ % data_url)

## Optical Flow

In [None]:
# inference on image dir
# script from: https://github.com/autonomousvision/unimatch/blob/master/scripts/gmflow_demo.sh
!python main_flow.py \
--inference_dir demo/flow-davis \
--resume pretrained/gmflow-scale2-regrefine6-mixdata-train320x576-4e7b215d.pth \
--output_path output/gmflow-scale2-regrefine6-davis \
--padding_factor 32 \
--upsample_factor 4 \
--num_scales 2 \
--attn_splits_list 2 8 \
--corr_radius_list -1 4 \
--prop_radius_list -1 1 \
--reg_refine \
--num_reg_refine 6

In [None]:
# visualize image and flow
img0_path = 'demo/flow-davis/00000.jpg'
img1_path = 'demo/flow-davis/00001.jpg'
flow_path = 'output/gmflow-scale2-regrefine6-davis/00000_flow.png'

img0, img1, flow = plt.imread(img0_path), plt.imread(img1_path), plt.imread(flow_path)
vis_img(np.concatenate([img0 / 255., img1 / 255., flow], axis=1))

In [None]:
# inference on video
# script from: https://github.com/autonomousvision/unimatch/blob/master/scripts/gmflow_demo.sh
!python3 main_flow.py \
--inference_video "demo/WAM_V.mp4" \
--resume pretrained/gmflow-scale2-regrefine6-kitti15-25b554d7.pth \
--output_path "demo/output/WAM_V" \
--padding_factor 32 \
--upsample_factor 4 \
--num_scales 2 \
--attn_splits_list 2 8 \
--corr_radius_list -1 4 \
--prop_radius_list -1 1 \
--reg_refine \
--num_reg_refine 6 \
--save_video \
--concat_flow_img

In [None]:
# visualize video
video_path = "demo/output/WAM_V/WAM_V_flow_img.mp4"
vis_video(video_path)

## Stereo Matching

In [None]:
# inference on a stereo pair
# script from: https://github.com/autonomousvision/unimatch/blob/master/scripts/gmstereo_demo.sh
!python main_stereo.py \
--inference_dir demo/stereo-middlebury \
--inference_size 1024 1536 \
--output_path output/gmstereo-scale2-regrefine3-middlebury \
--resume pretrained/gmstereo-scale2-regrefine3-resumeflowthings-middleburyfthighres-a82bec03.pth \
--padding_factor 32 \
--upsample_factor 4 \
--num_scales 2 \
--attn_type self_swin2d_cross_swin1d \
--attn_splits_list 2 8 \
--corr_radius_list -1 4 \
--prop_radius_list -1 1 \
--reg_refine \
--num_reg_refine 3

In [None]:
# visualize image and disparity
img0_path = 'demo/stereo-middlebury/im0.png'
img1_path = 'demo/stereo-middlebury/im1.png'
disp_path = 'output/gmstereo-scale2-regrefine3-middlebury/im0_disp.png'

img0, img1, disp = plt.imread(img0_path), plt.imread(img1_path), plt.imread(disp_path)
vis_img(np.concatenate([img0, img1, disp], axis=1))

## Depth Estimation

In [None]:
# inference on posed images
# script from: https://github.com/autonomousvision/unimatch/blob/master/scripts/gmdepth_demo.sh
!python main_depth.py \
--inference_dir demo/depth-scannet \
--output_path output/gmdepth-scale1-regrefine1-scannet \
--resume pretrained/gmdepth-scale1-regrefine1-resumeflowthings-scannet-90325722.pth \
--reg_refine \
--num_reg_refine 1


In [None]:
# visualize image and depth
img0_path = 'demo/depth-scannet/color/0048.png'
img1_path = 'demo/depth-scannet/color/0054.png'
depth_path = 'output/gmdepth-scale1-regrefine1-scannet/0048.png'

img0, img1, depth = plt.imread(img0_path), plt.imread(img1_path), plt.imread(depth_path)
vis_img(np.concatenate([img0, img1, depth], axis=1))