In [2]:
# check env
import torch

print('PyTorch version: %s' % torch.__version__)

if not torch.cuda.is_available():
  print('Running on CPU')
else:
  print('Running on GPU %s' % torch.cuda.get_device_name())

PyTorch version: 2.3.0+cu118
Running on GPU NVIDIA GeForce GTX 1080


In [3]:
# visualization utils
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import Image, display, HTML
from base64 import b64encode

plt.rcParams["figure.figsize"] = (30, 5)

def vis_img(img):
  plt.imshow(img)
  plt.axis("off")
  plt.show()

def vis_video(video_path):
  mp4 = open(video_path,'rb').read()
  data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
  return HTML("""
  <video width=600 controls>
        <source src="%s" type="video/mp4">
  </video>
  """ % data_url)

## Optical Flow Image

In [None]:
# inference on image dir
# script from: https://github.com/autonomousvision/unimatch/blob/master/scripts/gmflow_demo.sh
!python main_flow.py \
--inference_dir demo/flow-davis \
--resume pretrained/gmflow-scale2-regrefine6-mixdata-train320x576-4e7b215d.pth \
--output_path output/gmflow-scale2-regrefine6-davis \
--padding_factor 32 \
--upsample_factor 4 \
--num_scales 2 \
--attn_splits_list 2 8 \
--corr_radius_list -1 4 \
--prop_radius_list -1 1 \
--reg_refine \
--num_reg_refine 6

In [None]:
# visualize image and flow
img0_path = 'demo/flow-davis/00000.jpg'
img1_path = 'demo/flow-davis/00001.jpg'
flow_path = 'output/gmflow-scale2-regrefine6-davis/00000_flow.png'

img0, img1, flow = plt.imread(img0_path), plt.imread(img1_path), plt.imread(flow_path)
vis_img(np.concatenate([img0 / 255., img1 / 255., flow], axis=1))

## Optical Flow Video

In [14]:
%ls

DETR_panoptic.ipynb  SETR_panoptic.ipynb  [0m[01;34mdemo[0m/
GMFlow.ipynb         UniMatch_demo.ipynb


In [18]:
%cd ..
%ls

/home/arg
[0m[01;34mmmsegmentation[0m/  [01;34msensing[0m/  [01;34munimatch[0m/


In [16]:
%cd ..
%ls

/home/arg
[0m[01;34mmmsegmentation[0m/  [01;34msensing[0m/  [01;34munimatch[0m/


In [20]:
%cd notebooks/
%ls

/home/arg/sensing/notebooks
DETR_panoptic.ipynb  SETR_panoptic.ipynb  [0m[01;34mdemo[0m/
GMFlow.ipynb         UniMatch_demo.ipynb


In [21]:
import os

# directory containing the videos
video_dir = "demo/"

# path to the main_flow.py script
main_flow_script = "/home/arg/unimatch/main_flow.py"

# get a list of all video files in the directory
video_files = [f for f in os.listdir(video_dir) if f.endswith('.mp4')]

for video_file in video_files:
    # full path to the video file
    video_path = os.path.join(video_dir, video_file)
    
    # output path
    output_path = os.path.join("demo/output", os.path.splitext(video_file)[0])
    
    # command to run the inference script
    command = f"""python3 {main_flow_script} \
    --inference_video "{video_path}" \
    --resume pretrained/gmflow-scale2-regrefine6-kitti15-25b554d7.pth \
    --output_path "{output_path}" \
    --padding_factor 32 \
    --upsample_factor 4 \
    --num_scales 2 \
    --attn_splits_list 2 8 \
    --corr_radius_list -1 4 \
    --prop_radius_list -1 1 \
    --reg_refine \
    --num_reg_refine 6 \
    --save_img \
    --save_video"""
    
    # run the command
    os.system(command)

usage: main_flow.py [-h] [--checkpoint_dir CHECKPOINT_DIR] [--stage STAGE]
                    [--val_dataset VAL_DATASET [VAL_DATASET ...]]
                    [--max_flow MAX_FLOW]
                    [--image_size IMAGE_SIZE [IMAGE_SIZE ...]]
                    [--padding_factor PADDING_FACTOR] [--eval]
                    [--save_eval_to_file] [--evaluate_matched_unmatched]
                    [--val_things_clean_only] [--with_speed_metric] [--lr LR]
                    [--batch_size BATCH_SIZE] [--num_workers NUM_WORKERS]
                    [--weight_decay WEIGHT_DECAY] [--grad_clip GRAD_CLIP]
                    [--num_steps NUM_STEPS] [--seed SEED]
                    [--summary_freq SUMMARY_FREQ] [--val_freq VAL_FREQ]
                    [--save_ckpt_freq SAVE_CKPT_FREQ]
                    [--save_latest_ckpt_freq SAVE_LATEST_CKPT_FREQ]
                    [--resume RESUME] [--strict_resume]
                    [--no_resume_optimizer] [--task {flow,stereo,depth}]
           

## Stereo Matching

In [None]:
# inference on a stereo pair
# script from: https://github.com/autonomousvision/unimatch/blob/master/scripts/gmstereo_demo.sh
!python main_stereo.py \
--inference_dir demo/stereo-middlebury \
--inference_size 1024 1536 \
--output_path output/gmstereo-scale2-regrefine3-middlebury \
--resume pretrained/gmstereo-scale2-regrefine3-resumeflowthings-middleburyfthighres-a82bec03.pth \
--padding_factor 32 \
--upsample_factor 4 \
--num_scales 2 \
--attn_type self_swin2d_cross_swin1d \
--attn_splits_list 2 8 \
--corr_radius_list -1 4 \
--prop_radius_list -1 1 \
--reg_refine \
--num_reg_refine 3

In [None]:
# visualize image and disparity
img0_path = 'demo/stereo-middlebury/im0.png'
img1_path = 'demo/stereo-middlebury/im1.png'
disp_path = 'output/gmstereo-scale2-regrefine3-middlebury/im0_disp.png'

img0, img1, disp = plt.imread(img0_path), plt.imread(img1_path), plt.imread(disp_path)
vis_img(np.concatenate([img0, img1, disp], axis=1))

## Depth Estimation

In [None]:
# inference on posed images
# script from: https://github.com/autonomousvision/unimatch/blob/master/scripts/gmdepth_demo.sh
!python main_depth.py \
--inference_dir demo/depth-scannet \
--output_path output/gmdepth-scale1-regrefine1-scannet \
--resume pretrained/gmdepth-scale1-regrefine1-resumeflowthings-scannet-90325722.pth \
--reg_refine \
--num_reg_refine 1


In [None]:
# visualize image and depth
img0_path = 'demo/depth-scannet/color/0048.png'
img1_path = 'demo/depth-scannet/color/0054.png'
depth_path = 'output/gmdepth-scale1-regrefine1-scannet/0048.png'

img0, img1, depth = plt.imread(img0_path), plt.imread(img1_path), plt.imread(depth_path)
vis_img(np.concatenate([img0, img1, depth], axis=1))