**Name**: Angelique Mangubat

**Affiliation**: Atlantic Technological University

**Assignment 02**: Multiple View Geometry Computer Vision and Modelling Simulation and Test

In [1]:
pip install timm   #install this at the first try

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting timm
  Downloading timm-0.6.13-py3-none-any.whl (549 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m549.1/549.1 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub
  Downloading huggingface_hub-0.13.4-py3-none-any.whl (200 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.1/200.1 kB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: huggingface-hub, timm
Successfully installed huggingface-hub-0.13.4 timm-0.6.13


In [2]:
#Download dependencies
import cv2
from google.colab.patches import cv2_imshow #for google colab use
import torch
import matplotlib.pyplot as plt
import timm

In [3]:
#Download MiDas Model
midas = torch.hub.load('intel-isl/MiDas', 'MiDaS_small')  #(lowest accuracy, highest inference speed)
#midas = torch.hub.load('intel-isl/MiDas', 'DPT_Large')  #(highest accuracy, slowest inference speed)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)
midas.eval()

Downloading: "https://github.com/intel-isl/MiDas/zipball/master" to /root/.cache/torch/hub/master.zip


Loading weights:  None


Downloading: "https://github.com/rwightman/gen-efficientnet-pytorch/zipball/master" to /root/.cache/torch/hub/master.zip
Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite3-b733e338.pth" to /root/.cache/torch/hub/checkpoints/tf_efficientnet_lite3-b733e338.pth
Downloading: "https://github.com/isl-org/MiDaS/releases/download/v2_1/midas_v21_small_256.pt" to /root/.cache/torch/hub/checkpoints/midas_v21_small_256.pt
100%|██████████| 81.8M/81.8M [00:00<00:00, 137MB/s]


MidasNet_small(
  (pretrained): Module(
    (layer1): Sequential(
      (0): Conv2dSameExport(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
      (1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
      (3): Sequential(
        (0): DepthwiseSeparableConv(
          (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (bn1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
          (act1): ReLU6(inplace=True)
          (se): Identity()
          (conv_pw): Conv2d(32, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn2): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
          (act2): Identity()
        )
      )
      (4): Sequential(
        (0): InvertedResidual(
          (conv_pw): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(144,

In [4]:
#Input transformation pipeline
transforms = torch.hub.load('intel-isl/MiDas', 'transforms') 
transform = transforms.small_transform  #grab all the transform from small model type
#transform = transforms.dpt_transform   #grab all the transform from large model type

Using cache found in /root/.cache/torch/hub/intel-isl_MiDas_master


In [5]:
#Map google drive and the input and output files
from google.colab import drive
drive.mount('/content/drive')
VIDEO_STREAM = "/content/drive/My Drive/App/MVGCV/input/movie1.mp4"
OUTPUTFILE_PATH = "/content/drive/My Drive/App/MVGCV/depth/output/test1_output.csv"
OUTPUT_T1_PATH = "/content/drive/My Drive/App/MVGCV/depth/test1/"
OUTPUT_T2_PATH = "/content/drive/My Drive/App/MVGCV/depth/test2/"
OUTPUT_T3_PATH = "/content/drive/My Drive/App/MVGCV/depth/test3/"

Mounted at /content/drive


In [6]:
#create a function PROCESS that returns the process image for every frame
def process(image):
  #transform input for midas
  img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  imgbatch = transform(img).to('cpu')

	#make a prediction
  with torch.no_grad():
    prediction = midas(imgbatch)
    prediction = torch.nn.functional.interpolate(
			  prediction.unsqueeze(1),
			  size = img.shape[:2],
			  mode = 'bicubic',
			  align_corners = False
		  ).squeeze()
  
    output = prediction.cpu().numpy()
    #print(prediction)     #prints tensor values
    #print(output)
    from numpy import savetxt
    savetxt(OUTPUTFILE_PATH, output, delimiter=',') #save raw prediction values to external file
  
    return output

In [None]:
#Open the Video and access every frame, create a new image from the predicted values of MIDAS model
import os
cap = cv2.VideoCapture(VIDEO_STREAM)
time_skips = float(500) #skip every 0.5 seconds.

count = 0
while(cap.isOpened()):    #is boolean checks if frame is still available
    ret, frame = cap.read()
    frame = process(frame)
    #cv2.imshow('frame',frame)    #does not work on google colab, so I used a patch and cv2_imshow
    #cv2_imshow(frame)
    cv2.imwrite(OUTPUT_T1_PATH+"frame%d.jpg"% count, frame)
    cap.set(cv2.CAP_PROP_POS_MSEC, (count*time_skips))
    count += 1   # move the time
    
    if cv2.waitKey(1) & 0xFF == ord('q'):     #quit if q is pressed
        break
        
cap.release()
cv2.destroyAllWindows()

#plt.show()

**References:**

Video on how to create a depth estimation algorithm:

https://www.youtube.com/watch?v=c_WbKfyt8pY

Article on how to use opencv for videos:

https://learnopencv.com/read-write-and-display-a-video-using-opencv-cpp-python/

Article for video processing in google colab:

https://www.geeksforgeeks.org/how-to-use-google-colaboratory-for-video-processing/


How to get MIDAS models:
https://colab.research.google.com/github/pytorch/pytorch.github.io/blob/master/assets/hub/intelisl_midas_v2.ipynb#scrollTo=pleasant-reducing