In [1]:
!pip install timm

Collecting timm
  Downloading timm-0.6.13-py3-none-any.whl (549 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m549.1/549.1 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: timm
Successfully installed timm-0.6.13
[0m

In [2]:
import cv2
import torch
import matplotlib.pyplot as plt
import tqdm 
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import os
import numpy as np
import pandas as pd
import pickle
from pathlib import PureWindowsPath, PurePosixPath

In [3]:
model_type = "DPT_Large"     # MiDaS v3 - Large     (highest accuracy, slowest inference speed)
#model_type = "DPT_Hybrid"   # MiDaS v3 - Hybrid    (medium accuracy, medium inference speed)
#model_type = "MiDaS_small"  # MiDaS v2.1 - Small   (lowest accuracy, highest inference speed)

midas = torch.hub.load("intel-isl/MiDaS", model_type)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)
midas.eval()

Downloading: "https://github.com/intel-isl/MiDaS/archive/master.zip" to /root/.cache/torch/hub/master.zip
Downloading: "https://github.com/isl-org/MiDaS/releases/download/v3/dpt_large_384.pt" to /root/.cache/torch/hub/checkpoints/dpt_large_384.pt


  0%|          | 0.00/1.28G [00:00<?, ?B/s]

DPTDepthModel(
  (pretrained): Module(
    (model): VisionTransformer(
      (patch_embed): PatchEmbed(
        (proj): Conv2d(3, 1024, kernel_size=(16, 16), stride=(16, 16))
        (norm): Identity()
      )
      (pos_drop): Dropout(p=0.0, inplace=False)
      (norm_pre): Identity()
      (blocks): Sequential(
        (0): Block(
          (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (attn): Attention(
            (qkv): Linear(in_features=1024, out_features=3072, bias=True)
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Linear(in_features=1024, out_features=1024, bias=True)
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=1024, out_features=4096, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0

In [4]:
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")

if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
    transform = midas_transforms.dpt_transform
else:
    transform = midas_transforms.small_transform

Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master


In [5]:
'''
input_path = '/kaggle/input/goorealdataset/finalrealdatasetImgsV3/finalrealdatasetImgsV3'
output_path = '/kaggle/working/goo-real-depth'
obj_test = pd.read_pickle('/kaggle/input/goorealdataset/valrealhumansNew.pickle', compression='infer')
df_rg_test = pd.DataFrame.from_records(obj_test)
print(len(df_rg_test))
df = df_rg_test
'''

input_path = '/kaggle/input/goorealdataset/finalrealdatasetImgsV3Sparsed/finalrealdatasetImgsV3Sparsed'
output_path = '/kaggle/working/goo-real-sparse-depth'
obj_test = pd.read_pickle('/kaggle/input/goorealdataset/testrealhumansSparsedNew.pickle', compression='infer')
df_rg_test = pd.DataFrame.from_records(obj_test)
print(len(df_rg_test))
df = df_rg_test


'''
input_path = '/kaggle/input/goosynthtestdataset/goo-synth-test-images/images'
output_path = '/kaggle/working/goo-synth-test-depth'
obj_test = pd.read_pickle('/kaggle/input/goosynthtestdataset/goosynth_test_v2_no_segm.pkl', compression='infer')
df_rg_test = pd.DataFrame.from_records(obj_test)
print(len(df_rg_test))
df = df_rg_test
'''


1162


"\ninput_path = '/kaggle/input/goosynthtestdataset/goo-synth-test-images/images'\noutput_path = '/kaggle/working/goo-synth-test-depth'\nobj_test = pd.read_pickle('/kaggle/input/goosynthtestdataset/goosynth_test_v2_no_segm.pkl', compression='infer')\ndf_rg_test = pd.DataFrame.from_records(obj_test)\nprint(len(df_rg_test))\ndf = df_rg_test\n"

In [6]:
df_rg_test.head()

Unnamed: 0,filename,width,height,ann,gaze_item,gazeIdx,gaze_cx,gaze_cy,hx,hy,seg,cam:,occluded,cam,partnercam
0,8\cam1\cam00001_img00524.jpg,1920,1080,"{'bboxes': [[139.0, 60.0, 176.0, 122.0], [172....",17,41,382,329,199,156,"[[379.0, 320.0], [379.0, 321.0], [380.0, 319.0...",1,False,1,karenlazo\cam0\cam00000_img00524.jpg
1,15\cam0\cam00000_img00674.jpg,1920,1080,"{'bboxes': [[40.0, 149.0, 99.0, 185.0], [42.0,...",24,25,332,182,273,128,"[[326.0, 183.0], [326.0, 184.0], [326.0, 185.0...",0,False,0,ronjobintan\cam1\cam00001_img00673.jpg
2,5\cam0\cam00000_img00602.jpg,1920,1080,"{'bboxes': [[44.0, 162.0, 69.0, 202.0], [118.0...",7,30,454,226,416,239,"[[446.0, 238.0], [446.0, 239.0], [446.0, 240.0...",0,False,0,jhaezminnegayo\cam1\cam00001_img00601.jpg
3,11\cam0\cam00000_img00648.jpg,1920,1080,"{'bboxes': [[109.0, 112.0, 140.0, 185.0], [171...",23,39,329,99,263,193,"[[324.0, 76.0], [324.0, 77.0], [324.0, 78.0], ...",0,False,0,paulayap\cam1\cam00001_img00647.jpg
4,8\cam1\cam00001_img00534.jpg,1920,1080,"{'bboxes': [[139.0, 60.0, 176.0, 122.0], [172....",17,41,382,329,198,156,"[[379.0, 320.0], [379.0, 321.0], [380.0, 319.0...",1,False,1,karenlazo\cam0\cam00000_img00534.jpg


In [7]:
def write_depth(path, depth, grayscale = False, bits=1):
    """Write depth map to png file.
    Args:
        path (str): filepath without extension
        depth (array): depth
        grayscale (bool): use a grayscale colormap?
    """
    if not grayscale:
        bits = 1

    if not np.isfinite(depth).all():
        depth=np.nan_to_num(depth, nan=0.0, posinf=0.0, neginf=0.0)
        print("WARNING: Non-finite depth values present")

    depth_min = depth.min()
    depth_max = depth.max()

    max_val = (2**(8*bits))-1

    if depth_max - depth_min > np.finfo("float").eps:
        out = max_val * (depth - depth_min) / (depth_max - depth_min)
    else:
        out = np.zeros(depth.shape, dtype=depth.dtype)

    if not grayscale:
        out = cv2.applyColorMap(np.uint8(out), cv2.COLORMAP_INFERNO)

    if bits == 1:
        cv2.imwrite(path + ".png", out.astype("uint8"))
    elif bits == 2:
        cv2.imwrite(path + ".png", out.astype("uint16"))

    return

In [8]:
#!rm -r /kaggle/working/
# 932

In [9]:
## for goo real

N = len(df)
op_shape = (480,640)
#op_shape = (224,224)
for ix in tqdm.notebook.tqdm(range(N)):
    # Fetch dataframe row
    row = df.iloc[ix]
    
    # Path magic
    path = PureWindowsPath(row['filename']).as_posix()
    
    # input
    img = Image.open(os.path.join(input_path, path))
    img = np.array(img.convert('RGB'))
    input_batch = transform(img).to(device)
    with torch.no_grad():
        prediction = midas(input_batch)
        
        prediction = torch.nn.functional.interpolate(
            prediction.unsqueeze(1),
            size=op_shape,
            mode="bicubic",
            align_corners=False,
        ).squeeze()

    output = prediction.cpu().numpy()
    
    subfolder_path_str = os.path.splitext(path)[0].split("/")
    key_filename = subfolder_path_str[0]+'/'+subfolder_path_str[1]
    # create output sub-folder
    subfolder_path = os.path.join(output_path, key_filename)
    os.makedirs(subfolder_path, exist_ok=True)
    # output
    filename = os.path.join(
            subfolder_path, 
            os.path.splitext(os.path.basename(path))[0]
        )
    write_depth(filename,output,grayscale=True)
print(output.shape)


  0%|          | 0/1162 [00:00<?, ?it/s]

(480, 640)


In [10]:
"""
## for goo synth
N = len(df)
op_shape = (224,224)
for ix in tqdm.notebook.tqdm(range(N)):
    # Fetch dataframe row
    row = df.iloc[ix]
    
    # Path magic
    path = row['filename']
    
    # input
    img = Image.open(os.path.join(input_path, path))
    img = np.array(img.convert('RGB'))
    input_batch = transform(img).to(device)
    with torch.no_grad():
        prediction = midas(input_batch)
        
        prediction = torch.nn.functional.interpolate(
            prediction.unsqueeze(1),
            size=op_shape,
            mode="bicubic",
            align_corners=False,
        ).squeeze()

    output = prediction.cpu().numpy()
    
    filename = os.path.splitext(path)[0]
    write_depth(filename,output,grayscale=True)
print(output.shape)
"""

'\n## for goo synth\nN = len(df)\nop_shape = (224,224)\nfor ix in tqdm.notebook.tqdm(range(N)):\n    # Fetch dataframe row\n    row = df.iloc[ix]\n    \n    # Path magic\n    path = row[\'filename\']\n    \n    # input\n    img = Image.open(os.path.join(input_path, path))\n    img = np.array(img.convert(\'RGB\'))\n    input_batch = transform(img).to(device)\n    with torch.no_grad():\n        prediction = midas(input_batch)\n        \n        prediction = torch.nn.functional.interpolate(\n            prediction.unsqueeze(1),\n            size=op_shape,\n            mode="bicubic",\n            align_corners=False,\n        ).squeeze()\n\n    output = prediction.cpu().numpy()\n    \n    filename = os.path.splitext(path)[0]\n    write_depth(filename,output,grayscale=True)\nprint(output.shape)\n'

In [None]:
!zip -r goo-real-sparse-test-depth.zip /kaggle/working/


In [None]:
from IPython.display import FileLink
FileLink(r'goo-real-sparse-test-depth.zip')