In [1]:
import cv2
import os
import numpy as np
from random import shuffle
import glob

In [24]:
import torch
import torch.nn as nn
a = torch.tensor([[1, 1, 3, 3, 5]])
b = torch.tensor([[2, 2, 2, 2, 2]])
c = torch.cat([a, b], 0)
m = torch.min(c, 0)
m

torch.return_types.min(
values=tensor([1, 1, 2, 2, 2]),
indices=tensor([0, 0, 1, 1, 1]))

In [41]:
class SSIM(nn.Module):
    """Layer to compute the SSIM loss between a pair of images
    """
    def __init__(self):
        super(SSIM, self).__init__()
        self.mu_x_pool   = nn.AvgPool2d(3, 1)
        self.mu_y_pool   = nn.AvgPool2d(3, 1)
        self.sig_x_pool  = nn.AvgPool2d(3, 1)
        self.sig_y_pool  = nn.AvgPool2d(3, 1)
        self.sig_xy_pool = nn.AvgPool2d(3, 1)

        self.refl = nn.ReflectionPad2d(1)

        self.C1 = 0.01 ** 2
        self.C2 = 0.03 ** 2

    def forward(self, x, y):
        x = self.refl(x)
        y = self.refl(y)

        mu_x = self.mu_x_pool(x)
        mu_y = self.mu_y_pool(y)

        sigma_x  = self.sig_x_pool(x ** 2) - mu_x ** 2
        sigma_y  = self.sig_y_pool(y ** 2) - mu_y ** 2
        sigma_xy = self.sig_xy_pool(x * y) - mu_x * mu_y

        SSIM_n = (2 * mu_x * mu_y + self.C1) * (2 * sigma_xy + self.C2)
        SSIM_d = (mu_x ** 2 + mu_y ** 2 + self.C1) * (sigma_x + sigma_y + self.C2)
        res = torch.clamp((1 - SSIM_n / SSIM_d) / 2, 0, 1)
        return res

def compute_reprojection_loss(pred, target):
        """Computes reprojection loss between a batch of predicted and target images
        """
        abs_diff = torch.abs(target - pred)
        l1_loss = abs_diff.mean(1, True)
        print(l1_loss.shape)

        ssim_loss = SSIM()(pred, target).mean(1, True)
        reprojection_loss = 0.85 * ssim_loss + 0.15 * l1_loss
        print(reprojection_loss.shape)
        return reprojection_loss

In [37]:
pred = torch.rand(8, 3, 256, 832)
target = torch.rand(8, 1, 256, 832)

In [46]:
result = compute_reprojection_loss(pred[:, :, :256//3, :], target[:, :, :, :])

torch.Size([8, 1, 256, 832])
torch.Size([8, 1, 256, 832])


# Frame2video 

In [None]:
# 選擇要建立 video 的 data [TODO]
file_name = '2011_10_03/2011_10_03_drive_0047_sync'
# file_name = '2011_09_30/2011_09_30_drive_0016_sync'
# file_name = '2011_09_29/2011_09_29_drive_0026_sync'
# file_name = '2011_09_28/2011_09_28_drive_0037_sync'
# file_name = '2011_09_26/2011_09_26_drive_0036_sync'
# file_name = '2011_09_26/2011_09_26_drive_0023_sync'
# file_name = '2011_09_26/2011_09_26_drive_0020_sync'
# file_name = '2011_09_26/2011_09_26_drive_0013_sync'
# file_name = '2011_09_26/2011_09_26_drive_0002_sync'

# 選擇影片輸出資料夾 [TODO]
video_output_folder = os.path.join(os.path.expanduser("~"), 
                                                      "depth",
                                                      "monodepth2",
                                                      "video_result")

# 取得資料夾中所有影像檔案路徑
kitti_depth_folder = '/work/garin0115/datasets/kitti_data/'+file_name+'/image_02'
filenames = glob.glob(kitti_depth_folder+'/*/*.jpg')

# 將檔案路徑排序
filenames.sort()
print("Total images: {}".format(len(filenames)))

In [None]:
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter(video_output_folder+'/raw_{}.avi'.format(file_name.split('/')[-1]), fourcc, 15.0, (1238, 374))

for dataIter, data_path in enumerate(filenames):

    print('\r processing '+str(dataIter), end='')
    print('\r {}'.format(data_path), end='')
  
    # Get data path
    image = cv2.imread(data_path)
    image = cv2.resize(image, (1238, 374))
    image = np.array(image, dtype = np.uint8)
    
    # Write video frame
    out.write(image)
out.release()
print('\nVideo Record Successed!')