# 計算深度

## 方法
將FlowFormer後銜接一個NN，訓練NN透過左右影像的flow回歸出深度

## import flowformer元件

In [1]:
# import flowformer元件
from ast import List, Tuple
import sys

from sympy import root
sys.path.append('core')

from PIL import Image
from glob import glob
import argparse
import os
import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch import optim
import matplotlib.pyplot as plt
from configs.submission import get_cfg
from core.utils.misc import process_cfg
import core.datasets
from core.utils import flow_viz
from core.utils import frame_utils
import cv2
import math
import os.path as osp
from pathlib import Path
from tqdm import tqdm

from core.FlowFormer import build_flowformer

from core.utils.utils import InputPadder, forward_interpolate
import itertools

import flow_compute
import torch.utils.data as data

TRAIN_SIZE = [432, 960]
plt.rcParams['font.sans-serif'] = ['DFKai-SB']
plt.rcParams['axes.unicode_minus'] = False

building  model...


## 載入訓練資料

In [5]:
# 讀取深度
def depth_read(filename) -> torch.float64:
    # loads depth map D from png file
    # and returns it as a numpy array,
    # for details see readme.txt

    depth_png = np.array(Image.open(filename), dtype=int)
    # make sure we have a proper 16bit depth map here.. not 8bit!
    assert (np.max(depth_png) > 255)

    depth = depth_png.astype(float) / 256.
    depth[depth_png == 0] = -1.
    return torch.from_numpy(depth)


depth = depth_read(r"E:\datasets\KITTI_Depth Prediction\data_depth_annotated\train\2011_09_26_drive_0001_sync\proj_depth\groundtruth\image_02\0000000005.png")
with np.printoptions(threshold=np.inf):
    print(depth)

tensor([[-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        ...,
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.]], dtype=torch.float64)


In [14]:
# 定義及宣告深度Dataset
import torch.utils.data as data
from torch.utils.data import RandomSampler, DataLoader, Subset
class KITTI_Depth_Dataset(data.Dataset):
    def __init__(self, KITTI_path=r'E:\datasets\KITTI', depth_path=r'E:\datasets\KITTI_Depth Prediction\data_depth_annotated', flow_path=r'E:\ck\master\ff_depth\flow_pre_compute', type="train"):
        self.depth_02_paths=[]
        self.depth_03_paths = []
        self.image_02_paths = []
        self.image_03_paths = []
        self.flows=[]
        
        KITTI_path = Path(KITTI_path)
        depth_paths = Path(depth_path)
        depth_paths /= type
        depth_paths = depth_paths.iterdir()
        flow_path=Path(flow_path)

        for path in depth_paths:
            # 取得深度影像
            path_depth_02s = sorted((path / "proj_depth" / 'groundtruth' / "image_02").glob("*.png"))
            path_depth_03s = sorted((path / "proj_depth" / 'groundtruth' / "image_03").glob("*.png"))

            # 取得深度影像日期、車次
            drive=path.name
            date=drive[:10]
            # print(date, drive)
            # 取得RGB影像
            path_02s = sorted((KITTI_path / date / drive /
                              "image_02" / "data").glob("*.png"))[5:-5]
            path_03s = sorted(
                (KITTI_path / date / drive / "image_03" / "data").glob("*.png"))[5:-5]
            
            # 取得預計算flow
            flow_paths=flow_path.joinpath(drive).glob("*.pt")

            # 檢查長度是否相同
            if len(path_02s)!=len(path_03s)!=len(path_depth_02s)!=len(flow_paths):
                print(
                    f"data at {drive} is not the same! {path_02s} vs {path_03s} vs {path_depth_02s} vs {flow_paths}")
                # print(path_depth_02s)
                # print(path_02s)
                continue


            # 將資料加入陣列
            for path_depth_02,path_depth_03 in zip(path_depth_02s,path_depth_03s):
                self.depth_02_paths.append(str(path_depth_02))
                self.depth_03_paths.append(str(path_depth_03))
            for path_02,path_03 in zip(path_02s,path_03s):
                self.image_02_paths.append(str(path_02))
                self.image_03_paths.append(str(path_03))
            for path_flow in flow_paths:
                self.flows.append(path_flow)

        print(f"Add {len(self.depth_02_paths)} depth_02 path")
        print(f"Add {len(self.depth_03_paths)} depth_03 path")
        print(f"Add {len(self.image_02_paths)} image_02 path")
        print(f"Add {len(self.image_03_paths)} image_03 path")
        print(f"Add {len(self.flows)} flow path")
        self.n_samples = len(self.depth_02_paths)

    def __getitem__(self, index):
        if type(index)==int:
            image1, image2 = flow_compute.prepare_image(self.image_02_paths[index], self.image_03_paths[index], keep_size=True)
            depth = depth_read(self.depth_02_paths[index])
            flow=torch.load(self.flows[index])
            return image1, image2, depth, flow
        elif type(index)==slice:
            image_02s=list(item for item in self.image_02_paths[index])
            image_03s=list(item for item in self.image_03_paths[index])
            image_depths=list(item for item in self.depth_02_paths[index])
            image_flows = list(item for item in self.flows[index])
            ans_image_02s=[]
            ans_image_03s=[]
            ans_image_depths=[]
            ans_image_flows=[]
            for image_02,image_03,image_depth,image_flow in zip(image_02s,image_03s,image_depths,image_flows):
                image1, image2 = flow_compute.prepare_image(
                    image_02, image_03, keep_size=True)
                depth = depth_read(image_depth)
                flow = torch.load(image_flow)
                ans_image_02s.append(image1)
                ans_image_03s.append(image2)
                ans_image_depths.append(depth)
                ans_image_flows.append(flow)
            return ans_image_02s, ans_image_03s, ans_image_depths, ans_image_flows


    def __len__(self):
        return self.n_samples

my_kitti = KITTI_Depth_Dataset()
my_kitti_val = KITTI_Depth_Dataset(type="val")

KeyboardInterrupt: 

## 測試計算光流(legacy)

In [6]:
path1, path2, depth_path = my_kitti[0]
image1, image2 = flow_compute.prepare_image(path1, path2, keep_size=True)
depth = depth_read(depth_path)
print(f"image1: {image1.shape}, {path1}")
print(f"image2: {image2.shape}, {path2}")
print(f"depth: {depth.shape}, {depth_path}")
with torch.no_grad():
    flow = flow_compute.compute_flow(image1, image2)
print(f"flow: {flow.shape}")

preparing image...
E:\datasets\KITTI\2011_09_26\2011_09_26_drive_0001_sync\image_02\data\0000000005.png, E:\datasets\KITTI\2011_09_26\2011_09_26_drive_0001_sync\image_03\data\0000000005.png
image1: torch.Size([3, 375, 1242]), E:\datasets\KITTI\2011_09_26\2011_09_26_drive_0001_sync\image_02\data\0000000005.png
image2: torch.Size([3, 375, 1242]), E:\datasets\KITTI\2011_09_26\2011_09_26_drive_0001_sync\image_03\data\0000000005.png
depth: torch.Size([375, 1242]), E:\datasets\KITTI_Depth Prediction\data_depth_annotated\train\2011_09_26_drive_0001_sync\proj_depth\groundtruth\image_02\0000000005.png
computing flow...


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


flow: (375, 1242, 2)


## 建立NN網路

參考:  
[1]: https://ithelp.ithome.com.tw/m/articles/10289699 "IT幫幫忙 Pytorch Feedforward Neural Network"  
[2]: https://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/Pytorch/Basics/pytorch_simple_fullynet.py#L68 "ML/Pytorch/Basics/pytorch_simple_fullynet.py"  


In [None]:
class NN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NN, self).__init__()
        # Our first linear layer take input_size, in this case 784 nodes to 50
        # and our second linear layer takes 50 to the num_classes we have, in
        # this case 10.
        self.fc1 = nn.Linear(input_size, 50)
        self.fc2 = nn.Linear(50, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


# Set device cuda for GPU if it's available otherwise run on the CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# 超參數
input_size = 784
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 1

# Load Data
train_loader = DataLoader(dataset=my_kitti,
                          batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=my_kitti_val,
                         batch_size=batch_size, shuffle=True)

In [None]:
# Initialize network
model = NN(input_size=input_size, num_classes=num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

## 訓練NN網路

In [None]:
# Train Network
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(tqdm(train_loader)):
        # Get data to cuda if possible
        data = data.to(device=device)
        targets = targets.to(device=device)

        # Get to correct shape
        data = data.reshape(data.shape[0], -1)

        # Forward
        scores = model(data)
        loss = criterion(scores, targets)

        # Backward
        optimizer.zero_grad()
        loss.backward()

        # Gradient descent or adam step
        optimizer.step()

## 儲存NN網路

## 載入NN網路