In [1]:
import numpy as np
import os
import pickle
import gc
import re
import cv2
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf

from PIL import Image
from mpl_toolkits.axes_grid1 import make_axes_locatable
from IPython.display import clear_output

from tensorflow.keras import layers, Model
from tensorflow.keras.utils import Sequence
from tensorflow.keras.layers import Layer, Conv2D, Conv3D, Conv3DTranspose, BatchNormalization, ReLU
from tensorflow.python.keras.layers.convolutional import Conv3DTranspose
from tensorflow.python.ops.init_ops_v2 import he_normal
from keras import regularizers
from keras.callbacks import EarlyStopping, ModelCheckpoint

# from tensorflow.keras import mixed_precision
print("TensorFlow version: ", tf.__version__)

tf.get_logger().setLevel('ERROR')           # Suppress TensorFlow logging (2)

# Enable GPU dynamic memory allocation
gpus = tf.config.experimental.list_physical_devices('GPU')

for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

TensorFlow version:  2.10.0


In [2]:
def _scale_img(img):
    """Convert [0-255] to [-1.0~1.0]"""
    return (np.array(img, dtype=np.float32) / 255.0)*2.0-1.0

def _clamp_disp(disp, min_disp, max_disp):
    """Clip max disparity, ortherwise it'll be hard for network to learn really big disparity/close object"""
    return np.clip(disp, min_disp, max_disp)

def StereoDataloader(img_left, img_right, disp_left, img_h, img_w, df_h, df_w, batch_num, ComPerBatch, data_ord, max_disp):
    tmp_img = []
    tmp_disp = []
    if df_h > 32 :
        randomH = np.random.randint(0, df_h)
    else :
        randomH = 0
    if df_w > 32 :
        randomW = np.random.randint(0, df_w)
    else :
        randomW = 0
        
    for idx in range(batch_num*ComPerBatch,(batch_num+1)*ComPerBatch):
        l = np.array(Image.open(img_left[data_ord[idx]]))[randomH:randomH+img_h, randomW:randomW+img_w, :]
        r = np.array(Image.open(img_right[data_ord[idx]]))[randomH:randomH+img_h, randomW:randomW+img_w, :]
        tmp_img.append(np.concatenate((_scale_img(l), _scale_img(r)), axis=2))

        dispL = np.loadtxt(disp_left[data_ord[idx]], delimiter=",", dtype=np.float32)[randomH:randomH+img_h, randomW:randomW+img_w]
        tmp_disp.append(_clamp_disp(dispL,0,max_disp))
    
    return np.array(tmp_img, dtype=np.float32), np.array(tmp_disp, dtype=np.float32)

In [3]:
class _ConvBR_2D(Layer):
    """
    Conv2D BN ReLU
    """
    def __init__(self, n_feature, kernel_size, strides=(1, 1)):
        super(_ConvBR_2D, self).__init__()
        self.conv = Conv2D(
            n_feature,
            kernel_size,
            strides,
            padding="same",
            # kernel_initializer=tf.keras.initializers.he_normal(),
            kernel_regularizer=tf.keras.regularizers.l2(0.001),
        )
        self.bn = BatchNormalization()

    @tf.function
    def call(self, x, is_training):
        x = self.conv(x)
        x = self.bn(x, training=is_training)
        x = tf.nn.relu(x)

        return x


class _ConvBR_3D(Layer):
    """
    Conv3D BN ReLU
    """
    def __init__(self, n_feature, kernel_size, strides=(1, 1, 1)):
        super(_ConvBR_3D, self).__init__()
        self.conv = Conv3D(
            n_feature,
            kernel_size,
            strides,
            padding="same",
            # kernel_initializer=tf.keras.initializers.he_normal(),
            kernel_regularizer=tf.keras.regularizers.l2(0.001),
        )
        self.bn = BatchNormalization()

    @tf.function
    def call(self, x, is_training):
        x = self.conv(x)
        x = self.bn(x, training=is_training)
        x = tf.nn.relu(x)

        return x


class _DeconvBR_3D(Layer):
    """
    DeConv3D BN ReLU
    """
    def __init__(self, n_feature, kernel_size, strides=(2, 2, 2)):
        super(_DeconvBR_3D, self).__init__()
        self.conv = Conv3DTranspose(
            n_feature,
            kernel_size,
            strides,
            padding="same",
            # kernel_initializer=tf.keras.initializers.he_normal(),
            kernel_regularizer=tf.keras.regularizers.l2(0.001),
        )
        self.bn = BatchNormalization()

    @tf.function
    def call(self, x, is_training):
        x = self.conv(x)
        x = self.bn(x, training=is_training)
        x = tf.nn.relu(x)

        return x


class _GCNetUnary(Layer):
    """
    Unary part (Section 3.1) of GCNet paper
    """
    def __init__(self,feature_n):
        super(_GCNetUnary, self).__init__()
        self.conv1 = _ConvBR_2D(feature_n, 5, strides=(2, 2))

        self.conv_a = list()
        self.conv_b = list()
        for _ in range(7):
            self.conv_a.append(_ConvBR_2D(feature_n, 5))
            self.conv_b.append(_ConvBR_2D(feature_n, 5))

        self.conv_final = Conv2D(feature_n, 3, padding="same")        

    @tf.function
    def call(self, x, is_training):
        x = self.conv1(x, is_training)

        for i in range(7):
            residual = x
            x = self.conv_a[i](x, is_training)
            x = self.conv_b[i](x, is_training)
            x = x + residual

        x = self.conv_final(x)

        return x

In [4]:
class SoftArgMin(Layer):
    def __init__(self, n_disp):
        super(SoftArgMin, self).__init__()        
        self.disp_indices = tf.range(n_disp, dtype=tf.float32)
        self.disp_indices = tf.reshape(self.disp_indices, [1, n_disp, 1, 1])

    def call(self, x):
        # [N, D, H, W] 
        x = tf.nn.softmax(x, axis=1)  # compute softmax over all disparity
        x = tf.math.multiply(x, self.disp_indices)
        # [N, D, H, W] -> [N, H, W]
        x = tf.math.reduce_sum(x, axis=1)
        
        return x

In [5]:
class _GCNetCostVolume(Layer):
    """
    Cost Volume part (Section 3.2) of GCNet paper
    """

    def __init__(self, n_disp):
        super(_GCNetCostVolume, self).__init__()
        assert n_disp % 2 == 0
        self.n_disp = n_disp
    
    @tf.function
    def call(self, left, right):   #4차원 matix 생성 관련 부분 확인 필요
        # [N, H, W, C]  -> [N, 1, H, W, C]
        left = tf.expand_dims(left, axis=1)
        right = tf.expand_dims(right, axis=1)
        W = right.shape[3]
        assert self.n_disp // 2 < W  # Disparity must be lower than W
        out = list()
        for d in range(self.n_disp // 2):
            right_shifted = self._pad_left(right[:, :, :, : W - d, :], d)
            left_right_combined = tf.concat([left, right_shifted], axis=4)
            out.append(left_right_combined)
        # [N, n_disparity, W, H, C]
        out = tf.concat(out, axis=1)
        return out

    def _pad_left(self, x, left_val):
        return tf.pad(x, [[0, 0], [0, 0], [0, 0], [left_val, 0], [0, 0]])
    

class _GCNetRegularization(Layer):
    """
    Regularization part (Section 3.3) of GCNet Paper
    """

    def __init__(self, feature_n):
        super(_GCNetRegularization, self).__init__()
        self.conv1 = _ConvBR_3D(feature_n, 3)
        self.conv2 = _ConvBR_3D(feature_n, 3)

        self.conv3 = _ConvBR_3D(feature_n*2, 3, strides=(2, 2, 2))
        self.conv4 = _ConvBR_3D(feature_n*2, 3)
        self.conv5 = _ConvBR_3D(feature_n*2, 3)

        self.conv6 = _ConvBR_3D(feature_n*2, 3, strides=(2, 2, 2))
        self.conv7 = _ConvBR_3D(feature_n*2, 3)
        self.conv8 = _ConvBR_3D(feature_n*2, 3)

        self.conv9 = _ConvBR_3D(feature_n*2, 3, strides=(2, 2, 2))
        self.conv10 = _ConvBR_3D(feature_n*2, 3)
        self.conv11 = _ConvBR_3D(feature_n*2, 3)

        self.conv12 = _ConvBR_3D(feature_n*4, 3, strides=(2, 2, 2))
        self.conv13 = _ConvBR_3D(feature_n*4, 3)
        self.conv14 = _ConvBR_3D(feature_n*4, 3)

        self.deconv1 = _DeconvBR_3D(feature_n*2, 3, strides=(2, 2, 2))
        self.deconv2 = _DeconvBR_3D(feature_n*2, 3, strides=(2, 2, 2))
        self.deconv3 = _DeconvBR_3D(feature_n*2, 3, strides=(2, 2, 2))
        self.deconv4 = _DeconvBR_3D(feature_n, 3, strides=(2, 2, 2))
        self.deconv_final = Conv3DTranspose(1, 3, strides=(2, 2, 2), padding="same")

    @tf.function
    def call(self, cost_volume, is_training):
        conv1 = self.conv1(cost_volume, is_training)
        conv2 = self.conv2(conv1, is_training)

        conv3 = self.conv3(cost_volume, is_training)
        conv4 = self.conv4(conv3, is_training)
        conv5 = self.conv5(conv4, is_training)

        conv6 = self.conv6(conv3, is_training)
        conv7 = self.conv7(conv6, is_training)
        conv8 = self.conv8(conv7, is_training)

        conv9 = self.conv9(conv6, is_training)
        conv10 = self.conv10(conv9, is_training)
        conv11 = self.conv11(conv10, is_training)

        conv12 = self.conv12(conv9, is_training)
        conv13 = self.conv13(conv12, is_training)
        conv14 = self.conv14(conv13, is_training)

        deconv1 = self.deconv1(conv14, is_training)
        deconv1 = deconv1 + conv11

        deconv2 = self.deconv2(deconv1, is_training)
        deconv2 = deconv2 + conv8

        deconv3 = self.deconv3(deconv2, is_training)
        deconv3 = deconv3 + conv5

        deconv4 = self.deconv4(deconv3, is_training)
        deconv4 = deconv4 + conv2

        deconv_final = self.deconv_final(deconv4)        
        # [N, D, H, W, 1] -> [N, D, H, W]
        return tf.squeeze(deconv_final, axis=-1)
       

# Function for finding minimum cost value : SoftargMin
class GCNet_basic(Model):
    """
    End-to-End Learning of Geometry and Context for Deep Stereo Regression
    https://arxiv.org/abs/1703.04309
    """

    def __init__(self, max_disp, feature_n):
        super(GCNet_basic, self).__init__()        
        self.unary_block = _GCNetUnary(feature_n)
        self.cost_volume_block = _GCNetCostVolume(max_disp)
        self.regularization_block = _GCNetRegularization(feature_n)
        self.soft_argmin = SoftArgMin(max_disp)        

    @tf.function
    def call(self, data, training=False):
        left, right = tf.split(data, num_or_size_splits=2, axis=3)
        left = self.unary_block(left, training)
        right = self.unary_block(right, training)
        
        cost_volume = self.cost_volume_block(left, right)
        reg = self.regularization_block(cost_volume, training)
        disp = self.soft_argmin(reg)
        
        return disp

In [6]:
#### Loading dataset path
finl = open('gh_imgl.pkl','rb')
finr = open('gh_imgr.pkl', 'rb')
img_left = pickle.load(finl)
img_right = pickle.load(finr)
finl.close()
finr.close()

finl = open('gh_disp.pkl', 'rb')
disp_left = pickle.load(finl)
finl.close()

#training/test data set
tot_num1 = len(img_left)//2 # Half number, training data
tot_num2 = len(img_left) # total number, training+test data
file_max = tot_num2 # if training, file_max = tot_num1. Elif testing performance,file_max = tot_num2. 

# data size
max_w = 984
max_h = 560
max_disp = 160

img_w = 960
img_h = 544
df_w = max_w - img_w
df_h = max_h - img_h

feature_n = 32 # Default Feature Number of GC-Net
ComPerBatch = 1 #24 #256 # component number per batch
batch_size = np.int32(tot_num2/ComPerBatch) # number of batch
totpx_batch = img_w*img_h*ComPerBatch

# if you want to save data, save_data = 1,
#  else save_data = 0
save_data = 1

# Image save function
def disp_img(img, title): 
    plt.figure(figsize=(5, 3))
    plt.title(title)
    plt.axis('off')
    plt.imshow(img, cmap='jet')
    plt.colorbar(shrink=0.5) 
    plt.tight_layout()
    plt.show()
    
    plt.close()
    gc.collect()

vmin = 40
vmax = 80
vgap = 10
# Image save function
def _save_img(img, title, dest, file_name, data_ind):
#     plt.switch_backend('Agg')
    plt.rc('font', size=30)        # 기본 폰트 크기
    plt.rcParams["font.family"] = "Times New Roman"
    
    plt.figure(figsize=(10, 6))
    plt.title(title)
    plt.axis('off')
    ax = plt.gca()
    im = ax.imshow(img, cmap='jet', vmin=vmin, vmax=vmax)
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.05)
    cbar = plt.colorbar(im, cax=cax)
    cbar.set_ticks(np.arange(vmin, vmax, vgap))  # 32 간격으로 눈금 설정    
    plt.tight_layout()
    # plt.show()
    plt.savefig('%s/%s_%05d.png' %(dest,file_name,data_ind))
    
    plt.close()    
    gc.collect()

In [7]:
# IUHM performance #############################################################################
max_epochs1 = 52
chkdir='saved_model/GCNet_iter%d/' %(max_epochs1)

net = GCNet_basic(max_disp, feature_n)
net.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001), loss=tf.keras.losses.MeanAbsoluteError(), run_eagerly=True)
net.load_weights(chkdir)

mix_set = np.arange(file_max) #if training : mix_set = np.random.permutation(file_max)

dest_dir='GCNet_iter%d' %(max_epochs1)

if not os.path.isdir(dest_dir):
    os.makedirs(dest_dir)

file_name = '%s/test.csv' %(dest_dir)
f = open(file_name, 'w')    
f.write('Data No./Number as Disparity Difference, GCNet_GH2470_epc%d loss, ratio of px<=1, <=2, <=3\n' %(max_epochs1))
f.close()

for batch_n in range(batch_size):
    DB_imgs,DB_disp = StereoDataloader(
        img_left = img_left,
        img_right = img_right,
        disp_left = disp_left,                
        img_h = img_h,
        img_w = img_w,
        df_h = df_h,
        df_w = df_w,
        batch_num = batch_n, # order of batch
        ComPerBatch = ComPerBatch, # component number of a batch
        data_ord = mix_set,
        max_disp = max_disp
    )

    # Model
    if save_data == 0:
        print('Batch %d/%d' %(batch_n+1,batch_size))
        result = net.predict(DB_imgs, batch_size=1, verbose=1)
        result = _clamp_disp(result,0, max_disp)
    elif save_data == 1:
        print('\rBatch %d/%d' %(batch_n+1,batch_size), end='')
        result = _clamp_disp(net.predict(DB_imgs, batch_size=1, verbose=0),0, max_disp)
        loss1 = np.mean(np.abs(DB_disp-result))

        file_name = '%s/test.csv' %(dest_dir)
        f = open(file_name, 'a')

        disp_diff1 = np.abs(DB_disp-result)

        f.write('%d, ' %(batch_n+1))

        # Model disparity error and px area ratio (<= 1px,2px,3px)        
        f.write('%.5f, ' %loss1)        
        f.write('%.5f, ' %(np.sum(disp_diff1<=1)/totpx_batch))        
        f.write('%.5f, ' %(np.sum(disp_diff1<=2)/totpx_batch))        
        f.write('%.5f \n'%(np.sum(disp_diff1<=3)/totpx_batch))

        f.close()

        #### Disparity calculated by Trueth Data            
        title = 'Ground Truth_Data#%d' %(batch_n+1)
        file_name = 'True_Disp'
        _save_img(DB_disp[0,:,:], title, dest_dir, file_name, batch_n+1)

        #### Disparity calculated by Stereo Camera Data with Model
        title = 'GCNet_Data#%d' %(batch_n+1)
        file_name = 'Stereo_Disp'
        _save_img(result[0,:,:], title, dest_dir, file_name, batch_n+1)

        del file_name
        del loss1
        del disp_diff1
        gc.collect()

    del DB_imgs
    del DB_disp
    del result
    gc.collect()
    
del net

tf.keras.backend.clear_session()
gc.collect()

Batch 2470/2470

9632