### Crop, resize and split CityScapes data folders


1. Remove car logo and bonnet in images and corner distortions in gt disparity maps
2. Split into  2 half . Each of size 944x708. aspect ratio = 4/3
3. Resize spit into 640x480

In [None]:
from PIL import Image
import os
import numpy as np

conv_pics_count = 0
root_dir = os.path.join(r"D:\Downloads\DATASETS\CITYSCAPES\cityscape_weather\original\disparity")
splits_dir = os.path.join(r"D:\Downloads\DATASETS\CITYSCAPES\cityscape_weather\original\out\disparity")

number_of_files = len(os.listdir(root_dir))
print(f"Found {number_of_files} files.")

for path, subdirs, files in os.walk(root_dir):
    for filename in files:

        ext = os.path.splitext(filename)[-1].lower()

        if ext=='.png':

            conv_pics_count += 1
            if ((conv_pics_count/number_of_files) * 100) % 10 == 0:
                print(f"Completed : {((conv_pics_count/number_of_files) * 100)}%")
            # Read the image
            f = os.path.join(path, filename)
            img = Image.open(f)

            #prep
            o_width,o_height = img.size

            if o_width/o_height == 2:
                if img.size != (2048,1024):
                    img = img.resize((2048,1024))
            else:
                raise Exception(f"Following image is of incompatible ratio. Please use images with aspect ratio of 2. \n{os.path.join(path,filename)}")

            # to remove car logo and corner distortions in gt disparity maps
            initial_crop = (1888,708)
            left_margin = 112
            top_margin = 78

            width, height = initial_crop

            #1. split into  2 half . Each of size 944x708. aspect ratio = 4/3
            width_cutoff = width // 2
            s1 = img.crop((left_margin,top_margin,width_cutoff + left_margin, height + top_margin))
            s2 = img.crop((width_cutoff + left_margin,top_margin,width + left_margin,height + top_margin))

            #2. resize spit into 640x480
            s1 = s1.resize((640,480))
            s2 = s2.resize((640,480))

            # Save each half
            s1.save(os.path.join(splits_dir, f"{os.path.splitext(filename)[0]}_l.png"), format="png")
            s2.save(os.path.join(splits_dir, f"{os.path.splitext(filename)[0]}_r.png"), format="png")

print(f"Done. {conv_pics_count} files splitted.")



### Generate gt images with same name as input


In [None]:
import os
import shutil

gt_dir = os.path.join(r'D:\Downloads\Resized_datasets\1888x768\640x480\clear')
input_dir = os.path.join(r'D:\Downloads\Resized_datasets\1888x768\640x480\input')
new_gt_dir = os.path.join(r'D:\Downloads\Resized_datasets\1888x768\640x480\gt')

gt_files_arr = []

for path, subdirs, files in os.walk(gt_dir):
    for filename in files:
        ext = os.path.splitext(filename)[-1].lower()
        if ext=='.png':
            gt_files_arr.append(filename)


print(f"{len(gt_files_arr)} clear gt images found.")

input_files_arr = []

for path, subdirs, files in os.walk(input_dir):
    for filename in files:
        ext = os.path.splitext(filename)[-1].lower()
        if ext=='.png':
            input_files_arr.append(filename)


print(f"{len(input_files_arr)} allweather input images found.")

def getImgId(filename):
    f = filename.split('_')
    
    if len(f) < 3:
        subf = f[0].split('-')
        fileId = f"{subf[0]}_{subf[1]}".lower()
    else:
        fileId = f"{f[0]}_{f[1]}_{f[2]}".lower()
    
    fileType = str(f[-1]).lower()

    return fileId, fileType

found_imgs = []

for input_img in input_files_arr:
    for cl in gt_files_arr:
        try:
            if getImgId(input_img) == getImgId(cl):
                shutil.copy(os.path.join(gt_dir, cl), os.path.join(new_gt_dir, input_img.lower()))
                found_imgs.append(os.path.join(gt_dir, cl))
                break
        except IndexError:
            print(input_img , " : ", cl)

    if (((len(found_imgs)/len(input_files_arr))*100) % 10) == 0:
        print(str((len(found_imgs)/len(input_files_arr))*100) + "% processed.")
        

print(f"Done. Found {len(found_imgs)} images.")

### Generate side-by-side images as in Pix2Pix


https://github.com/phillipi/pix2pix#generating-pairs

Create folder /path/to/data with subfolders A and B. A and B should each have their own subfolders train, val, test, etc. In /path/to/data/A/train, put training images in style A. In /path/to/data/B/train, put the corresponding images in style B. Repeat same for other data splits (val, test, etc).

Corresponding images in a pair {A,B} must be the same size and have the same filename, e.g., /path/to/data/A/train/1.jpg is considered to correspond to /path/to/data/B/train/1.jpg.

In [None]:
!git clone https://github.com/phillipi/pix2pix.git

# A denotes gt image
# B inpute image
# Make sure all images are of same size

!python scripts/combine_A_and_B.py --fold_A /path/to/data/A --fold_B /path/to/data/B --fold_AB /path/to/data

### Generate txt with input-gt image pair names

In [None]:
import os
from pathlib import Path

txt_file_path = 'clear_train.txt'
gt_dir = r'disparity'
input_dir = r'train/clear'

gtArray = []

for path, subdirs, files in os.walk(gt_dir):
    for filename in files:
        f = os.path.join(path, filename)
        absPath = Path(f).as_posix()
        gtArray.append(absPath)

print(f'Found {len(gtArray)} gt images.')


with open(txt_file_path, "w") as a:
    for path, subdirs, files in os.walk(input_dir):
        for filename in files:

            gtFileName=''
            for gt in gtArray:
                if gt.split('/')[1].split('_disparity')[0] == filename.split('_leftImg8bit')[0]:
                    gtFileName = gt
                    break

            if len(gtFileName) == 0:
                raise Exception('Unable to find GT for ', os.path.join(path, filename))

            f = os.path.join(path, filename)
            absPath = Path(f).as_posix()

            # print(str(absPath) + "," + str(gtFileName) + "\n")

            a.write(str(absPath) + "," + str(gtFileName) + "\n")

print("Done")

### Split dataset into train/test/val

In [None]:
import splitfolders

def main():
    input_folder = r"D:\Downloads\DATASETS\CITYSCAPES\cityscape_weather\original\out\2. clear --- depth\in"
    output_folder = r"D:\Downloads\DATASETS\CITYSCAPES\cityscape_weather\original\out\2. clear --- depth\out"

    splitfolders.ratio(input_folder, output= output_folder,
                        seed=42, ratio=(.875,.1,.025), group_prefix=None)

if __name__ == "__main__":
    main()

### Validate (size,type,path) images in folder and cross-check with txt

In [None]:
import random
from pathlib import Path
from PIL import Image
import os

fileName = 'all_weather_test.txt'
root_dir = r'D:\Downloads\Resized_datasets\640x480\CITYSCAPE_768x768\data'
size = (640,480)

lines=open(os.path.join(root_dir,fileName)).read().splitlines()
lineCount = len(lines)
iterator = 0

for i in range(lineCount):

    percent = ((i/lineCount) * 100)
    if percent > 0 and percent%10 == 0:
        print(str(percent)+'% . Checked ' +str(iterator)+ ' files.')

    filePath = lines[i].split(',')[0]

    file = Path(os.path.join(root_dir,filePath))

    iterator+=1

    if file.is_file():
        im = Image.open(os.path.join(root_dir,filePath))
        width, height = im.size

        if width != size[0] and height != size[1]:
            raise Exception(os.path.join(root_dir,filePath) , " is wrong size")
    else:
        raise Exception(os.path.join(root_dir,filePath) , " does not exist")

print("All file checked! Checked " +str(iterator)+ " files.")


### Validate (size,type) all images in folder

In [None]:
import random
from pathlib import Path
from PIL import Image
import os

root_dir = r'D:\Downloads\DATASETS\CITYSCAPES\cityscape_weather\original\input'
size = (2048,1024)
iterator = 0

number_of_files = len(os.listdir(root_dir))
print(f"Found {number_of_files} files.")

for path, subdirs, files in os.walk(root_dir):
    for filename in files:
        ext = os.path.splitext(filename)[-1].lower()
        if ext=='.png':

            iterator += 1
            percent = ((iterator/number_of_files) * 100)
            if percent > 0 and percent%10 == 0:
                print(str(percent)+'% . Checked ' +str(iterator)+ ' files.')

            im = Image.open(os.path.join(path,filename))
            width, height = im.size
            if im.size != size:
                raise Exception(os.path.join(path,filename) , " is wrong size")
        else:
            raise Exception(os.path.join(path,filename) , " is not an image")

print("All file checked! Checked " +str(iterator)+ " files.")
            

### Validate (size,type,path) and move input and gt files into folders as in txt

In [None]:
import random
from pathlib import Path
from PIL import Image
import shutil
import os


fileName = 'all_weather_train.txt'
root_dir = r'D:\Downloads\DATASETS\CITYSCAPES\cityscape_weather\original\out\3. weather --- depth'
new_dir = r'D:\Downloads\DATASETS\CITYSCAPES\cityscape_weather\original\out\3. weather --- depth\trainB'
size = (640,480)

lines=open(os.path.join(root_dir,fileName)).read().splitlines()
lineCount = len(lines)

def validateAndMove(filePath):
    
    global iterator
    iterator+=1
    file = Path(os.path.join(root_dir,filePath))

    if file.is_file():
        im = Image.open(os.path.join(root_dir,filePath))
        width, height = im.size

        if width != size[0] and height != size[1]:
            raise Exception(os.path.join(root_dir,filePath) , " is wrong size")

        im.close()

        os.rename(os.path.join(root_dir,filePath), os.path.join(new_dir, filePath.split('/')[-1]))
        # print(str(os.path.join(root_dir,filePath)) + '\n' + str(os.path.join(new_dir, filePath.split('/')[-1])))

    else:
        raise Exception(os.path.join(root_dir,filePath) , " does not exist")

iterator = 0

for i in range(lineCount):

    percent = ((i/lineCount) * 100)
    if percent > 0 and percent%10 == 0:
        print(str(percent)+'% . Checked ' +str(iterator)+ ' files.')

    filePaths = lines[i].split(',')

    validateAndMove(filePaths[0])

    # validateAndMove(filePaths[1], "disparity")

print("All file checked! Checked " +str(iterator)+ " files.")



### Write all dataset filenames to txt

In [None]:
import os
from pathlib import Path

txt_file_path = "rain.txt"
data_dir = r'./rain'

with open(txt_file_path, "w") as a:
    for path, subdirs, files in os.walk(data_dir):
        for filename in files:
            f = os.path.join(path, filename)
            absPath = Path(f).as_posix()
            a.write(str(absPath) + "\n") 

### Randomly move a number of images

In [None]:
import shutil
import glob
import random

to_be_moved = random.sample(glob.glob("foggy/*.png"), 300)

for f in enumerate(to_be_moved):
    shutil.move(f[1], "val/")

### KITTI Depth map impainting using python

In [None]:
# Original Matlab code https://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html
#
#
# Python port of depth filling code from NYU toolbox
# Speed needs to be improved
#
# Uses 'pypardiso' solver 
#
import scipy
import skimage
import skimage.color
import numpy as np
from pypardiso import spsolve
from PIL import Image

#
# fill_depth_colorization.m
# Preprocesses the kinect depth image using a gray scale version of the
# RGB image as a weighting for the smoothing. This code is a slight
# adaptation of Anat Levin's colorization code:
#
# See: www.cs.huji.ac.il/~yweiss/Colorization/
#
# Args:
#  imgRgb - HxWx3 matrix, the rgb image for the current frame. This must
#      be between 0 and 1.
#  imgDepth - HxW matrix, the depth image for the current frame in
#       absolute (meters) space.
#  alpha - a penalty value between 0 and 1 for the current depth values.

def fill_depth_colorization(imgRgb=None, imgDepthInput=None, alpha=1):
	imgIsNoise = imgDepthInput == 0
	maxImgAbsDepth = np.max(imgDepthInput)
	imgDepth = imgDepthInput / maxImgAbsDepth
	imgDepth[imgDepth > 1] = 1
	(H, W) = imgDepth.shape
	numPix = H * W
	indsM = np.arange(numPix).reshape((W, H)).transpose()
	knownValMask = (imgIsNoise == False).astype(int)
	grayImg = skimage.color.rgb2gray(imgRgb)
	winRad = 1
	len_ = 0
	absImgNdx = 0
	len_window = (2 * winRad + 1) ** 2
	len_zeros = numPix * len_window

	cols = np.zeros(len_zeros) - 1
	rows = np.zeros(len_zeros) - 1
	vals = np.zeros(len_zeros) - 1
	gvals = np.zeros(len_window) - 1

	for j in range(W):
		for i in range(H):
			nWin = 0
			for ii in range(max(0, i - winRad), min(i + winRad + 1, H)):
				for jj in range(max(0, j - winRad), min(j + winRad + 1, W)):
					if ii == i and jj == j:
						continue

					rows[len_] = absImgNdx
					cols[len_] = indsM[ii, jj]
					gvals[nWin] = grayImg[ii, jj]

					len_ = len_ + 1
					nWin = nWin + 1

			curVal = grayImg[i, j]
			gvals[nWin] = curVal
			c_var = np.mean((gvals[:nWin + 1] - np.mean(gvals[:nWin+ 1])) ** 2)

			csig = c_var * 0.6
			mgv = np.min((gvals[:nWin] - curVal) ** 2)
			if csig < -mgv / np.log(0.01):
				csig = -mgv / np.log(0.01)

			if csig < 2e-06:
				csig = 2e-06

			gvals[:nWin] = np.exp(-(gvals[:nWin] - curVal) ** 2 / csig)
			gvals[:nWin] = gvals[:nWin] / sum(gvals[:nWin])
			vals[len_ - nWin:len_] = -gvals[:nWin]

	  		# Now the self-reference (along the diagonal).
			rows[len_] = absImgNdx
			cols[len_] = absImgNdx
			vals[len_] = 1  # sum(gvals(1:nWin))

			len_ = len_ + 1
			absImgNdx = absImgNdx + 1

	vals = vals[:len_]
	cols = cols[:len_]
	rows = rows[:len_]
	A = scipy.sparse.csr_matrix((vals, (rows, cols)), (numPix, numPix))

	rows = np.arange(0, numPix)
	cols = np.arange(0, numPix)
	vals = (knownValMask * alpha).transpose().reshape(numPix)
	G = scipy.sparse.csr_matrix((vals, (rows, cols)), (numPix, numPix))

	A = A + G
	b = np.multiply(vals.reshape(numPix), imgDepth.flatten('F'))

	#print ('Solving system..')

	new_vals = spsolve(A, b)
	new_vals = np.reshape(new_vals, (H, W), 'F')

	#print ('Done.')

	denoisedDepthImg = new_vals * maxImgAbsDepth
    
	output = denoisedDepthImg.reshape((H, W)).astype('float32')

	output = np.multiply(output, (1-knownValMask)) + imgDepthInput
    
	return output

In [None]:
import cv2
import os
import matplotlib.pyplot as plt

fig = plt.figure()

train_filename = "train.txt"
root_dir = r"C:\Users\avishka\Downloads\Depth processing" # Source Folder
dstpath = r"C:\Users\avishka\Downloads\Depth processing\output" # Destination Folder

train_list = os.path.join(root_dir ,train_filename)

with open(train_list) as f:
    contents = f.readlines()
    input_names = [i.strip() for i in contents]
       
       
i = 0
failed = []

for img_path in input_names:
    clear_path = img_path.strip().replace('input','gt_clear')
    depth_path = img_path.strip().replace('input','gt_disparity')
    
    clear_img = cv2.imread(os.path.join(root_dir,clear_path))
    clear_img = cv2.cvtColor(clear_img, cv2.COLOR_BGR2RGB)
    
    depth_img = cv2.imread(os.path.join(root_dir,depth_path))
    depth_img = cv2.cvtColor(depth_img, cv2.COLOR_BGR2GRAY)

    processed_img = fill_depth_colorization(imgRgb=clear_img, imgDepthInput=depth_img)
    
    try:
        dstPath = os.path.join(dstpath,img_path)
        cv2.imwrite(dstPath,processed_img)
    except:
        print("Save Failed - ",str(img_path))
        failed.append(img_path)
    
    i+=1

    if i%100 == 0:
        print("completed {} images.".format(i))
    
print("All Complete.{} images processed.".format(i))
print("{} images failed to process.".format(len(failed)))
print(failed)