## Includes

In [None]:
# mass includes
import os
import cv2
import pickle
import numpy as np
import pyexiv2 as exiv2
import multiprocessing as par
from tqdm.notebook import tqdm

## Initialization

In [None]:
data_root = '/home/lab/Documents/ssd/PlieCNR/FiveK'
min_px = 640  # minimal output size

In [None]:
# find all samples
file_list = [
    file for file in os.listdir(os.path.join(data_root, 'zeroed'))
    if '.tif' in file
]
file_list.sort()

# path to saves
save_path = os.path.join(data_root, '../paired')
if not os.path.exists(save_path):
    os.makedirs(save_path)

## Convert paired dataset

In [None]:
# generate new sample
def genSample(file):
    # read 16bit input image
    raw_img = cv2.imread(os.path.join(data_root, 'zeroed', file), -1)
    raw_img = cv2.cvtColor(raw_img, cv2.COLOR_BGR2RGB)

    # read 16bit retouched image
    out_img = cv2.imread(os.path.join(data_root, 'expert', file), -1)
    out_img = cv2.cvtColor(out_img, cv2.COLOR_BGR2RGB)

    # compute proper scale
    hei, wid, _ = raw_img.shape
    scale = float(min_px) / min(hei, wid)

    # resize
    raw_img = cv2.resize(raw_img,
                         None,
                         fx=scale,
                         fy=scale,
                         interpolation=cv2.INTER_AREA)
    out_img = cv2.resize(out_img,
                         None,
                         fx=scale,
                         fy=scale,
                         interpolation=cv2.INTER_AREA)

    # save to file
    save_data = {}
    save_data['zeroed'] = raw_img
    save_data['expert'] = out_img
    with open(os.path.join(save_path, file.replace('.tif', '.pkl')),
              'wb') as pkl:
        pickle.dump(save_data, pkl, protocol=pickle.HIGHEST_PROTOCOL)

## Parallel processing

In [None]:
# parallel processing
cores = int(par.cpu_count() / 2)
pool = par.Pool(processes=cores)
for out in tqdm(pool.imap(genSample, file_list),
                desc='progress',
                total=len(file_list)):
    pass
pool.close()