# Dataset Synthesis

## Includes

In [None]:
# mass includes
import os, sys
import pickle
import math
import numpy as np
import rawpy as rp
import pyexiv2 as exiv2
import matplotlib.pyplot as plt
import multiprocessing as par
from skimage.transform.pyramids import pyramid_reduce
from tqdm.notebook import tqdm

# add paths for all sub-folders
paths = [root for root, _, _ in os.walk('..') if 'evals' not in root]
for item in paths:
    sys.path.append(item)

from ipynb.fs.full.config import Config
from ipynb.fs.full.util import unifyBayerPtn
from ipynb.fs.full.ISPColor import *

## Initialization

In [None]:
data_root = '/home/lab/Documents/ssd/PlieCNR/HDR+'
min_px = 640  # minimal output size

In [None]:
# find all samples
file_list = [
    file for file in os.listdir(data_root) if '.dng' in file or '.DNG' in file
]
file_list.sort()

# path to saves
save_path = os.path.join(data_root, '../unpaired')
if not os.path.exists(save_path):
    os.makedirs(save_path)

## Generate unpaired dataset

In [None]:
# generate new sample
def genSample(file):
    # load a new file
    file_path = os.path.join(data_root, file)
    with rp.imread(file_path) as raw_obj:
        raw_bayer = raw_obj.raw_image_visible.copy()
        raw_mask = raw_obj.raw_colors_visible
        blk_level = raw_obj.black_level_per_channel
        sat_level = raw_obj.white_level
        raw_type = raw_obj.raw_pattern

    # normalize to 0-1
    raw_bayer = raw_bayer.astype(np.float64)
    raw_bayer[raw_mask == 0] = raw_bayer[raw_mask == 0] - blk_level[0]
    raw_bayer[raw_mask == 1] = raw_bayer[raw_mask == 1] - blk_level[1]
    raw_bayer[raw_mask == 2] = raw_bayer[raw_mask == 2] - blk_level[2]
    raw_bayer[raw_mask == 3] = raw_bayer[raw_mask == 3] - blk_level[3]
    raw_bayer = raw_bayer / (sat_level - max(blk_level))
    raw_bayer = np.clip(raw_bayer, 0.0, 1.0)

    # Bayer pattern unification
    raw_bayer = unifyBayerPtn(raw_bayer, raw_type)

    # compute proper scale
    hei, wid = raw_bayer.shape
    scale = math.ceil(min(hei / 2, wid / 2)) / float(min_px)

    # downsize and convert to rgb image
    r_chn = pyramid_reduce(raw_bayer[0::2, 0::2], downscale=scale)
    g1_chn = pyramid_reduce(raw_bayer[0::2, 1::2], downscale=scale)
    g2_chn = pyramid_reduce(raw_bayer[1::2, 0::2], downscale=scale)
    b_chn = pyramid_reduce(raw_bayer[1::2, 1::2], downscale=scale)
    lin_img = np.stack([r_chn, (g1_chn + g2_chn) / 2, b_chn], axis=-1)
    lin_img = np.clip(lin_img, 0.0, 1.0)

    # compute camera to xyz color space
    metadata = extMetadata(file_path)
    wp_xyz, interp_w = cam2xyzWP(metadata)
    cam2xyz = cam2xyzD50(metadata, wp_xyz, interp_w)

    # convert to sRGB
    lin_img = cam2sRGB(lin_img, cam2xyz)

    # save to file
    save_data = {}
    save_data['lin_img'] = lin_img.astype(np.float32)
    with open(os.path.join(save_path, file.replace('.dng', '.pkl')),
              'wb') as pkl:
        pickle.dump(save_data, pkl, protocol=pickle.HIGHEST_PROTOCOL)

## Parallel processing

In [None]:
# parallel processing
cores = int(par.cpu_count() / 2)
pool = par.Pool(processes=cores)
for out in tqdm(pool.imap(genSample, file_list),
                desc='progress',
                total=len(file_list)):
    pass
pool.close()

# get probability
exp_list = []
for file in file_list:
    # load a new file
    file_path = os.path.join(data_root, file)

    # read metadata
    img_md = exiv2.ImageMetadata(file_path)
    img_md.read()

    # extract metadata
    exp_level = img_md['Exif.Image.BaselineExposure'].value
    exp_level = float(exp_level)
    exp_list.append(exp_level)

%matplotlib inline
n, bins, patches = plt.hist(exp_list, bins=100)
plt.show()
n = n / np.sum(n)

with open(os.path.join(save_path, 'prob.txt'), 'w') as txt:
    for exp, file in zip(exp_list, file_list):
        for bin_prob, edge in zip(n, bins[:-1]):
            if exp > edge:
                file_prob = bin_prob
        txt.write('%s %f %f \n' % (file.split('.')[0], exp, file_prob))