# Dataset Synthesis

## Includes

In [None]:
# mass includes
import os, sys, warnings
import pickle
import math
import numpy as np
import rawpy as rp
import scipy.stats as stat
from random import choices
from skimage.transform.pyramids import pyramid_reduce
from numpy.random import uniform, normal, poisson
from scipy.stats import tukeylambda
from tqdm.notebook import tqdm

# add paths for all sub-folders
paths = [root for root, _, _ in os.walk('..') if 'evals' not in root]
for item in paths:
    sys.path.append(item)

from ipynb.fs.full.network import *
from ipynb.fs.full.config import *
from ipynb.fs.full.util import unifyBayerPtn, cam2sRGB, downsize
from ipynb.fs.full.ISPColor import extMetadata, cam2xyzWP, cam2xyzD50

## Initialization

In [None]:
data_path = '/home/lab/Documents/SSD/PlieCNR/RAISE'
model_path = '/home/lab/Documents/SSD/PlieCNR/cameras'
cam_model = 'S20'
exp_range = (1, 16)  # exposure range
highlight = 0.95  # preserve highlight
num_train = 8000  # number of training samples
num_val = 80  # number of validation samples
min_px = 320  # minimal output size

In [None]:
# for debugging only
%pdb off
warnings.filterwarnings('ignore')

# choose CPU because of parallel computing
device = t.device('cpu')

# find all samples
file_list = [
    file for file in os.listdir(data_path) if '.dng' in file or '.DNG' in file
]
file_list.sort()
print('Found %d samples.' % len(file_list))

# define model
opt = Config('unpaired')
net_E = Enhancer()
net_E.load('../saves')
net_E.to(device).eval()
for param in net_E.parameters():
    param.requires_grad = False

## Generate noisy dataset

In [None]:
# generate a list random noise parameters
def genParams(num_samples):
    # random sample list
    sample_list = choices(file_list, k=num_samples)

    k_list = uniform(low=noise_model['k'][0],
                     high=noise_model['k'][1],
                     size=num_samples)
    sig_r_list = noise_model['sig_r'][0] * k_list + noise_model['sig_r'][
        1] + normal(loc=0.0, scale=noise_model['sig_r'][2], size=num_samples)
    sig_tl_list = noise_model['sig_tl'][0] * k_list + noise_model['sig_tl'][
        1] + normal(loc=0.0, scale=noise_model['sig_tl'][2], size=num_samples)

    params = zip(range(num_samples), sample_list, k_list, sig_r_list,
                 sig_tl_list)

    return params


# generate noise
def genSample(params):
    index = params[0]
    file = params[1]
    log_k = params[2]
    log_sig_r = params[3]
    log_sig_tl = params[4]

    # load a new file
    file_path = os.path.join(data_path, file)
    with rp.imread(file_path) as raw_obj:
        raw_bayer = raw_obj.raw_image_visible.copy()
        raw_mask = raw_obj.raw_colors_visible
        blk_level = raw_obj.black_level_per_channel
        sat_level = raw_obj.white_level
        raw_type = raw_obj.raw_pattern

    # normalize to 0-1
    raw_bayer = raw_bayer.astype(np.float64)
    raw_bayer[raw_mask == 0] = raw_bayer[raw_mask == 0] - blk_level[0]
    raw_bayer[raw_mask == 1] = raw_bayer[raw_mask == 1] - blk_level[1]
    raw_bayer[raw_mask == 2] = raw_bayer[raw_mask == 2] - blk_level[2]
    raw_bayer[raw_mask == 3] = raw_bayer[raw_mask == 3] - blk_level[3]
    raw_bayer = raw_bayer / (sat_level - max(blk_level))
    raw_bayer = np.clip(raw_bayer, 0.0, 1.0)

    # random exposure shift with highlight preservation
    exposure = uniform(exp_range[0], exp_range[1])
    exponent = math.log(highlight / exposure, highlight)
    raw_bayer = np.where(raw_bayer < highlight, raw_bayer / exposure,
                         raw_bayer**exponent)

    # Bayer pattern unification
    raw_bayer = unifyBayerPtn(raw_bayer, raw_type)

    # compute proper scale
    hei, wid = raw_bayer.shape
    scale = math.ceil(min(hei / 2, wid / 2)) / float(min_px)

    # downsize and pack to 4-channel raw
    r_chn = pyramid_reduce(raw_bayer[0::2, 0::2], downscale=scale)
    g1_chn = pyramid_reduce(raw_bayer[0::2, 1::2], downscale=scale)
    g2_chn = pyramid_reduce(raw_bayer[1::2, 0::2], downscale=scale)
    b_chn = pyramid_reduce(raw_bayer[1::2, 1::2], downscale=scale)
    clean_raw = np.stack([r_chn, g1_chn, g2_chn, b_chn], axis=-1)
    clean_raw = np.clip(clean_raw, 0.0, 1.0)

    # add random shot noise
    shot_anlg = clean_raw / np.exp(log_k)
    noisy_raw = np.exp(log_k) * poisson(lam=shot_anlg)

    # add random row noise
    row_noise = normal(scale=np.exp(log_sig_r),
                       size=(clean_raw.shape[0], 1, 2))
    noisy_raw[:, :, 0] = noisy_raw[:, :, 0] + row_noise[:, :, 0]
    noisy_raw[:, :, 1] = noisy_raw[:, :, 1] + row_noise[:, :, 0]
    noisy_raw[:, :, 2] = noisy_raw[:, :, 2] + row_noise[:, :, 1]
    noisy_raw[:, :, 3] = noisy_raw[:, :, 3] + row_noise[:, :, 1]

    # add random read noise
    read_noise = tukeylambda.rvs(noise_model['lam_tl'],
                                 scale=np.exp(log_sig_tl),
                                 size=clean_raw.shape)
    noisy_raw = noisy_raw + read_noise

    # add random quantization noise
    qtz_noise = uniform(low=-noise_model['hf_step'],
                        high=noise_model['hf_step'],
                        size=clean_raw.shape)
    noisy_raw = noisy_raw + qtz_noise

    # clip to 0-1
    noisy_raw = np.clip(noisy_raw, 0.0, 1.0)

    # compute noise map
    noise_map = np.exp(log_k) * noisy_raw + np.exp(log_sig_r)**2 + np.exp(
        log_sig_tl)**2 + noise_model['hf_step']**2 / 3

    # compute camera to xyz color space matrix
    metadata = extMetadata(file_path)
    wp_xyz, interp_w = cam2xyzWP(metadata)
    cam2xyz = cam2xyzD50(metadata, wp_xyz, interp_w)

    # downsize
    raw_t = t.tensor(clean_raw.astype(np.float32)).to(device)
    cam2xyz_t = t.tensor(cam2xyz.astype(np.float32)).to(device)
    raw_t = raw_t.permute(2, 0, 1).unsqueeze(0)
    cam2xyz_t = cam2xyz_t.unsqueeze(0)

    srgb_img_t = cam2sRGB(raw_t, cam2xyz_t)
    down_img_t = downsize(srgb_img_t)

    # inference
    with t.no_grad():
        ilm_coes_t, _ = net_E(down_img_t, srgb_img_t)
        ilm_coes = ilm_coes_t.squeeze().permute(1, 2, 0).cpu().numpy()

    # save to file
    save_dict = {}
    save_dict['noisy_raw'] = noisy_raw.astype(np.float32)
    save_dict['clean_raw'] = clean_raw.astype(np.float32)
    save_dict['variance'] = noise_map.astype(np.float32)
    save_dict['ilm_coes'] = ilm_coes.astype(np.float32)
    save_dict['cam2xyz'] = cam2xyz.astype(np.float32)

    save_file = 'sam_%05d.pkl' % index

    return save_file, save_dict

In [None]:
# load noise model
with open(os.path.join(model_path, cam_model, 'noiseModel', 'model_stats.pkl'),
          'rb') as pkl:
    global noise_model
    noise_model = pickle.load(pkl)

# generate training set
save_path = os.path.join(data_path, '../denoise', cam_model, 'train')
if not os.path.exists(save_path):
    os.makedirs(save_path)

params = genParams(num_train)
for sample in tqdm(params, desc='train', total=num_train):
    save_file, save_dict = genSample(sample)
    with open(os.path.join(save_path, save_file), 'wb') as pkl:
        pickle.dump(save_dict, pkl, protocol=pickle.HIGHEST_PROTOCOL)

# generate validation set
save_path = os.path.join(data_path, '../denoise', cam_model, 'val')
if not os.path.exists(save_path):
    os.makedirs(save_path)

params = genParams(num_val)
for sample in tqdm(params, desc='val', total=num_val):
    save_file, save_dict = genSample(sample)
    with open(os.path.join(save_path, save_file), 'wb') as pkl:
        pickle.dump(save_dict, pkl, protocol=pickle.HIGHEST_PROTOCOL)