# Dataset Statistical Analysis

## Includes

In [None]:
# mass includes
import os
import pickle
import pyexiv2 as exiv2
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from torch.utils import data

## Initialization

In [None]:
# configuration
data_root = '/home/lab/Documents/ssd//DJI'  # dataset path
file_ext = '.DNG'  # extension of RAW file

## Linear fit

In [None]:
# MLE algorithm
def linearFit(sample_list):
    data_x = sample_list[:, 0]
    data_y = sample_list[:, 1]

    # intermediate variables
    x_mean = np.mean(data_x)
    y_mean = np.mean(data_y)
    lxx = np.sum((data_x - x_mean)**2)
    lyy = np.sum((data_y - y_mean)**2)
    lxy = np.sum((data_x - x_mean) * (data_y - y_mean))

    # MLE
    slope = lxy / lxx
    const = y_mean - slope * x_mean
    std = np.sqrt((lyy - slope * lxy) / (len(data_x) - 2))

    return slope, const, std

## Statistical analysis

In [None]:
# get file list
file_list = [file for file in os.listdir(data_root) if file_ext in file]
file_list.sort()

wb_list = []
noise_list = []
for index, file in tqdm(enumerate(file_list),
                        desc='progress',
                        total=len(file_list)):
    # load a new sample
    img_md = exiv2.ImageMetadata(os.path.join(data_root, file))
    img_md.read()

    # extract metadata
    cam_wb = img_md['Exif.Image.AsShotNeutral'].value
    wb_list.append(np.array([cam_wb[0], cam_wb[2]], dtype=np.float32))
    cam_noise = img_md['Exif.Image.NoiseProfile'].raw_value.split()
    noise_list.append(np.array(cam_noise, dtype=np.float32))

# compute slope,const, and std
wb_list = np.log(np.array(wb_list))
noise_list = np.log(np.array(noise_list))
wb_s, wb_c, wb_std = linearFit(wb_list)
noise_s, noise_c, noise_std = linearFit(noise_list)

# print results
print(
    "stat info for wb {'slope': %f, 'const': %f, 'std': %f, 'min': %f, 'max': %f}"
    % (wb_s, wb_c, wb_std, np.min(wb_list[:, 0]), np.max(wb_list[:, 0])))
print(
    "stat info for noise {'slope': %f, 'const': %f, 'std': %f, 'min': %f, 'max': %f}"
    % (noise_s, noise_c, noise_std, np.min(
        noise_list[:, 0]), np.max(noise_list[:, 0])))

# plot resu
fig1 = plt.figure()
ax1 = fig1.add_subplot(2, 1, 1)
ax1.plot(wb_list[:, 0], wb_list[:, 1], 'bo', markersize=3)
plt.xlabel('$\log(w_{r})$', fontsize=12)
plt.ylabel('$\log(w_{b})$', fontsize=12)
plt.tight_layout()

fig2 = plt.figure()
ax2 = fig2.add_subplot(2, 1, 2)
ax2.plot(noise_list[:, 0], noise_list[:, 1], 'bo', markersize=3)
plt.xlabel('$\log(\lambda_{shot})$', fontsize=12)
plt.ylabel('$\log(\lambda_{read})$', fontsize=12)
plt.tight_layout()

# save to figure if needed
fig1.savefig('stat1.png', bbox_inches='tight')
fig2.savefig('stat2.png', bbox_inches='tight')