# Evaluating Inference

Imports.

In [1]:
import numpy as np
from h5py import File
from tqdm import tnrange, tqdm_notebook
import pickle

  from ._conv import register_converters as _register_converters


Global Variables.

In [2]:
path = '/data/nir_data/2017_05_22/WithSpectralCorrection/results/training01_tiedStructured/inference.hdf5'
output_path = '/data/nir_data/2017_05_22/WithSpectralCorrection/results/training01_tiedStructured/Evaluation.pickle'
batchsize = 256
hist_bins = 512
num_bands_io = 25
num_bands_latent = 3

Loading data.

In [3]:
f = File(path, 'r')
input_data = f['input_data']
latent_data = f['latent_data']
output_data = f['output_data']

iterations = (input_data.shape[0] // batchsize) + 1

First pass.

In [4]:
input_data_range = {'min' : 100000, 'max': -100000}
latent_data_range = {'min' : 100000, 'max': -100000}
output_data_range = {'min' : 100000, 'max': -100000}
error_data_range = {'min' : 100000, 'max': -100000}

In [5]:
for data, data_range in tqdm_notebook([(input_data, input_data_range), (latent_data, latent_data_range), (output_data, output_data_range)]):
    for i in tnrange(iterations, leave=False):
        sample = data[i*batchsize:(i+1)*batchsize]
        data_range['min'] = min(np.min(sample), data_range['min'])
        data_range['max'] = max(np.max(sample), data_range['max'])




In [6]:
for i in tnrange(iterations):
    sample = output_data[i*batchsize:(i+1)*batchsize] - input_data[i*batchsize:(i+1)*batchsize]
    error_data_range['min'] = min(np.min(sample), error_data_range['min'])
    error_data_range['max'] = max(np.max(sample), error_data_range['max'])




In [7]:
print(input_data_range)
print(latent_data_range)
print(output_data_range)
print(error_data_range)

{'min': -1.0757142305374146, 'max': 6.287299633026123}
{'min': -0.02007482759654522, 'max': 0.023293958976864815}
{'min': -0.09361177682876587, 'max': 4.754312485456467}
{'min': -5.243341952562332, 'max': 2.5272861421108246}


Second pass. Buffers:

In [8]:
io_min = min(input_data_range['min'], output_data_range['min'])
io_max = max(input_data_range['max'], output_data_range['max'])

In [9]:
input_data_hist_edges = np.linspace(io_min, io_max, hist_bins)
output_data_hist_edges = np.linspace(io_min, io_max, hist_bins)
latent_data_hist_edges = np.linspace(latent_data_range['min'], latent_data_range['max'], hist_bins)
error_data_hist_edges = np.linspace(error_data_range['min'], error_data_range['max'], hist_bins)

In [10]:
input_data_hist_buffer  = np.zeros( (1 + num_bands_io,     input_data_hist_edges.shape[0] - 1),  dtype=int)
output_data_hist_buffer = np.zeros( (1 + num_bands_io,     output_data_hist_edges.shape[0] - 1), dtype=int)
latent_data_hist_buffer = np.zeros( (1 + num_bands_latent, latent_data_hist_edges.shape[0] - 1), dtype=int)
error_data_hist_buffer  = np.zeros( (1 + num_bands_io,     error_data_hist_edges.shape[0] - 1),  dtype=int)

input_data_mean_buffer  = np.zeros( (input_data.shape[0], 1 + num_bands_io),     dtype=float)
output_data_mean_buffer = np.zeros( (input_data.shape[0], 1 + num_bands_io),     dtype=float)
latent_data_mean_buffer = np.zeros( (input_data.shape[0], 1 + num_bands_latent), dtype=float)
error_data_mean_buffer  = np.zeros( (input_data.shape[0], 1 + num_bands_io),     dtype=float)

input_data_sigma_buffer  = np.zeros( (input_data.shape[0], 1 + num_bands_io),     dtype=float)
output_data_sigma_buffer = np.zeros( (input_data.shape[0], 1 + num_bands_io),     dtype=float)
latent_data_sigma_buffer = np.zeros( (input_data.shape[0], 1 + num_bands_latent), dtype=float)
error_data_sigma_buffer  = np.zeros( (input_data.shape[0], 1 + num_bands_io),     dtype=float)

Helper function.

In [11]:
def compute_properties(data_in, data_out, data_latent, idx):
    def local_properties(data, hist_bins):
        r_mean = np.mean(data)
        r_sigma = np.std(data)
        r_hist, _ = np.histogram(data, bins=hist_bins)

        return r_mean, r_sigma, r_hist

    def volume_properties(volume, hist_bins, out_mean, out_sigma, out_hist):
        # overall
        out_mean[-1], out_sigma[-1], tmp_hist = local_properties(volume, hist_bins)
        out_hist[-1] += tmp_hist

        # bands
        for i, band in enumerate(np.array(volume.T, order='C')):
            out_mean[i], out_sigma[i], tmp_hist = local_properties(band, hist_bins)
            out_hist[i] += tmp_hist

    data_error = data_in-data_out
    
    for data, edges, mean, sigma, hist in [
        (data_in,     input_data_hist_edges,  input_data_mean_buffer[idx],  input_data_sigma_buffer[idx],  input_data_hist_buffer),
        (data_out,    output_data_hist_edges, output_data_mean_buffer[idx], output_data_sigma_buffer[idx], output_data_hist_buffer),
        (data_latent, latent_data_hist_edges, latent_data_mean_buffer[idx], latent_data_sigma_buffer[idx], latent_data_hist_buffer),
        (data_error,  error_data_hist_edges,  error_data_mean_buffer[idx],  error_data_sigma_buffer[idx],  error_data_hist_buffer),
    ]:
        volume_properties(data, edges, mean, sigma, hist)

In [12]:
global_idx = 0

for i in tnrange(iterations):
    # loading
    sample_input = input_data[i*batchsize:(i+1)*batchsize]
    sample_latent = latent_data[i*batchsize:(i+1)*batchsize]
    sample_ouput = output_data[i*batchsize:(i+1)*batchsize]

    for j in tnrange(sample_input.shape[0], leave=False):
        compute_properties(sample_input[j], sample_ouput[j], sample_latent[j], global_idx)
        global_idx += 1        

Save result.

In [13]:
result = {
    # histogram edges
    'input_hist_edges' : input_data_hist_edges,
    'output_hist_edges' : output_data_hist_edges,
    'latent_hist_edges' : latent_data_hist_edges,
    'error_hist_edges' : error_data_hist_edges,

    # histograms
    'input_hist' : input_data_hist_buffer,
    'output_hist' : output_data_hist_buffer,
    'latent_hist' : latent_data_hist_buffer,
    'error_hist' : error_data_hist_buffer,
    
    # means
    'input_mean' : input_data_mean_buffer,
    'output_mean' : output_data_mean_buffer,
    'latent_mean' : latent_data_mean_buffer,
    'error_mean' : error_data_mean_buffer,
    
    # sigmas
    'input_sigma' : input_data_sigma_buffer,
    'output_sigma' : output_data_sigma_buffer,
    'latent_sigma' : latent_data_sigma_buffer,
    'error_sigma' : error_data_sigma_buffer,
    
    # min max
    'input_range' : input_data_range,
    'latent_range' : latent_data_range,
    'output_range' : output_data_range,
    'error_range' : error_data_range,
    
}

In [14]:
with open(output_path, 'wb') as g:
    pickle.dump(result, g, protocol=pickle.HIGHEST_PROTOCOL)