# Scenario A - Noise Level Variation (multiple runs for init mode)

In this scenario the noise level on a generated dataset is varied in three steps: low/medium/high, 
the rest of the parameters in the dataset is kept constant.

The model used in the inference of the parameters is formulated as follows: 

\begin{equation}
\large y = f(x) = \sum\limits_{m=1}^M \big[A_m \cdot e^{-\frac{(x-\mu_m)^2}{2\cdot\sigma_m^2}}\big] + \epsilon
\end{equation}


This file runs a series of runs for a single sampler init mode. It does not store the traces or plots, only the summary statistics are stored.

In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pymc3 as pm
import arviz as az

#az.style.use('arviz-darkgrid')

print('Running on PyMC3 v{}'.format(pm.__version__))

## Import local modules

In [None]:
import datetime
import os
import sys
sys.path.append('../../modules')
import datagen as dg
import models as mdl
import results as res
import figures as fig
import settings as cnf

## Local configuration

In [None]:
# output for results and images
out_path      = './output_mruns_lognormal_adapt'
file_basename = out_path + '/scenario_noise'
        
# if dir does not exist, create it
if not os.path.exists(out_path):
    os.makedirs(out_path)

conf = {}

# scenario name
conf['scenario'] = 'noise variation'

# initialization method for sampler ('jitter+adapt_diag'/'advi+adapt_diag'/'adapt_diag')
conf['init_mode'] = 'adapt_diag'

# probabilistic model (priors)
conf['prior_model'] = 'lognormal'

# provide peak positions to the model as testvalues ('yes'/'no')
conf['peak_info'] = 'yes'

# absolute peak shift (e.g. 2%(4), 5%(10) or 10%(20) of X-min.)
conf['peak_shift'] = 0.0

# dataset directory
conf['dataset_dir'] = './input_datasets'

# number of runs over the dataset
conf['nruns'] = 1

# number of cores to run
conf['ncores'] = 2

# number of samples per chain
conf['nsamples'] = 2000

In [None]:
conf

## Save configuration

In [None]:
cnf.save(out_path, conf)

# Generate data and plot

In [None]:
# list of wavelengths (x-values)
xval = [i for i in range(200, 400, 2)]

ldata  = []
lpeaks = []

# number of spectra per noise level
nsets  = 10

# noise level is 1%, 2% and 5% of the minimal signal amplitude
noise_levels = [0.05, 0.10, 0.25]

# total number of datasets
tsets = nsets * len(noise_levels)

# load pre-generated datasets from disk
ldata, lpeaks = dg.data_load(tsets, conf['dataset_dir'])

# add peakshift
lpeaks = dg.add_peakshift(lpeaks, conf['peak_shift'])

In [None]:
# plot datasets
#fig.plot_datasets(ldata, lpeaks, dims=(int(tsets/2),2), figure_size=(12,int(tsets*(1.8))), 
#                                                    savefig='yes', fname=file_basename)

# Initialize models and run inference

In [None]:
# convert pandas data to numpy arrays
x_val = np.array(xval, dtype='float32')

# store dataset y-values in list
cols = ldata[0].columns
y_val = [ldata[i][cols].values for i in range(len(ldata))]

In [None]:
# initialize models and run inference
models = []
traces = []

for r in range(conf['nruns']):
    print("running loop {0}/{1} over datasets".format(r+1,conf['nruns']))
    for i in range(len(ldata)):
        if conf['peak_info'] == 'yes':
            plist = lpeaks[i].flatten()
            plist.sort()
            model_g = mdl.model_gauss(xvalues=x_val, observations=y_val[i], npeaks=3, 
                                      mu_peaks=plist, pmodel=conf['prior_model'])
        else:
            model_g = mdl.model_gauss(xvalues=x_val, observations=y_val[i], npeaks=3,
                                      pmodel=conf['prior_model'])
        models.append(model_g)

        with model_g:
            print("({0}:{1}) running inference on dataset #{2}/{3}".format(r+1,conf['nruns'],i+1,len(ldata)))
            trace_g = pm.sample(conf['nsamples'], init=conf['init_mode'], cores=conf['ncores'])
            traces.append(trace_g)

# Model visualization

In [None]:
pm.model_to_graphviz(models[0])

In [None]:
# save model figure as image
img = pm.model_to_graphviz(models[0])
img.render(filename=file_basename + '_model', format='png');

# Collect results and save

In [None]:
# posterior predictive traces
ppc = [pm.sample_posterior_predictive(traces[i], samples=500, model=models[i]) for i in range(len(traces))]

In [None]:
varnames = ['amp', 'mu', 'sigma', 'epsilon']
nruns = conf['nruns']

# total dataset y-values, noise and run number list
ly_val = [val for run in range(nruns) for idx, val in enumerate(y_val)]
lnoise = [nl for run in range(nruns) for nl in noise_levels for i in range(nsets)]
lruns = ['{0}'.format(run+1) for run in range(nruns) for i in range(tsets)]

# collect the results and display
df = res.get_results_summary(varnames, traces, ppc, ly_val, epsilon_real=lnoise, runlist=lruns)
df

In [None]:
# save results to .csv
df.to_csv(file_basename + '.csv', index=False)

In [None]:
cnf.close(out_path)