In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import sys
from scipy.special import erfcinv as erfcinv
import tqdm as tqdm
import time
from models import gauss_func,gp_3D_solution,calculate_cost,gp_1D_solution,calculate_dcost,gaus_pdf
import matplotlib.pyplot as plt
from plotting import mole_fraction_plot
from read_settings import read_settings

In [None]:
grid,source,atm,obs = read_settings('../site_settings.yml')
x = grid['advect_axis']
y = grid['crosswind_axis']
z = grid['vertical_axis']

# Inverse Problem: Estimate the Parameters from Data
## Perfect Model Experiment
### Generate some noisy observations

In [None]:
obs_x = x[44]*np.ones(y.shape[0])
obs_y = y[:]
obs_z = np.zeros(y.shape[0])
observation_loc = {'x':obs_x,'y':obs_y,'z':obs_z}
obs_mole_fraction = gp_1D_solution(loc=observation_loc,atm=atm,source=source)+atm['background']
observation_noise = obs['is_noise']/100.*atm['background']*np.random.randn(*obs_mole_fraction.shape)
noisy_obs_mole_fraction = obs_mole_fraction + observation_noise
noisy_obs_mole_fraction_enh = noisy_obs_mole_fraction - atm['background']

observation = {'mole_fraction':noisy_obs_mole_fraction_enh}
for ky in observation_loc.keys(): 
    observation[ky] = observation_loc[ky]

### Define the prior parameters

In [None]:
prior_atm=atm.copy()
prior_source=source.copy()
prior_source['emis_rate'] = 0.5*source['emis_rate']

# uncomment the below to change the assumed wind speed and introduce model error
prior_atm['advection_wind_speed'] = 3

### Key Assumptions

In [None]:
# What do we assume for the prior uncertainty?
prior_source['emis_rate_uncert'] = 0.5*prior_source['emis_rate']
# What do we assume for the observation uncertainty? 
observation['uncert'] = obs['is_noise']/100.*atm['background']

### Examine the Cost Function

In [None]:
n_samples = 1000
emis_rate_ensemble = prior_source['emis_rate'] + prior_source['emis_rate_uncert']*np.random.randn(n_samples)
current_source=prior_source.copy()
current_atm=prior_atm.copy()
cost = np.zeros(n_samples)
dcost = np.zeros(n_samples)
loc = {'x':obs_x,'y':obs_y,'z':obs_z}

for i in range(n_samples):
    current_source['emis_rate'] = emis_rate_ensemble[i]
    emission = {'current':emis_rate_ensemble[i],'prior':prior_source['emis_rate'],'prior_uncert':prior_source['emis_rate_uncert']}
    cost[i] = calculate_cost(observation=observation,emission=emission,source=current_source,atm=current_atm)
    dcost[i] = calculate_dcost(observation=observation,emission=emission,source=current_source,atm=current_atm)
plt.scatter(emis_rate_ensemble,cost/cost.max(),s=1)
plt.scatter(emis_rate_ensemble,dcost/dcost.max(),s=1)
plt.legend(['Cost','Jacobian'])

### Gradient Descent Optimizer

In [None]:
max_iterations = 1000
current_source = prior_source.copy()
current_atm = prior_atm.copy()
emission = {'current':prior_source['emis_rate'],'prior':prior_source['emis_rate'],'prior_uncert':prior_source['emis_rate_uncert']}
cost = np.zeros(max_iterations)
cost[0] = calculate_cost(observation=observation,emission=emission,source=current_source,atm=current_atm)
dcost = np.zeros(max_iterations)
dcost[0] = calculate_dcost(observation=observation,emission=emission,source=current_source,atm=current_atm)
emis = np.zeros(max_iterations)
emis[0]=prior_source['emis_rate']
iter=1
while (iter < max_iterations)*(np.abs(dcost[iter-1]/dcost[0])> 1e-2):    
    emis[iter] = emis[iter-1]-0.01*dcost[iter-1]
    emission = {'current':emis[iter],'prior':prior_source['emis_rate'],'prior_uncert':prior_source['emis_rate_uncert']}
    current_source['emis_rate']=emis[iter]
    cost[iter] = calculate_cost(observation=observation,emission=emission,source=current_source,atm=current_atm)
    dcost[iter] = calculate_dcost(observation=observation,emission=emission,source=current_source,atm=current_atm)
    iter+=1
n_iter = iter-1
posterior={'emission':emission['current']}
posterior_source = current_source.copy()
posterior_source['emis_rate']=emission['current']
posterior['mole_fraction']=gp_1D_solution(loc=observation_loc,atm=current_atm,source=posterior_source)
posterior['uncert']=((posterior['mole_fraction']**2/posterior['emission']**2).sum()/observation['uncert']**2 + 1./prior_source['emis_rate_uncert']**2)**(-1)

plt.plot(cost[:n_iter])
plt.title('Cost Function vs. Iterations');
plt.xlabel('Iteration Number');
plt.ylabel('Cost');

### Prior and Posterior Emission Rate 

In [None]:
emis_plot_grid = np.linspace(0,2000,10001)
prior_pdf = gaus_pdf(emis_plot_grid,mu=prior_source['emis_rate'],sig=prior_source['emis_rate_uncert'])
post_pdf = gaus_pdf(emis_plot_grid,mu=posterior['emission'],sig=posterior['uncert'])
plt.plot(emis_plot_grid,prior_pdf/prior_pdf.max())
plt.plot(emis_plot_grid,post_pdf/post_pdf.max())
plt.plot(source['emis_rate'],0.5,'*')
plt.legend(['Prior Distribution','Posterior Distribution','Truth'])

### Prior and Posterior Residuals

In [None]:
prior_mole_fraction = gp_1D_solution(loc=observation_loc,atm=prior_atm,source=prior_source)
g1 = plt.hist(prior_mole_fraction-posterior['mole_fraction'],bins=np.linspace(-1,1,21));
g2 = plt.hist(posterior['mole_fraction']-observation['mole_fraction'],bins=np.linspace(-1,1,21),alpha=0.4);
plt.legend([g1[2],g2[2]],['Prior','Posterior'])
plt.xlabel('Model Data Mismatch (ppm)')