# Monte-carlo approach to APF calculations

To get an estimate of the uncertainty on Area-Peak Factor values derived from WD scans, we need to take into account uncertainty in the background fit.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pickle
import pandas as pd
import glob
from pathlib import Path
from scipy import ndimage
import matplotlib.pyplot as plt

import sys
sys.path.insert(1, "..")
from src.wdscan import select_roi, plot_bg_fit
from lmfit.models import LinearModel, ConstantModel, LorentzianModel, GaussianModel

In [None]:
def get_noise_via_running_mean(data, window_size=21, plot=False):
    
    """ Fits a spline to the dataset
        Calculates difference between spline values and data"""
    
    x = data.L
    y = data.cps_per_nA
    
    weights = np.ones(window_size) / window_size
    
    y_running_mean = ndimage.convolve(y, weights, mode='nearest')
    
    residuals = y - y_running_mean
    
    if plot:

        plt.plot(x, y, '.k', label='data')
        plt.plot(x, y_running_mean, '-r', label='running mean')
        plt.plot(x, residuals, '-b', label='residuals')
        plt.legend()
        plt.show()

    return residuals, y_running_mean

def simulate_spectra_montecarlo(data, window_size=15, number_of_mc_simulations=20, plot=False):

    """ Takes residuals between data and fitted peak,
    and resamples the residuals to generate many new synthetic datasets
    with as much noise as the original dataset. """
    
    # Generate the new synthetic datasets
    
    residuals, running_mean = get_noise_via_running_mean(data, window_size)
    
    # Select zone around the peak as separate to the other zones
    pk_max_L = data.loc[data.cps_per_nA.argmax(), 'L']
    
    pk_zone_bool = (data.L > pk_max_L-3) & (data.L < pk_max_L+3)
    pk_zone_idx = data.index[pk_zone_bool]
    lwr_zone_idx = data.index[0:pk_zone_idx[0]]
    upr_zone_idx = data.index[pk_zone_idx[-1]+1:]
    zone_list = [lwr_zone_idx, pk_zone_idx, upr_zone_idx]
        
    residuals_array = np.empty(shape=(len(data), number_of_mc_simulations))
    cps_per_nA_array = np.empty(shape=(len(data), number_of_mc_simulations))

    for i in range(number_of_mc_simulations):
        for zone in zone_list: 
            residuals_array[zone, i] = residuals[zone].sample(frac=1, replace=True)
            cps_per_nA_array[zone,i] = running_mean[zone] - residuals_array[zone, i]
        
    if plot:

        plt.plot(data.L, data.cps_per_nA, '.k', label='data')
        plt.plot(data.L, running_mean, '-r', label='running mean')
        plt.plot(data.L, cps_per_nA_array, '-y', label='_nolegend', zorder=0)
        plt.title('yellow = synthetic datasets')
        plt.legend()
        plt.show()
        
    synthetic_data = pd.DataFrame(cps_per_nA_array)
    synthetic_data.index = data.L.values
    synthetic_data.index.name = 'L'

    return synthetic_data

# Import data

In [None]:
anu_data = {}

for xtl in ['LDE1', 'LDE1H']:
    files = glob.glob(f'../data/interim/peak_shapes/cleaned_data_ANU/{xtl}/*.csv')
    data = [pd.read_csv(f, index_col=0) for f in files]
    names = [Path(f).stem for f in files]
    anu_data[xtl] = dict(zip(names, data))
    
sta_data = {}

for xtl in ['LDE1', 'LDE1L']:
    files = glob.glob(f'../data/interim/peak_shapes/cleaned_data_StA/{xtl}/*.csv')
    data = [pd.read_csv(f, index_col=0) for f in files]
    names = [Path(f).stem for f in files]
    sta_data[xtl] = dict(zip(names, data))
    
data = {'ANU': anu_data, 'StA': sta_data}

# with open('data.pickle', 'wb') as f:
#     pickle.dump(data, f)

# Get noise via running mean and create synthetic datasets

First do this with just 10 mc simulations and plot the figures to check it works as expected

In [None]:
synthetic_data = {'ANU': {}, 'StA': {}}

for lab in ['ANU', 'StA']:
    
    for xtl in data[lab].keys():
        synthetic_data[lab][xtl] = {}
        
        for sample in data[lab][xtl].keys():
            print(sample)
            if sample in ['Buddingtonite', 'GlassA870']:
                if lab == 'StA':
                    window_size=4
                else:
                    window_size=9
            else:
                window_size=21
                
            synthetic_data[lab][xtl][sample] = simulate_spectra_montecarlo(
                                                        data[lab][xtl][sample],
                                                        window_size=window_size,
                                                        number_of_mc_simulations=10, plot=True)

Now do it again with 50 mc simulations and save the data.

In [None]:
NUM_MC_SIMS = 10

synthetic_data = {'ANU': {}, 'StA': {}}

for lab in ['ANU', 'StA']:
    
    for xtl in data[lab].keys():
        synthetic_data[lab][xtl] = {}
        
        for sample in data[lab][xtl].keys():
            
            if sample in ['Buddingtonite', 'GlassA870']:
                if lab == 'StA':
                    window_size=4
                else:
                    window_size=9
            else:
                window_size=21
                
            synthetic_data[lab][xtl][sample] = simulate_spectra_montecarlo(
                                                        data[lab][xtl][sample],
                                                        window_size=window_size,
                                                        number_of_mc_simulations=NUM_MC_SIMS, plot=False)

with open('../data/interim/peak_shapes/synthetic_data.pickle', 'wb') as f:
    pickle.dump(synthetic_data, f)
            
# To save the data in a structure similar to that of the "cleaned" data, uncomment the below:

#             output_folder = Path(f'./synthetic_data_{lab}/{xtl}/')
#             if not output_folder.exists():
#                 output_folder.mkdir(parents=True, exist_ok=True)
                
#             synthetic_data[lab][xtl][sample].to_csv(output_folder / Path(f'{sample}.csv'))

# Set up for background fitting

In [None]:
rois = {
    'StA': 
            {'LDE1': 
                {
                'AlN': np.array([[125,130], [164,165]])
                ,'BN': np.array([[129, 133], [164,165]])
                ,'GaN': np.array([[133, 136], [162,165]])
                ,'Si3N4': np.array([[134, 136], [162,165]])
                ,'GlassA870': np.array([[136, 142], [157,160]])
                ,'Buddingtonite': np.array([])
                }
            ,'LDE1L': 
                {
                'AlN': np.array([[130, 135], [162,165]])
                ,'BN': np.array([[128, 130], [164,165]])
                ,'GaN': np.array([[132, 136], [160,165]])
                ,'Si3N4': np.array([[133, 135], [162,165]])
                ,'GlassA870': np.array([[134, 140], [155,158]])
                ,'Buddingtonite': np.array([])
                }
            }
    
    ,'ANU':
            {'LDE1':
                {
                'AlN': np.array([[125,130], [162,165]])
                ,'BN': np.array([[129, 133], [162,165]])
                ,'GaN': np.array([[125, 133], [160,165]])
                ,'Si3N4': np.array([[128, 132], [162,165]])
                ,'GlassA870': np.array([[125,127], [132, 135], [157,165]])
                ,'Buddingtonite': np.array([[125,127], [132, 135], [157,165]])
                }
             ,'LDE1H':
                {
                'AlN': np.array([[125, 130], [162,165]])
                ,'BN': np.array([[129, 133], [162,165]])
                ,'GaN': np.array([[130, 135], [160,165]])
                ,'Si3N4': np.array([[128, 132], [162,165]])
                ,'GlassA870': np.array([[125,126], [132, 135], [157,165]])
                ,'Buddingtonite': np.array([[125,127], [132, 135], [157,165]])
                }}
        }


In [None]:
fig, ax = plt.subplots()
data['ANU']['LDE1']['GaN'].plot(x='L', y='cps_per_nA', ax=ax, label='LDE1')
data['ANU']['LDE1H']['GaN'].plot(x='L', y='cps_per_nA', ax=ax, label='LDE1H')
plt.xlim(143,150)
plt.axvline(146.2, color='tab:blue')
plt.axvline(145.9, color='tab:orange')
plt.xticks(np.arange(143,151))
plt.title('GaN at ANU on different crystals')
plt.ylabel('cps/nA')
plt.show()

anu_peak_pos_difference = 146.2 - 145.9
print(round(anu_peak_pos_difference,1))

In [None]:
fig, ax = plt.subplots()
data['StA']['LDE1']['GaN'].plot(x='L', y='cps_per_nA', ax=ax, label='LDE1')
data['StA']['LDE1L']['GaN'].plot(x='L', y='cps_per_nA', ax=ax, label='LDE1L')
plt.xlim(143,150)
plt.axvline(148.5, color='tab:blue')
plt.axvline(146.6, color='tab:orange')
plt.xticks(np.arange(143,151))
plt.title('GaN at StA on different crystals')
plt.ylabel('cps/nA')
plt.show()

sta_peak_pos_difference = 148.5 - 146.6
print(round(sta_peak_pos_difference,1))

# Fit backgrounds to the original data

In [None]:
def select_roi_index(df, roi):
    
    """For a dataframe df, select rows within index ranges defined by roi (regions of interest)"""
    
    idx_list = [df[(df.index > r[0]) & (df.index < r[1])].index.to_list() for r in roi]
    flat_idx_list = [item for sublist in idx_list for item in sublist]
    df_roi = df.loc[flat_idx_list, :]
    
    return df_roi

def fit_bg(data, bg_type='linear'):
    
    """ bg_type can be either:
            - 'linear' (default)
            - 'lorentzian_plus_c' 
    """

    x = data.L
    y = data.cps_per_nA

    if bg_type == 'linear':
        
        mod = LinearModel()
        pars = mod.make_params(slope=0, intercept=-1)
        
    elif bg_type == 'lorentzian_plus_c':
        
        mod = LorentzianModel() + ConstantModel()
        pars = mod.make_params(amplitude=max(y)*10, sigma=10, center=120, c=0)
        
        pars['amplitude'].set(min=0)
        pars['center'].set(max=130)        
        pars['c'].set(max=min(y))
    
    bg_fit_result = mod.fit(y, pars, x=x)
    
    return bg_fit_result


def plot_spectrum_and_roi(df, roi, sample=None, baseline=None):
    
    """ Plots 'regions of interest' (roi) defined for fitting background to spectrum """
    
    fig, ax = plt.subplots(figsize=(8,2))

    plt.plot(df['L'], df['cps_per_nA'], lw=1, color='k', label='data')
    
    if baseline is not None:
        plt.plot(df['L'], baseline, lw=1, color='b', label='baseline')
    
    for r in roi:
        ax.axvspan(r[0], r[1], alpha=0.1, color='red', linewidth=0)
        
    df_roi = select_roi(df, roi)
    
    ymin = df_roi['cps_per_nA'].min() - df_roi['cps_per_nA'].max()*0.05
    ymax = df_roi['cps_per_nA'].max() + df_roi['cps_per_nA'].max()*0.3
    
    plt.ylim(ymin, ymax)
    plt.title(sample)
    plt.tight_layout()


def fit_baseline_and_plot(df, roi, name=None, bg_type='linear'):
    
    """ Fit the baseline, store results as new columns in df, and plot the fit and roi."""
       
    bg_fit_result = fit_bg(select_roi(df, roi), bg_type=bg_type)
    
    baseline = bg_fit_result.eval(x=df['L'].values)
    
    corrected_data = df['cps_per_nA'].values - baseline

    plot_spectrum_and_roi(df, roi, sample=name, baseline=baseline)

    df['baseline'] = baseline
    df['cps_per_nA_corrected'] = corrected_data


def fit_mc_bg(full_synthetic_data, roi, bg_type='linear', randomise_roi=False):

    """ bg_type can be either:
            - 'linear' (default)
            - 'lorentzian_plus_c' 
    """
    
    if randomise_roi:
    
        rng = np.random.default_rng(12345)
        rfloat = rng.random(size=len(roi)*2) - 1
        roi_adjustments = rfloat.reshape(roi.shape)
        
        roi = roi + roi_adjustments
    
    synthetic_data = select_roi_index(full_synthetic_data, roi)
    x = synthetic_data.index
    
    baseline_array = np.zeros(shape=full_synthetic_data.values.shape) 
    corrected_array = np.zeros(shape=full_synthetic_data.values.shape)
    
    for i, col in enumerate(synthetic_data.columns):
    
        if i % 10 == 0:
            print(f'Fitting montecarlo simulation {i+1} of {len(synthetic_data.columns)}')
    
        if col != 'L':
        
            y = synthetic_data.loc[:, col].values

            if bg_type == 'linear':
        
                mod = LinearModel()
                pars = mod.make_params(slope=0, intercept=-1)
        
            elif bg_type == 'lorentzian_plus_c':
        
                mod = LorentzianModel() + ConstantModel()
                pars = mod.make_params(amplitude=max(y)*10, sigma=10, center=120, c=0)
        
                pars['amplitude'].set(min=0)
                pars['center'].set(max=130)        
                pars['c'].set(max=min(y))
    
            bg_fit_result = mod.fit(y, pars, x=x)
        
            baseline_array[:,i] = bg_fit_result.eval(x=full_synthetic_data.index)
            corrected_array[:,i] = full_synthetic_data.loc[:, col].values - baseline_array[:, i]
        
    baseline = pd.DataFrame(baseline_array, index=full_synthetic_data.index)
    corrected_data = pd.DataFrame(corrected_array, index=full_synthetic_data.index)
    
    return baseline, corrected_data
    

def plot_mc_bg_fits(df, synthetic_data, baseline, roi, sample=None):
    
   
    fig, ax = plt.subplots(figsize=(15,4))

    plt.plot(df['L'], df['cps_per_nA'], lw=1, color='k')
    
    baseline.reset_index().plot(x='L', lw=1, color='b', ax=ax, legend=False)
    
    for r in roi:
        ax.axvspan(r[0], r[1], alpha=0.1, color='red', linewidth=0)
        
    df_roi = select_roi(df, roi)
    
    ymin = df_roi['cps_per_nA'].min() - df_roi['cps_per_nA'].max()*0.05
    ymax = df_roi['cps_per_nA'].max() + df_roi['cps_per_nA'].max()*0.3
    
    plt.ylim(ymin, ymax)
    plt.ylabel('cps/nA', fontsize=14)
    plt.xlabel('L (mm)', fontsize=14)
    plt.title(sample)
    plt.tight_layout()
    
    
def fit_multiple_gaussians(df_full, pk_params, 
                            y_column='counts_corrected', roi=[[135,160]],
                            samplename=None, plot=True, plot_only_roi=True):
    
    """ Fit gaussians to the N peak of <samplename> 
   
    """
    
    df = select_roi(df_full, roi)

    x = df.L
    y = df[y_column]

    mod = GaussianModel(prefix='a_')
    
    pars = mod.make_params(a_amplitude=pk_params[0]['amplitude'],
                           a_center=pk_params[0]['center'],
                           a_sigma=pk_params[0]['sigma'])
    
    for i in range(len(pk_params)):
        if i > 0:
            prefix = pk_params[i]['prefix']
            extra_peak = GaussianModel(prefix=prefix)
            extra_peak_pars = extra_peak.make_params()
            extra_peak_pars.add(prefix + 'amplitude', value=pk_params[i]['amplitude'])
            extra_peak_pars.add(prefix + 'center', value=pk_params[i]['center'])
            extra_peak_pars.add(prefix + 'sigma', value=pk_params[i]['sigma'])
            
            pars.update(extra_peak_pars)
            mod = mod + extra_peak
       
    for par_name in pars.keys():
        if 'amplitude' in par_name:       
            pars[par_name].set(min=0)
            
        elif 'center' in par_name:
        
            # Check if there is a constraint specified for min or max
            prefix = par_name.replace('center', '')
            
            user_set_pk_pars = next(entry for entry in pk_params if entry['prefix'] == prefix)
            try: 
                pars[par_name].set(min=user_set_pk_pars['center_min'], max=user_set_pk_pars['center_max'])
            except KeyError:
                # If they haven't been defined, set to a sensible range
                pars[par_name].set(min=140, max=155)
                
        elif 'sigma' in par_name:
            # Check if there is a constraint specified for min or max
            prefix = par_name.replace('sigma', '')
            user_set_pk_pars = next(entry for entry in pk_params if entry['prefix'] == prefix)
            try: 
                pars[par_name].set(min=user_set_pk_pars['sigma_min'], max=user_set_pk_pars['sigma_max'])
            except KeyError:
                # If they haven't been defined, set to a sensible range
                pars[par_name].set(min=0, max=10)
            
            
    out = mod.fit(y, x=x, params=pars)
    
    if plot:
    
        fig, ax = plt.subplots(1,2,figsize=(10,5))

        out.plot_fit(show_init=True, ax=ax[0])
        ax[0].set_title('')

        fitted_components = out.eval_components(x=df_full.L)
        fitted_curve = out.eval(x=df_full.L)
        ax[1].plot(df_full.L, df_full[y_column], '-', color='lightgrey')
        ax[1].plot(df_full.L, fitted_curve, label='fit')

        for p in fitted_components.keys():
            ax[1].plot(df_full.L, fitted_components[p], label=p)
            
        for r in roi:
            ax[1].axvspan(r[0], r[1], alpha=0.05, color='red', linewidth=0)
        
        if plot_only_roi:
            ax[1].set_xlim(np.array(roi).min(), np.array(roi).max())
        
        plt.suptitle(samplename)
        plt.legend()
        plt.show()
    
    return out


def fit_multiple_gaussians_mc(synthetic_data_corrected, pk_params, 
                            roi=[[135,160]],
                            samplename=None, plot=True):
    
    synthetic_data = synthetic_data_corrected.reset_index()
    fit_result = []
    fit = np.zeros(shape=synthetic_data_corrected.shape)
    
    for i in range(len(synthetic_data_corrected.columns)):
        if i % 10 == 0:
            print(f'Fitting montecarlo simulation {i+1} of {len(synthetic_data.columns)}')

        df = synthetic_data.loc[:, ['L', i]]
        out = fit_multiple_gaussians(df, pk_params, y_column=i, roi=roi,
                                     samplename=samplename, plot=False)
        
        fit_result.append(out)
        fit[:,i] = out.eval(x=synthetic_data_corrected.index.values)
        
    fit = pd.DataFrame(fit, index=synthetic_data_corrected.index.values)
    fit.index.name = 'L'
    
    if plot:
        fig, ax = plt.subplots(1,1,figsize=(8,4))

        synthetic_data.plot(x='L', lw=0.5, color='lightgrey', ax=ax, legend=False)
        fit.reset_index().plot(x='L', lw=1, color='blue', ax=ax, legend=False)
        
        
        clrs = ['tab:green', 'tab:orange', 'tab:purple', 'tab:cyan']
        for model in fit_result:
            fitted_components = model.eval_components(x=synthetic_data.L)
            
            for i,p in enumerate(fitted_components.keys()):
                ax.plot(synthetic_data.L, fitted_components[p], color=clrs[i], lw=0.5)
        
        for r in roi:
            ax.axvspan(r[0], r[1], alpha=0.05, color='red', linewidth=0)
        
        plt.title(samplename)
        plt.show()
    
    return fit_result
        

In [None]:
labs = ['StA', 'ANU']

# baselines = {'StA': {'LDE1': {}, 'LDE1L': {}}
#             ,'ANU': {'LDE1': {}, 'LDE1H': {}}}

# corrected_data = {'StA': {'LDE1': {}, 'LDE1L': {}}
#                  ,'ANU': {'LDE1': {}, 'LDE1H': {}}}

for lab in labs:
    
    for xtl in data[lab].keys():
    
        for s in data[lab][xtl].keys():
            
            roi = rois[lab][xtl][s]
            print(f'\n--- {lab} {xtl} {s} ---')
            
            if s not in ['Buddingtonite', 'GlassA870']:
                bg_type = 'linear'
            else:
                bg_type = 'lorentzian_plus_c'
                
            fit_baseline_and_plot(data[lab][xtl][s], roi, name=None, bg_type=bg_type)


In [None]:
labs = ['StA', 'ANU']
nitrides = ['GaN', 'AlN', 'Si3N4', 'BN']

baselines = {'StA': {'LDE1': {}, 'LDE1L': {}}
            ,'ANU': {'LDE1': {}, 'LDE1H': {}}}

corrected_data = {'StA': {'LDE1': {}, 'LDE1L': {}}
                 ,'ANU': {'LDE1': {}, 'LDE1H': {}}}

for lab in labs:
    
    for xtl in data[lab].keys():
    
        for s in nitrides:
            
            roi = rois[lab][xtl][s]
            print(f'\n--- {lab} {xtl} {s} ---')

            baselines[lab][xtl][s], corrected_data[lab][xtl][s] = fit_mc_bg(
                            synthetic_data[lab][xtl][s], roi,
                            bg_type='linear', randomise_roi=True)
            
            plot_mc_bg_fits(data[lab][xtl][s], synthetic_data[lab][xtl][s],
                                        baselines[lab][xtl][s], roi,
                                        sample='_'.join([lab, xtl, s]))


In [None]:
labs = ['StA', 'ANU']
samples = ['GlassA870']

for lab in labs:
    
    for xtl in data[lab].keys():
    
        for s in samples:
            
            roi = rois[lab][xtl][s]
            print(f'\n--- {lab} {xtl} {s} ---')

            baselines[lab][xtl][s], corrected_data[lab][xtl][s] = fit_mc_bg(
                            synthetic_data[lab][xtl][s], roi,
                            bg_type='lorentzian_plus_c', randomise_roi=True)
            
            plot_mc_bg_fits(data[lab][xtl][s], synthetic_data[lab][xtl][s],
                                        baselines[lab][xtl][s], roi,
                                        sample='_'.join([lab, xtl, s]))


In [None]:
labs = ['ANU']
samples = ['Buddingtonite']

for lab in labs:
    
    for xtl in data[lab].keys():
    
        for s in samples:
            
            roi = rois[lab][xtl][s]
            
            print(f'\n--- {lab} {xtl} {s} ---')
            baselines[lab][xtl][s], corrected_data[lab][xtl][s] = fit_mc_bg(
                            synthetic_data[lab][xtl][s], roi,
                            bg_type='lorentzian_plus_c', randomise_roi=True)
            
            plot_mc_bg_fits(data[lab][xtl][s], synthetic_data[lab][xtl][s],
                                        baselines[lab][xtl][s], roi,
                                        sample='_'.join([lab, xtl, s]))



# Fit peaks to the background-corrected data

In [None]:
# with open('data_w_baselines.pickle', 'rb') as pickle_file:
#     data = pickle.load(pickle_file)

# with open('synthetic_data.pickle', 'rb') as pickle_file:
#     synthetic_data = pickle.load(pickle_file)
    
# with open('baseline_data.pickle', 'rb') as pickle_file:
#     baseline_data = pickle.load(pickle_file)
    
# with open('corrected_data.pickle', 'rb') as pickle_file:
#     corrected_data = pickle.load(pickle_file)

# Fitting the background-subtracted data

In [None]:
fit_dict = {'StA': {'LDE1': {}, 'LDE1L': {}}
             ,'ANU': {'LDE1': {}, 'LDE1H': {}}}

mc_fit_dict = {'StA': {'LDE1': {}, 'LDE1L': {}}
             ,'ANU': {'LDE1': {}, 'LDE1H': {}}}

## BN

### Fit to actual data:

In [None]:
lab = 'StA'
s = 'BN'

for xtl in ['LDE1', 'LDE1L']:
    
    roi = [[135, 160]]
    df = select_roi(data[lab][xtl][s], roi)
    ymax = df.cps_per_nA_corrected.max()
    
    pk_params = [
            {'prefix': 'a_',  'amplitude': ymax*3, 'center': 149, 'sigma': 2},
            {'prefix': 'b_', 'amplitude': ymax, 'center': 144, 'sigma': 2},
            {'prefix': 'c_', 'amplitude': ymax, 'center': 150, 'sigma': 2},
            ]

    fit_dict[lab][xtl][s] = fit_multiple_gaussians(data[lab][xtl][s], pk_params,
                                                        y_column='cps_per_nA_corrected',
                                                        roi=roi,
                                                        samplename='_'.join([lab, xtl, s]))
    
lab = 'ANU'
for xtl in ['LDE1', 'LDE1H']:
    
    roi = [[135, 160]]
    df = select_roi(data[lab][xtl][s], roi)
    ymax = df.cps_per_nA_corrected.max()
    
    pk_params = [
            {'prefix': 'a_',  'amplitude': ymax*3, 'center': 149, 'sigma': 2},
            {'prefix': 'b_', 'amplitude': ymax, 'center': 144, 'sigma': 2},
            {'prefix': 'c_', 'amplitude': ymax, 'center': 150, 'sigma': 2},
            ]

    fit_dict[lab][xtl][s] = fit_multiple_gaussians(data[lab][xtl][s], pk_params,
                                                        y_column='cps_per_nA_corrected',
                                                        roi=roi,
                                                        samplename='_'.join([lab, xtl, s]))


### Montecarlo fit:

In [None]:
lab = 'StA'

for xtl in ['LDE1', 'LDE1L']:
    
    roi = [[135, 160]]
    df = select_roi(data[lab][xtl][s], roi)
    ymax = df.cps_per_nA_corrected.max()
    
    pk_params = [
            {'prefix': 'a_',  'amplitude': ymax*3, 'center': 149, 'sigma': 2},
            {'prefix': 'b_', 'amplitude': ymax, 'center': 144, 'sigma': 2},
            {'prefix': 'c_', 'amplitude': ymax, 'center': 150, 'sigma': 2},
            ]
    
    mc_fit_dict[lab][xtl][s] = fit_multiple_gaussians_mc(corrected_data[lab][xtl][s], pk_params,
                                                              roi=roi, samplename='_'.join([lab, xtl, s]))
    
lab = 'ANU'

for xtl in ['LDE1', 'LDE1H']:
    
    roi = [[135, 160]]
    df = select_roi(data[lab][xtl][s], roi)
    ymax = df.cps_per_nA_corrected.max()
    
    pk_params = [
            {'prefix': 'a_',  'amplitude': ymax*3, 'center': 149, 'sigma': 2},
            {'prefix': 'b_', 'amplitude': ymax, 'center': 144, 'sigma': 2},
            {'prefix': 'c_', 'amplitude': ymax, 'center': 150, 'sigma': 2},
            ]
    
    mc_fit_dict[lab][xtl][s] = fit_multiple_gaussians_mc(corrected_data[lab][xtl][s], pk_params,
                                                              roi=roi, samplename='_'.join([lab, xtl, s]))
    
 # Play a sound when the cell finishes running
# Audio(filename=sound_file, autoplay=True)

## GaN

### Fit to acutal data

In [None]:
lab = 'StA'
s = 'GaN'

rois = {'LDE1': [[135,152.5], [155.2,155.8], [159,165]],
         'LDE1L': [[135,150.5], [153.2,153.8], [157,165]]}


for xtl in ['LDE1', 'LDE1L']:
    
    roi = rois[xtl]
    df = select_roi(data[lab][xtl][s], roi)
    ymax = df.cps_per_nA_corrected.max()
    
    pk_params = [
               {'prefix': 'a_',  'amplitude': ymax*3, 'center': 149, 'sigma': 2},
               {'prefix': 'b_', 'amplitude': ymax, 'center': 146, 'sigma': 2},
        ]
    
    fit_dict[lab][xtl][s] = fit_multiple_gaussians(data[lab][xtl][s], pk_params,
                                                        y_column='cps_per_nA_corrected',
                                                        roi=roi,
                                                        samplename='_'.join([lab, xtl, s]))
    
lab = 'ANU'
rois = {'LDE1': [[135,150],[152.5,153],[157,165]]
        ,'LDE1H': [[135,150],[152.5,153],[157,165]]}

for xtl in ['LDE1', 'LDE1H']:
    
    roi = rois[xtl]
    df = select_roi(data[lab][xtl][s], roi)
    ymax = df.cps_per_nA_corrected.max()
    
    pk_params = [
               {'prefix': 'a_',  'amplitude': ymax*3, 'center': 147, 'sigma': 2},
               {'prefix': 'b_', 'amplitude': ymax, 'center': 144, 'sigma': 2},
            ]

    fit_dict[lab][xtl][s] = fit_multiple_gaussians(data[lab][xtl][s], pk_params,
                                                        y_column='cps_per_nA_corrected',
                                                        roi=roi,
                                                        samplename='_'.join([lab, xtl, s]))


In [None]:
lab = 'StA'
s = 'GaN'

rois = {'LDE1': [[135,152.5], [155.2,155.8], [159,165]],
         'LDE1L': [[135,150.5], [153.2,153.8], [157,165]]}

for xtl in ['LDE1', 'LDE1L']:
    
    roi = rois[xtl]
    df = select_roi(data[lab][xtl][s], roi)
    ymax = df.cps_per_nA_corrected.max()
    
    pk_params = [
               {'prefix': 'a_',  'amplitude': ymax*3, 'center': 147, 'sigma': 2},
               {'prefix': 'b_', 'amplitude': ymax, 'center': 144, 'sigma': 2},
            ]

    mc_fit_dict[lab][xtl][s] = fit_multiple_gaussians_mc(corrected_data[lab][xtl][s], pk_params,
                                                              roi=roi, samplename='_'.join([lab, xtl, s]))

In [None]:
s= 'GaN'
lab = 'ANU'
rois = {'LDE1': [[135,150],[152.5,153],[157,165]]
        ,'LDE1H': [[135,150],[152.5,153],[157,165]]}

for xtl in ['LDE1', 'LDE1H']:
    
    roi = rois[xtl]
    df = select_roi(data[lab][xtl][s], roi)
    ymax = df.cps_per_nA_corrected.max()
    
    pk_params = [
               {'prefix': 'a_',  'amplitude': ymax*3, 'center': 147, 'sigma': 2},
               {'prefix': 'b_', 'amplitude': ymax, 'center': 144, 'sigma': 2},
            ]

    mc_fit_dict[lab][xtl][s] = fit_multiple_gaussians_mc(corrected_data[lab][xtl][s], pk_params,
                                                              roi=roi, samplename='_'.join([lab, xtl, s]))
# Play a sound when the cell finishes running
# Audio(filename=sound_file, autoplay=True)

# AlN

In [None]:
lab = 'StA'
s = 'AlN'

rois = {'LDE1': [[130, 165]],
         'LDE1L': [[130, 165]]}

for xtl in ['LDE1', 'LDE1L']:
    
    roi = rois[xtl]
    df = select_roi(data[lab][xtl][s], roi)
    ymax = df.cps_per_nA_corrected.max()
    
    pk_params = [
               {'prefix': 'a_',  'amplitude': ymax*3, 'center': 149, 'sigma': 2},
               {'prefix': 'b_', 'amplitude': ymax, 'center': 148, 'sigma': 2},
                 {'prefix': 'c_', 'amplitude': ymax, 'center': 150, 'sigma': 2},

        ]
    
    fit_dict[lab][xtl][s] = fit_multiple_gaussians(data[lab][xtl][s], pk_params,
                                                    y_column='cps_per_nA_corrected',
                                                    roi=roi,
                                                    samplename='_'.join([lab, xtl, s]),
                                                    plot=True, plot_only_roi=False)
    
lab = 'ANU'
rois = {'LDE1': [[130, 165]],
         'LDE1H': [[130, 165]]}

for xtl in ['LDE1', 'LDE1H']:
    
    roi = rois[xtl]
    df = select_roi(data[lab][xtl][s], roi)
    ymax = df.cps_per_nA_corrected.max()
    
    pk_params = [
               {'prefix': 'a_',  'amplitude': ymax*3, 'center': 147, 'sigma': 2},
               {'prefix': 'b_', 'amplitude': ymax, 'center': 144, 'sigma': 2},
                 {'prefix': 'c_', 'amplitude': ymax, 'center': 150, 'sigma': 2},

            ]

    fit_dict[lab][xtl][s] = fit_multiple_gaussians(data[lab][xtl][s], pk_params,
                                                    y_column='cps_per_nA_corrected',
                                                    roi=roi,
                                                    samplename='_'.join([lab, xtl, s]),
                                                    plot=True, plot_only_roi=False)


In [None]:
lab = 'StA'
s = 'AlN'

rois = {'LDE1': [[135, 165]],
         'LDE1L': [[135, 165]]}

for xtl in ['LDE1', 'LDE1L']:
    
    roi = rois[xtl]
    df = select_roi(data[lab][xtl][s], roi)
    ymax = df.cps_per_nA_corrected.max()
    
    pk_params = [
               {'prefix': 'a_',  'amplitude': ymax*3, 'center': 149, 'sigma': 2},
               {'prefix': 'b_', 'amplitude': ymax, 'center': 144, 'sigma': 2},
                 {'prefix': 'c_', 'amplitude': ymax, 'center': 150, 'sigma': 2},
            ]

    mc_fit_dict[lab][xtl][s] = fit_multiple_gaussians_mc(corrected_data[lab][xtl][s], pk_params,
                                                              roi=roi, samplename='_'.join([lab, xtl, s]))

In [None]:
lab = 'ANU'
s = 'AlN'

rois = {'LDE1': [[135, 165]],
         'LDE1H': [[135, 165]]}

for xtl in ['LDE1', 'LDE1H']:
    
    roi = rois[xtl]
    df = select_roi(data[lab][xtl][s], roi)
    ymax = df.cps_per_nA_corrected.max()
    
    pk_params = [
               {'prefix': 'a_',  'amplitude': ymax*3, 'center': 147, 'sigma': 2},
               {'prefix': 'b_', 'amplitude': ymax, 'center': 148, 'sigma': 2},
                 {'prefix': 'c_', 'amplitude': ymax, 'center': 150, 'sigma': 2},
            ]

    mc_fit_dict[lab][xtl][s] = fit_multiple_gaussians_mc(corrected_data[lab][xtl][s], pk_params,
                                                              roi=roi, samplename='_'.join([lab, xtl, s]))

# Si3N4

In [None]:
lab = 'StA'
s = 'Si3N4'

rois = {'LDE1': [[135, 165]],
         'LDE1L': [[135, 165]]}

for xtl in ['LDE1', 'LDE1L']:
    
    roi = rois[xtl]
    df = select_roi(data[lab][xtl][s], roi)
    ymax = df.cps_per_nA_corrected.max()
    
    pk_params = [
               {'prefix': 'a_',  'amplitude': ymax*3, 'center': 149, 'sigma': 2},
               {'prefix': 'b_', 'amplitude': ymax, 'center': 148, 'sigma': 2},

        ]
    
    fit_dict[lab][xtl][s] = fit_multiple_gaussians(data[lab][xtl][s], pk_params,
                                                    y_column='cps_per_nA_corrected',
                                                    roi=roi,
                                                    samplename='_'.join([lab, xtl, s]),
                                                    plot=True, plot_only_roi=False)
    
lab = 'ANU'
rois = {'LDE1': [[130, 165]],
         'LDE1H': [[130, 165]]}

for xtl in ['LDE1', 'LDE1H']:
    
    roi = rois[xtl]
    df = select_roi(data[lab][xtl][s], roi)
    ymax = df.cps_per_nA_corrected.max()
    
    pk_params = [
               {'prefix': 'a_',  'amplitude': ymax*3, 'center': 147, 'sigma': 2},
               {'prefix': 'b_', 'amplitude': ymax, 'center': 144, 'sigma': 2},
            ]

    fit_dict[lab][xtl][s] = fit_multiple_gaussians(data[lab][xtl][s], pk_params,
                                                    y_column='cps_per_nA_corrected',
                                                    roi=roi,
                                                    samplename='_'.join([lab, xtl, s]),
                                                    plot=True, plot_only_roi=False)


In [None]:
lab = 'StA'
s = 'Si3N4'

rois = {'LDE1': [[135, 165]],
         'LDE1L': [[135, 165]]}

for xtl in ['LDE1', 'LDE1L']:
    
    roi = rois[xtl]
    df = select_roi(data[lab][xtl][s], roi)
    ymax = df.cps_per_nA_corrected.max()
    
    pk_params = [
               {'prefix': 'a_',  'amplitude': ymax*3, 'center': 149, 'sigma': 2},
               {'prefix': 'b_', 'amplitude': ymax, 'center': 148, 'sigma': 2},
            ]

    mc_fit_dict[lab][xtl][s] = fit_multiple_gaussians_mc(corrected_data[lab][xtl][s], pk_params,
                                                              roi=roi, samplename='_'.join([lab, xtl, s]))

In [None]:
lab = 'ANU'
s = 'Si3N4'

rois = {'LDE1': [[130, 165]],
         'LDE1H': [[130, 165]]}

for xtl in ['LDE1', 'LDE1H']:
    
    roi = rois[xtl]
    df = select_roi(data[lab][xtl][s], roi)
    ymax = df.cps_per_nA_corrected.max()
    
    pk_params = [
               {'prefix': 'a_',  'amplitude': ymax*3, 'center': 147, 'sigma': 2},
               {'prefix': 'b_', 'amplitude': ymax, 'center': 148, 'sigma': 2},
            ]

    mc_fit_dict[lab][xtl][s] = fit_multiple_gaussians_mc(corrected_data[lab][xtl][s], pk_params,
                                                              roi=roi, samplename='_'.join([lab, xtl, s]))

# GlassA870

In [None]:
lab = 'StA'
s = 'GlassA870'

rois = {'LDE1': [[135, 160]],
         'LDE1L': [[135, 160]]}

for xtl in ['LDE1', 'LDE1L']:
    
    roi = rois[xtl]
    df = select_roi(data[lab][xtl][s], roi)
    ymax = df.cps_per_nA_corrected.max()
    
    pk_params = [
               {'prefix': 'a_',  'amplitude': ymax*3, 'center': 147, 'sigma': 2},
               {'prefix': 'b_', 'amplitude': ymax, 'center': 144, 'sigma': 2},
                {'prefix': 'c_', 'amplitude': ymax, 'center': 154, 'sigma': 2,
                                                'sigma_min': 0, 'sigma_max': 2.4,
                                                'center_min': 151, 'center_max':156},
        ]
    
    fit_dict[lab][xtl][s] = fit_multiple_gaussians(data[lab][xtl][s], pk_params,
                                                    y_column='cps_per_nA_corrected',
                                                    roi=roi,
                                                    samplename='_'.join([lab, xtl, s]),
                                                    plot=True, plot_only_roi=False)
    
lab = 'ANU'
rois = {'LDE1': [[135, 160]],
         'LDE1H': [[135, 160]]}

for xtl in ['LDE1', 'LDE1H']:
    
    roi = rois[xtl]
    df = select_roi(data[lab][xtl][s], roi)
    ymax = df.cps_per_nA_corrected.max()
    
    pk_params = [
            {'prefix': 'a_',  'amplitude': ymax*3, 'center': 145, 'sigma': 3,
                                                    'sigma_min':0, 'sigma_max':3},
            {'prefix': 'b_', 'amplitude': ymax*2, 'center': 144.9, 'sigma': 2},
            {'prefix': 'c_', 'amplitude': ymax, 'center': 151, 'sigma': 2.3,
                                                'sigma_min': 1, 'sigma_max': 2.4,
                                                'center_min': 150, 'center_max':160},
            ]

    fit_dict[lab][xtl][s] = fit_multiple_gaussians(data[lab][xtl][s], pk_params,
                                                    y_column='cps_per_nA_corrected',
                                                    roi=roi,
                                                    samplename='_'.join([lab, xtl, s]),
                                                    plot=True, plot_only_roi=False)


In [None]:
lab = 'StA'
s = 'GlassA870'

rois = {'LDE1': [[135, 165]],
         'LDE1L': [[135, 165]]}

for xtl in ['LDE1', 'LDE1L']:
    
    roi = rois[xtl]
    df = select_roi(data[lab][xtl][s], roi)
    ymax = df.cps_per_nA_corrected.max()
    
    pk_params = [
               {'prefix': 'a_',  'amplitude': ymax*3, 'center': 147, 'sigma': 2},
               {'prefix': 'b_', 'amplitude': ymax, 'center': 144, 'sigma': 2},
                {'prefix': 'c_', 'amplitude': ymax, 'center': 154, 'sigma': 2,
                                                'sigma_min': 0, 'sigma_max': 2.4,
                                                'center_min': 151, 'center_max':156},
                ]

    mc_fit_dict[lab][xtl][s] = fit_multiple_gaussians_mc(corrected_data[lab][xtl][s], pk_params,
                                                              roi=roi, samplename='_'.join([lab, xtl, s]))

In [None]:
lab = 'ANU'
s = 'GlassA870'

rois = {'LDE1': [[135, 160]],
         'LDE1H': [[135, 160]]}

for xtl in ['LDE1', 'LDE1H']:
    
    roi = rois[xtl]
    df = select_roi(data[lab][xtl][s], roi)
    ymax = df.cps_per_nA_corrected.max()
    
    pk_params = [
            {'prefix': 'a_',  'amplitude': ymax*3, 'center': 145, 'sigma': 3,
                                                    'sigma_min':0, 'sigma_max':3},
            {'prefix': 'b_', 'amplitude': ymax*2, 'center': 144.9, 'sigma': 2},
            {'prefix': 'c_', 'amplitude': ymax, 'center': 151, 'sigma': 2.3,
                                                'sigma_min': 1, 'sigma_max': 2.4,
                                                'center_min': 150, 'center_max':160},
    ]

    mc_fit_dict[lab][xtl][s] = fit_multiple_gaussians_mc(corrected_data[lab][xtl][s], pk_params,
                                                              roi=roi, samplename='_'.join([lab, xtl, s]))

## Buddingtonite

In [None]:
s = 'Buddingtonite'

lab = 'ANU'
rois = {'LDE1': [[132, 160]],
         'LDE1H': [[132, 160]]}

for xtl in ['LDE1', 'LDE1H']:
    
    roi = rois[xtl]
    df = select_roi(data[lab][xtl][s], roi)
    ymax = df.cps_per_nA_corrected.max()
    
    pk_params = [
               {'prefix': 'a_',  'amplitude': ymax*3, 'center': 147, 'sigma': 2},
               {'prefix': 'b_', 'amplitude': ymax, 'center': 144, 'sigma': 2},
                 {'prefix': 'c_', 'amplitude': ymax, 'center': 150, 'sigma': 2},
            ]

    fit_dict[lab][xtl][s] = fit_multiple_gaussians(data[lab][xtl][s], pk_params,
                                                    y_column='cps_per_nA_corrected',
                                                    roi=roi,
                                                    samplename='_'.join([lab, xtl, s]),
                                                    plot=True, plot_only_roi=False)


In [None]:
lab = 'ANU'
s = 'Buddingtonite'

rois = {'LDE1': [[132, 160]],
         'LDE1H': [[132, 160]]}

for xtl in ['LDE1', 'LDE1H']:
    
    roi = rois[xtl]
    df = select_roi(data[lab][xtl][s], roi)
    ymax = df.cps_per_nA_corrected.max()
    
    pk_params = [
               {'prefix': 'a_',  'amplitude': ymax*3, 'center': 147, 'sigma': 2},
               {'prefix': 'b_', 'amplitude': ymax, 'center': 144, 'sigma': 2},
                 {'prefix': 'c_', 'amplitude': ymax, 'center': 150, 'sigma': 2}
            ]

    mc_fit_dict[lab][xtl][s] = fit_multiple_gaussians_mc(corrected_data[lab][xtl][s], pk_params,
                                                              roi=roi, samplename='_'.join([lab, xtl, s]))

# Calculate height/area ratios

For the fits to the actual data:

In [None]:
lab = 'StA'
xtl = 'LDE1L'
s = 'GaN'

amp_keys = [k for k in fit_dict[lab][xtl][s].params.keys() if 'amplitude' in k]
amp_vals = [fit_dict[lab][xtl][s].params[amp_key].value for amp_key in amp_keys]
sum_amplitude = sum(amp_vals)

print(sum_amplitude)

In [None]:
fitted_curve = fit_dict[lab][xtl][s].eval(x=data[lab][xtl][s].L)
trapz_area = np.trapz(fitted_curve, x=data[lab][xtl][s].L)
print(trapz_area)

Yes! The area under the fit is the same based on the sum of amplitudes or a trapezoidal integration method. So I will use the sum of amplitudes!

In [None]:
def get_height_area_ratio(fit_result_object, xvals, prefix_component_to_exclude=None):
    
    """
    fit_result_object = an lmfit.fit_result object
    xvals = an array over which to evaluate the fit result, in order to find peak height
    prefix_component_to_exclude = string giving prefix of the component to exclude from the
                                area calculation (e.g. to exclude a shoulder). Should be
                                something like 'c_'.
    """

    amp_keys = [k for k in fit_result_object.params.keys() if 'amplitude' in k]
    
    amp_vals = []
    
    for amp_key in amp_keys:
        if amp_key[0:2] != prefix_component_to_exclude:
            amp_vals.append(fit_result_object.params[amp_key].value)
            
    area_fit = sum(amp_vals)

    fitted_curve = fit_result_object.eval(x=xvals)
    height_fit = fitted_curve.max()

    ht_area_ratio = height_fit / area_fit
    
    return ht_area_ratio

In [None]:
def get_height_area_ratio_mc_avg(fit_result_object_list, xvals, prefix_component_to_exclude=None):
    
    """
    fit_result_object_list = a list of lmfit.fit_result objects over which to find mean and stdev
    xvals = an array over which to evaluate the fit result, in order to find peak height
    prefix_component_to_exclude = string giving prefix of the component to exclude from the
                                area calculation (e.g. to exclude a shoulder). Should be
                                something like 'c_'.
    """
    
    ht_area_ratio_list = []
    
    for i, fit_result_object in enumerate(fit_result_object_list):
        print('        Getting height/area for mc model #{}'.format(i))
        ht_area_of_this_model = get_height_area_ratio(fit_result_object, xvals, prefix_component_to_exclude)
        ht_area_ratio_list.append(ht_area_of_this_model)
        
    ht_area_mean = np.array(ht_area_ratio_list).mean()
    ht_area_stdev = np.array(ht_area_ratio_list).std()

    return ht_area_mean, ht_area_stdev

In [None]:
def get_peak_pos(fit_result_object, xvals, prefix_component_to_exclude=None):
    
    """
    fit_result_object = an lmfit.fit_result object
    xvals = an array over which to evaluate the fit result, in order to find peak height
    prefix_component_to_exclude = string giving prefix of the component to exclude from the
                                area calculation (e.g. to exclude a shoulder). Should be
                                something like 'c_'.
                                **** NOT IMPLEMENTED **** 
    """

    fitted_curve = fit_result_object.eval(x=xvals)
    peak_pos_idx = fitted_curve.argmax()
    peak_pos = xvals[peak_pos_idx]
    
    return peak_pos

In [None]:
def get_peak_pos_mc_avg(fit_result_object_list, xvals, prefix_component_to_exclude=None):
    
    """
    fit_result_object_list = a list of lmfit.fit_result objects over which to find mean and stdev
    xvals = an array over which to evaluate the fit result, in order to find peak height
    prefix_component_to_exclude = string giving prefix of the component to exclude from the
                                area calculation (e.g. to exclude a shoulder). Should be
                                something like 'c_'.
                                *** NOT IMPLEMENTED ***
    """
    
    peak_pos_list = []
    
    for i, fit_result_object in enumerate(fit_result_object_list):
        print('        Getting peak pos for mc model #{}'.format(i))
        peak_pos_of_this_model = get_peak_pos(fit_result_object, xvals, prefix_component_to_exclude)
        peak_pos_list.append(peak_pos_of_this_model)
        
    peak_pos_mean = np.array(peak_pos_list).mean()
    peak_pos_stdev = np.array(peak_pos_list).std()

    return peak_pos_mean, peak_pos_stdev

In [None]:
ht_area_dict = {'StA': {'LDE1': None, 'LDE1L': None}
                ,'ANU': {'LDE1': None, 'LDE1H': None}}

In [None]:
xvals = np.arange(120, 180, 0.01)
len(xvals)

In [None]:
for lab in ['StA', 'ANU']:
    
    for xtl in ht_area_dict[lab].keys():
        
        samples = []
        ht_area_ratios = []
        ht_area_ratios_mc_mean = []
        ht_area_ratios_mc_stdev = []
        
        for s in fit_dict[lab][xtl].keys():
            
            print(s)
            samples.append(s)
            
            print('    Getting height/area ratios from data')
            ht_area = get_height_area_ratio(fit_dict[lab][xtl][s],
                                xvals,
                                prefix_component_to_exclude=None)
            
            print('    Getting height/area ratios from mc simulations')
            ht_area_mean, ht_area_stdev = get_height_area_ratio_mc_avg(mc_fit_dict[lab][xtl][s],
                                xvals,
                                prefix_component_to_exclude=None)
            
            ht_area_ratios.append(ht_area)
            ht_area_ratios_mc_mean.append(ht_area_mean)
            ht_area_ratios_mc_stdev.append(ht_area_stdev)
            
        print('Creating final tables')
        ht_area_dict[lab][xtl] = pd.DataFrame({'Sample': samples
                                               ,'ht_area_ratio': ht_area_ratios
                                               ,'ht_area_ratio_mc_mean': ht_area_ratios_mc_mean
                                               ,'ht_area_ratio_mc_stdev': ht_area_ratios_mc_stdev})


In [None]:
peak_pos_dict = {'StA': {'LDE1': None, 'LDE1L': None}
                ,'ANU': {'LDE1': None, 'LDE1H': None}
              }

for lab in ['StA', 'ANU']:
    
    for xtl in peak_pos_dict[lab].keys():
        
        samples = []

        peak_pos = []
        peak_pos_mc_mean = []
        peak_pos_mc_stdev = []
        
        for s in fit_dict[lab][xtl].keys():
            
            print(s)
            
            samples.append(s)
           
            print('    Getting peak pos')
           
            pos = get_peak_pos(fit_dict[lab][xtl][s],
                                   xvals)

            print('    Getting peak pos from mc simulations')
            pos_mean, pos_stdev = get_peak_pos_mc_avg(mc_fit_dict[lab][xtl][s],
                                   xvals,
                                   prefix_component_to_exclude=None)
            
            peak_pos.append(pos)
            peak_pos_mc_mean.append(pos_mean)
            peak_pos_mc_stdev.append(pos_stdev)
        
        print('Creating final tables')
        peak_pos_dict[lab][xtl] = pd.DataFrame({'Sample': samples
                                                ,'peak_pos': peak_pos
                                                ,'peak_pos_mc_mean': peak_pos_mc_mean
                                                ,'peak_pos_mc_stdev': peak_pos_mc_stdev})


## Save the height/area results

In [None]:
for lab in ['StA', 'ANU']:
    for xtl in ht_area_dict[lab].keys():
        ht_area_dict[lab][xtl].to_csv(f'../data/processed/peak_shapes/ht_area_tables/{lab}_{xtl}.csv')

## Save the peak position results

In [None]:
for lab in ['StA', 'ANU']:
    for xtl in peak_pos_dict[lab].keys():
        peak_pos_dict[lab][xtl].to_csv(f'../data/processed/peak_shapes/peak_pos_tables/{lab}_{xtl}.csv')

## Save the fitted curves

In [None]:
for lab in ['StA', 'ANU']:
    for xtl in fit_dict[lab].keys():
        for s in fit_dict[lab][xtl].keys():
            fitted_curve = fit_dict[lab][xtl][s].eval(x=data[lab][xtl][s]['L'])
            data[lab][xtl][s]['fit'] = fitted_curve
            data[lab][xtl][s].to_csv(f'../data/processed/peak_shapes/data_with_fits_{lab}/{xtl}/{s}.csv')