# Compare peak shapes and background scans at different positions


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import glob
from pathlib import Path
from lmfit.models import LinearModel, LorentzianModel, ConstantModel
import sys
sys.path.insert(1, '..')
from src import readfiles, wdscan

class Scan():

    def __init__(self, data, metadata):
        
        self.data = data
        self.metadata = metadata   

def plot_spectrum_and_roi(df, roi, sample=None, baseline=None):
    
    """ Plots 'regions of interest' (roi) defined for fitting background to spectrum """
    
    fig, ax = plt.subplots(figsize=(15,4))

    plt.plot(df['L'], df['cps_per_nA'], lw=1, color='k', label='data')
    
    if baseline is not None:
        plt.plot(df['L'], baseline, lw=1, color='b', label='baseline')
    
    for r in roi:
        ax.axvspan(r[0], r[1], alpha=0.1, color='red', linewidth=0)
        
    df_roi = select_roi(df, roi)
    
    ymin = df_roi['cps_per_nA'].min() - df_roi['cps_per_nA'].max()*0.05
    ymax = df_roi['cps_per_nA'].max() + df_roi['cps_per_nA'].max()*0.3
    
    plt.ylim(ymin, ymax)
    plt.title(sample)
    plt.tight_layout()
    
def select_roi(df, roi):
    
    """For a dataframe df, select rows within energy ranges defined by roi (regions of interest)"""
    
    idx_list = [df[(df.L > r[0]) & (df.L < r[1])].index.to_list() for r in roi]
    flat_idx_list = [item for sublist in idx_list for item in sublist]
    df_roi = df.loc[flat_idx_list, :]
    
    return df_roi


def fit_baseline_and_plot(df, roi, name=None, bg_type='linear'):
    
    """ Fit the baseline, store results as new columns in df, and plot the fit and roi."""
       
    bg_fit_result = fit_bg(select_roi(df, roi), bg_type=bg_type)
    
    baseline = bg_fit_result.eval(x=df['L'].values)
    
    corrected_data = df['cps_per_nA'].values - baseline

    plot_spectrum_and_roi(df, roi, sample=name, baseline=baseline)

    df['baseline'] = baseline
    df['cps_per_nA_corrected'] = corrected_data
    
def fit_bg(data, bg_type='linear'):
    
    """ bg_type can be either:
            - 'linear' (default)
            - 'lorentzian_plus_c' 
    """

    x = data.L
    y = data.cps_per_nA

    if bg_type == 'linear':
        
        mod = LinearModel()
        pars = mod.make_params(slope=0, intercept=-1)
        
    elif bg_type == 'lorentzian_plus_c':
        
        mod = LorentzianModel() + ConstantModel()
        pars = mod.make_params(amplitude=max(y)*10, sigma=10, center=120, c=0)
        
        pars['amplitude'].set(min=0)
        pars['center'].set(max=130)        
        pars['c'].set(max=min(y))
    
    bg_fit_result = mod.fit(y, pars, x=x)
    
    return bg_fit_result

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import glob
from pathlib import Path
from lmfit.models import LinearModel, LorentzianModel, ConstantModel
import sys
sys.path.insert(1, '..')
from src import readfiles, wdscan

class Scan():

    def __init__(self, data, metadata):
        
        self.data = data
        self.metadata = metadata   

def plot_spectrum_and_roi(df, roi, sample=None, baseline=None):
    
    """ Plots 'regions of interest' (roi) defined for fitting background to spectrum """
    
    fig, ax = plt.subplots(figsize=(15,4))

    plt.plot(df['L'], df['cps_per_nA'], lw=1, color='k', label='data')
    
    if baseline is not None:
        plt.plot(df['L'], baseline, lw=1, color='b', label='baseline')
    
    for r in roi:
        ax.axvspan(r[0], r[1], alpha=0.1, color='red', linewidth=0)
        
    df_roi = select_roi(df, roi)
    
    ymin = df_roi['cps_per_nA'].min() - df_roi['cps_per_nA'].max()*0.05
    ymax = df_roi['cps_per_nA'].max() + df_roi['cps_per_nA'].max()*0.3
    
    plt.ylim(ymin, ymax)
    plt.title(sample)
    plt.tight_layout()
    
def select_roi(df, roi):
    
    """For a dataframe df, select rows within energy ranges defined by roi (regions of interest)"""
    
    idx_list = [df[(df.L > r[0]) & (df.L < r[1])].index.to_list() for r in roi]
    flat_idx_list = [item for sublist in idx_list for item in sublist]
    df_roi = df.loc[flat_idx_list, :]
    
    return df_roi


def fit_baseline_and_plot(df, roi, name=None, bg_type='linear'):
    
    """ Fit the baseline, store results as new columns in df, and plot the fit and roi."""
       
    bg_fit_result = fit_bg(select_roi(df, roi), bg_type=bg_type)
    
    baseline = bg_fit_result.eval(x=df['L'].values)
    
    corrected_data = df['cps_per_nA'].values - baseline

    plot_spectrum_and_roi(df, roi, sample=name, baseline=baseline)

    df['baseline'] = baseline
    df['cps_per_nA_corrected'] = corrected_data
    
def fit_bg(data, bg_type='linear'):
    
    """ bg_type can be either:
            - 'linear' (default)
            - 'lorentzian_plus_c' 
    """

    x = data.L
    y = data.cps_per_nA

    if bg_type == 'linear':
        
        mod = LinearModel()
        pars = mod.make_params(slope=0, intercept=-1)
        
    elif bg_type == 'lorentzian_plus_c':
        
        mod = LorentzianModel() + ConstantModel()
        pars = mod.make_params(amplitude=max(y)*10, sigma=10, center=120, c=0)
        
        pars['amplitude'].set(min=0)
        pars['center'].set(max=130)        
        pars['c'].set(max=min(y))
    
    bg_fit_result = mod.fit(y, pars, x=x)
    
    return bg_fit_result

In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib.transforms as mtransforms
import numpy as np
import glob
import sys
sys.path.insert(1, '..')
from src import readfiles, helper_funs
from pathlib import Path

# **Warning:**
# Files named 001 and 002 do not always correspond to the same crystal! So this information must be extracted from the metadata.
# So get all the data into one big list!

allfolders = sorted(glob.glob('../data/raw/wavelength_scans/*/*'))

data_list = []
metadata_list = []

for i in range(len(allfolders)):
    _, data, metadata = readfiles.import_jeol_wdscans(allfolders[i],
                                                         'data001_mm.csv', 'data001.cnd',
                                                         comment_line_num=80,
                                                         return_metadata=True)
    data_list.append(data)
    metadata_list.append(metadata)
    
    _, data, metadata = readfiles.import_jeol_wdscans(allfolders[i],
                                                         'data002_mm.csv', 'data002.cnd',
                                                         comment_line_num=80,
                                                         return_metadata=True)
    data_list.append(data)
    metadata_list.append(metadata)
    
# Then split the data based on crystal

scans = {'LDE1L': {}, 'LDE1': {}}

for i, m in enumerate(metadata_list):
    if m.crystal == 'LDE1L':
        scans['LDE1L'][m.folder.parts[4]] = Scan(data_list[i], m)
    elif m.crystal == 'LDE1':
        scans['LDE1'][m.folder.parts[4]] = Scan(data_list[i], m)
    

In [None]:
pd.concat(metadata_list, axis=1)

In [None]:
[k for k, _ in scans['LDE1L'].items()]

In [None]:
[k for k, _ in scans['LDE1'].items()]

In [None]:
scans['LDE1L']['si3n4_long_100nA'].metadata

In [None]:
scans['LDE1L']['si3n4_long_100nA'].metadata

In [None]:
scans['LDE1L']['si3n4_long_100nA'].data

In [None]:
xtl = 'LDE1L'

fig, ax = plt.subplots()
for i in range(4):
    nm = f'bn_short_100nA_{i+1}'
    scans[xtl][nm].data.plot(x='L', y='cps', ax=ax, label=scans[xtl][nm].metadata.current_nA)

for i in range(7):
    nm = f'bn_short_50nA_{i+1}'
    scans[xtl][nm].data.plot(x='L', y='cps', ax=ax, label=scans[xtl][nm].metadata.current_nA)

Average data collected at same conditions

In [None]:
def average_scan(scans, group_name, xtl, n_repeats):
    
    df = pd.DataFrame(
        {'L': scans[xtl][f'{group_name}_1'].data.L
      , 'cps_per_nA': pd.concat([scans[xtl][f'{group_name}_{i+1}'].data['cps_per_nA'] 
                                 for i in range(n_repeats)]
                                , axis=1)
                      .mean(axis=1)
      , 'cps_per_nA_stdev': pd.concat([scans[xtl][f'{group_name}_{i+1}'].data['cps_per_nA'] 
                                       for i in range(n_repeats)]
                                    , axis=1)
                            .std(axis=1)
        })
    
    avg_metadata = (pd.concat([scans[xtl][f'{group_name}_{i+1}'].metadata 
                              for i in range(n_repeats)], axis=1)
                    .drop(index=['folder', 'comment', 'crystal'])
                    .mean(axis=1)
                   )
    avg_metadata['n'] = n_repeats
    
    avg_scan = Scan(data=df, metadata=avg_metadata)
    
    return avg_scan

In [None]:
avg_scans = {'LDE1': {}, 'LDE1L': {}}

for xtl in ['LDE1', 'LDE1L']:
    avg_scans[xtl]['bn_short_100nA'] = average_scan(scans, 'bn_short_100nA', xtl, 4)
    avg_scans[xtl]['bn_short_50nA'] = average_scan(scans, 'bn_short_50nA', xtl, 7)
    avg_scans[xtl]['bn_long_100nA'] = scans[xtl]['bn_long_100nA']
    avg_scans[xtl]['bn_long_100nA'].metadata['n'] = 1
    
    avg_scans[xtl]['gan_short_100nA'] = average_scan(scans, 'gan_short_100nA', xtl, 3)
    avg_scans[xtl]['gan_short_50nA'] = average_scan(scans, 'gan_short_50nA', xtl, 3)
    avg_scans[xtl]['gan_long_100nA'] = scans[xtl]['gan_long_100nA']
    avg_scans[xtl]['gan_long_100nA'].metadata['n'] = 1


In [None]:
plt.rcParams['font.family'] = 'arial'

fig, ax = plt.subplots(2, 1, figsize=(8*0.39,14*0.39), sharex=True)

clrs = ['lightgrey', 'tab:orange', 'tab:blue']

for i, s in enumerate(['bn_long_100nA', 'bn_short_100nA', 'bn_short_50nA']):
    scan = avg_scans['LDE1L'][s]
    
    scan.data.plot(
        x='L', y='cps_per_nA', ax=ax[0], lw=1, color=clrs[i],
        label = '{:.0f} x {:.0f} min\n{} nA'.format(
        scan.metadata.n, scan.metadata.total_time_mins, s.split('_')[-1].replace('nA', '')))

for i, s in enumerate(['gan_long_100nA', 'gan_short_100nA', 'gan_short_50nA']):
    scan = avg_scans['LDE1L'][s]
    
    scan.data.plot(
        x='L', y='cps_per_nA', ax=ax[1], lw=1, color=clrs[i],
        label = '{:.0f} x {:.0f} min\n{} nA'.format(
        scan.metadata.n, scan.metadata.total_time_mins, s.split('_')[-1].replace('nA', '')))

ax[0].set_title('BN')
ax[1].set_title('GaN')

alphalabel = 'AB'
for i, a in enumerate(ax):
    a.set_xlabel('L (mm)', fontsize = 12)
    a.set_ylabel('cps/nA', fontsize = 12)
    a.set_xticks(np.arange(130, 170, 10))
    
    trans = mtransforms.ScaledTranslation(5/72, -5/72, fig.dpi_scale_trans)
    a.text(0.93, 0.99, alphalabel[i], transform=a.transAxes + trans,
            fontsize=12, va='top', ha='right', fontfamily='Arial',
            bbox=dict(facecolor='w', edgecolor='grey', pad=3.0))
    
ax[0].legend(ncol=1, loc='upper left', bbox_to_anchor=(0,1), fontsize=9, frameon=False)
ax[1].legend(ncol=1, loc='upper left', bbox_to_anchor=(0,1), fontsize=9, frameon=False)

plt.tight_layout()

plt.savefig('../figures/nitride_peaks_at_different_conditions.pdf')

## Figure showing background of hyalophane is independent of beam current

In [None]:
hyal_60 = scans["LDE1L"]["hyalophane_60nA"]
hyal_150 = scans["LDE1L"]["hyalophane_150nA"]

In [None]:
trimmed_data = wdscan.trim_data_from_regions(hyal_150.data, fit_regions=[[120,135], [155,180]])
out = wdscan.fit_bg(trimmed_data, cps_per_nA=True)
wdscan.plot_bg_fit(hyal_150.data, trimmed_data=trimmed_data, out=out, comment="Hyalophane", save_to=None, cps_per_nA=True, print_parameters=False)

In [None]:
fig, ax = plt.subplot_mosaic(
    '''
    A
    B
    C
    ''',
    sharex=True,
    figsize=(8*0.39, 16*0.39)
     )

plt.sca(ax["A"])
hyal_60.data.plot(x='L', y='cps', ax=ax["A"], label='60 nA', lw=0.5, color="tab:blue")
hyal_150.data.plot(x='L', y='cps', ax=ax["A"], label='150 nA', lw=0.5, color="tab:orange")
plt.xlabel('L (mm)', fontsize=12)
plt.ylabel('cps', fontsize=12)

plt.sca(ax["B"])
hyal_60.data.plot(x='L', y='cps_per_nA', ax=ax["B"], label='60 nA', lw=0.5, color="tab:blue")
hyal_150.data.plot(x='L', y='cps_per_nA', ax=ax["B"], label='150 nA', lw=0.5, color="tab:orange")
plt.xlabel('L (mm)', fontsize=12)
plt.ylabel('cps/nA', fontsize=12)

plt.sca(ax["C"])
hyal_150.data.plot(x='L', y='cps_per_nA', ax=ax["C"], label='150 nA', lw=0.5, color="tab:orange")
fitted = out.eval(x=hyal_150.data.L)
plt.plot(hyal_150.data.L, fitted, '-', label="Fit", color='tab:purple', linewidth=1.5)
plt.axvspan(120, 135, facecolor="grey", alpha=0.15, edgecolor=None)
plt.axvspan(155, 180, facecolor="grey", alpha=0.15, edgecolor=None)

plt.annotate(
    text=r"N K$\alpha$",
    xy=(146.6, 0.8),
    xytext=(146.6, 2),
    ha="center",
    arrowprops={"arrowstyle": "->"}
)

for k, v in ax.items():
    v.set_xlabel('L (mm)', fontsize = 12)
    v.set_ylabel('cps/nA', fontsize = 12)
    v.set_xticks(np.arange(120, 190, 10))
    
    trans = mtransforms.ScaledTranslation(5/72, -5/72, fig.dpi_scale_trans)
    v.text(0.93, 0.99, k, transform=v.transAxes + trans,
            fontsize=12, va='top', ha='right', fontfamily='Arial',
            bbox=dict(facecolor='w', edgecolor='grey', pad=3.0))

plt.xlabel('L (mm)', fontsize=12)
plt.ylabel('cps/nA', fontsize=12)

ax["A"].legend(framealpha=0, loc='upper left', bbox_to_anchor=(0.1, 0.98), borderaxespad=0)
ax["B"].legend(framealpha=0, loc='upper left', bbox_to_anchor=(0.1, 0.98), borderaxespad=0)
ax["C"].legend(framealpha=0, loc='upper left', bbox_to_anchor=(0.1, 0.98), borderaxespad=0)

plt.tight_layout()
plt.savefig('../figures/hyalophane_bg_at_different_currents.png')

In [None]:
out