# D2872 - Nitrogen quantitative analyses

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Set up to find custom python package
import os
import sys
import numpy as np
sys.path.insert(1, ".")
sys.path.insert(1, "..")

In [3]:
from src import readfiles, wdscan, correct_quant, calczaf, helper_funs
import pickle
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt

# Quantitative analysis

In [4]:
samples = ['D2872'] # List of samples in this dataset
sample_folders = [Path('../data/raw/basaltic_glasses_StA/raw_quant_by_sample/D2872/')]
# List of folders corresponding to the samples
category = 'basaltic glasses' # Category of this dataset (e.g. "glasses")
wd_scan = Path('../data/interim/basaltic_glasses/fits/key_params_D2872.txt') # Path to wd scan fit parameters
std_dbase_info_file = Path('data/_dictionaries/standards.csv')

In [5]:
datalist = readfiles.find_files_and_folders(
                samples, sample_folders,
                # apf_file = None,
                apf_file=Path('../data/_dictionaries/apf_values.csv'), #<- Can put None in here
                wd_scan=wd_scan
                )

datalist

Comments found: ['D2872_100nA_30um_1', 'D2872_100nA_30um_2', 'D2872_100nA_30um_3', 'D2872_50nA_20um_1', 'D2872_50nA_20um_146.6_180s_1', 'D2872_50nA_20um_146.6_180s_2', 'D2872_50nA_20um_146.6_180s_3', 'D2872_50nA_20um_146.6_180s_4', 'D2872_50nA_20um_146.6_180s_5', 'D2872_50nA_20um_2', 'D2872_50nA_20um_3', 'D2872_50nA_20um_4', 'D2872_50nA_20um_5', 'D2872_50nA_20um_6']


Unnamed: 0,folder,comment,sample,paramfile,apf,apf_sd
0,..\data\raw\basaltic_glasses_StA\raw_quant_by_...,D2872_100nA_30um_1,D2872,..\data\interim\basaltic_glasses\fits\key_para...,0.819,0.013
1,..\data\raw\basaltic_glasses_StA\raw_quant_by_...,D2872_100nA_30um_2,D2872,..\data\interim\basaltic_glasses\fits\key_para...,0.819,0.013
2,..\data\raw\basaltic_glasses_StA\raw_quant_by_...,D2872_100nA_30um_3,D2872,..\data\interim\basaltic_glasses\fits\key_para...,0.819,0.013
3,..\data\raw\basaltic_glasses_StA\raw_quant_by_...,D2872_50nA_20um_1,D2872,..\data\interim\basaltic_glasses\fits\key_para...,0.819,0.013
4,..\data\raw\basaltic_glasses_StA\raw_quant_by_...,D2872_50nA_20um_146.6_180s_1,D2872,..\data\interim\basaltic_glasses\fits\key_para...,0.819,0.013
5,..\data\raw\basaltic_glasses_StA\raw_quant_by_...,D2872_50nA_20um_146.6_180s_2,D2872,..\data\interim\basaltic_glasses\fits\key_para...,0.819,0.013
6,..\data\raw\basaltic_glasses_StA\raw_quant_by_...,D2872_50nA_20um_146.6_180s_3,D2872,..\data\interim\basaltic_glasses\fits\key_para...,0.819,0.013
7,..\data\raw\basaltic_glasses_StA\raw_quant_by_...,D2872_50nA_20um_146.6_180s_4,D2872,..\data\interim\basaltic_glasses\fits\key_para...,0.819,0.013
8,..\data\raw\basaltic_glasses_StA\raw_quant_by_...,D2872_50nA_20um_146.6_180s_5,D2872,..\data\interim\basaltic_glasses\fits\key_para...,0.819,0.013
9,..\data\raw\basaltic_glasses_StA\raw_quant_by_...,D2872_50nA_20um_2,D2872,..\data\interim\basaltic_glasses\fits\key_para...,0.819,0.013


In [6]:
myspot = [None] * len(datalist.folder)

for i in range(len(datalist.folder)):
    peak, bg, standard, info = readfiles.read_and_organise_data(
                                    datalist.loc[i,:].copy(),
                                    bgi=False,
                                    save=False)
    myspot[i] = correct_quant.Spot()
    myspot[i].add_data(info, bg, peak, standard)
    myspot[i].add_wd_scan_params_from_file(wd_scan)
    print('Read dataset:', i + 1, 'of', len(datalist), ':',
          myspot[i].info.comment)
    myspot[i].comprehensify_data()


 loading path: ..\data\interim\basaltic_glasses\fits\key_params_D2872.txt
{'sigma': 0.0432318540326726, 'center': 107.90302352779099}
Read dataset: 1 of 14 : D2872_100nA_30um_1

 loading path: ..\data\interim\basaltic_glasses\fits\key_params_D2872.txt
{'sigma': 0.0432318540326726, 'center': 107.90302352779099}
Read dataset: 2 of 14 : D2872_100nA_30um_2

 loading path: ..\data\interim\basaltic_glasses\fits\key_params_D2872.txt
{'sigma': 0.0432318540326726, 'center': 107.90302352779099}
Read dataset: 3 of 14 : D2872_100nA_30um_3

 loading path: ..\data\interim\basaltic_glasses\fits\key_params_D2872.txt
{'sigma': 0.0432318540326726, 'center': 107.90302352779099}
Read dataset: 4 of 14 : D2872_50nA_20um_1

 loading path: ..\data\interim\basaltic_glasses\fits\key_params_D2872.txt
{'sigma': 0.0432318540326726, 'center': 107.90302352779099}
Read dataset: 5 of 14 : D2872_50nA_20um_146.6_180s_1

 loading path: ..\data\interim\basaltic_glasses\fits\key_params_D2872.txt
{'sigma': 0.04323185403267

In [7]:
correct_quant.process_datasets(
    myspot, 
    datalist, 
    num_mc_sims=100, 
    path_out=Path("../data/processed/basaltic_glasses/background_corrections/D2872")
    )


Processing dataset: 1 of 14 : D2872_100nA_30um_1
Correcting background
Saved bg correction figure for 2021-12-22_D2872_100nA_30um_1
Resample cps to check stdev method
Montecarlo background correction
monte-carlo loop 0 of 100
monte-carlo loop 50 of 100
Saved montecarlo bg correction figure for 2021-12-22_D2872_100nA_30um_1
correct height/area ratio
original kraw: 0.00 ± 100.00%
corrected kraw: 1.16 ± 6.17%

Processing dataset: 2 of 14 : D2872_100nA_30um_2
Correcting background
Saved bg correction figure for 2021-12-22_D2872_100nA_30um_2
Resample cps to check stdev method
Montecarlo background correction
monte-carlo loop 0 of 100
monte-carlo loop 50 of 100
Saved montecarlo bg correction figure for 2021-12-22_D2872_100nA_30um_2
correct height/area ratio
original kraw: 0.00 ± 100.00%
corrected kraw: 1.23 ± 4.48%

Processing dataset: 3 of 14 : D2872_100nA_30um_3
Correcting background
Saved bg correction figure for 2021-12-22_D2872_100nA_30um_3
Resample cps to check stdev method
Montecarlo

[<src.correct_quant.Spot at 0x2cf7a138b80>,
 <src.correct_quant.Spot at 0x2cf1af949a0>,
 <src.correct_quant.Spot at 0x2cf1afedd00>,
 <src.correct_quant.Spot at 0x2cf1aff7e80>,
 <src.correct_quant.Spot at 0x2cf1aff5e80>,
 <src.correct_quant.Spot at 0x2cf1b0097f0>,
 <src.correct_quant.Spot at 0x2cf1b014d90>,
 <src.correct_quant.Spot at 0x2cf1b01aee0>,
 <src.correct_quant.Spot at 0x2cf1b028fa0>,
 <src.correct_quant.Spot at 0x2cf1b0329a0>,
 <src.correct_quant.Spot at 0x2cf1b03b190>,
 <src.correct_quant.Spot at 0x2cf1b046b80>,
 <src.correct_quant.Spot at 0x2cf1b04c5b0>,
 <src.correct_quant.Spot at 0x2cf1b052ee0>]

Write calczaf file

In [8]:
samples = ['D2872']
category = 'basaltic glasses'
subfolder = Path('../data/processed/basaltic_glasses/calczaf_files/D2872/')

write_detection_limit_calczaf_files = True
detlim_subfolder = subfolder / Path('detlim')

# note: in the subfolder there must be a file specifying valence.
# this can be copied from the _dictionaries folder.
valence_dict = readfiles.read_valence_file(subfolder, pattern='valence*')
standard_database_dict = pd.read_csv(
    '../data/_dictionaries/standards.csv',
     index_col=0, 
     header=None, 
     squeeze=True).to_dict()

standard_database_dict

{'Sanidine': 3000,
 'Diopside': 3001,
 '08_Pollucite': 3002,
 'Albite': 3003,
 'GaN': 608,
 '22_Mo': 542,
 '09_Vanadium_Wire_Alfa': 523,
 'BN': 604}

In [9]:
# Make a dictionary
sampledata = {"D2872": myspot}

Get the major element analyses

In [13]:
majors_summary = pd.read_csv(
    "../data/processed/basaltic_glasses/basaltic_glasses_majors_summary.csv",
    header = [0, 1], index_col=0)
majors_summary["D2872"]

Unnamed: 0,wt% mean,stdev
Si,22.842,0.053
Al,8.135,0.032
Cl,4.394,0.049
P,0.082,0.006
Fe,0.135,0.017
Mn,0.183,0.005
Cr,0.024,0.006
K,0.172,0.005
Ca,10.463,0.052
Ru,0.0,0.0


In [14]:
# Remove Oxygen and Total from the dataframe to get all other element values
majors_relevant = majors_summary["D2872"].loc[
    majors_summary["D2872"].index[~majors_summary["D2872"].index.isin(["O", "Total"])],
    "wt% mean",
]
majors_relevant

Si    22.842
Al     8.135
Cl     4.394
P      0.082
Fe     0.135
Mn     0.183
Cr     0.024
K      0.172
Ca    10.463
Ru     0.000
Na     1.279
Mg     5.303
Name: wt% mean, dtype: float64

In [15]:
# For multiple different methods of processing the data, add a description
run_descriptor = ['_1_base', '_2_bg', '_3_bg_apf']  
# Leave as a list of an empty string if not using: e.g. run_descriptor = ['']

for i in range(len(samples)):

    # Here we pass in these arguments as a dictionary - this is useful in order
    # to reuse the arguments for the detection limit function. But you can
    # alternatively pass in each argument just by defining it in the function
    # as normal (see glasses example).

    args = {
              'elementByDifference' : 'h' # string element symbol
            , 'elementByStoichToStoichOxygen' : None # string element symbol
            , 'stoichOxygenRatio' : 0
            # for hyalophane there is H
            # that can be defined stoichiometrically relative to N:
            , 'elementByStoichToOtherElement' : None
            , 'OtherElement' : None
            , 'stoichElementRatio' : None

            , 'correct_bg' : False
            , 'correct_apf' : False

            # Elements to omit from matrix correction
            # (e.g. if analysed but not actually present in sample)
            , 'remove_elements' : None

            , 'definedElements' : majors_relevant.index # list of element symbols to add
            , 'definedElementWts' : majors_relevant.values # list of known element wt% to add
            }
    
    # Make copies of args with different values
    args2 = args.copy()
    args2["correct_bg"] = True
    args2["correct_apf"] = False

    args3 = args2.copy()
    args3["correct_bg"] = True
    args3["correct_apf"] = True

    args_list = [args, args2, args3]

    for j in range(len(run_descriptor)):
        print("******************************************************")
        print(args_list[j]["correct_bg"], args_list[j]["correct_apf"])
        print("******************************************************")

        calczaf_path_out = subfolder / '{}{}.dat'.format(
                                            samples[i], run_descriptor[j])
        open(calczaf_path_out, 'w').close()  # Erase contents of file

        if write_detection_limit_calczaf_files:
            
            detlim_path_out = detlim_subfolder / '{}{}_detlim.dat'.format(
                                            samples[i], run_descriptor[j])
            open(detlim_path_out, 'w').close()  # Erase contents of file

        for spot in sampledata[samples[i]]:

            calczaf.write_calczaf_input(
                spot, calczaf_path_out, valence_dict, standard_database_dict,
                accV=10, calcMode=2, taAngle=40, Oxide_or_Element=1,
                **args_list[j]) # <- **args unpacks the args dictionary defined earlier
                # so that all those arguments are passed into the function
                # without the need to type them all out.

            if write_detection_limit_calczaf_files:
                if args_list[j]['correct_bg']:

                    detlim_spot = correct_quant.create_detection_limit_spot(spot)

                    calczaf.write_calczaf_input(
                        detlim_spot, detlim_path_out, valence_dict, 
                        standard_database_dict,
                        accV=10, calcMode=2, taAngle=40, Oxide_or_Element=1,
                        **args_list[j])
                    
                else:
                    print('\n\nWarning: Not writing detection limit file.' 
                            'Calculating detection limit does not make sense'
                            ' except on background-corrected data. Raw data files' 
                            ' contain an estimate of detection limit without bg'
                            ' correction.\n')
                    
    

******************************************************
False False
******************************************************
Wrote CALCZAF file for D2872_100nA_30um_1 to ..\data\processed\basaltic_glasses\calczaf_files\D2872\D2872_1_base.dat



Wrote CALCZAF file for D2872_100nA_30um_2 to ..\data\processed\basaltic_glasses\calczaf_files\D2872\D2872_1_base.dat



Wrote CALCZAF file for D2872_100nA_30um_3 to ..\data\processed\basaltic_glasses\calczaf_files\D2872\D2872_1_base.dat



Wrote CALCZAF file for D2872_50nA_20um_1 to ..\data\processed\basaltic_glasses\calczaf_files\D2872\D2872_1_base.dat



Wrote CALCZAF file for D2872_50nA_20um_146.6_180s_1 to ..\data\processed\basaltic_glasses\calczaf_files\D2872\D2872_1_base.dat



Wrote CALCZAF file for D2872_50nA_20um_146.6_180s_2 to ..\data\processed\basaltic_glasses\calczaf_files\D2872\D2872_1_base.dat



Wrote CALCZAF file for D2872_50nA_20um_146.6_180s_3 to ..\data\processed\basaltic_glasses\calczaf_files\D2872\D2872_1_base.dat



Wrote CAL

## Process calczaf outputs

In [16]:
folderpath = Path('../data/processed/basaltic_glasses/calczaf_files/D2872/')

helper_funs.check_calczaf_folder_exists(folderpath)
valence_file = sorted(folderpath.glob('valence*'))[0]

results = calczaf.process_calczaf_outputs(folderpath, valence_file)

# For detection limits

results_detlim = calczaf.process_calczaf_outputs(folderpath / 'detlim/', valence_file, detlim=True)

[                     0           1           2           3           4           5           6           7           8           9           10          11          12          13
wt% element                                                                                                                                                                        
Si            22.841700   22.841700   22.841700   22.841700   22.841700   22.841700   22.841700   22.841700   22.841700   22.841700   22.841700   22.841700   22.841700   22.841700
Al             8.134580    8.134580    8.134580    8.134580    8.134580    8.134580    8.134580    8.134580    8.134580    8.134580    8.134580    8.134580    8.134580    8.134580
Ca            10.463100   10.463100   10.463100   10.463100   10.463100   10.463100   10.463100   10.463100   10.463100   10.463100   10.463100   10.463100   10.463100   10.463100
Mg             5.303100    5.303100    5.303100    5.303100    5.303100    5.303100    5.303100    

In [17]:
N_by_method = pd.DataFrame(
    {"comment": datalist["comment"],
     "N wt": results["wtdata"]["D2872_3_bg_apf"].loc["N", :]
     }
     )

N_by_method["method"] = N_by_method["comment"].str.replace(r'_[0-9]+$', '', regex=True)
N_by_method.groupby("method")["N wt"].mean()

method
D2872_100nA_30um              0.241133
D2872_50nA_20um               0.193541
D2872_50nA_20um_146.6_180s    0.197456
Name: N wt, dtype: float64

In [18]:
N_by_method.round(2)

Unnamed: 0,comment,N wt,method
0,D2872_100nA_30um_1,0.24,D2872_100nA_30um
1,D2872_100nA_30um_2,0.26,D2872_100nA_30um
2,D2872_100nA_30um_3,0.22,D2872_100nA_30um
3,D2872_50nA_20um_1,0.16,D2872_50nA_20um
4,D2872_50nA_20um_146.6_180s_1,0.23,D2872_50nA_20um_146.6_180s
5,D2872_50nA_20um_146.6_180s_2,0.19,D2872_50nA_20um_146.6_180s
6,D2872_50nA_20um_146.6_180s_3,0.17,D2872_50nA_20um_146.6_180s
7,D2872_50nA_20um_146.6_180s_4,0.21,D2872_50nA_20um_146.6_180s
8,D2872_50nA_20um_146.6_180s_5,0.19,D2872_50nA_20um_146.6_180s
9,D2872_50nA_20um_2,0.2,D2872_50nA_20um


There's a bit of variability in the N content by method, but it's hard to tell if it's truly relating to the method or just variability in the sample. For example the spots done at 100 nA and 30 microns seem to show higher average nitrogen, but I only have three spots for these.

I don't think it's obvious enough to worry about, and I'll just report the average of all analyses.

In [19]:
n_summary = {}
for nm in results["wtdata"].keys():
    n_summary[nm] = results["wtdata"][nm].loc["N", ["average", "stdev"]]
    n_summary[nm].rename({"stdev": "stdev (multiple measurements)"}, inplace=True)
    # n_summary[nm]["typical stdev on individual measurement"] = (
    #     n_summary[nm]["average"] * 
    #     typical_kratios
    #     .loc[nm, "Stdev % (relative)"]/100
    # )

pd.concat(n_summary, axis=1).transpose()

Unnamed: 0,average,stdev (multiple measurements)
D2872_1_base,0.0,0.0
D2872_2_bg,0.168011,0.024491
D2872_3_bg_apf,0.205138,0.029903


In [21]:
wt_pct_summary_table = pd.concat(
    {k: v[["average", "stdev"]] for k, v in results["wtdata"].items()},
    axis=1
    ).round(3)

# wt_pct_summary_table.to_csv("../data/processed/hyalophane_StA/wt_pct_summary_GaNcalib.csv")

wt_pct_summary_table


Unnamed: 0_level_0,D2872_1_base,D2872_1_base,D2872_2_bg,D2872_2_bg,D2872_3_bg_apf,D2872_3_bg_apf
Unnamed: 0_level_1,average,stdev,average,stdev,average,stdev
wt% element,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Si,22.842,0.0,22.842,0.0,22.842,0.0
Al,8.135,0.0,8.135,0.0,8.135,0.0
Ca,10.463,0.0,10.463,0.0,10.463,0.0
Mg,5.303,0.0,5.303,0.0,5.303,0.0
Fe,0.135,0.0,0.135,0.0,0.135,0.0
Mn,0.183,0.0,0.183,0.0,0.183,0.0
K,0.172,0.0,0.172,0.0,0.172,0.0
Na,1.279,0.0,1.279,0.0,1.279,0.0
H,0.601,0.0,0.582,0.003,0.578,0.003
N,0.0,0.0,0.168,0.024,0.205,0.03


This is with hydrogen by difference.