In [None]:
%load_ext autoreload
%autoreload 2

# Buddingtonite - ANU

This notebook contains the analysis for buddingtonite measured at the JEOL probe at ANU.

## Fit WD scans

In [None]:
# Set up to find custom python package
import os
import sys
import numpy as np
sys.path.insert(1, "..")

In [None]:
from src import readfiles, wdscan, correct_quant, calczaf, helper_funs
import pickle
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from nb_helper_funs import compile_n_summary

In [None]:
# ------#### INPUT #### -----------------------------
# Where is the data stored?
scan_path = Path('../data/raw/buddingtonite_ANU/raw_wd_scans')
# What's the sample name?
sample = 'buddingtonite'
# Option to add peak position markers to plot:
# e.g. pk_pos_markers = False (no markers)
#      pk_pos_markers = [145.839] (one marker)
#      pk_pos_markers = [145.84, 145.73] (two markers)
pk_pos_markers =  [145.839] #
# ---------------------------------------------------

In [None]:
# Read in the data
comments, data = readfiles.import_jeol_wdscans(scan_path)

# Plot the data without fitting
wdscan.plot_wdscan(comments, data, save_to=None)

In [None]:
# Fit and plot with the fits ------------
# Choose parts of the spectrum to use in the fit
bg_roi = [[120,125], [130,138], [158,180]]

trimmed_data = wdscan.trim_data_from_regions(data, bg_roi)
out = wdscan.fit_bg(trimmed_data)
wdscan.plot_bg_fit(data, trimmed_data, out, sample, pk_pos_markers, save_to=None)
par_dict = wdscan.write_fit_params(out, sample, save_to=Path("../data/interim/buddingtonite_ANU/fits/"))

# Correct quantitative analyses

In [None]:
samples = ['buddingtonite'] # List of samples in this dataset
sample_folders = [Path('../data/raw/buddingtonite_ANU/raw_quant/')] # List of folders corresponding to the samples
category = 'buddingtonite' # Category of this dataset (e.g. "glasses")

wd_scan = Path('../data/interim/buddingtonite_ANU/fits/key_params_buddingtonite.txt') # Path to wd scan fit parameters
std_dbase_info_file = Path('../data/_dictionaries/standards.csv')

In [None]:
datalist = readfiles.find_files_and_folders(
                samples, sample_folders,
                apf_file=Path('../data/_dictionaries/apf_values.csv'), #<- Can put None in here
                wd_scan=wd_scan)

datalist

In [None]:
myspot = [None] * len(datalist.folder)

for i in range(len(datalist.folder)):
    peak, bg, standard, info = readfiles.read_and_organise_data(
                                    datalist.loc[i,:].copy(),
                                    bgi=False,
                                    save=False)
    myspot[i] = correct_quant.Spot()
    myspot[i].add_data(info, bg, peak, standard)
    myspot[i].add_wd_scan_params_from_file(wd_scan)
    print('Read dataset:', i + 1, 'of', len(datalist), ':',
          myspot[i].info.comment)
    myspot[i].comprehensify_data()

In [None]:
correct_quant.process_datasets(myspot, datalist, num_mc_sims=100, path_out=Path("../data/processed/buddingtonite_ANU/background_corrections/"))

In [None]:
summary_tables = correct_quant.write_summary_excel_tables(myspot, "../data/processed/buddingtonite_ANU/kraw_summaries.xlsx")

In [None]:
# Write the spot objects out to a pickle file:
with open('../data/interim/buddingtonite_ANU/buddingtonite.pickle', 'wb') as handle:
    pickle.dump(myspot, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Write a spots summary table as an excel file:
info_spots = []
for spot in myspot:
    info_spots.append(spot.info)

info = pd.DataFrame(info_spots)
info.to_csv('spots_info_' + category + '.csv')

print('-----Finished-----')

# Write calczaf file

In [None]:
# Load data from the saved pickle file ---------------------------
saved_pickle_file = "../data/interim/buddingtonite_ANU/buddingtonite.pickle"
samples = ['buddingtonite']
category = 'buddingtonite'
subfolder = Path('../data/processed/buddingtonite_ANU/calczaf_files/')

write_detection_limit_calczaf_files = True
detlim_subfolder = Path('../data/processed/buddingtonite_ANU/calczaf_files/detlim/')

# note: in the subfolder there must be a file specifying valence.
# this can be copied from the _dictionaries folder.
valence_dict = readfiles.read_valence_file(subfolder, pattern='valence*')
standard_database_dict = pd.read_csv(
    '../data/_dictionaries/standards.csv',
     index_col=0, 
     header=None, 
     squeeze=True).to_dict()

standard_database_dict

In [None]:
# Read in the data -------------------------------------------------------------
with open(saved_pickle_file, 'rb') as handle:
    myspot = pickle.load(handle)

print('Loaded data: ', [spot.info.comment for i, spot in enumerate(myspot)])

# Separate the myspot list by sample
sampledata = [None]*len(samples)
for i, sample in enumerate(samples):
    sampledata[i] = [spot for i, spot in enumerate(myspot) if sample == spot.info['sample']]

sampledata = dict(zip(samples,sampledata))

In [None]:
# For multiple different methods of processing the data, add a description
run_descriptor = ['_1_base', '_2_bg', '_3_bg_apf']  
# Leave as a list of an empty string if not using: e.g. run_descriptor = ['']

for i in range(len(samples)):

    # Here we pass in these arguments as a dictionary - this is useful in order
    # to reuse the arguments for the detection limit function. But you can
    # alternatively pass in each argument just by defining it in the function
    # as normal (see glasses example).

    args = {
              'elementByDifference' : None # string element symbol
            , 'elementByStoichToStoichOxygen' : None # string element symbol
            , 'stoichOxygenRatio' : 0
            # for buddingtonite there is H
            # that can be defined stoichiometrically relative to N:
            , 'elementByStoichToOtherElement' : 'h'
            , 'OtherElement' : 'n'
            , 'stoichElementRatio' : 4

            , 'correct_bg' : False
            , 'correct_apf' : False

            # Elements to omit from matrix correction
            # (e.g. if analysed but not actually present in sample)
            , 'remove_elements' : ['Rb','Mo','Ca','Mg']

            , 'definedElements' : None # list of element symbols to add
            , 'definedElementWts' : None # list of known element wt% to add
            }
    
    # Make copies of args with different values
    args2 = args.copy()
    args2["correct_bg"] = True
    args2["correct_apf"] = False

    args3 = args2.copy()
    args3["correct_bg"] = True
    args3["correct_apf"] = True

    args_list = [args, args2, args3]

    for j in range(len(run_descriptor)):
        print("******************************************************")
        print(args_list[j]["correct_bg"], args_list[j]["correct_apf"])
        print("******************************************************")

        calczaf_path_out = subfolder / '{}{}.dat'.format(
                                            samples[i], run_descriptor[j])
        open(calczaf_path_out, 'w').close()  # Erase contents of file

        if write_detection_limit_calczaf_files:
            
            detlim_path_out = detlim_subfolder / '{}{}_detlim.dat'.format(
                                            samples[i], run_descriptor[j])
            open(detlim_path_out, 'w').close()  # Erase contents of file

        for spot in sampledata[samples[i]]:

            calczaf.write_calczaf_input(
                spot, calczaf_path_out, valence_dict, standard_database_dict,
                accV=10, calcMode=2, taAngle=40, Oxide_or_Element=1,
                **args_list[j]) # <- **args unpacks the args dictionary defined earlier
                # so that all those arguments are passed into the function
                # without the need to type them all out.

            if write_detection_limit_calczaf_files:
                if args_list[j]['correct_bg']:

                    detlim_spot = correct_quant.create_detection_limit_spot(spot)

                    calczaf.write_calczaf_input(
                        detlim_spot, detlim_path_out, valence_dict, 
                        standard_database_dict,
                        accV=10, calcMode=2, taAngle=40, Oxide_or_Element=1,
                        **args_list[j])
                    
                else:
                    print('\n\nWarning: Not writing detection limit file.' 
                            'Calculating detection limit does not make sense'
                            ' except on background-corrected data. Raw data files' 
                            ' contain an estimate of detection limit without bg'
                            ' correction.\n')
                    
    

# Manual step - run the file through calczaf

# Process calczaf outputs

In [None]:
from src import calczaf, helper_funs
from pathlib import Path

folderpath = Path('../data/processed/buddingtonite_ANU/calczaf_files/')

helper_funs.check_calczaf_folder_exists(folderpath)
valence_file = sorted(folderpath.glob('valence*'))[0]

results = calczaf.process_calczaf_outputs(folderpath, valence_file)

# For detection limits

results_detlim = calczaf.process_calczaf_outputs(folderpath / 'detlim/', valence_file, detlim=True)

In [None]:
myspot[0].montecarlo

In [None]:
dict(
    original_raw_cps = np.mean([s.peak.loc[8, "raw_cps"] for s in myspot]),
    linear_bg_net_cps = np.mean([s.peak.loc[8, "net_cps"] for s in myspot]),
    curved_bg_net_cps = np.mean([s.corrected.loc[8, "net_cps"] for s in myspot]),
)

In [None]:
dict(
    linear_bg_kraw = np.mean([s.peak.loc[8, "kraw_pcnt"] for s in myspot]),
    curved_bg_kraw = np.mean([s.corrected.loc[8, "kraw_pcnt"] for s in myspot]),
    curved_bg_apf_kraw = np.mean([s.montecarlo.loc[8, "kraw_apf_pcnt"] for s in myspot])
)

In [None]:
wt_pct_summary_table = pd.concat(
    {k: v[["average", "stdev"]] for k, v in results["wtdata"].items()},
    axis=1
    ).round(2)

wt_pct_summary_table.to_csv("../data/processed/buddingtonite_ANU/wt_pct_summary.csv")

wt_pct_summary_table


In [None]:
results["wtdata"]["buddingtonite_3_bg_apf"][["average", "stdev"]].round(2)

# Calculate expected N wt% based on stoichiometry

In [None]:
import periodictable

In [None]:
budd = periodictable.formula("NH4AlSi3O8") + 0.5*periodictable.formula("H2O")
budd

In [None]:
df = pd.DataFrame([
    budd.atoms,
    dict(zip(
        budd.atoms.keys(),     
        [periodictable.nitrogen.mass,
            periodictable.hydrogen.mass,
            periodictable.aluminum.mass,
            periodictable.silicon.mass,
            periodictable.oxygen.mass
        ]
    ))
]
).T

df.columns = ["n_mols", "molar_mass"]

df
df["wt"] = df["n_mols"] * df["molar_mass"]
df["wt%"] = (df["wt"] / df["wt"].sum())*100

df

Get stdev on individual measurements

In [None]:
sampledata["buddingtonite"][0].peak

In [None]:
suffix_list = ["1_base", "2_bg", "3_bg_apf"]

summary, details = compile_n_summary(
    suffix_list, results, results_detlim, sampledata, datalist, summary_tables, samples
)

summary


In [None]:
summary.to_csv("../data/processed/buddingtonite_ANU/nitrogen_summary.csv")