# Synthetic basaltic glasses

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# Set up to find custom python package
import os
import sys
import numpy as np
sys.path.insert(1, ".")
sys.path.insert(1, "..")

In [None]:
from src import readfiles, wdscan, correct_quant, calczaf, helper_funs
import pickle
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt

In [None]:
# For reference: how to rename folders

# from src.readfiles import rename_folders_as_comments

# for f in os.listdir("../data/raw/basaltic_glasses_StA/raw_quant_20211222/original/"):
#     print(f)
                    
#     rename_folders_as_comments(
#         Path(f"../data/raw/basaltic_glasses_StA/raw_quant_20211222/original/{f}"), 
#         "../data/raw/basaltic_glasses_StA/raw_quant_20211222/renamed")

In [None]:
import pyrolite.geochem

# Major/trace elements other than N

In [None]:
def get_major_elements_from_txt(path):

    majors_raw = pd.read_csv(
        path,
        header=1,
        index_col=False
        )[:5]

    mass_pct_cols = [col for col in majors_raw.columns if "(Mass%)" in col]
    majors = majors_raw.loc[:, mass_pct_cols].rename(columns={col: col.replace("(Mass%)", "") for col in mass_pct_cols})
    detlims = majors_raw[[col for col in majors_raw.columns if "(D.L.)" in col]]
    return majors, detlims

majors = {}
detlims = {}
for s in ["Edi06", "Edi09", "D2872"]:

    majors[s], detlims[s] = get_major_elements_from_txt(
        f"../data/raw/basaltic_glasses_StA/majors_20211110/{s.lower()}_all.txt"
    )

# for D2983 I didn't get an "_all.txt" file so I need to get the data differently

majors["D2893"] = pd.read_csv(
    "../data/raw/basaltic_glasses_StA/majors_20211110/D2893_2_oxide.csv",
    header=1,
    index_col=False
)[:4]

majors["D2893"].columns = [c.strip() for c in majors["D2893"].columns]

detlims["D2893"] = pd.read_csv(
    "../data/raw/basaltic_glasses_StA/majors_20211110/D2893_2_detlim.txt",
    header=1,
    index_col=False
)[:4]

In [None]:
majors["D2872"].columns

In [None]:
majors["D2893"].columns

In [None]:
majors_element = {}

for k, df in majors.items():

    majors_element[k] = df[
        ['SiO2', 'Al2O3', 'Cl', 'P2O5', 'Fe2O3', 'MnO', 'Cr2O3', 'K2O', 'CaO', 'Ru2O3', 'Na2O', 'MgO']
        ].pyrochem.convert_chemistry(
        to=['Si', 'Al', 'Cl', 'P', 'Fe', 'Mn', 'Cr', 'K', 'Ca', 'Ru', 'Na', 'Mg']
    ).fillna(0)

    majors_element[k]["O"] = df["Total"] - (majors_element[k].sum(axis=1))

    majors_element[k]["Total"] = majors_element[k].sum(axis=1)


In [None]:
majors_element

In [None]:
majors_summary = {}

for k, df in majors_element.items():
    majors_summary[k] = pd.concat([df.T.mean(axis=1), df.T.std(axis=1)], axis=1)
    majors_summary[k].columns = ["wt% mean", "stdev"]

majors_summary_combined = pd.concat(majors_summary, axis=1)

In [None]:
majors_summary_combined.round(3).to_csv("../data/processed/basaltic_glasses/basaltic_glasses_majors_summary.csv")
majors_summary_combined.round(3)

In [None]:
detlims["Edi09"]

# Nitrogen analyses

### WD scan - visualise & fit

D2872 - all dates

In [None]:
samplenames = ["D2872", "D2893", "Edi09", "Edi06"]
metadata_list = {}
data_list = {}

for s in samplenames:
    print(f"---------------- {s} ----------------")

    folderpath_list = [
        f"../data/raw/basaltic_glasses_StA/wd_scans_20211008/{s}",
        f"../data/raw/basaltic_glasses_StA/wd_scans_20211125/{s}",
        f"../data/raw/basaltic_glasses_StA/wd_scans_20211222/{s}"
    ]

    metadata_list[s] = []
    data_list[s] = []

    for i, f in enumerate(folderpath_list):
        # Read in the data
        for d in ["data001", "data002"]:
            try:             
                comments, data, metadata = readfiles.import_jeol_wdscans(
                    subfolder=f,
                    scan_filename=f'{d}_mm.csv',
                    cnd_filename=f'{d}.cnd',
                    comment_line_num=80,
                    crystal_line_name="$XM_WDS_CRYSTAL_NAME%0",
                    sep=',',
                    return_metadata=True
                )

                metadata_list[s].append(metadata)
                data_list[s].append(data)
            except FileNotFoundError:
                 print(f"No file found for {d}")


In [None]:
lde1l_scans = {}
lde1_scans = {}

for s in samplenames:
    print(f"--------------------{s}-------------------")
    lde1l_scans[s] = {"metadata": [], "data": []}
    lde1_scans[s] = {"metadata": [], "data": []}

    for i in range(len(data_list[s])):
        if metadata_list[s][i].crystal == "LDE1L":
            lde1l_scans[s]["metadata"].append(metadata_list[s][i])
            lde1l_scans[s]["data"].append(data_list[s][i])

        if metadata_list[s][i].crystal == "LDE1":
            lde1_scans[s]["metadata"].append(metadata_list[s][i])
            lde1_scans[s]["data"].append(data_list[s][i])    


    fig, ax = plt.subplots(ncols=4, figsize=(10, 3))

    for i in range(len(lde1l_scans[s]["data"])):
            data = lde1l_scans[s]["data"][i]

            ax[i].plot(data.L, data.cps_per_nA, ".k", markersize=1)
            ax[i].set_title(s)

            ax[3].plot(data.L, data.cps_per_nA, ".", markersize=1, label=i)
            ax[3].set_title("All scans overlain")

    display(pd.DataFrame(lde1l_scans[s]["metadata"]).T)
    plt.show()

For each of these, we can just fit the middle scan - the highest-resolution one
Except for Edi06 for which there is only a single scan

In [None]:
# Fit and plot with the fits ------------
# Choose parts of the spectrum to use in the fit
bg_roi = [[120,138], [155, 180]]
sample = "D2872"

comments, data, metadata = readfiles.import_jeol_wdscans(
    subfolder="../data/raw/basaltic_glasses_StA/wd_scans_20211125/D2872",
    scan_filename='data001_mm.csv',
    cnd_filename='data001.cnd',
    comment_line_num=80,
    crystal_line_name="$XM_WDS_CRYSTAL_NAME%0",
    sep=',',
    return_metadata=True
)
print(metadata.crystal)
trimmed_data = wdscan.trim_data_from_regions(data, bg_roi)
out = wdscan.fit_bg(trimmed_data)
wdscan.plot_bg_fit(data, trimmed_data, out, sample, [146.6], save_to=Path("../data/interim/basaltic_glasses/fits"))
par_dict = wdscan.write_fit_params(out, sample, save_to=Path("../data/interim/basaltic_glasses/fits/"))

## D2983

In [None]:
# Fit and plot with the fits ------------
# Choose parts of the spectrum to use in the fit
bg_roi = [[120,140], [170, 180]]
sample = "D2893"

comments, data, metadata = readfiles.import_jeol_wdscans(
    subfolder="../data/raw/basaltic_glasses_StA/wd_scans_20211125/D2893",
    scan_filename='data001_mm.csv',
    cnd_filename='data001.cnd',
    comment_line_num=80,
    crystal_line_name="$XM_WDS_CRYSTAL_NAME%0",
    sep=',',
    return_metadata=True
)

print(metadata.crystal)
trimmed_data = wdscan.trim_data_from_regions(data, bg_roi)
out = wdscan.fit_bg(trimmed_data)
wdscan.plot_bg_fit(data, trimmed_data, out, sample, [146.6], save_to=Path("../data/interim/basaltic_glasses/fits"))
par_dict = wdscan.write_fit_params(out, sample, save_to=Path("../data/interim/basaltic_glasses/fits/"))

## Edi09

In [None]:
# Fit and plot with the fits ------------
# Choose parts of the spectrum to use in the fit
bg_roi = [[120,140], [170, 180]]
sample = "Edi09"

comments, data, metadata = readfiles.import_jeol_wdscans(
    subfolder="../data/raw/basaltic_glasses_StA/wd_scans_20211125/Edi09",
    scan_filename='data001_mm.csv',
    cnd_filename='data001.cnd',
    comment_line_num=80,
    crystal_line_name="$XM_WDS_CRYSTAL_NAME%0",
    sep=',',
    return_metadata=True
)

print(metadata.crystal)
trimmed_data = wdscan.trim_data_from_regions(data, bg_roi)
out = wdscan.fit_bg(trimmed_data)
wdscan.plot_bg_fit(data, trimmed_data, out, sample, [146.6], save_to=Path("../data/interim/basaltic_glasses/fits"))
par_dict = wdscan.write_fit_params(out, sample, save_to=Path("../data/interim/basaltic_glasses/fits/"))

## Edi06

In [None]:
# Fit and plot with the fits ------------
# Choose parts of the spectrum to use in the fit
bg_roi = [[120,140], [170, 180]]
sample = "Edi06"

comments, data, metadata = readfiles.import_jeol_wdscans(
    subfolder="../data/raw/basaltic_glasses_StA/wd_scans_20211222/Edi06",
    scan_filename='data001_mm.csv',
    cnd_filename='data001.cnd',
    comment_line_num=80,
    crystal_line_name="$XM_WDS_CRYSTAL_NAME%0",
    sep=',',
    return_metadata=True
)

print(metadata.crystal)
trimmed_data = wdscan.trim_data_from_regions(data, bg_roi)
out = wdscan.fit_bg(trimmed_data)
wdscan.plot_bg_fit(data, trimmed_data, out, sample, [146.6], save_to=Path("../data/interim/basaltic_glasses/fits"))
par_dict = wdscan.write_fit_params(out, sample, save_to=Path("../data/interim/basaltic_glasses/fits/"))

# Quantitative analysis

In [None]:
samples = ['D2872'] # List of samples in this dataset
sample_folders = [Path('../data/raw/basaltic_glasses_StA/raw_quant_by_sample/D2872/')]
# List of folders corresponding to the samples
category = 'basaltic glasses' # Category of this dataset (e.g. "glasses")
wd_scan = Path('../data/interim/basaltic_glasses/fits/key_params_D2872.txt') # Path to wd scan fit parameters
std_dbase_info_file = Path('data/_dictionaries/standards.csv')

In [None]:
datalist = readfiles.find_files_and_folders(
                samples, sample_folders,
                # apf_file = None,
                apf_file=Path('../data/_dictionaries/apf_values.csv'), #<- Can put None in here
                wd_scan=wd_scan
                )

datalist

In [None]:
myspot = [None] * len(datalist.folder)

for i in range(len(datalist.folder)):
    peak, bg, standard, info = readfiles.read_and_organise_data(
                                    datalist.loc[i,:].copy(),
                                    bgi=False,
                                    save=False)
    myspot[i] = correct_quant.Spot()
    myspot[i].add_data(info, bg, peak, standard)
    myspot[i].add_wd_scan_params_from_file(wd_scan)
    print('Read dataset:', i + 1, 'of', len(datalist), ':',
          myspot[i].info.comment)
    myspot[i].comprehensify_data()

In [None]:
correct_quant.process_datasets(
    myspot, 
    datalist, 
    num_mc_sims=100, 
    path_out=Path("../data/processed/basaltic_glasses/background_corrections/D2872")
    )

Write calczaf file

In [None]:
samples = ['D2872']
category = 'basaltic glasses'
subfolder = Path('../data/processed/basaltic_glasses/calczaf_files/D2872/')

write_detection_limit_calczaf_files = True
detlim_subfolder = subfolder / Path('detlim')

# note: in the subfolder there must be a file specifying valence.
# this can be copied from the _dictionaries folder.
valence_dict = readfiles.read_valence_file(subfolder, pattern='valence*')
standard_database_dict = pd.read_csv(
    '../data/_dictionaries/standards.csv',
     index_col=0, 
     header=None, 
     squeeze=True).to_dict()

standard_database_dict

In [None]:
# Make a dictionary
sampledata = {"D2872": myspot}

In [None]:
# Or use actual analyses from StA
majors_relevant = majors_summary["D2872"].loc[
    majors_summary["D2872"].index[~majors_summary["D2872"].index.isin(["O", "Total"])],
    "wt% mean",
]
majors_relevant

In [None]:
# For multiple different methods of processing the data, add a description
run_descriptor = ['_1_base', '_2_bg', '_3_bg_apf']  
# Leave as a list of an empty string if not using: e.g. run_descriptor = ['']

for i in range(len(samples)):

    # Here we pass in these arguments as a dictionary - this is useful in order
    # to reuse the arguments for the detection limit function. But you can
    # alternatively pass in each argument just by defining it in the function
    # as normal (see glasses example).

    args = {
              'elementByDifference' : 'h' # string element symbol
            , 'elementByStoichToStoichOxygen' : None # string element symbol
            , 'stoichOxygenRatio' : 0
            # for hyalophane there is H
            # that can be defined stoichiometrically relative to N:
            , 'elementByStoichToOtherElement' : None
            , 'OtherElement' : None
            , 'stoichElementRatio' : None

            , 'correct_bg' : False
            , 'correct_apf' : False

            # Elements to omit from matrix correction
            # (e.g. if analysed but not actually present in sample)
            , 'remove_elements' : None

            , 'definedElements' : majors_relevant.index # list of element symbols to add
            , 'definedElementWts' : majors_relevant.values # list of known element wt% to add
            }
    
    # Make copies of args with different values
    args2 = args.copy()
    args2["correct_bg"] = True
    args2["correct_apf"] = False

    args3 = args2.copy()
    args3["correct_bg"] = True
    args3["correct_apf"] = True

    args_list = [args, args2, args3]

    for j in range(len(run_descriptor)):
        print("******************************************************")
        print(args_list[j]["correct_bg"], args_list[j]["correct_apf"])
        print("******************************************************")

        calczaf_path_out = subfolder / '{}{}.dat'.format(
                                            samples[i], run_descriptor[j])
        open(calczaf_path_out, 'w').close()  # Erase contents of file

        if write_detection_limit_calczaf_files:
            
            detlim_path_out = detlim_subfolder / '{}{}_detlim.dat'.format(
                                            samples[i], run_descriptor[j])
            open(detlim_path_out, 'w').close()  # Erase contents of file

        for spot in sampledata[samples[i]]:

            calczaf.write_calczaf_input(
                spot, calczaf_path_out, valence_dict, standard_database_dict,
                accV=10, calcMode=2, taAngle=40, Oxide_or_Element=1,
                **args_list[j]) # <- **args unpacks the args dictionary defined earlier
                # so that all those arguments are passed into the function
                # without the need to type them all out.

            if write_detection_limit_calczaf_files:
                if args_list[j]['correct_bg']:

                    detlim_spot = correct_quant.create_detection_limit_spot(spot)

                    calczaf.write_calczaf_input(
                        detlim_spot, detlim_path_out, valence_dict, 
                        standard_database_dict,
                        accV=10, calcMode=2, taAngle=40, Oxide_or_Element=1,
                        **args_list[j])
                    
                else:
                    print('\n\nWarning: Not writing detection limit file.' 
                            'Calculating detection limit does not make sense'
                            ' except on background-corrected data. Raw data files' 
                            ' contain an estimate of detection limit without bg'
                            ' correction.\n')
                    
    

In [None]:
folderpath = Path('../data/processed/basaltic_glasses/calczaf_files/D2872/')

helper_funs.check_calczaf_folder_exists(folderpath)
valence_file = sorted(folderpath.glob('valence*'))[0]

results = calczaf.process_calczaf_outputs(folderpath, valence_file)

# For detection limits

results_detlim = calczaf.process_calczaf_outputs(folderpath / 'detlim/', valence_file, detlim=True)

In [None]:
N_by_method = pd.DataFrame(
    {"comment": datalist["comment"],
     "N wt": results["wtdata"]["D2872_3_bg_apf"].loc["N", :]
     }
     )

N_by_method["method"] = N_by_method["comment"].str.replace(r'_[0-9]+$', '', regex=True)
N_by_method.groupby("method")["N wt"].mean()

Okay, that's interesting. So for sure, the 100 nA, 30 micron method gave us more nitrogen than the other methods.
Does this indicate that nitrogen was lost in all the other methods? Quite probably.

Shame I only got three analyses of these but that's okay. 

Or is it for sure? Maybe it's just variability in the sample?

After, some other spots 

In [None]:
N_by_method.round(2)

In [None]:
n_summary = {}
for nm in results["wtdata"].keys():
    n_summary[nm] = results["wtdata"][nm].loc["N", ["average", "stdev"]]
    n_summary[nm].rename({"stdev": "stdev (multiple measurements)"}, inplace=True)
    # n_summary[nm]["typical stdev on individual measurement"] = (
    #     n_summary[nm]["average"] * 
    #     typical_kratios
    #     .loc[nm, "Stdev % (relative)"]/100
    # )

pd.concat(n_summary, axis=1).transpose()

In [None]:
wt_pct_summary_table = pd.concat(
    {k: v[["average", "stdev"]] for k, v in results["wtdata"].items()},
    axis=1
    ).round(2)

wt_pct_summary_table.to_csv("../data/processed/hyalophane_StA/wt_pct_summary_GaNcalib.csv")

wt_pct_summary_table
