In [1]:
import CoolProp.CoolProp as cp
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import re
import seaborn
import pickle
import contextlib
from tqdm import tqdm  # Import tqdm for progress bar


import DiadFit as pf
pf.__version__

'1.0.5'

In [2]:
# Setting plotting parameters
plt.rcParams["font.family"] = 'Times New Roman'
plt.rcParams["font.size"] =12
plt.rcParams["mathtext.default"] = "regular"
plt.rcParams["mathtext.fontset"] = "dejavusans"
plt.rcParams['patch.linewidth'] = 1
plt.rcParams['axes.linewidth'] = 1 
plt.rcParams["xtick.direction"] = "out"
plt.rcParams["ytick.direction"] = "out"
plt.rcParams["ytick.direction"] = "out"
plt.rcParams["xtick.major.size"] = 6 # Sets length of ticks
plt.rcParams["ytick.major.size"] = 6 # Sets length of ticks
plt.rcParams["xtick.minor.size"] = 3 # Sets length of ticks
plt.rcParams["ytick.minor.size"] = 3 # Sets length of ticks
plt.rcParams["ytick.labelsize"] = 12 # Sets size of numbers on tick marks
plt.rcParams["xtick.labelsize"] = 12 # Sets size of numbers on tick marks
plt.rcParams["axes.titlesize"] = 14 # Overall title
plt.rcParams["axes.labelsize"] = 14 # Axes labels
plt.rcParams["pdf.fonttype"]=42
plt.rcParams['svg.fonttype'] = 'none'
plt.rcParams["legend.fancybox"]=False

In [3]:
if os.name=='posix':
    slash='/'
else:
    slash='\\'

MasterFolder=os.getcwd()

# rep_FI_path=os.path.join(Helperfile_folder, "Leilani_repeated_names.xlsx") #r"P:\WORK-GENERAL\POSTDOC-UCB\BERKELEY-VIBE\Documents\Projects\Data\Hawaii_FI\Data\Helper_files\Leilaini_repeated_names.xlsx"

#Folder to save figures

figpath=MasterFolder +slash+'Figs'
if not os.path.exists(figpath):
    os.mkdir(figpath)

#Folder to save full datasets

compilation_folder=MasterFolder +slash+'Compiled_data'
if not os.path.exists(compilation_folder):
    os.mkdir(compilation_folder)

In [4]:
crystals=pd.read_excel('KAM_EDS_12062024.xlsx',sheet_name='concatenated_olivines')

crystal_reps=pd.read_excel('Helper_file_120624.xlsx',sheet_name='Crystals_EDS_reps')
crystals = crystals.loc[:, ~crystals.columns.str.contains('^Unnamed')]

crystals_wnames=pd.merge(left=crystal_reps,right=crystals,on=['Name_windex'],how='left')
crystals_wnames


Unnamed: 0,Index,Sample Name_x,Name_windex,Type,Name_Full_xtal,Sample,Crystal,Sample_crystal,Sample Name_y,Predict_Mineral,...,#_ions__O,#_ions__Mg,#_ions__Si,#_ions__Ca,#_ions__Cr,#_ions__Mn,#_ions__Fe,#_ions__Ni,Fo,K2O
0,0,KA69_c400_a1,KA69_c400_a1_0,Olivine,KA69_c400_a1,KA69,c400,KA69_c400,KA69_c400_a1,Olivine,...,,,,,,,,,,0
1,1,KA69_c400_a2,KA69_c400_a2_1,Olivine,KA69_c400_a2,KA69,c400,KA69_c400,KA69_c400_a2,Olivine,...,,,,,,,,,,0
2,2,KA69_c400_a3,KA69_c400_a3_2,Olivine,KA69_c400_a3,KA69,c400,KA69_c400,KA69_c400_a3,Olivine,...,,,,,,,,,,0
3,3,KA69_c401_a1,KA69_c401_a1_3,Olivine,KA69_c401_a1,KA69,c401,KA69_c401,KA69_c401_a1,Olivine,...,,,,,,,,,,0
4,4,KA69_c402_a1,KA69_c402_a1_4,Olivine,KA69_c402_a1,KA69,c402,KA69_c402,KA69_c402_a1,Olivine,...,,,,,,,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
681,681,187-1-111-reg3,187-1-111-reg3_681,Olivine,187-1-111-reg3,KA71,c111,KA71_c111,187-1-111-reg3,Olivine,...,,,,,,,,,0.874456,0
682,682,187-1-111-reg4,187-1-111-reg4_682,Olivine,187-1-111-reg4,KA71,c111,KA71_c111,187-1-111-reg4,Olivine,...,,,,,,,,,0.880375,0
683,683,187-1-111-reg5,187-1-111-reg5_683,Olivine,187-1-111-reg5,KA71,c111,KA71_c111,187-1-111-reg5,Olivine,...,,,,,,,,,0.876892,0
684,684,187-1-111-reg6,187-1-111-reg6_684,Olivine,187-1-111-reg6,KA71,c111,KA71_c111,187-1-111-reg6,Olivine,...,,,,,,,,,0.881516,0


In [5]:
## Filter the crystals (to ignore EDS spots when WDS is available)

# This part is to filter out rows which have EDS AND WDS so only the WDS is kept to average
base_names = set(crystals_wnames['Sample Name_x'].str.rstrip('_WDS'))

def filter_rows(row):
    base_name = row.rstrip('_WDS')
    if row.endswith('_WDS'):
        return True 
    elif f"{base_name}_WDS" not in list(crystals_wnames['Sample Name_x']):

        return True
    else:
        return False

crystals_filtered = crystals_wnames[crystals_wnames['Sample Name_x'].apply(filter_rows)]

crystals_filtered

Unnamed: 0,Index,Sample Name_x,Name_windex,Type,Name_Full_xtal,Sample,Crystal,Sample_crystal,Sample Name_y,Predict_Mineral,...,#_ions__O,#_ions__Mg,#_ions__Si,#_ions__Ca,#_ions__Cr,#_ions__Mn,#_ions__Fe,#_ions__Ni,Fo,K2O
0,0,KA69_c400_a1,KA69_c400_a1_0,Olivine,KA69_c400_a1,KA69,c400,KA69_c400,KA69_c400_a1,Olivine,...,,,,,,,,,,0
1,1,KA69_c400_a2,KA69_c400_a2_1,Olivine,KA69_c400_a2,KA69,c400,KA69_c400,KA69_c400_a2,Olivine,...,,,,,,,,,,0
2,2,KA69_c400_a3,KA69_c400_a3_2,Olivine,KA69_c400_a3,KA69,c400,KA69_c400,KA69_c400_a3,Olivine,...,,,,,,,,,,0
3,3,KA69_c401_a1,KA69_c401_a1_3,Olivine,KA69_c401_a1,KA69,c401,KA69_c401,KA69_c401_a1,Olivine,...,,,,,,,,,,0
4,4,KA69_c402_a1,KA69_c402_a1_4,Olivine,KA69_c402_a1,KA69,c402,KA69_c402,KA69_c402_a1,Olivine,...,,,,,,,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
681,681,187-1-111-reg3,187-1-111-reg3_681,Olivine,187-1-111-reg3,KA71,c111,KA71_c111,187-1-111-reg3,Olivine,...,,,,,,,,,0.874456,0
682,682,187-1-111-reg4,187-1-111-reg4_682,Olivine,187-1-111-reg4,KA71,c111,KA71_c111,187-1-111-reg4,Olivine,...,,,,,,,,,0.880375,0
683,683,187-1-111-reg5,187-1-111-reg5_683,Olivine,187-1-111-reg5,KA71,c111,KA71_c111,187-1-111-reg5,Olivine,...,,,,,,,,,0.876892,0
684,684,187-1-111-reg6,187-1-111-reg6_684,Olivine,187-1-111-reg6,KA71,c111,KA71_c111,187-1-111-reg6,Olivine,...,,,,,,,,,0.881516,0


In [6]:
crystals=crystals_filtered[['Sample Name_x','Name_Full_xtal','Sample','Sample_crystal','Predict_Mineral','Al2O3','CaO','Cr2O3','FeOt','K2O','MgO','MnO','Na2O','NiO','SiO2','TiO2',
             'Oxide % Sigma_Al2O3','Oxide % Sigma_CaO','Oxide % Sigma_Cr2O3','Oxide % Sigma_FeOt','Oxide % Sigma_MgO',
             'Oxide % Sigma_MnO','Oxide % Sigma_Na2O','Oxide % Sigma_NiO','Oxide % Sigma_SiO2','Oxide % Sigma_TiO2']].copy()

def assign_inclusion_type(row):
    if pd.isna(row['Sample_crystal']) or "c2" not in str(row['Sample_crystal']):
        return 'FI'
    return 'MI'

crystals.loc[:, 'Inclusion type'] = crystals.apply(assign_inclusion_type, axis=1)
crystals.insert(4,'Inclusion type',crystals.pop('Inclusion type'))
crystals

Unnamed: 0,Sample Name_x,Name_Full_xtal,Sample,Sample_crystal,Inclusion type,Predict_Mineral,Al2O3,CaO,Cr2O3,FeOt,...,Oxide % Sigma_Al2O3,Oxide % Sigma_CaO,Oxide % Sigma_Cr2O3,Oxide % Sigma_FeOt,Oxide % Sigma_MgO,Oxide % Sigma_MnO,Oxide % Sigma_Na2O,Oxide % Sigma_NiO,Oxide % Sigma_SiO2,Oxide % Sigma_TiO2
0,KA69_c400_a1,KA69_c400_a1,KA69,KA69_c400,FI,Olivine,,0.3600,0.03,14.9300,...,,0.0100,0.01,0.0400,0.0600,0.0200,,0.0200,0.0600,
1,KA69_c400_a2,KA69_c400_a2,KA69,KA69_c400,FI,Olivine,,0.3400,0.04,15.0700,...,,0.0100,0.01,0.0400,0.0600,0.0200,,0.0200,0.0600,
2,KA69_c400_a3,KA69_c400_a3,KA69,KA69_c400,FI,Olivine,,0.3600,0.03,15.3700,...,,0.0100,0.01,0.0400,0.0600,0.0200,,0.0200,0.0600,
3,KA69_c401_a1,KA69_c401_a1,KA69,KA69_c401,FI,Olivine,,0.3600,0.02,15.5400,...,,0.0100,0.01,0.0400,0.0600,0.0200,,0.0200,0.0600,
4,KA69_c402_a1,KA69_c402_a1,KA69,KA69_c402,FI,Olivine,,0.3600,0.00,14.9600,...,,0.0100,0.01,0.0400,0.0600,0.0200,,0.0200,0.0600,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
681,187-1-111-reg3,187-1-111-reg3,KA71,KA71_c111,FI,Olivine,,0.3486,,11.8965,...,,0.0244,,0.0873,0.1235,0.0345,,0.0102,0.1357,
682,187-1-111-reg4,187-1-111-reg4,KA71,KA71_c111,FI,Olivine,,0.2936,,11.5140,...,,0.0247,,0.0883,0.1274,0.0354,,0.0104,0.1403,
683,187-1-111-reg5,187-1-111-reg5,KA71,KA71_c111,FI,Olivine,,0.2787,,11.7026,...,,0.0243,,0.0870,0.1238,0.0346,,0.0103,0.1362,
684,187-1-111-reg6,187-1-111-reg6,KA71,KA71_c111,FI,Olivine,,0.3290,,11.2923,...,,0.0244,,0.0858,0.1239,0.0343,,0.0105,0.1361,


In [7]:
# Function to calculate mean excluding NaN and 0 values
def mean_excluding_nan_zero(series):
    return series[~series.isna() & (series != 0)].mean()

# Group by 'Name_Full_xtal'
grouped = crystals.groupby('Name_Full_xtal')
numeric_cols_ol = crystals.select_dtypes(include='number')
non_numeric_cols = crystals.select_dtypes(exclude='number')

numeric_crystalspot_averaged_mean = grouped[numeric_cols_ol.columns].agg(mean_excluding_nan_zero)

numeric_crystalspot_averaged_std = grouped[numeric_cols_ol.columns].std()
non_numeric_crystalspot_averaged = grouped[non_numeric_cols.columns].first()

crystalspot_averaged = pd.concat([numeric_crystalspot_averaged_mean, non_numeric_crystalspot_averaged], axis=1)

crystalspot_averaged = crystalspot_averaged.reindex(columns=crystals.columns)

# std_columns = numeric_crystalspot_averaged_std.add_suffix('_spot_std')

# crystalspot_averaged = pd.concat([crystalspot_averaged, std_columns], axis=1)

crystalspot_averaged = crystalspot_averaged.reset_index(drop=True)

row_counts = grouped.size().reset_index(name='row_count')
row_counts['spot averaged?'] = 'No'
row_counts.loc[row_counts['row_count'] > 1, 'spot averaged?'] = 'Yes'
crystalspot_averaged = pd.merge(crystalspot_averaged, row_counts[['Name_Full_xtal', 'spot averaged?']], on='Name_Full_xtal', how='outer')
crystalspot_averaged['spot averaged?'] = crystalspot_averaged['spot averaged?'].fillna('No')

crystalspot_averaged.to_clipboard(excel=True, index=False)
crystalspot_averaged.to_excel(compilation_folder+'/'+"KAM_crystals_averaged_spots.xlsx")
crystalspot_averaged


Unnamed: 0,Sample Name_x,Name_Full_xtal,Sample,Sample_crystal,Inclusion type,Predict_Mineral,Al2O3,CaO,Cr2O3,FeOt,...,Oxide % Sigma_CaO,Oxide % Sigma_Cr2O3,Oxide % Sigma_FeOt,Oxide % Sigma_MgO,Oxide % Sigma_MnO,Oxide % Sigma_Na2O,Oxide % Sigma_NiO,Oxide % Sigma_SiO2,Oxide % Sigma_TiO2,spot averaged?
0,187-1-101-reg1,187-1-101-reg1,KA71,KA71_c101,FI,Olivine,,0.3078,,12.8572,...,0.0229,,0.0847,0.1161,0.0327,,0.0432,0.1276,,No
1,187-1-101-reg2,187-1-101-reg2,KA71,KA71_c101,FI,Olivine,,0.3442,,12.7108,...,0.0244,,0.0899,0.1239,0.0349,,0.0096,0.1364,,No
2,187-1-101-reg3,187-1-101-reg3,KA71,KA71_c101,FI,Olivine,,0.3298,,12.5571,...,0.0243,,0.0896,0.1231,0.0346,,0.0098,0.1354,,No
3,187-1-102-A,187-1-102-A,KA71,KA71_c102,FI,Olivine,,0.3671,,13.7361,...,0.0249,,0.0930,0.1248,0.0351,,0.0095,0.1373,,No
4,187-1-102-B,187-1-102-B,KA71,KA71_c102,FI,Olivine,,0.3870,,13.5197,...,0.0247,,0.0930,0.1252,0.0352,,0.0094,0.1377,,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
507,KA79_c503_a2,KA79_c503_a2,KA79,KA79_c503,FI,Olivine,,0.3061,0.0727,14.0268,...,0.0125,0.0172,0.0498,0.0665,0.0184,,0.0242,0.0731,,No
508,KA79_c503_a3,KA79_c503_a3,KA79,KA79_c503,FI,Olivine,,0.3061,,13.0582,...,0.0126,,0.0483,0.0666,0.0182,,0.0243,0.0731,,No
509,KA79_c504_a1,KA79_c504_a1,KA79,KA79_c504,FI,Olivine,,0.2474,0.0854,11.1310,...,0.0124,0.0172,0.0453,0.0667,0.0180,,0.0243,0.0733,,No
510,KA79_c505_a1,KA79_c505_a1,KA79,KA79_c505,FI,Olivine,,0.2502,0.1141,10.9196,...,0.0123,0.0174,0.0450,0.0668,0.0179,,0.0242,0.0735,,No


## Now average crystal composition per crystal

In [51]:
# Function to calculate mean excluding NaN and 0 values
def mean_excluding_nan_zero(series):
    return series[~series.isna() & (series != 0)].mean()

# Group by 'Name_Full_xtal'
grouped = crystalspot_averaged.groupby('Sample_crystal')
numeric_cols_ol = crystalspot_averaged.select_dtypes(include='number')
non_numeric_cols = crystalspot_averaged.select_dtypes(exclude='number')

numeric_df_averaged_mean = grouped[numeric_cols_ol.columns].agg(mean_excluding_nan_zero)

numeric_df_averaged_std = grouped[numeric_cols_ol.columns].std()
non_numeric_df_averaged = grouped[non_numeric_cols.columns].first()

df_averaged = pd.concat([numeric_df_averaged_mean, non_numeric_df_averaged], axis=1)

df_averaged = df_averaged.reindex(columns=crystalspot_averaged.columns)

std_columns = numeric_df_averaged_std.add_suffix('_crystal_std')

df_averaged = pd.concat([df_averaged, std_columns], axis=1)

df_averaged = df_averaged.reset_index(drop=True)

row_counts = grouped.size().reset_index(name='row_count')
row_counts['crystal averaged?'] = 'No'
row_counts.loc[row_counts['row_count'] > 1, 'crystal averaged?'] = 'Yes'
df_averaged = pd.merge(df_averaged, row_counts[['Sample_crystal', 'crystal averaged?']], on='Sample_crystal', how='outer')
df_averaged['crystal averaged?'] = df_averaged['crystal averaged?'].fillna('No')

df_averaged=df_averaged.drop(['Sample Name_x','Name_Full_xtal'], axis=1, inplace=False)
df_averaged=df_averaged.dropna(axis=1, how='all', inplace=False)


df_averaged['Total (wt%)'] = df_averaged.iloc[:, 4:14].sum(axis=1)
df_averaged.insert(1, 'Total (wt%)', df_averaged.pop('Total (wt%)'))

for col in df_averaged.columns[5:15]:
    df_averaged[f'{col}_wt%uncertainty ({col}_crystal_std when it was averaged by crystal, {col}_Oxide sigma% when it was a single spot)'] = df_averaged.apply(
        lambda row: row[f'{col}_crystal_std'] if row['crystal averaged?'] != 'No' else row[f'Oxide % Sigma_{col}'], axis=1)
    

df_averaged.to_clipboard(excel=True, index=False)
df_averaged.to_excel(compilation_folder+'/'+"KAM_crystals_averagedbycrystal.xlsx")

df_averaged


Unnamed: 0,Sample,Total (wt%),Sample_crystal,Inclusion type,Predict_Mineral,Al2O3,CaO,Cr2O3,FeOt,MgO,...,"Al2O3_wt%uncertainty (Al2O3_crystal_std when it was averaged by crystal, Al2O3_Oxide sigma% when it was a single spot)","CaO_wt%uncertainty (CaO_crystal_std when it was averaged by crystal, CaO_Oxide sigma% when it was a single spot)","Cr2O3_wt%uncertainty (Cr2O3_crystal_std when it was averaged by crystal, Cr2O3_Oxide sigma% when it was a single spot)","FeOt_wt%uncertainty (FeOt_crystal_std when it was averaged by crystal, FeOt_Oxide sigma% when it was a single spot)","MgO_wt%uncertainty (MgO_crystal_std when it was averaged by crystal, MgO_Oxide sigma% when it was a single spot)","MnO_wt%uncertainty (MnO_crystal_std when it was averaged by crystal, MnO_Oxide sigma% when it was a single spot)","Na2O_wt%uncertainty (Na2O_crystal_std when it was averaged by crystal, Na2O_Oxide sigma% when it was a single spot)","NiO_wt%uncertainty (NiO_crystal_std when it was averaged by crystal, NiO_Oxide sigma% when it was a single spot)","SiO2_wt%uncertainty (SiO2_crystal_std when it was averaged by crystal, SiO2_Oxide sigma% when it was a single spot)","TiO2_wt%uncertainty (TiO2_crystal_std when it was averaged by crystal, TiO2_Oxide sigma% when it was a single spot)"
0,AMG98_48g,99.003633,AMG98_48g_c353,FI,Olivine,,0.232417,0.065400,11.914367,46.367283,...,,0.000731,0.011597,0.016452,0.034295,0.001838,,0.002074,0.078159,
1,AMG98_48g,98.549233,AMG98_48g_c355,FI,Olivine,,0.238300,0.070367,12.556967,45.622167,...,,0.010533,0.014700,0.040867,0.056600,0.015633,,0.016833,0.062367,
2,AMG,100.140100,AMG_XEN4,FI,Olivine,,0.227267,0.058867,11.893233,47.052100,...,,0.010533,0.014700,0.040200,0.057400,0.015633,,0.016833,0.063200,
3,KA611,50.344650,KA611_c101,FI,Olivine,,0.192525,0.023625,6.852700,22.955875,...,,0.007884,0.009369,0.090863,1.234220,0.008450,,0.005869,1.024315,
4,KA611,99.004200,KA611_c102,FI,Olivine,,0.450100,0.044900,13.202300,45.295100,...,,0.011200,0.014600,0.041900,0.056900,0.015700,,0.020600,0.062600,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
258,KA79,100.366600,KA79_c502,FI,Olivine,,0.255000,0.113500,10.915200,48.030500,...,,0.012300,0.017200,0.045000,0.066800,0.018100,,0.024300,0.073500,
259,KA79,100.442167,KA79_c503,FI,Olivine,,0.305933,0.072700,13.349533,46.087300,...,,0.000289,,0.588434,0.371135,0.018032,,0.014068,0.029251,
260,KA79,100.110800,KA79_c504,FI,Olivine,,0.247400,0.085400,11.131000,47.738100,...,,0.012400,0.017200,0.045300,0.066700,0.018000,,0.024300,0.073300,
261,KA79,100.421700,KA79_c505,FI,Olivine,,0.250200,0.114100,10.919600,48.074100,...,,0.012300,0.017400,0.045000,0.066800,0.017900,,0.024200,0.073500,
