### Process AgeCalc ###
This notebook takes U-Pb datatables output from AgeCalcML and saves files used in `figs.ipynb`,`supp.ipynb`, and `output_to_isoplotr.ipynb`. All cells must be run before using those notebooks

In [None]:
# Imports
import os
import pandas as pd
import numpy as np

from geoscripts.dz import dz

In [None]:
# Identify directory for U-Pb data
directory_no_filter = os.path.normpath('../upb_data_agecalc/no_filter/')
directory_rel_age_filter = os.path.normpath('../upb_data_agecalc/rel_age_filter/')

In [None]:
# Make a class for AgeCalc DZ data
class AgeCalcDZ:
    def __init__(self,name,accepted,rejected,standards,syst_238,syst_207):

        self.name = name
        self.accepted = accepted
        self.rejected = rejected
        self.standards = standards
        self.syst_238 = syst_238
        self.syst_207 = syst_207

        return

In [None]:
# Define function to process directory

def proc_directory(directory):
    print('Source Directory: ',directory)

    output_dir = 'proc_'+ os.path.basename(directory)

    os.makedirs(output_dir,exist_ok=True)
    print('Output Directory: ',output_dir)

    new_columns = (
    ['Analysis','U','206Pb/204Pb','U/Th','206Pb/207Pb','206Pb/207Pb_err','207Pb/235U','207Pb/235U_err',
     '206Pb/238U','206Pb/238U_err','error_corr','206Pb/238U_age','206Pb/238U_ageerr','207Pb/235U_age','207Pb/235U_ageerr',
     '206Pb/207Pb_age','206Pb/207Pb_ageerr','Best age','Best age_err','Conc']
    )

    syst_errors = pd.DataFrame([],columns=['Syst_238','Syst_207'])

    for root,dirs,files in os.walk(directory):
        # Get names from dirs
        if len(dirs)>1:
            names = dirs
            print(names)
        
        for file in files:
            if ('_DataTable.xls' in file) & ('lock' not in file):
                path = os.path.join(root,file)
                
                # Read relevant part of the Excel file
                df = pd.read_excel(path,skiprows=25,usecols='A:T',header=None,sheet_name='Sheet1')
                df.columns = np.arange(20)

                reject = pd.read_excel(path,skiprows=25,usecols='W:AP',header=None,sheet_name='Sheet1')
                reject.columns = np.arange(20)

                stand = pd.read_excel(path,skiprows=25,usecols='AS:BL',header=None,sheet_name='Sheet1')
                stand.columns = np.arange(20)

                syst = pd.read_excel(path,skiprows=9,nrows=2,header=None,
                                            usecols='B')

                syst_238 = syst.loc[0,1]
                syst_207 = syst.loc[1,1]
                
                # Remove empty rows
                df.dropna(how='all',inplace=True)
                reject.dropna(how='all',inplace=True)
                
                # Move SLs to standards
                sl_bool = df.iloc[:,0].str.contains('SL')

                sl = df[sl_bool==True]
                df = df[sl_bool==False]

                # Remove rejected standards
                reject.iloc[:,0] = reject.iloc[:,0].astype(str)

                std_reject = (
                    (reject.iloc[:,0].str.contains('SL'))|
                    (reject.iloc[:,0].str.contains('F'))|
                    (reject.iloc[:,0].str.contains('R'))
                )

                reject = reject[std_reject==False]

                name = file.partition('_')[0]

                sample = AgeCalcDZ(name=name,accepted=df,rejected=reject,standards=stand,syst_238=syst_238,syst_207=syst_207)
                print(sample.name, 'n=',len(sample.accepted))

                output = sample.accepted
                output.columns = new_columns
                
                # Deal with AB0926 Spot, which AgeCalcML will not reject even though it is unusable.
                if name == 'AB0926':
                    is_spot44 = output['Analysis'].str.contains('spot 44')
                    output.drop(output[is_spot44].index,inplace=True)

                output.to_csv(output_dir + '/' + sample.name + '.csv')

                syst_errors.loc[name,'Syst_238'] = syst_238
                syst_errors.loc[name,'Syst_207'] = syst_207

    syst_errors.to_csv(os.path.basename(directory)+'_syst.csv')
        

In [None]:
# Process no filter (processed in IsoplotR)
proc_directory(directory_no_filter)


In [None]:
# Prcoess relative age filter
proc_directory(directory_rel_age_filter)

In [None]:
# Output relative age filter data to DZ objects for further processing
csv_dir = 'proc_rel_age_filter'
files = os.listdir(csv_dir)

map_ages = pd.read_csv('../map_ages.csv')

color_dict = {'J1':'midnightblue','J2':'blue','J3':'skyblue','K1':'darkgreen',
              'K2':'chartreuse'}

syst_errors = pd.read_csv('rel_age_filter_syst.csv',index_col=0)

for file in files:
    print(file)
    filepath = os.path.join(csv_dir,file)
    data = pd.read_csv(filepath,index_col=0)

    smp = dz.DZSample(name=file[:-4],agedata=data,source='Vasey23')

    map_index = map_ages[map_ages['Sample Number']==smp.name].index

    smp.reported_age = map_ages.loc[map_index,'Reported Age'].values[0]
    smp.color = color_dict[smp.reported_age]
    smp.syst_238 = syst_errors.loc[smp.name,'Syst_238']
    smp.syst_207 = syst_errors.loc[smp.name,'Syst_207']

    smp.age_235 = smp.agedata['207Pb/235U_age']
    smp.age_238 = smp.agedata['206Pb/238U_age']

    smp.calc_bestage('206Pb/238U_age','206Pb/207Pb_age',err_238='206Pb/238U_ageerr',err_207='206Pb/207Pb_ageerr',use_err=True,err_lev='2sig',
                     filter_disc=False)
    
    smp.calc_mda(systematic=True,filter235238=True,cutoffs235238=(80,110))
    smp.calc_ysg(systematic=True,filter235238=True,cutoffs235238=(80,110))

    smp.save(path='dz_rel_age_filter/')