#### Code that fits sample MM20-EC-109 to a variety of apparent Pb loss functions
##### Accompanyment to "Modeling apparent Pb loss in zircon U-Pb geochronology", submitted to Geochronology
By: Glenn R. Sharman, Department of Geosciences, University of Arkansas

In [None]:
import convFuncs as convFunc

import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm
from scipy.stats import kstest
from astropy.stats import kuiper
from scipy.signal import convolve
from scipy.optimize import minimize

import pathlib

import xlsxwriter

import detritalpy.detritalFuncs as dFunc

from KDEpy import FFTKDE

from importlib import reload

import matplotlib
%matplotlib inline
%config InlineBackend.figure_format = 'retina' # For improving matplotlib figure resolution
matplotlib.rcParams['pdf.fonttype'] = 42 # For allowing preservation of fonts upon importing into Adobe Illustrator
matplotlib.rcParams['ps.fonttype'] = 42

In [None]:
dataToLoad = ['MM20-EC-109.xlsx']

main_df, main_byid_df, samples_df, analyses_df = dFunc.loadDataExcel(dataToLoad, dataSheet='ZrUPb')

In [None]:
sampleList = ['MM20-EC-109']
ages, errors, numGrains, labels = dFunc.sampleToData(sampleList, main_byid_df, sigma = '1sigma');

label = 'MM20-EC-109'

n_x = 20001 # Number of x-axis values

# From CA-ID-TIMS weighted mean
age = 144.496257 # Ma
age_2s_uncert = 0.07 # Myr

# For plotting
xlim = (110, 170) # X-axis limits, Ma
xlim_Pb_loss = (-20, 1) # X-axis limits, %
plot_ref_age = True

# Parameters for x-axis (Ma)
xage_1 = 0
xage_2 = age*2
xage = np.linspace(xage_1, xage_2, n_x)

# Parameters for Pb loss x-axis (%)
x1 = -100 # Note, it is not possible for a U-Pb date to be < -100% from it's true age, as this would result in a negative age
x2 = 100
x = np.linspace(x1, x2, n_x)

In [None]:
# Create KDE
bw = 1 # in Ma

KDE = FFTKDE(bw=bw, kernel='gaussian').fit(ages[0]).evaluate(xage)
KDE = KDE/np.sum(KDE)

In [None]:
plt.hist(ages[0], bins=20, color='gray')
plt.ylim(0,)
plt.twinx()
plt.plot(xage, KDE, color='black')
plt.ylim(0,)
plt.xlim(xlim[0], xlim[1])

In [None]:
# Filter out old analyses
analyses_df = analyses_df.loc[(analyses_df['BestAge'] <158)]
main_byid_df = dFunc.loadData(samples_df, analyses_df)
ages, errors, numGrains, labels = dFunc.sampleToData(sampleList, main_byid_df, sigma = '1sigma');

In [None]:
# re-create KDE
bw = 1 # in Ma

KDE = FFTKDE(bw=bw, kernel='gaussian').fit(ages[0]).evaluate(xage)
KDE = KDE/np.sum(KDE)

plt.hist(ages[0], bins=20, color='gray')
plt.ylim(0,)
plt.twinx()
plt.plot(xage, KDE, color='black')
plt.ylim(0,)
plt.xlim(xlim[0], xlim[1])

In [None]:
reload(convFunc);

dist_types = ['none','constant','isolated','uniform','gamma','expon','rayleigh','weibull','pareto','halfnorm','lognorm'] # Select which form(s) of Pb loss you want to model

omega = 1 # Guess, Myr

method = 'ss' # 'ss' is sum of squared residuals between ECDF and modeled CDF

pathlib.Path(str(label)).mkdir(parents=True, exist_ok=True) # Recursively creates the directory and does not raise an exception if the directory already exists 

file_name = str(label)+'/'+'model_results_'+label+'.xlsx'

plot_fig = True

workbook = xlsxwriter.Workbook(file_name)

bold_format = workbook.add_format({'bold' : True})

max_offset = (age-np.min(ages[0]))/age*-100

# Record model parameters
worksheet = workbook.add_worksheet('Model_parameters')
worksheet.write(0, 0, 'Sample', bold_format)
worksheet.write(1, 0, 'N (non-CA)', bold_format)
worksheet.write(0, 1, sampleList[0])
worksheet.write(1, 1, numGrains[0])
worksheet.write(2, 0, 'Misfit function', bold_format)
worksheet.write(2, 1, method)

worksheet = workbook.add_worksheet('Model_results')
worksheet.write(0, 1, 'ss', bold_format)
worksheet.write(0, 2, 'KS Dmax (f*g)', bold_format)
worksheet.write(0, 3, 'KS p-value (f*g)', bold_format)
worksheet.write(0, 4, 'Kuiper Vmax (f*g)', bold_format)
worksheet.write(0, 5, 'Kuiper p-value (f*g)', bold_format)
worksheet.write(0, 6, 'f(t) age', bold_format)
worksheet.write(0, 7, 'f(t) 1 s.d.', bold_format)
worksheet.write(0, 8, 'g(t) params[0]', bold_format)
worksheet.write(0, 9, 'g(t) params[1]', bold_format)
worksheet.write(0, 10, 'g(t) params[2]', bold_format)

c = 0 # counter variable
for dist_type in dist_types:
    print('Starting ',dist_type)
    if dist_type == 'none':
        params_0 = [age, omega, 0] # Age (Ma), omega (Myr), and shift in %
        bounds = [(age-age_2s_uncert, age+age_2s_uncert), (0,None), (0,0)]
        result = minimize(convFunc.misfit_ECDF, params_0, args=(dist_type, x, xage, ages[0], method), 
                      bounds=bounds, tol=1e-20, method='Powell', options={'maxiter' : 1e6, 'disp' : False})

    if dist_type == 'constant':
        params_0 = [age, omega, -2.0] # Age (Ma), omega (Myr), and shift in %
        bounds = [(age-age_2s_uncert, age+age_2s_uncert), (0,None), (None,None)]
        result = minimize(convFunc.misfit_ECDF, params_0, args=(dist_type, x, xage, ages[0], method), 
                      bounds=bounds, tol=1e-20, method='Powell', options={'maxiter' : 1e6, 'disp' : False})

    if dist_type == 'isolated':
        params_0 = [age, omega, -3.0, 0.5] # Age (Ma), omega (Myr), and shift in %, and proportion of grains with shift (0-1)
        bounds = [(age-age_2s_uncert, age+age_2s_uncert), (0,None), (-100,0), (0,1)]
        result = minimize(convFunc.misfit_ECDF, params_0, args=(dist_type, x, xage, ages[0], method), 
                      bounds=bounds, tol=1e-20, method='Powell', options={'maxiter' : 1e6, 'disp' : False})

    if dist_type == 'expon':
        params_0 = [age, omega, 1.0] # Age (Ma), omega (Myr), and scale
        bounds = [(age-age_2s_uncert, age+age_2s_uncert), (0,None), (0,None)]
        result = minimize(convFunc.misfit_ECDF, params_0, args=(dist_type, x, xage, ages[0], method), 
                      bounds=bounds)
        
    if dist_type == 'rayleigh':
        params_0 = [age, omega, 0.5] # Age (Ma), omega (Myr), and scale
        bounds = [(age-age_2s_uncert, age+age_2s_uncert), (0,None), (0,None)]
        result = minimize(convFunc.misfit_ECDF, params_0, args=(dist_type, x, xage, ages[0], method), 
                      bounds=bounds)

    if dist_type == 'halfnorm':
        params_0 = [age, omega, 1.0] # Age (Ma), omega (Myr), and scale
        bounds = [(age-age_2s_uncert, age+age_2s_uncert), (0,None), (0,None)]
        result = minimize(convFunc.misfit_ECDF, params_0, args=(dist_type, x, xage, ages[0], method), 
                      bounds=bounds)

    if dist_type == 'lognorm':
        params_0 = [age, omega, 1.0, 1.0] # Age (Ma), omega (Myr), scale, and shape
        bounds = [(age-age_2s_uncert, age+age_2s_uncert), (0,None), (0,None), (0,None)]
        result = minimize(convFunc.misfit_ECDF, params_0, args=(dist_type, x, xage, ages[0], method), 
                      bounds=bounds, tol=1e-20, method='Powell', options={'maxiter' : 1e6, 'disp' : False})
        
    if dist_type == 'weibull':
        params_0 = [age, omega, 0.5, 0.5] # Age (Ma), omega (Myr), scale, and shape
        bounds = [(age-age_2s_uncert, age+age_2s_uncert), (0,None), (0,None), (0,None)]
        result = minimize(convFunc.misfit_ECDF, params_0, args=(dist_type, x, xage, ages[0], method), 
                      bounds=bounds)

    if dist_type == 'gamma':
        params_0 = [age, omega, 0.5, 0.5] # Age (Ma), omega (Myr), scale, and shape
        bounds = [(age-age_2s_uncert, age+age_2s_uncert), (0,None), (0,None), (0,None)]
        result = minimize(convFunc.misfit_ECDF, params_0, args=(dist_type, x, xage, ages[0], method), 
                      bounds=bounds)

    if dist_type == 'uniform':
        params_0 = [age, omega, 1.0, 1.0] # Age (Ma), omega (Myr), scale, and shape
        bounds = [(age-age_2s_uncert, age+age_2s_uncert), (0,None), (0,None), (0,None)]
        result = minimize(convFunc.misfit_ECDF, params_0, args=(dist_type, x, xage, ages[0], method), 
                      bounds=bounds, tol=1e-20, method='Powell', options={'maxiter' : 1e6, 'disp' : False})

    if dist_type == 'pareto':
        params_0 = [age, omega, 1.0] # Age (Ma), omega (Myr), shape
        bounds = [(age-age_2s_uncert, age+age_2s_uncert), (0,None), (0,None)]
        result = minimize(convFunc.misfit_ECDF, params_0, args=(dist_type, x, xage, ages[0], method), 
                      bounds=bounds)
    
    Pb_loss_pct_pdf = convFunc.Pb_loss_fun(params=result.x[2:], dist_type=dist_type, x=x)
    
    rv_norm_Ma = norm(loc=result.x[0], scale=result.x[1])
    norm_Ma_pdf = rv_norm_Ma.pdf(xage)
    norm_Ma_pdf = norm_Ma_pdf/np.sum(norm_Ma_pdf)

    conv_Ma_pdf = convolve(Pb_loss_pct_pdf, norm_Ma_pdf, mode='same')

    ks_results = kstest(rvs=ages[0], cdf=convFunc.cdf_fun(xage, conv_Ma_pdf))
    kuiper_results = kuiper(data=ages[0], cdf=convFunc.cdf_fun(xage, conv_Ma_pdf))
    
    worksheet.write(c+1, 0, dist_type)
    worksheet.write(c+1, 1, result.fun)
    worksheet.write(c+1, 2, ks_results[0])
    worksheet.write(c+1, 3, ks_results[1])
    worksheet.write(c+1, 4, kuiper_results[0])
    worksheet.write(c+1, 5, kuiper_results[1])
    for i in range(len(result.x)):
        worksheet.write(c+1, 6+i, result.x[i])
    
    print('---ss: ', np.round(result.fun,6))
    print('---Age: ', np.round(result.x[0],2))
    print('---1 s.d.: ', np.round(result.x[1],2))
    for i in range(len(result.x)-2):
        print('---g(t) params[{}]'.format(i),np.round(result.x[i+2],2))
    c+=1
    
    if plot_fig:
        fig = convFunc.plot_Pb_loss_model_approach_1(params_norm = result.x[0:2], params_Pb_loss=result.x[2:],
                                                     fit=result.fun, dates_input=ages[0], errors_1s_input=errors[0], 
                                xage=xage, x=x, xlim=xlim, xlim_Pb_loss=xlim_Pb_loss, dist_type=dist_type,
                                plot_ref_age=plot_ref_age, ref_age=age, ref_age_2s_uncert=age_2s_uncert);
        fig.savefig(str(label)+'/'+'fig_'+str(dist_type)+'.pdf')
    
workbook.close()