In [48]:
import pandas as pd
import numpy as np
from scipy import constants
from lmfit import minimize, Parameters, report_fit
import matplotlib.pyplot as plt
%matplotlib inline  

In [49]:
GRDF = pd.read_csv("../Results/sorted_data.csv")
k = constants.value('Boltzmann constant in eV/K')
e = np.exp(1)

np.random.seed(111)  # set random seed for repeatability

In [160]:
################################################################################
# get_TSS()
################################################################################

def get_TSS(data):
    return sum((data - np.mean(data))**2)

################################################################################
# schlfld_vals()
################################################################################

def schlfld_vals(id, df):
    """returns in a dictionary x, y and starting values for fitting schoolfield models"""

    vals = {'NewID' : id,
            'xVals' : np.asarray(df.UsedTempK[df.NewID == id]),
            'yVals' : np.asarray(df.OTVlogged[df.NewID == id]),
            'B0'    : df.B0[df.NewID == id].iloc[0],
            'E'     : abs(df.E[df.NewID == id].iloc[0]),
            'El'    : abs(df.El[df.NewID == id].iloc[0]),
            'Eh'    : df.Eh[df.NewID == id].iloc[0],
            'Tl'    : df.Tl[df.NewID == id].iloc[0],
            'Th'    : df.Th[df.NewID == id].iloc[0]}

    return vals

################################################################################
# full_schlfld_residuals()
################################################################################

def full_schlfld_residuals(params, x, data):
    """returns residuals of data and model with given parameters for full_schlfld_model"""

    B0 = params['B0'].value
    E  = params['E'].value
    El = params['El'].value
    Eh = params['Eh'].value
    Tl = params['Tl'].value
    Th = params['Th'].value

    model = np.log((B0*e**((-E/k)*((1/x)-(1/283.15))))/(
                   1+(e**((El/k)*((1/Tl)-(1/x))))+(e**((Eh/k)*((1/Th)-(1/x))))))

    return model - data

################################################################################
#
################################################################################

def full_schlfld_nl_residuals(params, x, data):
    """returns non logged residuals of data and model with given parameters for full_schlfld_model"""

    B0 = params['B0'].value
    E  = params['E'].value
    El = params['El'].value
    Eh = params['Eh'].value
    Tl = params['Tl'].value
    Th = params['Th'].value

    model = np.log((B0*e**((-E/k)*((1/x)-(1/283.15))))/(
                   1+(e**((El/k)*((1/Tl)-(1/x))))+(e**((Eh/k)*((1/Th)-(1/x))))))

    return np.exp(model) - np.exp(data)

In [161]:
################################################################################
# full_schlfld_model()
################################################################################

def full_schlfld_model(id, df, tries = 10, method = 1):
    """performs non linear least square model fitting for given ids TPC on full_schlfld_model

    keyword arguments:
        id     -- specific curve id
        df     -- dataframe containg TPC data and starting values for all ids
        tries  -- number of tries with randomized starting values
        method -- 1 - stops trying once model converges
                  2 - continue trying to improve fit upto tries"""

    vals = schlfld_vals(id, df)  # get starting values and data from values function

    xVals    = vals["xVals"]     # temperatures
    yVals    = vals["yVals"]     # corresponding trait values

    # res will be output - set initial values as starting values
    res = {'NewID'   : vals["NewID"],
           'B0'      : vals["B0"],
           'E'       : vals["E"],
           'El'      : vals["El"],
           'Eh'      : vals["Eh"],
           'Tl'      : vals["Tl"],
           'Th'      : vals["Th"],
           'chisqr'  : [np.NaN],
           'RSS'     : [np.NaN],
           'TSS'     : [np.NaN],
           'Rsqrd'   : [np.NaN],
           'nlRSS'   : [np.NaN],
           'nlTSS'   : [np.NaN],
           'nlRsqrd' : [np.NaN],
           'nlaic'   : [np.NaN],
           'aic'     : [np.NaN],  # will test on each try for improvment
           'bic'     : [np.NaN]}

    trycount = 0
    while True:

        trycount += 1  # increment trycount

        if method == 1:  # method 1 check if curve has converged or tries have run out
            if res["aic"] != [np.NaN] or trycount > tries:
                break
        elif method == 2:  # method 2 just check if tries have run out
            if trycount > tries:
                break

        try:
            # on first try use starting values
            if trycount == 1:
                params = Parameters()
                params.add('B0', value = vals["B0"], min = 0)
                params.add('E',  value = vals["E"],  min = 0)
                params.add('El', value = vals["El"], min = 0)
                params.add('Eh', value = vals["Eh"], min = 0)
                params.add('Tl', value = vals["Tl"], min = 250, max = 400)
                params.add('Th', value = vals["Th"], min = 250, max = 400)

            # on following tries starting values are randomised
            else:
                params = Parameters()
                params.add('B0', value = np.random.uniform(0, vals["B0"]*2), min = 0)
                params.add('E',  value = np.random.uniform(0, vals["E"]*2),  min = 0)
                params.add('El', value = np.random.uniform(0, vals["El"]*2), min = 0)
                params.add('Eh', value = np.random.uniform(0, vals["Eh"]*2), min = 0)
                params.add('Tl', value = vals["Tl"], min = 250, max = 400)
                params.add('Th', value = vals["Th"], min = 250, max = 400)

            # try minimize function to minimize residuals
            out = minimize(full_schlfld_residuals, params, args = (xVals, yVals))

            RSS      = sum(full_schlfld_residuals(out.params, xVals, yVals)**2)
            TSS      = get_TSS(yVals)
            Rsquared = 1 - (RSS/TSS)

            nl_RSS   = sum(full_schlfld_nl_residuals(out.params, xVals, yVals)**2)
            nl_TSS   = get_TSS(np.exp(yVals))
            nl_Rsqrd = 1 - (nl_RSS/nl_TSS)
            nl_aic   = len(xVals)*np.log(nl_RSS/len(xVals)) + 2*6

            # if aic from this try is lower than previous lowest overwrite res
            # (only relevant for method == 2)
            if out.aic < res["aic"] or res["aic"] == [np.NaN]:
                res = {'NewID'   : [id],
                       'B0'      : [out.params["B0"].value],
                       'E'       : [out.params["E"].value],
                       'El'      : [out.params["El"].value],
                       'Eh'      : [out.params["Eh"].value],
                       'Tl'      : [out.params["Tl"].value],
                       'Th'      : [out.params["Th"].value],
                       'chisqr'  : [out.chisqr],
                       'RSS'     : RSS,
                       'TSS'     : TSS,
                       'Rsqrd'   : Rsquared,
                       'nlRSS'   : nl_RSS,
                       'nlTSS'   : nl_TSS,
                       'nlRsqrd' : nl_Rsqrd,
                       'nlaic'   : nl_aic,
                       'aic'     : [out.aic],
                       'bic'     : [out.bic]}
            continue

        # if it didnt converge go to next try/break if tries have run out
        except ValueError:
            continue

    # convert res to dataframe and output
    res = pd.DataFrame(res)
    return res

In [117]:
################################################################################
# full_schlfld_model()
################################################################################

def full_schlfld_model1(id, df, tries = 10, method = 1):
    """performs non linear least square model fitting for given ids TPC on full_schlfld_model

    keyword arguments:
        id     -- specific curve id
        df     -- dataframe containg TPC data and starting values for all ids
        tries  -- number of tries with randomized starting values
        method -- 1 - stops trying once model converges
                  2 - continue trying to improve fit upto tries"""

    vals = schlfld_vals(id, df)  # get starting values and data from values function

    xVals    = vals["xVals"]     # temperatures
    yVals    = vals["yVals"]     # corresponding trait values

    # res will be output - set initial values as starting values
    res = {'NewID'   : vals["NewID"],
           'B0'      : vals["B0"],
           'E'       : vals["E"],
           'El'      : vals["El"],
           'Eh'      : vals["Eh"],
           'Tl'      : vals["Tl"],
           'Th'      : vals["Th"],
           'chisqr'  : [np.NaN],
           'RSS'     : [np.NaN],
           'TSS'     : [np.NaN],
           'Rsqrd'   : [np.NaN],
           'nlRSS'   : [np.NaN],
           'nlTSS'   : [np.NaN],
           'nlRsqrd' : [np.NaN],
           'nlaic'   : [np.NaN],
           'aic'     : [np.NaN],  # will test on each try for improvment
           'bic'     : [np.NaN]}

    trycount = 0
    while True:

        trycount += 1  # increment trycount

        if method == 1:  # method 1 check if curve has converged or tries have run out
            if res["aic"] != [np.NaN] or trycount > tries:
                break
        elif method == 2:  # method 2 just check if tries have run out
            if trycount > tries:
                break

        try:
            # on first try use starting values
            if trycount == 1:
                params = Parameters()
                params.add('B0', value = vals["B0"], min = 0)
                params.add('E',  value = vals["E"],  min = 0)
                params.add('El', value = vals["El"], min = 0)
                params.add('Eh', value = vals["Eh"], min = 0)
                params.add('Tl', value = vals["Tl"], min = 250, max = 400)
                params.add('Th', value = vals["Th"], min = 250, max = 400)

            # on following tries starting values are randomised
            else:
                params = Parameters()
                params.add('B0', value = np.random.uniform(0, vals["B0"]*2), min = 0)
                params.add('E',  value = np.random.uniform(0, vals["E"]*2),  min = 0)
                params.add('El', value = np.random.uniform(0, vals["El"]*2), min = 0)
                params.add('Eh', value = np.random.uniform(0, vals["Eh"]*2), min = 0)
                params.add('Tl', value = vals["Tl"], min = 250, max = 400)
                params.add('Th', value = vals["Th"], min = 250, max = 400)

            # try minimize function to minimize residuals
            out = minimize(full_schlfld_nl_residuals, params, args = (xVals, yVals))

            RSS      = sum(full_schlfld_residuals(out.params, xVals, yVals)**2)
            TSS      = get_TSS(yVals)
            Rsquared = 1 - (RSS/TSS)

            nl_RSS   = sum(full_schlfld_nl_residuals(out.params, xVals, yVals)**2)
            nl_TSS   = get_TSS(np.exp(yVals))
            nl_Rsqrd = 1 - (nl_RSS/nl_TSS)
            nl_aic   = len(xVals)*np.log(nl_RSS/len(xVals)) + 2*6

            # if aic from this try is lower than previous lowest overwrite res
            # (only relevant for method == 2)
            if out.aic < res["aic"] or res["aic"] == [np.NaN]:
                res = {'NewID'   : [id],
                       'B0'      : [out.params["B0"].value],
                       'E'       : [out.params["E"].value],
                       'El'      : [out.params["El"].value],
                       'Eh'      : [out.params["Eh"].value],
                       'Tl'      : [out.params["Tl"].value],
                       'Th'      : [out.params["Th"].value],
                       'chisqr'  : [out.chisqr],
                       'RSS'     : RSS,
                       'TSS'     : TSS,
                       'Rsqrd'   : Rsquared,
                       'nlRSS'   : nl_RSS,
                       'nlTSS'   : nl_TSS,
                       'nlRsqrd' : nl_Rsqrd,
                       'nlaic'   : nl_aic,
                       'aic'     : [out.aic],
                       'bic'     : [out.bic]}
            continue

        # if it didnt converge go to next try/break if tries have run out
        except ValueError:
            continue

    # convert res to dataframe and output
    res = pd.DataFrame(res)
    return res

In [178]:
id = 4

res = full_schlfld_model(id, GRDF)
xVals = schlfld_vals(id, GRDF)["xVals"]
yVals = schlfld_vals(id, GRDF)["yVals"]

B0 = res['B0'][0]
E  = res['E'][0]
Eh = res['Eh'][0]
El = res['El'][0]
Th = res['Th'][0]
Tl = res['Tl'][0]
    
mdlx  = np.linspace(min(xVals), max(xVals), 100)
mdly  = np.log((B0*e**((-E/k)*((1/mdlx)-(1/283.15))))/(
                   1+(e**((El/k)*((1/Tl)-(1/mdlx))))+(e**((Eh/k)*((1/Th)-(1/mdlx))))))

In [179]:
res

Unnamed: 0,B0,E,Eh,El,NewID,RSS,Rsqrd,TSS,Th,Tl,aic,bic,chisqr,nlRSS,nlRsqrd,nlTSS,nlaic
0,0.002123,44.413674,45.408571,43.047752,4,0.264387,0.984426,16.975802,284.204228,283.600471,-29.010617,-26.623245,0.264387,0.002806,0.924881,0.037353,-79.01324
