In [387]:
# imports
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import constants
from scipy import stats
from scipy.optimize import leastsq

In [388]:
# constants
k = constants.value('Boltzmann constant in eV/K')
e = np.exp(1)

In [389]:
# Read in data
GRDF = pd.read_csv("../Data/GrowthRespPhotoData_new.csv",
                   usecols = ["FinalID",
                              "StandardisedTraitName",
                              "StandardisedTraitDef",
                              "StandardisedTraitValue",
                              "StandardisedTraitUnit",
                              "AmbientTemp",
                              "AmbientTempUnit",
                              "ConTemp",
                              "ConTempUnit",
                              "ResTemp",
                              "ResTempUnit"],
                   low_memory = False)

In [390]:
# create NewID based on FinalID
GRDF["NewID"] = GRDF.FinalID.astype("category").cat.codes

In [391]:
# get rid of 0s -ves and NAs from data (only +ves)
GRDF = GRDF.loc[GRDF.index[GRDF["StandardisedTraitValue"] > 0]]

In [392]:
# return relevent temp
def f(group):
    if group.ConTemp.isnull().any():
        if group.ResTemp.isnull().any():
            group["UsedTemp"]     = group.AmbientTemp
            group["UsedTempUnit"] = group.AmbientTempUnit
            group["UsedTempType"] = "AmbientTemp"
            return group
        else:
            group["UsedTemp"]     = group.ResTemp
            group["UsedTempUnit"] = group.ResTempUnit
            group["UsedTempType"] = "ResTemp"
            return group
    else:
        group["UsedTemp"]     = group.ConTemp
        group["UsedTempUnit"] = group.ConTempUnit
        group["UsedTempType"] = "ConTemp"
        return group

GRDF = GRDF.groupby("NewID").apply(f)

In [393]:
# convert to kelvin
GRDF["UsedTempK"] = GRDF.UsedTemp + 273.15

In [394]:
# sort by ID then temperature
GRDF = GRDF.sort_values(['NewID', 'UsedTempK'])

In [395]:
# removes groups where all temp values are the same
def f(group):
    if len(group.UsedTemp.unique()) == 1:
        return False
    else:
        return True
    
GRDF = GRDF.groupby("NewID").filter(f)

In [396]:
# removes groups where all trait values are the same
def f(group):
    if len(group.StandardisedTraitValue.unique()) == 1:
        return False
    else:
        return True
    
GRDF = GRDF.groupby("NewID").filter(f)

In [397]:
# reomve first point if its much higher (change to difference between points?)
def f(group):
    try:
        if group.reset_index().StandardisedTraitValue[0] > 3*group.reset_index().StandardisedTraitValue[1]:
            return group.reset_index()[1:]
        else:
            return group.reset_index()
    except KeyError:
        return group.reset_index()
    
GRDF = GRDF.groupby("NewID").apply(f)

In [398]:
# only columns I want
GRDF = GRDF.loc[ : ,("FinalID",
                     "StandardisedTraitName",
                     "StandardisedTraitDef",
                     "StandardisedTraitValue",
                     "StandardisedTraitUnit",
                     "UsedTemp",
                     "UsedTempType",
                     "UsedTempK")]

In [399]:
# Remove groups with fewer than five
GRDF = GRDF.groupby("NewID").filter(lambda x: len(x) > 5)

In [400]:
# logged trait value
GRDF["STVlogged"] = np.log(GRDF.StandardisedTraitValue)

In [401]:
# 1/kT
GRDF["adjTemp"] = 1/(GRDF.UsedTempK*k)

In [402]:
# reset index and make NewID a column
GRDF.reset_index(level=0, inplace=True)

In [403]:
# function to get starting values
def f(group):

    split = np.argmax(group.reset_index().STVlogged)  # split
    x     = group.reset_index().adjTemp
    y     = group.reset_index().STVlogged
    xVals = group.reset_index().UsedTemp

    if split + 1 == len(y) or split == 0\
                           or split == 1\
                           or x[:split].nunique() == 1\
                           or x[split:].nunique() == 1:
        
        lm1 = stats.linregress(x, y)
        lm2 = stats.linregress(x, y)
    else:
        try:
            lm1 = stats.linregress(x[:split], y[:split])
        except ValueError:
            lm1 = stats.linregress(x, y)
        try:
            lm2 = stats.linregress(x[split:], y[split:])
        except ValueError:
            lm2 = stats.linregress(x, y)
        
    vals = {"E"     : lm1[0],
            "El"    : lm1[0]/2,
            "Eh"    : lm2[0],
            "Eint"  : lm1[1],
            "Ehint" : lm2[1],
            "B0"    : lm1[0]*(1/(k*283.15)) + lm1[1],
            "Th"    : xVals[np.argmax(y)],
            "Tl"    : min(xVals)}
    
    return pd.DataFrame(vals, index = [0])

strt_vals = GRDF.groupby("NewID").apply(f)

In [404]:
# reset index of starting values and make NewID a column
strt_vals.reset_index(level=0, inplace=True)

In [405]:
# merge GRDF and strt_vals
GRDF = pd.merge(GRDF, strt_vals, on = "NewID")

In [411]:
GRDF.to_csv("../Results/sorted_data.csv", index = False)