# Oak parameter data from TRY

In [1]:
import pandas as pd
import os
import myfuncs
from datetime import datetime
pd.set_option('display.max_rows', 400)
pd.set_option('display.min_rows', 400)

#define constants
m2_per_cm2 = 1e-4
g_biomass_per_g_C = 2
m2_per_mm2 = 1e-6
mg_per_g = 1e3
g_per_kg = 1000
mm2_per_cm2 = 100
g_per_mg = 1e-3
write_csv = True

In [2]:
path_to_trait_obs = "/home/adam/cloud/gdrive/postdoc/parameters/param_data/trait_observations"
write_to_csv = False

In [3]:
try_data = os.path.join(path_to_trait_obs,"conifer/TRY/conifer_traits_TRY.txt")

### Functions

In [4]:
def convert_to_fates_units(trait_name,input_units,input_value):

    
    if ("SLA" in trait_name) | ("Specific leaf area" in trait_name) and input_units == "mm2 mg-1":  
        output_value = input_value * m2_per_mm2 * mg_per_g * g_biomass_per_g_C
        return output_value

    elif "Leaf nitrogen" in trait_name and input_units == "mg/g":
        output_value = input_value * g_per_mg * g_biomass_per_g_C
        return output_value

    elif "Stem specific density" in trait_name and input_units == "g/cm3":
        output_value = input_value
        return output_value
    
    # Anaker conversions
    elif trait_name == "log(SLA cm2 g-1)":
        output_value = np.exp(input_value) * m2_per_cm2 * g_biomass_per_g_C
        return output_value
        
    elif trait_name == 'log(wood density g ml-1)':
        output_value = np.exp(input_value)
        return output_value
        
    elif trait_name == 'log(N %)':
        output_value = np.exp(input_value) * 1e-2 * g_biomass_per_g_C
        return output_value
    
    #Jepson leaf N
    elif trait_name == "Leaf Nmass":
    
        # muliplying by 1e-2 is to reverse the "per cent"  
        output_value = input_value * 1e-2 * g_biomass_per_g_C
        return output_value
    
    elif "Wood density" in trait_name:
        output_value = input_value
        return output_value
    
    elif trait_name == "Leaf area to sapwood area" and input_units == "mm2 mm-2":
        output_value = input_value * 1e-4
        return output_value
    
    elif "Vcmax" in trait_name and input_units == "micro mol m-2 s-1":
        output_value = input_value
        return output_value
    
    elif "hydraulic vulnerability" in trait_name and input_units == "Mpa":
        output_value = input_value
        return output_value
    
    elif "Leaf lifespan" in trait_name and input_units == "month":
        output_value = input_value / 12
        return output_value
    
    else:
        #print("No unit conversion known for:", trait_name)
        return None
    
def convert_to_fates_param_name(input_trait_name):
    if ("SLA" in input_trait_name) | ('Specific leaf area' in input_trait_name):
        return "fates_leaf_slatop"
    
    elif ("wood density" in input_trait_name) |  ("Wood density" in input_trait_name):
        return "fates_wood_density"
    
    elif ("N %" in input_trait_name) | ("Leaf Nmass" in input_trait_name):
        return "fates_stoich_nitr"
    
    elif "Leaf nitrogen (N) content per leaf dry mass" in input_trait_name:
        return "fates_stoich_nitr"
    
    elif "Leaf area to sapwood area" in input_trait_name:
        return "fates_allom_la_per_sa_int"
    
    elif "Vcmax" in input_trait_name:
        return "fates_leaf_vcmax25top"
    
    elif "hydraulic vulnerability" in input_trait_name:
        return "fates_nonhydro_smpsc"
    
    elif "Leaf lifespan" in input_trait_name:
        return "fates_turnover_leaf"
    
    
    else:
        return None
    
def get_sources(df,variable):
    return df.loc[df.fates_name == variable].source.unique()

def get_range_data(df,subset_name):

    summary_stats = df.groupby('fates_name')['fates_value'].agg(
        min_value='min',
        max_value='max',
        mean='mean',
        median='median',
        sample_size='size'
    )

    summary_stats['subset_name'] = subset_name
    
    return summary_stats

### Load, clean, and view oak trait data

In [5]:
# Load and join QUCH and QUKE data
try_df_raw = pd.read_csv(try_data, sep="\t", encoding='latin1')[['ObservationID','SpeciesName','AccSpeciesName','TraitID','TraitName','OrigValueStr','OrigUnitStr','StdValue','UnitName','ErrorRisk']]

try_df_raw = try_df_raw[['AccSpeciesName','TraitName','OrigValueStr','OrigUnitStr','StdValue','UnitName']]

print("Available trait observations")
print(try_df_raw.TraitName.value_counts())
#print("\n")
#print(try_df_raw.info())

Available trait observations
TraitName
Plant height vegetative                                                                                    433
Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): undefined if petiole is in- or excluded    259
Xylem hydraulic vulnerability, xylem cavitation vulnerability, embolism vulnerability, (P20, P50, P80)     116
Xylem hydraulic vulnerability curve (P20, P50, P80)                                                         98
Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): petiole excluded                            90
Stem specific density (SSD, stem dry mass per stem fresh volume) or wood density                            74
Bark thickness                                                                                              68
Leaf nitrogen (N) content per leaf area                                                                     51
Leaf carbon (C) content per leaf area                                    

In [6]:
try_df_raw = pd.DataFrame.drop_duplicates(try_df_raw)

### Filter to relevant traits

In [7]:
# Filter to get traits of interest
fates_relevant_traits = ['Stem specific density (SSD, stem dry mass per stem fresh volume) or wood density',
                         'Leaf nitrogen (N) content per leaf dry mass',
                         #'Bark thickness',
                         'Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): undefined if petiole is in- or excluded',
                         'Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): petiole excluded',
                         'Leaf lifespan (longevity)',
                         'Photosynthesis carboxylation capacity (Vcmax) per leaf area (Farquhar model)',
                         'Xylem hydraulic vulnerability, xylem cavitation vulnerability, embolism vulnerability, (P20, P50, P80)']

try_df = try_df_raw[try_df_raw.TraitName.isin(fates_relevant_traits)]
try_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 518 entries, 9 to 24154
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   AccSpeciesName  518 non-null    object 
 1   TraitName       518 non-null    object 
 2   OrigValueStr    518 non-null    object 
 3   OrigUnitStr     512 non-null    object 
 4   StdValue        494 non-null    float64
 5   UnitName        494 non-null    object 
dtypes: float64(1), object(5)
memory usage: 28.3+ KB


In [8]:
try_df.loc[try_df['TraitName'].str.contains('Leaf life')]

Unnamed: 0,AccSpeciesName,TraitName,OrigValueStr,OrigUnitStr,StdValue,UnitName
24125,Abies concolor,Leaf lifespan (longevity),7.7,year,92.4,month
24135,Pinus ponderosa,Leaf lifespan (longevity),3.0,year,36.0,month


### Apply unit conversions and fates parameter names

In [9]:
# Unit convertion to fates units
fates_values = try_df.apply(lambda x: convert_to_fates_units(x['TraitName'], x['UnitName'], x["StdValue"]), axis=1)
try_df['fates_value'] = fates_values

# Convert variable names to fates names
fates_names = try_df.apply(lambda x: convert_to_fates_param_name(x["TraitName"]),axis = 1)
try_df["fates_name"] = fates_names


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  try_df['fates_value'] = fates_values
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  try_df["fates_name"] = fates_names


In [10]:
try_df

Unnamed: 0,AccSpeciesName,TraitName,OrigValueStr,OrigUnitStr,StdValue,UnitName,fates_value,fates_name
9,Pinus ponderosa,Leaf area per leaf dry mass (specific leaf are...,165.4,cm2/g,16.540000,mm2 mg-1,0.033080,fates_leaf_slatop
15,Pinus ponderosa,Leaf area per leaf dry mass (specific leaf are...,96.6,cm2/g,9.660000,mm2 mg-1,0.019320,fates_leaf_slatop
26,Pinus ponderosa,Leaf area per leaf dry mass (specific leaf are...,16.5,mm2/mg,16.500000,mm2 mg-1,0.033000,fates_leaf_slatop
36,Abies concolor,"Stem specific density (SSD, stem dry mass per ...",0.4,g/cm^3,0.400000,g/cm3,0.400000,fates_wood_density
38,Calocedrus decurrens,"Stem specific density (SSD, stem dry mass per ...",0.37,g/cm^3,0.370000,g/cm3,0.370000,fates_wood_density
40,Pinus ponderosa,"Stem specific density (SSD, stem dry mass per ...",0.42,g/cm^3,0.420000,g/cm3,0.420000,fates_wood_density
141,Pinus ponderosa,"Stem specific density (SSD, stem dry mass per ...",350,kg/m3,0.350000,g/cm3,0.350000,fates_wood_density
174,Pinus ponderosa,Leaf area per leaf dry mass (specific leaf are...,275,g/m2,3.636364,mm2 mg-1,0.007273,fates_leaf_slatop
179,Pinus ponderosa,Leaf area per leaf dry mass (specific leaf are...,333.333333333333,g/m2,3.000000,mm2 mg-1,0.006000,fates_leaf_slatop
184,Pinus ponderosa,Leaf area per leaf dry mass (specific leaf are...,294.117647058824,g/m2,3.400000,mm2 mg-1,0.006800,fates_leaf_slatop


In [11]:
ABCO_df = try_df.loc[try_df.AccSpeciesName == "Abies concolor"]
CADE_df = try_df.loc[try_df.AccSpeciesName == "Calocedrus decurrens"]
PIPO_df = try_df.loc[try_df.AccSpeciesName == "Pinus ponderosa"]

param_range_conifer_df = pd.concat([get_range_data(ABCO_df,"ABCO"),get_range_data(CADE_df,"CADE"),get_range_data(PIPO_df,"PIPO")]).reset_index()
param_range_conifer_df['sources'] = "try"

### See percentiles of key parameters for PIPO

This is to make sure that the ranges we're testing based on Buotte et al., 2021 also line up well with the TRY data

In [12]:
param_vars = ['fates_leaf_slatop', 'fates_leaf_vcmax25top', 'fates_nonhydro_smpsc']

for p in param_vars:
    print(p)
    print(PIPO_df.loc[PIPO_df['fates_name'].str.contains(p)].describe())

fates_leaf_slatop
         StdValue  fates_value
count  247.000000   247.000000
mean     4.621289     0.009243
std      2.305617     0.004611
min      0.940000     0.001880
25%      3.135819     0.006272
50%      3.863076     0.007726
75%      5.881351     0.011763
max     16.540000     0.033080
fates_leaf_vcmax25top
        StdValue  fates_value
count   2.000000     2.000000
mean   62.496000    62.496000
std    19.482206    19.482206
min    48.720000    48.720000
25%    55.608000    55.608000
50%    62.496000    62.496000
75%    69.384000    69.384000
max    76.272000    76.272000
fates_nonhydro_smpsc
        StdValue  fates_value
count  61.000000    61.000000
mean   -2.325816    -2.325816
std     1.380365     1.380365
min    -5.448360    -5.448360
25%    -3.361057    -3.361057
50%    -2.100000    -2.100000
75%    -1.260000    -1.260000
max     0.670000     0.670000


### See percentiles of key parameters for CADE

This is to make sure that the ranges we're testing based on Buotte et al., 2021 also line up well with the TRY data

In [13]:
for p in param_vars:
    print(p)
    print(CADE_df.loc[CADE_df['fates_name'].str.contains(p)].describe())

fates_leaf_slatop
       StdValue  fates_value
count       0.0          0.0
mean        NaN          NaN
std         NaN          NaN
min         NaN          NaN
25%         NaN          NaN
50%         NaN          NaN
75%         NaN          NaN
max         NaN          NaN
fates_leaf_vcmax25top
       StdValue  fates_value
count       0.0          0.0
mean        NaN          NaN
std         NaN          NaN
min         NaN          NaN
25%         NaN          NaN
50%         NaN          NaN
75%         NaN          NaN
max         NaN          NaN
fates_nonhydro_smpsc
        StdValue  fates_value
count   4.000000     4.000000
mean   -7.712140    -7.712140
std     2.251164     2.251164
min   -10.435596   -10.435596
25%    -8.417937    -8.417937
50%    -7.745384    -7.745384
75%    -7.039587    -7.039587
max    -4.922196    -4.922196


### See percentiles of key parameters for ABCO

This is to make sure that the ranges we're testing based on Buotte et al., 2021 also line up well with the TRY data

In [14]:
for p in param_vars:
    print(p)
    print(ABCO_df.loc[ABCO_df['fates_name'].str.contains(p)].describe())

fates_leaf_slatop
        StdValue  fates_value
count  98.000000    98.000000
mean    5.136648     0.010273
std     1.768476     0.003537
min     2.206200     0.004412
25%     3.535400     0.007071
50%     4.760100     0.009520
75%     6.878307     0.013757
max     8.411215     0.016822
fates_leaf_vcmax25top
       StdValue  fates_value
count       0.0          0.0
mean        NaN          NaN
std         NaN          NaN
min         NaN          NaN
25%         NaN          NaN
50%         NaN          NaN
75%         NaN          NaN
max         NaN          NaN
fates_nonhydro_smpsc
        StdValue  fates_value
count  11.000000    11.000000
mean   -4.739429    -4.739429
std     1.564701     1.564701
min    -8.400000    -8.400000
25%    -5.224000    -5.224000
50%    -4.510000    -4.510000
75%    -3.571150    -3.571150
max    -3.249000    -3.249000


In [15]:
# Write CSV
if write_to_csv == True:

    now = datetime.now()
    dt_string = now.strftime("%m-%d-%Y-%H-%M-%S")
    file_path = "conifer_param_ranges_data_derived_" + dt_string + ".csv"
    param_range_conifer_df.to_csv(file_path, index=False)