# Oak parameter data from TRY

In [1]:
import pandas as pd
import os
#import myfuncs
from datetime import datetime
pd.set_option('display.max_rows', 400)
pd.set_option('display.min_rows', 400)
import numpy as np

#define constants
m2_per_cm2 = 1e-4
g_biomass_per_g_C = 2
m2_per_mm2 = 1e-6
mg_per_g = 1e3
g_per_kg = 1000
mm2_per_cm2 = 100
g_per_mg = 1e-3
write_csv = True

In [2]:
path_to_trait_obs = "/home/adamhb/gdrive/postdoc/parameters/param_data/trait_observations"
write_QUKE_data_avail = False
write_to_csv = False

In [3]:
QUCH_try_data = os.path.join(path_to_trait_obs,"QUCH/TRY/TRY_obs_QUCH.txt")
QUKE_try_data = os.path.join(path_to_trait_obs,"QUKE/TRY/QUKE_TRYdata.txt")
CA_oak_try_data = os.path.join(path_to_trait_obs,"CA_oaks/TRY/CA_oak_try_data.txt")
evergreen_oak_data = os.path.join(path_to_trait_obs,'CA_oaks/TRY/oak_vcmax.txt')

### Functions

In [4]:
def convert_to_fates_units(trait_name,input_units,input_value):

    
    if ("SLA" in trait_name) | ("Specific leaf area" in trait_name) and input_units == "mm2 mg-1":  
        output_value = input_value * m2_per_mm2 * mg_per_g * g_biomass_per_g_C
        return output_value

    elif "Leaf nitrogen" in trait_name and input_units == "mg/g":
        output_value = input_value * g_per_mg * g_biomass_per_g_C
        return output_value

    elif "Stem specific density" in trait_name and input_units == "g/cm3":
        output_value = input_value
        return output_value
    
    # Anaker conversions
    elif trait_name == "log(SLA cm2 g-1)":
        output_value = np.exp(input_value) * m2_per_cm2 * g_biomass_per_g_C
        return output_value
        
    elif trait_name == 'log(wood density g ml-1)':
        output_value = np.exp(input_value)
        return output_value
        
    elif trait_name == 'log(N %)':
        output_value = np.exp(input_value) * 1e-2 * g_biomass_per_g_C
        return output_value
    
    #Jepson leaf N
    elif trait_name == "Leaf Nmass":
    
        # muliplying by 1e-2 is to reverse the "per cent"  
        output_value = input_value * 1e-2 * g_biomass_per_g_C
        return output_value
    
    elif "Wood density" in trait_name:
        output_value = input_value
        return output_value
    
    elif trait_name == "Leaf area to sapwood area" and input_units == "mm2 mm-2":
        output_value = input_value * 1e-4
        return output_value
    
    elif "Vcmax" in trait_name and input_units == "micro mol m-2 s-1":
        output_value = input_value
        return output_value
    
    else:
        #print("No unit conversion known for:", trait_name)
        return None
    
def convert_to_fates_param_name(input_trait_name):
    if ("SLA" in input_trait_name) | ('Specific leaf area' in input_trait_name):
        return "fates_leaf_slatop"
    
    elif ("wood density" in input_trait_name) |  ("Wood density" in input_trait_name):
        return "fates_wood_density"
    
    elif ("N %" in input_trait_name) | ("Leaf Nmass" in input_trait_name):
        return "fates_stoich_nitr"
    
    elif "Leaf nitrogen (N) content per leaf dry mass" in input_trait_name:
        return "fates_stoich_nitr"
    
    elif "Leaf area to sapwood area" in input_trait_name:
        return "fates_allom_la_per_sa_int"
    
    elif "Vcmax" in input_trait_name:
        return "fates_leaf_vcmax25top"
    
    else:
        return None
    
def get_sources(df,variable):
    return df.loc[df.fates_name == variable].source.unique()

def get_range_data(df,subset_name):

    summary_stats = df.groupby('fates_name')['fates_value'].agg(
        min_value='min',
        max_value='max',
        mean='mean',
        median='median',
        sample_size='size'
    )

    summary_stats['subset_name'] = subset_name
    
    return summary_stats

### View TRY data availability for black oak

Note: to view the data availability for QUKE look at the csv exported in this code block

In [5]:
#clean data
QUKE_data_avail = os.path.join(path_to_trait_obs,"QUKE/TRY/QUKE_TRY_obs_availability.txt")
QUKE_df = pd.read_csv(QUKE_data_avail, sep="\t", encoding='latin1')[["Trait","TraitID",".Quercus kelloggii."]]
QUKE_df = QUKE_df.rename(columns={"Trait": "trait", "TraitID": "trait_id", ".Quercus kelloggii.": "n"})
QUKE_df = QUKE_df.sort_values(["n"],ascending=False)

if write_QUKE_data_avail == True:
    QUKE_df.to_csv(os.path.join(path_to_trait_obs,"QUKE/TRY/QUKE_TRY_obs_availability_clean.csv"))

    #get trait IDs available for QUKE
    print(list(QUKE_df.trait_id))

    print(QUKE_df)

In [6]:
QUKE_df

Unnamed: 0,trait,trait_id,n
46,Plant growth form,42,79
63,Seed dry mass,26,19
14,Leaf area per leaf dry mass (specific leaf are...,3117,17
50,Plant height vegetative,3106,16
31,Leaf phenology type,37,16
28,Leaf nitrogen (N) content per leaf dry mass,14,15
51,Plant human usage types,604,13
27,Leaf nitrogen (N) content per leaf area,50,13
61,Plant woodiness,38,11
33,Leaf phosphorus (P) content per leaf dry mass,15,11


### Load, clean, and view oak trait data

In [26]:
# Load and join QUCH and QUKE data
try_df_raw_QUCH = pd.read_csv(QUCH_try_data, sep="\t", encoding='latin1')[['ObservationID','SpeciesName','AccSpeciesName','TraitID','TraitName','OrigValueStr','OrigUnitStr','StdValue','UnitName','ErrorRisk']]
try_df_raw_QUKE = pd.read_csv(QUKE_try_data, sep="\t", encoding='latin1')[['ObservationID','SpeciesName','AccSpeciesName','TraitID','TraitName','OrigValueStr','OrigUnitStr','StdValue','UnitName','ErrorRisk']]
try_df_raw_CA_oak = pd.read_csv(CA_oak_try_data, sep="\t", encoding='latin1')[['ObservationID','SpeciesName','AccSpeciesName','TraitID','TraitName','OrigValueStr','OrigUnitStr','StdValue','UnitName','ErrorRisk']]
try_df_evergreen_oak = pd.read_csv(evergreen_oak_data, sep="\t", encoding='latin1')[['ObservationID','SpeciesName','AccSpeciesName','TraitID','TraitName','OrigValueStr','OrigUnitStr','StdValue','UnitName','ErrorRisk']]

try_df_raw = pd.concat([try_df_raw_QUCH,try_df_raw_QUKE,try_df_raw_CA_oak,try_df_evergreen_oak])

try_df_raw = try_df_raw[['AccSpeciesName','TraitName','OrigValueStr','OrigUnitStr','StdValue','UnitName']]

#print("Available trait observations")
#print(try_df_raw.TraitName.value_counts())
#print("\n")
#print(try_df_raw.info())

In [27]:
try_df_raw['TraitName'].value_counts()

Leaf nitrogen (N) content per leaf dry mass                                                                980
Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): undefined if petiole is in- or excluded    972
Leaf nitrogen (N) content per leaf area                                                                    963
Leaf carbon (C) content per leaf area                                                                      944
Photosynthesis carboxylation capacity (Vcmax) per leaf area (Farquhar model)                               512
Photosynthesis carboxylation capacity (Vcmax) per leaf dry mass (Farquhar model)                           510
Xylem hydraulic vulnerability curve (P20, P50, P80)                                                        162
Seed dry mass                                                                                              138
Leaf phenology type                                                                                        102
P

In [28]:
try_df_evergreen_oak.UnitName.unique()
print(try_df_evergreen_oak.TraitName.value_counts())

Photosynthesis carboxylation capacity (Vcmax) per leaf area (Farquhar model)        62
Photosynthesis carboxylation capacity (Vcmax) per leaf dry mass (Farquhar model)    60
Name: TraitName, dtype: int64


In [10]:
try_df_evergreen_oak['SpeciesName'].value_counts()

Quercus ilex     601
Quercus suber     46
Name: SpeciesName, dtype: int64

In [31]:
# umol_m_s = try_df_evergreen_oak['UnitName'] == 'micro mol m-2 s-1'
# vcmax_df = try_df_evergreen_oak.loc[umol_m_s]
# print(vcmax_df['SpeciesName'].value_counts())
# print('mean',vcmax_df['StdValue'].values.mean())
# print('std',vcmax_df['StdValue'].values.std())

umol_m_s = try_df_raw['UnitName'] == 'micro mol m-2 s-1'
vcmax_df = try_df_raw.loc[umol_m_s]
print(vcmax_df['AccSpeciesName'].value_counts())
print('mean',vcmax_df['StdValue'].values.mean())
print('std',vcmax_df['StdValue'].values.std())

Quercus douglasii    450
Quercus ilex          58
Quercus suber          4
Name: AccSpeciesName, dtype: int64
mean 62.21169567029505
std 28.009810051021464


In [36]:
vcmax_df.groupby('AccSpeciesName')['StdValue'].quantile([0.25,0.75]).unstack()

Unnamed: 0_level_0,0.25,0.75
AccSpeciesName,Unnamed: 1_level_1,Unnamed: 2_level_1
Quercus douglasii,45.051587,82.981634
Quercus ilex,29.892991,44.753682
Quercus suber,36.2775,56.3275


In [38]:
vcmax_df.groupby('AccSpeciesName').mean()['StdValue']

AccSpeciesName
Quercus douglasii    65.339839
Quercus ilex         38.945876
Quercus suber        47.650000
Name: StdValue, dtype: float64

In [13]:
try_df_raw = pd.DataFrame.drop_duplicates(try_df_raw)

### Filter to relevant traits

In [14]:
# Filter to get traits of interest
fates_relevant_traits = ['Stem specific density (SSD, stem dry mass per stem fresh volume) or wood density',
                         'Leaf nitrogen (N) content per leaf dry mass',
                         'Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): undefined if petiole is in- or excluded',
                         'Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): petiole excluded',
                         'Photosynthesis carboxylation capacity (Vcmax) per leaf area (Farquhar model)']

try_df = try_df_raw[try_df_raw.TraitName.isin(fates_relevant_traits)]
try_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 368 entries, 16 to 53806
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   AccSpeciesName  368 non-null    object 
 1   TraitName       368 non-null    object 
 2   OrigValueStr    368 non-null    object 
 3   OrigUnitStr     368 non-null    object 
 4   StdValue        368 non-null    float64
 5   UnitName        368 non-null    object 
dtypes: float64(1), object(5)
memory usage: 20.1+ KB


In [15]:
try_df

Unnamed: 0,AccSpeciesName,TraitName,OrigValueStr,OrigUnitStr,StdValue,UnitName
16,Quercus chrysolepis,"Stem specific density (SSD, stem dry mass per ...",0.7,g/cm^3,0.7,g/cm3
25,Quercus chrysolepis,Leaf nitrogen (N) content per leaf dry mass,1.04,%,10.4,mg/g
71,Quercus chrysolepis,"Stem specific density (SSD, stem dry mass per ...",0.7,g/cm3,0.7,g/cm3
84,Quercus chrysolepis,Leaf area per leaf dry mass (specific leaf are...,155.861197443779,g m-2,6.415965,mm2 mg-1
15,Quercus kelloggii,Leaf area per leaf dry mass (specific leaf are...,117.73,g/m2,8.494012,mm2 mg-1
17,Quercus kelloggii,Leaf nitrogen (N) content per leaf dry mass,1.5765,%,15.765,mg/g
18,Quercus kelloggii,Leaf area per leaf dry mass (specific leaf are...,84.9401172173617,cm2/g,8.494012,mm2 mg-1
44,Quercus kelloggii,Leaf area per leaf dry mass (specific leaf are...,111.89,cm2/g,11.189,mm2 mg-1
46,Quercus kelloggii,"Stem specific density (SSD, stem dry mass per ...",0.72,g / cm3,0.72,g/cm3
47,Quercus kelloggii,Leaf nitrogen (N) content per leaf dry mass,1.82,%,18.2,mg/g


In [16]:
#try_df.loc[try_df.TraitName == "Photosynthesis carboxylation capacity (Vcmax) per leaf area (Farquhar model)"]

### Apply unit conversions and fates parameter names

In [17]:
# Unit convertion to fates units
fates_values = try_df.apply(lambda x: convert_to_fates_units(x['TraitName'], x['UnitName'], x["StdValue"]), axis=1)
try_df['fates_value'] = fates_values

# Convert variable names to fates names
fates_names = try_df.apply(lambda x: convert_to_fates_param_name(x["TraitName"]),axis = 1)
try_df["fates_name"] = fates_names


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  try_df['fates_value'] = fates_values
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  try_df["fates_name"] = fates_names


### Get percentiles for oak vcmax

In [18]:
def calculate_percentiles(data, percentiles):
    """
    Calculate the specified percentiles of a dataset.

    :param data: list or ndarray, the dataset
    :param percentiles: list of floats, the percentiles to compute, e.g., [25, 50, 75]
    :return: dictionary of percentiles and their corresponding values
    """
    results = {}
    for p in percentiles:
        value = np.percentile(data, p)
        results[p] = value
    return results

In [19]:
try_df.loc[try_df["fates_name"] == "fates_leaf_vcmax25top"].describe()

Unnamed: 0,StdValue,fates_value
count,56.0,56.0
mean,65.111867,65.111867
std,28.26991,28.26991
min,13.630628,13.630628
25%,45.051047,45.051047
50%,64.20777,64.20777
75%,83.319989,83.319989
max,127.296133,127.296133


#### Low-end percentiles

In [20]:
calculate_percentiles(try_df.loc[try_df["fates_name"] == "fates_leaf_vcmax25top"]["fates_value"],[1,2,3,5,10])

{1: 14.906282625171244,
 2: 16.36395350384018,
 3: 18.64069777496117,
 5: 24.680039042699,
 10: 31.475380780524404}

### Get low-end percentiles for oak slatop

In [21]:
calculate_percentiles(try_df.loc[try_df["fates_name"] == "fates_leaf_slatop"]["fates_value"],[1,2,3,5,10])

{1: 0.008158152,
 2: 0.00991634399996065,
 3: 0.01021009999950812,
 5: 0.010993779999999998,
 10: 0.01179225806452}

### Organize and write csv

In [22]:
QUKE_df = try_df.loc[try_df.AccSpeciesName == "Quercus kelloggii"]
QUCH_df = try_df.loc[try_df.AccSpeciesName == "Quercus chrysolepis"]
QUDO_df = try_df.loc[try_df.AccSpeciesName == "Quercus douglasii"]

param_range_oak_df = pd.concat([get_range_data(QUKE_df,"QUKE"),get_range_data(QUCH_df,"QUCH"),get_range_data(QUDO_df,"QUDO")]).reset_index()
param_range_oak_df['sources'] = "try"
param_range_oak_df['pft'] = "oak"

In [40]:
print(0.0123 * 1.5)
print(0.0123 * 0.8)

0.01845
0.009840000000000002


In [23]:
param_range_oak_df

Unnamed: 0,fates_name,min_value,max_value,mean,median,sample_size,subset_name,sources,pft
0,fates_leaf_slatop,0.01698,0.022378,0.018568,0.016988,8,QUKE,try,oak
1,fates_stoich_nitr,0.0306,0.039,0.034383,0.033965,4,QUKE,try,oak
2,fates_wood_density,0.51,0.72,0.58,0.51,3,QUKE,try,oak
3,fates_leaf_slatop,0.012832,0.012832,0.012832,0.012832,1,QUCH,try,oak
4,fates_stoich_nitr,0.0208,0.0208,0.0208,0.0208,1,QUCH,try,oak
5,fates_wood_density,0.63,0.7,0.665833,0.666667,4,QUCH,try,oak
6,fates_leaf_slatop,0.010956,0.029333,0.015171,0.014378,126,QUDO,try,oak
7,fates_leaf_vcmax25top,13.630628,127.296133,65.111867,64.20777,56,QUDO,try,oak
8,fates_stoich_nitr,0.029777,0.049693,0.038271,0.036928,130,QUDO,try,oak
9,fates_wood_density,0.75,0.75,0.75,0.75,1,QUDO,try,oak


In [24]:
# Write CSV
if write_to_csv == True:

    now = datetime.now()
    dt_string = now.strftime("%m-%d-%Y-%H-%M-%S")
    file_path = "oak_param_ranges_data_derived_" + dt_string + ".csv"
    param_range_oak_df.to_csv(file_path, index=False)