### To do

* include oak trait data
* update the format of the output to match the shrub one (once I update the shrub one)

In [1]:
import pandas as pd
import os
import myfuncs
pd.set_option('display.max_rows', 400)
pd.set_option('display.min_rows', 400)

#define constants
m2_per_cm2 = 1e-4
g_biomass_per_g_C = 2
m2_per_mm2 = 1e-6
mg_per_g = 1e3
g_per_kg = 1000
mm2_per_cm2 = 100
g_per_mg = 1e-3
write_csv = True

In [2]:
path_to_trait_obs = "/home/adam/cloud/gdrive/postdoc/parameters/param_data/trait_observations"

In [3]:
QUCH_try_data = os.path.join(path_to_trait_obs,"QUCH/TRY/TRY_obs_QUCH.txt")
QUKE_try_data = os.path.join(path_to_trait_obs,"QUKE/TRY/QUKE_TRYdata.txt")

### View TRYdata availability for black oak

Note: to view the data availability for QUKE look at the csv exported in this code block

In [4]:
#clean data
QUKE_data_avail = os.path.join(path_to_trait_obs,"QUKE/TRY/QUKE_TRY_obs_availability.txt")
QUKE_df = pd.read_csv(QUKE_data_avail, sep="\t", encoding='latin1')[["Trait","TraitID",".Quercus kelloggii."]]
QUKE_df = QUKE_df.rename(columns={"Trait": "trait", "TraitID": "trait_id", ".Quercus kelloggii.": "n"})
QUKE_df = QUKE_df.sort_values(["n"],ascending=False)
QUKE_df.to_csv(os.path.join(path_to_trait_obs,"QUKE/TRY/QUKE_TRY_obs_availability_clean.csv"))
#get trait IDs available for QUKE
#print(list(QUKE_df.trait_id))
print(QUKE_df.head())

                                                trait  trait_id   n
46                                  Plant growth form        42  79
63                                      Seed dry mass        26  19
14  Leaf area per leaf dry mass (specific leaf are...      3117  17
50                            Plant height vegetative      3106  16
31                                Leaf phenology type        37  16


### Load and clean oak trait data

In [5]:
try_df_raw_QUCH = pd.read_csv(QUCH_try_data, sep="\t", encoding='latin1')[['ObservationID','SpeciesName','AccSpeciesName','TraitID','TraitName','OrigValueStr','OrigUnitStr','StdValue','UnitName','ErrorRisk']]
try_df_raw_QUKE = pd.read_csv(QUKE_try_data, sep="\t", encoding='latin1')[['ObservationID','SpeciesName','AccSpeciesName','TraitID','TraitName','OrigValueStr','OrigUnitStr','StdValue','UnitName','ErrorRisk']]

try_df_raw = pd.concat([try_df_raw_QUCH,try_df_raw_QUKE])
fates_relevant_traits = ['Stem specific density (SSD, stem dry mass per stem fresh volume) or wood density',
                         'Leaf nitrogen (N) content per leaf dry mass',
                         'Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): undefined if petiole is in- or excluded']
try_df_raw = try_df_raw[try_df_raw.TraitName.isin(fates_relevant_traits)].set_index("ObservationID")
try_df_raw = try_df_raw[['AccSpeciesName','TraitName','OrigValueStr','OrigUnitStr','StdValue','UnitName']]

### Look at TRY data

In [6]:
print("Available trait observations")
print(try_df_raw.TraitName.value_counts())
print("\n")
print(try_df_raw.info())

Available trait observations
TraitName
Leaf area per leaf dry mass (specific leaf area, SLA or 1/LMA): undefined if petiole is in- or excluded    8
Leaf nitrogen (N) content per leaf dry mass                                                                7
Stem specific density (SSD, stem dry mass per stem fresh volume) or wood density                           5
Name: count, dtype: int64


<class 'pandas.core.frame.DataFrame'>
Index: 20 entries, 749286 to 2908799
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   AccSpeciesName  20 non-null     object 
 1   TraitName       20 non-null     object 
 2   OrigValueStr    20 non-null     object 
 3   OrigUnitStr     20 non-null     object 
 4   StdValue        20 non-null     float64
 5   UnitName        20 non-null     object 
dtypes: float64(1), object(5)
memory usage: 1.1+ KB
None


### Apply unit conversions

In [7]:
fates_values = try_df_raw.apply(lambda x: myfuncs.convert_to_fates_units(x['TraitName'], x['UnitName'], x["StdValue"]), axis=1)
try_df_raw['fates_value'] = fates_values
try_df = try_df_raw
try_df

Unnamed: 0_level_0,AccSpeciesName,TraitName,OrigValueStr,OrigUnitStr,StdValue,UnitName,fates_value
ObservationID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
749286,Quercus chrysolepis,"Stem specific density (SSD, stem dry mass per ...",0.7,g/cm^3,0.7,g/cm3,0.7
774372,Quercus chrysolepis,Leaf nitrogen (N) content per leaf dry mass,1.04,%,10.4,mg/g,0.0208
2915297,Quercus chrysolepis,"Stem specific density (SSD, stem dry mass per ...",0.7,g/cm3,0.7,g/cm3,0.7
6363858,Quercus chrysolepis,Leaf area per leaf dry mass (specific leaf are...,155.861197443779,g m-2,6.415965,mm2 mg-1,0.012832
1290314,Quercus kelloggii,Leaf area per leaf dry mass (specific leaf are...,117.73,g/m2,8.494012,mm2 mg-1,0.016988
1290314,Quercus kelloggii,Leaf nitrogen (N) content per leaf dry mass,1.5765,%,15.765,mg/g,0.03153
1290314,Quercus kelloggii,Leaf area per leaf dry mass (specific leaf are...,84.9401172173617,cm2/g,8.494012,mm2 mg-1,0.016988
218210,Quercus kelloggii,"Stem specific density (SSD, stem dry mass per ...",0.72,g / cm3,0.72,g/cm3,0.72
218210,Quercus kelloggii,Leaf nitrogen (N) content per leaf dry mass,1.82,%,18.2,mg/g,0.0364
749333,Quercus kelloggii,"Stem specific density (SSD, stem dry mass per ...",0.51,g/cm^3,0.51,g/cm3,0.51


### Look at TRY units

In [None]:
#print(try_df_raw.UnitName.unique())
#print(try_df_raw.TraitName.unique())

for t,u in zip(try_df_raw.TraitName.unique(), try_df_raw.UnitName.unique()):
    print(t,":",u)

### Convert obs to FATES units

In [None]:
#for QUCH
try_df_QUCH = try_df_raw.loc[try_df_raw['AccSpeciesName'] == "Quercus chrysolepis"]
try_df_raw_wide_QUCH = try_df_QUCH.pivot(columns='TraitName', values='StdValue')
try_df_raw_wide_QUCH.columns = ['SLA','LeafN','wsg']
try_df_raw_wide_QUCH
#convert units to fates units
try_df_raw_wide_QUCH['fates_leaf_slatop'] = try_df_raw_wide['SLA'] * m2_per_mm2 * mg_per_g * g_biomass_per_g_C
try_df_raw_wide_QUCH['fates_wood_density'] = try_df_raw_wide['wsg']
try_df_raw_wide_QUCH['fates_stoich_nitr'] = try_df_raw_wide['LeafN'] * g_per_mg * g_biomass_per_g_C
try_df_raw_wide_QUCH




#try_df_QUKE = try_df_raw.loc[try_df_raw['AccSpeciesName'] == "Quercus kelloggii"]

### Clean and export for use deciding on parameter ranges for FATES

In [None]:
try_df_raw_wide = try_df_raw_wide.reset_index().drop(["SLA","LeafN","wsg"], axis=1)
final_df = pd.melt(try_df_raw_wide,id_vars = ["ObservationID"],var_name="var",value_name="value")

obsID_species = try_df_raw[["SpeciesName"]].reset_index()

final_df = final_df.dropna(axis = 0)
final_df = pd.merge(final_df, obsID_species, on = "ObservationID")
final_df = final_df.sort_values(by = ["var","SpeciesName","value"])

if write_csv == True:
    final_df.\
    to_csv("QUCH/TRY/QUCH_processed_param_data.csv",index=False)

In [None]:
final_df