In [1]:
# Core
import re, os, sys, string, random, functools
import pandas as pd
from pathlib import Path
import numpy as np
# Stats
import scipy.stats as stats
# Reduce warnings
import warnings
warnings.filterwarnings("ignore") # usually not a good idea!

## Data wrangling
### Desert Botanic Garden
These data were collected by Karolina and I in the spring of 2019. I already compiled the data somewhere else, so it can be easily loaded.

In [2]:
nonCAMcolor = "xkcd:light eggplant"
intCAMcolor = "xkcd:apricot"
pCAMcolor = "xkcd:lichen"
cmap = {"C3":nonCAMcolor, "C4":nonCAMcolor,"C3-C4":nonCAMcolor, "CAM":pCAMcolor, "C3-C4+CAM":intCAMcolor, "C4+CAM":intCAMcolor, "C3+CAM":intCAMcolor}

In [3]:
dbg = pd.read_csv("../Data/DBG-Measurements.csv")
dbg = dbg.rename({"mesophyll_cell_area_um2":"MAmean (um^2)", 
            "mesophyll_cell_area_um2_SE":"MAse (um^2)",
            "leaf_thickness_um":"LTmean (um)",
            "leaf_thickness_um_SE": "LTse (um)",
            "IAS":"IASmean (%)",
            "IAS_SE":"IASse (%)",}, axis=1)
dbg["Reference"] = "This publication"
dbg["color"] = [cmap[p] for p in dbg["Pathway"]]
dbg["Source"] = "DBG"
dbg.head()

Unnamed: 0,MajorLineage,Genus,Species,Taxon,Tissue,Pathway,MAmean (um^2),MAse (um^2),LTmean (um),LTse (um),IASmean (%),IASse (%),Reference,color,Source
0,Agavoideae,Agave,americana,Agave americana,Leaf,CAM,1339.683455,20.391867,4887.368333,51.895283,0.047124,0.013377,This publication,xkcd:lichen,DBG
1,Agavoideae,Agave,americana,Agave americana,Leaf,CAM,2212.446822,36.936013,3204.702,17.616073,0.061181,0.024675,This publication,xkcd:lichen,DBG
2,Agavoideae,Agave,americana,Agave americana,Leaf,CAM,3090.210853,53.191428,4402.634667,114.466009,0.180749,0.050523,This publication,xkcd:lichen,DBG
3,Agavoideae,Agave,attenuata,Agave attenuata,Leaf,CAM,3013.052933,175.556512,2935.331667,57.566621,,,This publication,xkcd:lichen,DBG
4,Agavoideae,Agave,bovicornuta,Agave bovicornuta,Leaf,CAM,3223.139679,84.612126,2729.777,82.985558,0.039694,0.009803,This publication,xkcd:lichen,DBG


In [4]:
dbg[["Taxon","Pathway"]].drop_duplicates()["Pathway"].value_counts()

CAM       60
C3        12
C3+CAM     7
Name: Pathway, dtype: int64

### Other Edwards' lab data
These sections were prepared by Lily and Elissa in 2017. They are mostly _Parakeelya_ (Australian _Calandrinia_), but there are other Portulacineae too.

In [5]:
elDir = Path("../Data/Edwards-Lab-Unpublished/")
elFiles = list(elDir.rglob("*.csv"))

In [6]:
genus = []
species = []
iasMean = []
iasSE = []
thicknessMean = []
thicknessSE = []
mAreaMean = []
mAreaSE = []

for p in elFiles:
    g,s,_ =  p.stem.split("-")
    genus.append(g)
    species.append(s)
    
    df = pd.read_csv(p)
    df.dropna(how="all")
    
    iasVals = []
    for c in df.columns:
        if (c.lower().startswith("ias")) and (len(df[c].dropna() > 0)):
            series = df[c].dropna()
            ias = series[1:].sum()/series[0]
            iasVals.append(ias)
            del ias
            
        elif c.lower().startswith("thickness"):
            if len(df[c].dropna()) > 0:
                ts  = []
                for t in df[c].dropna():
                    try:
                        ts.append(float(t))
                    except:
                        continue
                thicknessMean.append(np.mean(ts))
                thicknessSE.append(stats.sem(ts))
            else:
                thicknessMean.append(np.nan)
                thicknessSE.append(np.nan)
            
        elif "um^" in c.lower():
            if len(df[c].dropna()) > 0:
                mAreaMean.append(df[c].dropna().mean())
                mAreaSE.append(stats.sem(df[c].dropna()))
            else:
                mAreaMean.append(np.nan)
                mAreaSE.append(np.nan)
                
    if len(iasVals) > 0:
        iasMean.append(np.mean(iasVals))
        iasSE.append(stats.sem(iasVals))
    else:
        iasMean.append(np.nan)
        iasSE.append(np.nan)
    del iasVals
    
elDF = pd.DataFrame()
elDF["Genus"] = genus
elDF["Species"] = species
elDF["Taxon"] = elDF["Genus"] + " " + elDF["Species"]
elDF["IASmean (%)"] = iasMean
elDF["IASse (%)"] = iasSE
elDF["LTmean (um)"] = thicknessMean
elDF["LTse (um)"] = thicknessSE
elDF["MAmean (um^2)"] = mAreaMean
elDF["MAse (um^2)"] = mAreaSE
elDF["MajorLineage"] = "Montiaceae"
elDF.loc[elDF["Genus"]=="Anacampseros", "MajorLineage"] = "Anacampserotaceae"
elDF.loc[elDF["Genus"]=="Alluaudia", "MajorLineage"] = "Didiereaceae"
elDF.loc[elDF["Genus"]=="Anredera", "MajorLineage"] = "Basellaceae"
elDF["Pathway"] = "C3+CAM"
elDF.loc[elDF["Genus"]=="Alluaudia", "Pathway"] = "CAM"
elDF.loc[elDF["Species"].isin(["tumida","granulifera","arenicola"]), "Pathway"] = "C3"
elDF["Tissue"] = "leaf"
elDF["Reference"] = "This publication"
elDF["color"] = [cmap[p] for p in elDF["Pathway"]]
elDF["Source"] = "Edwards Lab unpublished"
elDF

Unnamed: 0,Genus,Species,Taxon,IASmean (%),IASse (%),LTmean (um),LTse (um),MAmean (um^2),MAse (um^2),MajorLineage,Pathway,Tissue,Reference,color,Source
0,Anacampseros,lanceolata,Anacampseros lanceolata,0.11541,0.029697,2471.649,3.131274,5386.6529,533.753571,Anacampserotaceae,C3+CAM,leaf,This publication,xkcd:apricot,Edwards Lab unpublished
1,Calandrinia,schistorhiza,Calandrinia schistorhiza,0.386109,0.032014,2201.309667,116.701903,5845.797059,539.551182,Montiaceae,C3+CAM,leaf,This publication,xkcd:apricot,Edwards Lab unpublished
2,Calandrinia,flava,Calandrinia flava,0.254209,0.037023,2121.204667,110.127142,15824.9651,1569.959482,Montiaceae,C3+CAM,leaf,This publication,xkcd:apricot,Edwards Lab unpublished
3,Alluaudia,procera,Alluaudia procera,0.037015,0.013922,650.387333,19.058869,2687.575767,199.017468,Didiereaceae,CAM,leaf,This publication,xkcd:lichen,Edwards Lab unpublished
4,Anacampseros,rufescens,Anacampseros rufescens,0.062892,0.001657,,,5465.068933,667.258109,Anacampserotaceae,C3+CAM,leaf,This publication,xkcd:apricot,Edwards Lab unpublished
5,Calandrinia,linliflora,Calandrinia linliflora,0.126511,0.011174,748.17,52.404524,4140.029333,265.364358,Montiaceae,C3+CAM,leaf,This publication,xkcd:apricot,Edwards Lab unpublished
6,Calandrinia,kalanniensis,Calandrinia kalanniensis,0.158303,0.060582,1420.4555,12.745901,5820.995133,582.214644,Montiaceae,C3+CAM,leaf,This publication,xkcd:apricot,Edwards Lab unpublished
7,Calandrinia,spergularina,Calandrinia spergularina,0.15183,0.045947,1628.278,37.424043,10034.584,545.033741,Montiaceae,C3+CAM,leaf,This publication,xkcd:apricot,Edwards Lab unpublished
8,Calandrinia,quadrivalvis,Calandrinia quadrivalvis,0.296044,0.014849,2125.910333,96.829656,6468.855526,520.2713,Montiaceae,C3+CAM,leaf,This publication,xkcd:apricot,Edwards Lab unpublished
9,Calandrinia,brevipedata,Calandrinia brevipedata,,,1327.9725,23.652313,7948.5074,488.05614,Montiaceae,C3+CAM,leaf,This publication,xkcd:apricot,Edwards Lab unpublished


In [7]:
elDF[["Taxon","Pathway"]].drop_duplicates()["Pathway"].value_counts()

C3+CAM    13
CAM        1
C3         1
Name: Pathway, dtype: int64

### _Portulaca_ images
These images were shared by Dr. Elena Voznesenskaya and were originally published in multiple works with Dr. Gilberto Ocampo on _Portulaca_ leaf ultrastructure (Ocampo _et al._, 2013; Vozneenskaya _et al._, 2010, 2017)

In [8]:
portDir = Path("../Data/Portulaca-Ocampo-2013/")
portFiles = list(portDir.rglob("*.csv"))

In [9]:
genus = []
species = []
iasMean = []
iasSE = []
thicknessMean = []
thicknessSE = []
mAreaMean = []
mAreaSE = []

for p in portFiles:
    g,s =  p.stem.split("-")[:2]
    genus.append(g)
    species.append(s)
    
    df = pd.read_csv(p)
    df.dropna(how="all")
    
    iasVals = []
    for c in df.columns:
        if (c.lower().startswith("ias")) and (len(df[c].dropna() > 0)):
            series = df[c].dropna()
            ias = series[1:].sum()/series[0]
            iasVals.append(ias)
            del ias
            
        elif c.lower().startswith("thickness"):
            if len(df[c].dropna()) > 0:
                ts  = []
                for t in df[c].dropna():
                    try:
                        ts.append(float(t))
                    except:
                        continue
                thicknessMean.append(np.mean(ts))
                thicknessSE.append(stats.sem(ts))
            else:
                thicknessMean.append(np.nan)
                thicknessSE.append(np.nan)
            
        elif "um^" in c.lower():
            if len(df[c].dropna()) > 0:
                mAreaMean.append(df[c].dropna().mean())
                mAreaSE.append(stats.sem(df[c].dropna()))
            else:
                mAreaMean.append(np.nan)
                mAreaSE.append(np.nan)
                
    if len(iasVals) > 0:
        iasMean.append(np.mean(iasVals))
        iasSE.append(stats.sem(iasVals))
    else:
        iasMean.append(np.nan)
        iasSE.append(np.nan)
    del iasVals
    
portDF = pd.DataFrame()
portDF["Genus"] = genus
portDF["Species"] = species
portDF["Taxon"] = portDF["Genus"] + " " + portDF["Species"]
portDF["IASmean (%)"] = iasMean
portDF["IASse (%)"] = iasSE
portDF["LTmean (um)"] = thicknessMean
portDF["LTse (um)"] = thicknessSE
portDF["MAmean (um^2)"] = mAreaMean
portDF["MAse (um^2)"] = mAreaSE
portDF["MajorLineage"] = "Portulacaceae"
portDF["Pathway"] = "C4+CAM"
portDF.loc[portDF["Species"].isin(["cryptopetala","hirsutissima", "mucronata"]), "Pathway"] = "C3-C4+CAM"
portDF["Tissue"] = "leaf"
portDF["Reference"] = "Ocampo et al., 2013; Voznesenskaya et al., 2010, 2017"
portDF["color"] = [cmap[p] for p in portDF["Pathway"]]
portDF["Source"] = "Voznesenskaya"
portDF

Unnamed: 0,Genus,Species,Taxon,IASmean (%),IASse (%),LTmean (um),LTse (um),MAmean (um^2),MAse (um^2),MajorLineage,Pathway,Tissue,Reference,color,Source
0,Portulaca,biloba,Portulaca biloba,0.228076,0.059172,614.595667,18.489551,1066.389267,97.628145,Portulacaceae,C4+CAM,leaf,"Ocampo et al., 2013; Voznesenskaya et al., 201...",xkcd:apricot,Voznesenskaya
1,Portulaca,suffrutescens,Portulaca suffrutescens,0.093388,0.001908,624.067333,5.31315,614.22415,42.833571,Portulacaceae,C4+CAM,leaf,"Ocampo et al., 2013; Voznesenskaya et al., 201...",xkcd:apricot,Voznesenskaya
2,Portulaca,giliesii,Portulaca giliesii,0.1703,0.056221,958.208,8.941161,2331.649,232.395388,Portulacaceae,C4+CAM,leaf,"Ocampo et al., 2013; Voznesenskaya et al., 201...",xkcd:apricot,Voznesenskaya
3,Portulaca,halimoides,Portulaca halimoides,0.061723,0.033373,714.675333,19.040694,2319.56,118.023122,Portulacaceae,C4+CAM,leaf,"Ocampo et al., 2013; Voznesenskaya et al., 201...",xkcd:apricot,Voznesenskaya
4,Portulaca,elatior,Portulaca elatior,,,553.904167,26.920341,895.186187,71.9718,Portulacaceae,C4+CAM,leaf,"Ocampo et al., 2013; Voznesenskaya et al., 201...",xkcd:apricot,Voznesenskaya
5,Portulaca,hirsutissima,Portulaca hirsutissima,0.228079,0.041216,1612.173333,15.320234,3615.7466,394.487793,Portulacaceae,C3-C4+CAM,leaf,"Ocampo et al., 2013; Voznesenskaya et al., 201...",xkcd:apricot,Voznesenskaya
6,Portulaca,bicolor,Portulaca bicolor,,,3480.241,30.212094,559.431154,39.757293,Portulacaceae,C4+CAM,leaf,"Ocampo et al., 2013; Voznesenskaya et al., 201...",xkcd:apricot,Voznesenskaya
7,Portulaca,mucronata,Portulaca mucronata,0.110991,0.002283,646.551667,12.893469,5698.374867,606.384711,Portulacaceae,C3-C4+CAM,leaf,"Ocampo et al., 2013; Voznesenskaya et al., 201...",xkcd:apricot,Voznesenskaya
8,Portulaca,pilosa,Portulaca pilosa,0.056052,0.004676,2019.628333,29.777352,1914.744867,174.199251,Portulacaceae,C4+CAM,leaf,"Ocampo et al., 2013; Voznesenskaya et al., 201...",xkcd:apricot,Voznesenskaya
9,Portulaca,cryptopetala,Portulaca cryptopetala,0.12293,0.038382,1334.973333,9.248129,2054.263933,225.274919,Portulacaceae,C3-C4+CAM,leaf,"Ocampo et al., 2013; Voznesenskaya et al., 201...",xkcd:apricot,Voznesenskaya


In [10]:
dbg[["Taxon","Pathway"]].merge(elDF[["Taxon","Pathway"]], on=["Taxon","Pathway"], how="outer").merge(portDF[["Taxon","Pathway"]],on=["Taxon","Pathway"],how="outer").drop_duplicates()['Pathway'].value_counts()

CAM          61
C3+CAM       20
C3           13
C4+CAM       11
C3-C4+CAM     3
Name: Pathway, dtype: int64

### Orgburn and Edwards (2012)

In [11]:
mogs = pd.read_csv("../Data/Ogburn/leaf_data2.tsv", sep="\t", usecols=["taxon", "palisade_cellsize", "leaf_thickness","ias_by_area"])
mogs = mogs.groupby("taxon").mean().reset_index()
mogs["MAmean (um^2)"] = mogs["palisade_cellsize"]*1000*1000
mogs["LTmean (um)"] = mogs["leaf_thickness"]*1000
mogs = mogs.rename({"ias_by_area":"IASmean (%)", "taxon":"Taxon"},axis=1)
mogs = mogs.drop(["palisade_cellsize", "leaf_thickness"],axis=1)
mogs["Taxon"] = [t.replace("_"," ") for t in mogs["Taxon"]]
mogs["Genus"] = [t.split()[0] for t in mogs["Taxon"]]
mogs["Species"] = [t.split()[1] for t in mogs["Taxon"]]
mogs["MajorLineage"] = ""
mogs["Reference"] = "Ogburn and Edwards (2012); Ogburn and Edwards (2013)"
mogs["Pathway"] = ""

mogs.sort_values(by=["Taxon"])

Unnamed: 0,Taxon,IASmean (%),MAmean (um^2),LTmean (um),Genus,Species,MajorLineage,Reference,Pathway
0,Adenogramma glomerata,0.237898,600.0,263.9692,Adenogramma,glomerata,,Ogburn and Edwards (2012); Ogburn and Edwards ...,
1,Alluaudia procera,0.030253,2915.285714,575.8462,Alluaudia,procera,,Ogburn and Edwards (2012); Ogburn and Edwards ...,
2,Anacampseros australiana,,4629.5,850.275,Anacampseros,australiana,,Ogburn and Edwards (2012); Ogburn and Edwards ...,
3,Anacampseros lanceolata,0.114036,6660.307692,2070.560143,Anacampseros,lanceolata,,Ogburn and Edwards (2012); Ogburn and Edwards ...,
4,Anacampseros rufescens,,,1867.400333,Anacampseros,rufescens,,Ogburn and Edwards (2012); Ogburn and Edwards ...,
5,Anacampseros subnuda,,,3525.701,Anacampseros,subnuda,,Ogburn and Edwards (2012); Ogburn and Edwards ...,
6,Anredera baselloides,0.142392,6811.0,585.683333,Anredera,baselloides,,Ogburn and Edwards (2012); Ogburn and Edwards ...,
7,Calandrinia axilliflora,,5876.2,728.927,Calandrinia,axilliflora,,Ogburn and Edwards (2012); Ogburn and Edwards ...,
8,Calandrinia colchaguensis,,2873.111111,927.2704,Calandrinia,colchaguensis,,Ogburn and Edwards (2012); Ogburn and Edwards ...,
9,Calyptridium umbellatum,,4222.428571,1137.7634,Calyptridium,umbellatum,,Ogburn and Edwards (2012); Ogburn and Edwards ...,


I noticed that the IAS was suspiciously low for _Cistanthe salsoloides_ (0.0000293), so I remeasured it and got 0.13, a more reasonable value. I remeasured a few other taxa that had values that seemed a litte off (like the large MA of Montia linearis), but these all matched Matt's measurements. Still, I'll use my measurements for consistency below.

In [12]:
mogs.loc[mogs["Taxon"].isin(["Cistanthe salsoloides","Hypertelis salsoloides"]), "IASmean (%)"] = np.NaN
mogs.loc[mogs["Taxon"].isin(["Montia linearis","Hypertelis salsoloides", "Pereskia guamacho"]), "MAmean (um^2)"] = np.NaN
mogs = mogs[~mogs["Taxon"].isin(["Claytonia megarhiza"])]

I'll fill in missing measurements from the above. Not all traits were originally measured for all species.

In [13]:
mogsDir = Path("../Data/Ogburn/Remeasured/")
mogsFiles = [f for f in list(mogsDir.rglob("*.csv"))]

In [14]:
genus = []
species = []
iasMean = []
iasSE = []
thicknessMean = []
thicknessSE = []
mAreaMean = []
mAreaSE = []

for p in mogsFiles:
    g,s =  re.split("[0-9_-]",p.stem)[:2]
    genus.append(g)
    species.append(s)
    
    df = pd.read_csv(p)
    df.dropna(how="all")
    
    iasVals = []
    for c in df.columns:
        if (c.lower().startswith("ias")) and (len(df[c].dropna() > 0)):
            series = df[c].dropna()
            ias = series[1:].sum()/series[0]
            iasVals.append(ias)
            del ias
            
        elif c.lower().startswith("thickness"):
            if len(df[c].dropna()) > 0:
                ts  = []
                for t in df[c].dropna():
                    try:
                        ts.append(float(t))
                    except:
                        continue
                thicknessMean.append(np.mean(ts))
                thicknessSE.append(stats.sem(ts))
            else:
                thicknessMean.append(np.nan)
                thicknessSE.append(np.nan)
            
        elif "um^" in c.lower():
            if len(df[c].dropna()) > 0:
                mAreaMean.append(df[c].dropna().mean())
                mAreaSE.append(stats.sem(df[c].dropna()))
            else:
                mAreaMean.append(np.nan)
                mAreaSE.append(np.nan)
                
    if len(iasVals) > 0:
        iasMean.append(np.mean(iasVals))
        iasSE.append(stats.sem(iasVals))
    else:
        iasMean.append(np.nan)
        iasSE.append(np.nan)
    del iasVals
    
miscOgs = pd.DataFrame()
miscOgs["Genus"] = genus
miscOgs["Species"] = species
miscOgs["Taxon"] = miscOgs["Genus"] + " " + miscOgs["Species"]
miscOgs["IASmean (%)"] = iasMean
miscOgs["IASse (%)"] = iasSE
miscOgs["LTmean (um)"] = thicknessMean
miscOgs["LTse (um)"] = thicknessSE
miscOgs["MAmean (um^2)"] = mAreaMean
miscOgs["MAse (um^2)"] = mAreaSE
miscOgs["Reference"] = "Ogburns and Edwards (2012); Ogburns and Edwards (2013)"
miscOgs = miscOgs.merge(mogs[["Taxon","MajorLineage"]], on="Taxon", how="left")
miscOgs.sort_values("Taxon")

Unnamed: 0,Genus,Species,Taxon,IASmean (%),IASse (%),LTmean (um),LTse (um),MAmean (um^2),MAse (um^2),Reference,MajorLineage
35,Adenogramma,teretifolia,Adenogramma teretifolia,0.226999,0.059778,121.342,2.023269,313.217133,17.372898,Ogburns and Edwards (2012); Ogburns and Edward...,
5,Anacampseros,rufescens,Anacampseros rufescens,0.103406,0.031097,,,7170.181933,1008.100584,Ogburns and Edwards (2012); Ogburns and Edward...,
23,Anacampseros,subnuda,Anacampseros subnuda,0.120289,0.021124,,,4392.562067,563.396753,Ogburns and Edwards (2012); Ogburns and Edward...,
32,Calandrinia,axilliflora,Calandrinia axilliflora,0.261763,0.04278,,,,,Ogburns and Edwards (2012); Ogburns and Edward...,
18,Calandrinia,colchaguensis,Calandrinia colchaguensis,0.156518,0.047394,,,,,Ogburns and Edwards (2012); Ogburns and Edward...,
22,Calyptridium,umbellatum,Calyptridium umbellatum,0.259478,0.019914,,,,,Ogburns and Edwards (2012); Ogburns and Edward...,
16,Ceraria,fruticulosa,Ceraria fruticulosa,0.149828,0.056282,,,,,Ogburns and Edwards (2012); Ogburns and Edward...,
8,Cistanthe,picta,Cistanthe picta,0.089785,0.032958,,,,,Ogburns and Edwards (2012); Ogburns and Edward...,
36,Cistanthe,salsoloides,Cistanthe salsoloides,0.13041,0.012014,,,,,Ogburns and Edwards (2012); Ogburns and Edward...,
4,Cistanthe,tweedyi,Cistanthe tweedyi,0.179227,0.042938,,,,,Ogburns and Edwards (2012); Ogburns and Edward...,


In [15]:
mogs = mogs.append(miscOgs)

In [16]:
mogs.loc[mogs["Genus"].isin(["Adenogramma","Hypertelis","Mollugo","Pharnaceum"]), "MajorLineage"] = "Molluginaceae"
mogs.loc[mogs["Genus"].isin(["Alluaudia","Portulacaria","Ceraria"]), "MajorLineage"] = "Didiereaceae"
mogs.loc[mogs["Genus"].isin(["Anacampseros","Grahamia","Talinopsis"]), "MajorLineage"] = "Anacampserotaceae"
mogs.loc[mogs["Genus"].isin(["Anredera","Basella"]), "MajorLineage"] = "Basellaceae"
mogs.loc[mogs["Genus"].isin(["Calandrinia","Calyptridium","Cistanthe","Claytonia","Lewisia", "Montia","Montiopsis","Phemeranthus"]), "MajorLineage"] = "Montiaceae"
mogs.loc[mogs["Genus"].isin(["Halophytum"]), "MajorLineage"] = "Halophytaceae"
mogs.loc[mogs["Genus"].isin(["Limeum"]), "MajorLineage"] = "Limeaceae"
mogs.loc[mogs["Genus"].isin(["Mirabilis"]), "MajorLineage"] = "Nyctaginaceae"
mogs.loc[mogs["Genus"].isin(["Pereskia","Quiabentia"]), "MajorLineage"] = "Cactaceae"
mogs.loc[mogs["Genus"].isin(["Portulaca"]), "MajorLineage"] = "Portulacaceae"
mogs.loc[mogs["Genus"].isin(["Talinum"]), "MajorLineage"] = "Talinaceae"

mogs.loc[mogs["MajorLineage"].isin(["Anacampserotaceae", "Basellaceae", "Talinaceae", ]), "Pathway"] = "C3+CAM"
mogs.loc[mogs["Taxon"].isin(["Anacampseros subnuda",]), "Pathway"] = "CAM"
mogs.loc[mogs["Genus"].isin(["Pereskia","Portulacaria","Cistanthe","Ceraria","Halophytum","Calyptridium","Calandrinia","Phemeranthus","Quiabentia", "Lewisia",]), "Pathway"] = "C3+CAM"
mogs.loc[mogs["Genus"].isin(["Alluaudia",]), "Pathway"] = "CAM"
mogs.loc[mogs["Genus"].isin(["Montia","Limeum","Claytonia","Mirabilis", "Montiopsis"]), "Pathway"] = "C3"
mogs.loc[mogs["Taxon"].isin(["Calandrinia tumida"]), "Pathway"] = "C3"
mogs.loc[mogs["Genus"].isin(["Portulaca",]), "Pathway"] = "C4+CAM"
mogs.loc[mogs["MajorLineage"].isin(["Molluginaceae"]), "Pathway"] = "C3"
mogs.loc[mogs["Taxon"].isin(["Mollugo verticillata", "Mollugo cerviana"]), "Pathway"] = "C3-C4"
mogs["Tissue"] = "leaf"
mogs["Source"] = "Ogburn"

In [17]:
mogs.groupby("Taxon").mean().reset_index().merge(mogs[["Taxon","Pathway"]],on="Taxon")["Pathway"].value_counts()

C3+CAM    46
C3        26
C3-C4      4
CAM        3
C4+CAM     3
Name: Pathway, dtype: int64

### TRY database
These data were accessed via the TRY database of plant traits

In [18]:
tryLDMC = pd.read_csv("../Data/TRY-Database/LDMC.csv")
tryLDMC = tryLDMC.rename({"LDMC_gg-1": "LDMCmean (g/g)", "LDMC_gg-1_SE":"LDMCse (g/g)"}, axis=1)
tryLT = pd.read_csv("../Data/TRY-Database/Leaf-Thickness.csv")
tryLT = tryLT.rename({"leaf_thickness_um":"LTmean (um)","leaf_thickness_um_SE":"LTse (um)"}, axis=1)
tryDF = tryLDMC.merge(tryLT, how="outer")
tryMA = pd.read_csv("../Data/TRY-Database/Mesophyll-Cell-Area.csv")
tryMA = tryMA.rename({"mesophyll_cell_area_um2":"MAmean (um^2)","mesophyll_cell_area_um2_SE":"MAse (um^2)"}, axis=1)
tryDF = tryDF.merge(tryMA, how="outer")
tryDSLA = pd.read_csv("../Data/TRY-Database/SLA-Dry-Mass.csv")
tryDSLA = tryDSLA.rename({"SLAd_mm2mg-1":"dSLAmean (mm^2/mg)","SLAd_mm2mg-1_SE":"dSLAse (mm^2/mg)"}, axis=1)
tryDF = tryDF.merge(tryDSLA, how="outer")
tryFSLA = pd.read_csv("../Data/TRY-Database/SLA-Fresh-Mass.csv")
tryFSLA = tryFSLA.rename({"SLAf_mm2mg-1":"fSLAmean (mm^2/mg)","SLAf_mm2mg-1_SE":"fSLAse (mm^2/mg)"}, axis=1)
tryDF = tryDF.merge(tryFSLA, how="outer")
tryDF = tryDF.rename({"Family":"MajorLineage"}, axis=1)

In [19]:
tryIAS = pd.read_csv("../Data/TRY-Database/IAS.csv")
tryIAS = tryIAS.rename({"IASmean (%)": "IAS (%)"}, axis=1)
tryIAS["IAS (%)"] = tryIAS["IAS (%)"]/100
tryIAS.head()

Unnamed: 0,MajorLineage,Taxon,IAS (%),Reference
0,Solanaceae,Solanum arcanum,0.367,"Muir CD, RP Hangarter, LC Moyle & PA Davis (20..."
1,Solanaceae,Solanum arcanum,0.411,"Muir CD, RP Hangarter, LC Moyle & PA Davis (20..."
2,Solanaceae,Solanum arcanum,0.186,"Muir CD, RP Hangarter, LC Moyle & PA Davis (20..."
3,Solanaceae,Solanum arcanum,0.301,"Muir CD, RP Hangarter, LC Moyle & PA Davis (20..."
4,Solanaceae,Solanum chilense,0.33,"Muir CD, RP Hangarter, LC Moyle & PA Davis (20..."


In [20]:
IASmean = []
IASse = []
taxon = []
for t in tryIAS.Taxon.unique():
    taxon.append(t)
    IASmean.append(tryIAS[tryIAS["Taxon"]==t]["IAS (%)"].mean())
    IASse.append(stats.sem(tryIAS[tryIAS["Taxon"]==t]["IAS (%)"]))
tryIASmeans = pd.DataFrame()
tryIASmeans["Taxon"] = taxon
tryIASmeans["IASmean (%)"] = IASmean
tryIASmeans["IASse (%)"] = IASse
tryIASmeans["Genus"] = [t[0] for t in tryIASmeans["Taxon"].str.split()]
tryIASmeans["Species"] = [t[1] for t in tryIASmeans["Taxon"].str.split()]
tryIASmeans["Pathway"] = "C3"
tryIASmeans["Tissue"] = "leaf"
tryIASmeans["Reference"] = tryIAS["Reference"][0]
tryIASmeans["MajorLineage"] = "Solanaceae"
tryIASmeans.head()

Unnamed: 0,Taxon,IASmean (%),IASse (%),Genus,Species,Pathway,Tissue,Reference,MajorLineage
0,Solanum arcanum,0.31625,0.048948,Solanum,arcanum,C3,leaf,"Muir CD, RP Hangarter, LC Moyle & PA Davis (20...",Solanaceae
1,Solanum chilense,0.295571,0.01712,Solanum,chilense,C3,leaf,"Muir CD, RP Hangarter, LC Moyle & PA Davis (20...",Solanaceae
2,Solanum galapagense,0.302,0.025811,Solanum,galapagense,C3,leaf,"Muir CD, RP Hangarter, LC Moyle & PA Davis (20...",Solanaceae
3,Solanum habrochaites,0.1968,0.029562,Solanum,habrochaites,C3,leaf,"Muir CD, RP Hangarter, LC Moyle & PA Davis (20...",Solanaceae
4,Solanum huaylasense,0.2986,0.019038,Solanum,huaylasense,C3,leaf,"Muir CD, RP Hangarter, LC Moyle & PA Davis (20...",Solanaceae


In [21]:
tryDF = tryDF.merge(tryIASmeans, how="outer")
tryDF["color"] = [cmap[p] for p in tryDF["Pathway"]]
tryDF["Tissue"] = "leaf"
tryDF["Source"] = "TRY"
tryDF.head()

Unnamed: 0,Taxon,LDMCmean (g/g),LDMCse (g/g),MajorLineage,Genus,Pathway,Reference,Species,LTmean (um),LTse (um),...,MAse (um^2),dSLAmean (mm^2/mg),dSLAse (mm^2/mg),fSLAmean (mm^2/mg),fSLAse (mm^2/mg),IASmean (%),IASse (%),Tissue,color,Source
0,Abelia biflora,0.263048,2.484984e-12,Caprifoliaceae,Abelia,C3,"Wang, Han; Harrison, Sandy P; Prentice, Iain C...",biflora,,,...,,20.279533,,,,,,leaf,xkcd:light eggplant,TRY
1,Abies alba,0.330752,0.02804777,Pinaceae,Abies,C3,"Reich, P. B., J. Oleksyn, and I. J. Wright. 20...",alba,569.716429,16.024899,...,,6.020967,1.694338,,,,,leaf,xkcd:light eggplant,TRY
2,Abies concolor,0.452686,0.01044235,Pinaceae,Abies,C3,"Laughlin, D.C., P.Z. Fulé, D.W. Huffman, J. Cr...",concolor,596.5,21.512236,...,,7.046874,0.107726,,,,,leaf,xkcd:light eggplant,TRY
3,Abies lasiocarpa,0.508283,0.006413033,Pinaceae,Abies,C3,"Cornwell, W. K., J. H. C. Cornelissen, K. Amat...",lasiocarpa,758.75,15.860722,...,,20.194931,1.393947,,,,,leaf,xkcd:light eggplant,TRY
4,Acacia aneura,0.30078,0.02692856,Fabaceae,Acacia,C3,"Cornwell WK, Wright I, Turner J, Maire V, Barb...",aneura,495.833333,,...,,2.598784,0.280178,2.974073,0.126443,,,leaf,xkcd:light eggplant,TRY


In [22]:
tryDF[["Taxon","Pathway"]].drop_duplicates()["Pathway"].value_counts()

C3        4624
C4         217
CAM         19
C3+CAM       8
C4+CAM       1
Name: Pathway, dtype: int64

### BROT 2
BROT is a database of Mediterranean plant traits (Tavsanoglu _et al._, 2018).

In [23]:
brot = pd.read_csv("../Data/BROT2_dat.csv")
brot = brot.drop(["ID", "TaxonID", "DataType", "Method", "Region", "Lat", "Long", "Alt", "Comments", "Accuracy"], axis=1)
brot["Genus"] = [t[0] for t in brot["Taxon"].str.split()]
brot["Species"] = [t[1] for t in brot["Taxon"].str.split()]
brot = brot[brot["Trait"].isin(["SLA", "LDMC", "d13C"])]
brot = brot.rename({"SourceID":"Reference"},axis=1)
brot

Unnamed: 0,Taxon,Trait,Data,Units,Reference,Genus,Species
17,Sambucus nigra,SLA,20.9,mm2/mg,Mediavilla2003,Sambucus,nigra
69,Viburnum tinus,SLA,13.7,mm2/mg,Navas2010,Viburnum,tinus
123,Mesembryanthemum nodiflorum,SLA,4.775,mm2/mg,Cartagena2017,Mesembryanthemum,nodiflorum
144,Amaranthus blitoides,SLA,20.05,mm2/mg,Bochet2015,Amaranthus,blitoides
145,Amaranthus blitoides,LDMC,212.19,mg/g,Bochet2015,Amaranthus,blitoides
...,...,...,...,...,...,...,...
25701,Vitis vinifera,SLA,15.78,mm2/mg,Villar2001,Vitis,vinifera
25702,Vitis vinifera,SLA,32.57,mm2/mg,Riva2016u,Vitis,vinifera
25704,Vitis vinifera,LDMC,174,mg/g,Hodgson2017,Vitis,vinifera
25735,Fagonia cretica,SLA,4.007,mm2/mg,Crevillent2017,Fagonia,cretica


The next data set is a list of genera and families that I've been updating from World Flora Online. Plants Of the World Online (POWO), from Kew, is not a better resource, but this worked easily enough without downloading their massive raw data file.

In [24]:
wfo = pd.read_csv("../Data/wfoGenusFamilyMap.csv")
wfo.head()

Unnamed: 0,child,parent
0,Honkenya,Caryophyllaceae
1,Minuartiella,Caryophyllaceae
2,Saxofridericia,Rapateaceae
3,Bactria,Polygonaceae
4,Petroana,Caryophyllaceae


In [25]:
brot = brot.merge(wfo, left_on="Genus", right_on="child", how="left")
brot = brot.drop(["child"], axis=1)
brot = brot.rename({"parent":"MajorLineage"}, axis=1)
brot.head()

Unnamed: 0,Taxon,Trait,Data,Units,Reference,Genus,Species,MajorLineage
0,Sambucus nigra,SLA,20.9,mm2/mg,Mediavilla2003,Sambucus,nigra,Viburnaceae
1,Viburnum tinus,SLA,13.7,mm2/mg,Navas2010,Viburnum,tinus,Adoxaceae
2,Mesembryanthemum nodiflorum,SLA,4.775,mm2/mg,Cartagena2017,Mesembryanthemum,nodiflorum,Aizoaceae
3,Amaranthus blitoides,SLA,20.05,mm2/mg,Bochet2015,Amaranthus,blitoides,Amaranthaceae
4,Amaranthus blitoides,LDMC,212.19,mg/g,Bochet2015,Amaranthus,blitoides,Amaranthaceae


In [26]:
brotSLA = brot[brot["Trait"]=="SLA"][["MajorLineage", "Genus", "Species", "Taxon", "Data", "Reference"]]
brotSLA = brotSLA.rename({"Data":"dSLAmean (mm^2/mg)"}, axis=1)
brotSLA["Pathway"] = "C3"  
# A few manual changes to pathways
c3Taxa = ["Salsola genistoides", "Senecio gallicus", "Senecio vulgaris", "Euphorbia exigua","Euphorbia falcata","Euphorbia helioscopia",
          "Euphorbia helioscopia", "Euphorbia isatidifolia", "Euphorbia minuta","Euphorbia segetalis","Euphorbia serrata","Euphorbia terracina",
          "Euphorbia terracina", "Euphorbia terracina", ]
c4Taxa = ["Amaranthus blitoides","Atriplex semibaccata", "Salsola kali", "Salsola oppositifolia", ]
c3Genera = ["Geranium","Marrubium",]
c3CAMGenera = ["Sedum",]
c4Genera = ["Cynodon", "Dichanthium","Hyparrhenia","Saccharum"]
brotSLA.loc[brotSLA["Taxon"]=="Mesembryanthemum nodiflorum", "Pathway"] = "C3+CAM"
brotSLA.loc[brotSLA["Taxon"].isin(c3Taxa), "Pathway"] = "C3"
brotSLA.loc[brotSLA["Taxon"].isin(c4Taxa), "Pathway"] = "C4"
brotSLA.loc[brotSLA["Genus"].isin(c3Genera), "Pathway"] = "C3"
brotSLA.loc[brotSLA["Genus"].isin(c3CAMGenera), "Pathway"] = "C3+CAM"
brotSLA.loc[brotSLA["Genus"].isin(c4Genera), "Pathway"] = "C4"
brotSLA["Source"] = "BROT"
brotSLA.head()

Unnamed: 0,MajorLineage,Genus,Species,Taxon,dSLAmean (mm^2/mg),Reference,Pathway,Source
0,Viburnaceae,Sambucus,nigra,Sambucus nigra,20.9,Mediavilla2003,C3,BROT
1,Adoxaceae,Viburnum,tinus,Viburnum tinus,13.7,Navas2010,C3,BROT
2,Aizoaceae,Mesembryanthemum,nodiflorum,Mesembryanthemum nodiflorum,4.775,Cartagena2017,C3+CAM,BROT
3,Amaranthaceae,Amaranthus,blitoides,Amaranthus blitoides,20.05,Bochet2015,C4,BROT
5,Amaranthaceae,Atriplex,halimus,Atriplex halimus,9.605,NavarroCano2017b,C3,BROT


In [27]:
brotLDMC = brot[brot["Trait"]=="LDMC"][["MajorLineage", "Genus", "Species", "Taxon", "Data", "Reference"]]
brotLDMC = brotLDMC.rename({"Data":"LDMCmean (g/g)"}, axis=1)
brotLDMC["LDMCmean (g/g)"] = brotLDMC["LDMCmean (g/g)"].astype(float)/1000
brotLDMC["Pathway"] = "C3"
brotLDMC.loc[brotLDMC["Taxon"]=="Mesembryanthemum nodiflorum", "Pathway"] = "C3+CAM"
brotLDMC.loc[brotLDMC["Taxon"].isin(c3Taxa), "Pathway"] = "C3"
brotLDMC.loc[brotLDMC["Taxon"].isin(c4Taxa), "Pathway"] = "C4"
brotLDMC.loc[brotLDMC["Genus"].isin(c3Genera), "Pathway"] = "C3"
brotLDMC.loc[brotLDMC["Genus"].isin(c3CAMGenera), "Pathway"] = "C3+CAM"
brotLDMC.loc[brotLDMC["Genus"].isin(c4Genera), "Pathway"] = "C4"
brotLDMC["Source"] = "BROT"
brotLDMC.head()

Unnamed: 0,MajorLineage,Genus,Species,Taxon,LDMCmean (g/g),Reference,Pathway,Source
4,Amaranthaceae,Amaranthus,blitoides,Amaranthus blitoides,0.21219,Bochet2015,C4,BROT
8,Amaranthaceae,Chenopodium,album,Chenopodium album,0.21034,Bochet2015,C3,BROT
10,Amaranthaceae,Polycnemum,majus,Polycnemum majus,0.11375,Bochet2015,C3,BROT
15,Amaryllidaceae,Allium,ampeloprasum,Allium ampeloprasum,0.14931,Bochet2015,C3,BROT
33,Anacardiaceae,Pistacia,lentiscus,Pistacia lentiscus,0.4464,Paula2006b,C3,BROT


In [28]:
brotSLA[["Taxon","Pathway"]].merge(brotLDMC[["Taxon","Pathway"]], on=["Taxon", "Pathway"], how="outer").drop_duplicates()["Pathway"].value_counts()

C3        617
C4          8
C3+CAM      2
Name: Pathway, dtype: int64

### Heyduk _et al._ (2016) Agavoideae

In [29]:
heyduk2016 = pd.read_csv("../Data/Heyduk_etal_2016_anatomy.csv")
heyduk2016["Genus"] = heyduk2016["Genus"].replace("Mafreda", "Manfreda")
heyduk2016["Taxon"] = heyduk2016["Genus"] + " " + heyduk2016["Species"]

genera = []
species = []
pathways = []
ltMeans = []
ltSEs = []
maMeans = []
maSEs = []
iasMeans = []
iasSEs = []
taxa = []

for t in heyduk2016["Taxon"].unique():
    genera.append(t.split()[0])
    species.append(t.split()[1])
    pathways.append(heyduk2016[heyduk2016["Taxon"]==t].Pathway.iloc[0])
    ltMeans.append(heyduk2016[heyduk2016["Taxon"]==t]["LT (um)"].mean())
    ltSEs.append(stats.sem(heyduk2016[heyduk2016["Taxon"]==t]["LT (um)"]))
    maMeans.append(heyduk2016[heyduk2016["Taxon"]==t]["MA (um^2)"].mean())
    maSEs.append(stats.sem(heyduk2016[heyduk2016["Taxon"]==t]["LT (um)"]))
    iasMeans.append(heyduk2016[heyduk2016["Taxon"]==t]["IAS (%)"].mean()/100)
    iasSEs.append(stats.sem(heyduk2016[heyduk2016["Taxon"]==t]["IAS (%)"])/100)
    taxa.append(t)
    
summaryHeyduk2016 = pd.DataFrame(dtype=object)
summaryHeyduk2016["Genus"] = genera
summaryHeyduk2016["Species"] = species
summaryHeyduk2016["Taxon"] = taxa
summaryHeyduk2016["LTmean (um)"] = ltMeans
summaryHeyduk2016["LTse (um)"] = ltSEs
summaryHeyduk2016["MAmean (um^2)"] = maMeans
summaryHeyduk2016["MAse (um^2)"] = maSEs
summaryHeyduk2016["IASmean (%)"] = iasMeans
summaryHeyduk2016["IASse (%)"] = iasSEs
summaryHeyduk2016["Pathway"] = pathways
summaryHeyduk2016["MajorLineage"] = "Agavoideae"
summaryHeyduk2016["Tissue"] = "leaf"
summaryHeyduk2016["Reference"] = "Heyduk et al. (2016)"
summaryHeyduk2016["color"] = [cmap[p] for p in summaryHeyduk2016["Pathway"]]
summaryHeyduk2016["Source"] = "Heyduk et al 2016"
summaryHeyduk2016.head()

Unnamed: 0,Genus,Species,Taxon,LTmean (um),LTse (um),MAmean (um^2),MAse (um^2),IASmean (%),IASse (%),Pathway,MajorLineage,Tissue,Reference,color,Source
0,Agave,palmeri,Agave palmeri,1223.101333,,2835.182233,,0.119167,0.032159,CAM,Agavoideae,leaf,Heyduk et al. (2016),xkcd:lichen,Heyduk et al 2016
1,Agave,schotti,Agave schotti,1315.4255,,2365.219367,,0.075342,0.011812,CAM,Agavoideae,leaf,Heyduk et al. (2016),xkcd:lichen,Heyduk et al 2016
2,Beschorneria,yuccoides,Beschorneria yuccoides,793.456733,,836.192233,,0.120494,0.021646,C3+CAM,Agavoideae,leaf,Heyduk et al. (2016),xkcd:apricot,Heyduk et al 2016
3,Chlorophytum,rhizopendulum,Chlorophytum rhizopendulum,231.4948,15.756147,302.49384,15.756147,0.334074,0.015374,C3,Agavoideae,leaf,Heyduk et al. (2016),xkcd:light eggplant,Heyduk et al 2016
4,Hesperaloe,funifera,Hesperaloe funifera,1282.351222,141.518836,846.1929,141.518836,0.042575,0.002325,CAM,Agavoideae,leaf,Heyduk et al. (2016),xkcd:lichen,Heyduk et al 2016


In [30]:
heyduk2016[["Taxon","Pathway"]].drop_duplicates()["Pathway"].value_counts()

CAM       8
C3        3
C3+CAM    2
Name: Pathway, dtype: int64

### Silvera _et al._ 2005
This table has additional information on isotopes and titrtable acidty, which need to be dropped, and I've updated the taxonomy in the `Taxon` column, but left the original taxonomy in the `Genus` and `Species` columns. There are also a few species where I could not confidently assign pathways because they did not have titratable acidity data and therefore could have been C3+CAM (many C3+CAM orchids have quite negative isotope values)

In [31]:
silvera2005 = pd.read_csv("../Data/Silvera-etal-2005.csv")
silvera2005 = silvera2005[["MajorLineage", "Taxon", "LTmean (um)", "LDMCmean (g/g)", "dSLAmean (cm^2/g)", "Pathway", "Reference", "Tissue"]]
silvera2005["dSLAmean (mm^2/mg)"] = silvera2005["dSLAmean (cm^2/g)"]/10
silvera2005 = silvera2005.drop(["dSLAmean (cm^2/g)"], axis=1)
silvera2005["Genus"] = [s.split()[0] for s in silvera2005["Taxon"]]
silvera2005["Species"] = [" ".join(s.split()[1:]) for s in silvera2005["Taxon"]]
silvera2005 = silvera2005.iloc[silvera2005["Pathway"].dropna().index].reset_index(drop=True)
silvera2005["color"] = [cmap[p] for p in silvera2005["Pathway"]]
silvera2005["Source"] = "Silvera et al 2005"
silvera2005.head()

Unnamed: 0,MajorLineage,Taxon,LTmean (um),LDMCmean (g/g),Pathway,Reference,Tissue,dSLAmean (mm^2/mg),Genus,Species,color,Source
0,Orchidaceae,Acianthera johnsonii,1230.0,0.119,C3,Silvera et al. (2005),leaf,8.0,Acianthera,johnsonii,xkcd:light eggplant,Silvera et al 2005
1,Orchidaceae,Acineta sp.,550.0,0.141,C3,Silvera et al. (2005),leaf,19.7,Acineta,sp.,xkcd:light eggplant,Silvera et al 2005
2,Orchidaceae,Anathallis barbulata,630.0,0.172,C3+CAM,Silvera et al. (2005),leaf,7.9,Anathallis,barbulata,xkcd:apricot,Silvera et al 2005
3,Orchidaceae,Arundina graminifolia,320.0,0.286,C3,Silvera et al. (2005),leaf,11.9,Arundina,graminifolia,xkcd:light eggplant,Silvera et al 2005
4,Orchidaceae,Aspasia epidendroides,570.0,0.189,C3+CAM,Silvera et al. (2005),leaf,14.7,Aspasia,epidendroides,xkcd:apricot,Silvera et al 2005


In [32]:
silvera2005[["Taxon","Pathway"]].drop_duplicates()["Pathway"].value_counts()

C3        101
C3+CAM     53
CAM        42
Name: Pathway, dtype: int64

### Males (2018)
This data set collected by Males (2018) estimated chlorenchyma diameter and thickness, which I'll use to calculate cell area, assuming photosynthetic mesophyll cells are roughly circles. Unfortunately, IAS is given for most species as an absolute area so we cannot use it.

In [33]:
males2018 = pd.read_csv("../Data/Males-2018.csv")
males2018 = males2018.iloc[males2018["Pathway"].dropna().index].reset_index(drop=True)
males2018["MAmean (um^2)"] = np.pi * ((males2018["ChlorenchymaDiameter (um)"]/2)**2)
males2018 = males2018.drop(["HydrenchymaVerticalThickness (um)", "ChlorenchymaDiameter (um)", "ChlorenchymaVertThickness (um)", "IASmean (um^2)", "ChlorenchymaHydrenchymaRatio", "Subfamily", "Genus", "Species"], axis=1)
males2018["Genus"] = [s.split()[0] for s in males2018["Taxon"]]
males2018["Species"] = [" ".join(s.split()[1:]) for s in males2018["Taxon"]]
males2018["Tissue"] = "leaf"
males2018["color"] = [cmap[p] for p in males2018["Pathway"]]
males2018["IASmean (%)"] = males2018["IASmean (%)"]*0.01
males2018["Source"] = "Males 2018"
males2018.head()

Unnamed: 0,Pathway,Taxon,Reference,MajorLineage,LTmean (um),IASmean (%),MAmean (um^2),Genus,Species,Tissue,color,Source
0,CAM,Acanthostachys strobilacea,Males (2018),Bromeliaceae,3079.47,,650.084191,Acanthostachys,strobilacea,leaf,xkcd:lichen,Males 2018
1,CAM,Aechmea alba,Males (2018),Bromeliaceae,812.65,,945.145087,Aechmea,alba,leaf,xkcd:lichen,Males 2018
2,CAM,Aechmea bromeliifolia,Males (2018),Bromeliaceae,2370.44,,1801.273067,Aechmea,bromeliifolia,leaf,xkcd:lichen,Males 2018
3,CAM,Aechmea bromeliifolia,Males (2018),Bromeliaceae,1437.42,,1979.234788,Aechmea,bromeliifolia,leaf,xkcd:lichen,Males 2018
4,CAM,Aechmea capixabae,Males (2018),Bromeliaceae,867.96,,1121.614629,Aechmea,capixabae,leaf,xkcd:lichen,Males 2018


In [34]:
males2018[["Taxon","Pathway"]].drop_duplicates()["Pathway"].value_counts()

CAM       84
C3        67
C3+CAM     8
Name: Pathway, dtype: int64

### Earnshaw _et al._ 1987
Again, we're going to drop a fair number of taxa because we canot be certain of C3 vs. C3+CAM

In [35]:
earnshaw1987 = pd.read_csv("../Data/Earnshaw_etal_1987.csv", usecols=["MajorLineage", "LT (um)", "Tissue", "Reference", "Taxon", "Pathway"])
earnshaw1987 = earnshaw1987.groupby(["MajorLineage", "Tissue", "Reference", "Taxon", "Pathway"]).mean().reset_index()
earnshaw1987["Genus"] = [s.split()[0] for s in earnshaw1987["Taxon"]]
earnshaw1987["Species"] = [" ".join(s.split()[1:]) for s in earnshaw1987["Taxon"]]
earnshaw1987 = earnshaw1987.rename({"LT (um)":"LTmean (um)"},axis=1)
earnshaw1987["Source"] = "Earnshaw et al 1987"
earnshaw1987.head()

Unnamed: 0,MajorLineage,Tissue,Reference,Taxon,Pathway,LTmean (um),Genus,Species,Source
0,Apocynaceae,leaf,Earnshaw et al. (1987),Dischidia imbricata,CAM,1800.0,Dischidia,imbricata,Earnshaw et al 1987
1,Iridaceae,leaf,Earnshaw et al. (1987),Libertia pulchella,C3,200.0,Libertia,pulchella,Earnshaw et al 1987
2,Orchidaceae,leaf,Earnshaw et al. (1987),Agrostophyllum majus,C3,280.0,Agrostophyllum,majus,Earnshaw et al 1987
3,Orchidaceae,leaf,Earnshaw et al. (1987),Agrostophyllum sp.,C3,250.0,Agrostophyllum,sp.,Earnshaw et al 1987
4,Orchidaceae,leaf,Earnshaw et al. (1987),Bryobium eriaeoides,C3+CAM,465.0,Bryobium,eriaeoides,Earnshaw et al 1987


In [36]:
earnshaw1987[["Taxon","Pathway"]].drop_duplicates()["Pathway"].value_counts()

C3        24
C3+CAM    19
CAM       15
Name: Pathway, dtype: int64

## Nelson _et al._ (2008)

In [37]:
nelson = pd.read_csv("../Data/Nelson2008.csv")
nelson["Genus"] = [s.split()[0] for s in nelson["Taxon"]]
nelson["Species"] = [" ".join(s.split()[1:]) for s in nelson["Taxon"]]
nelson = nelson.merge(wfo, left_on="Genus", right_on="child")
nelson = nelson[['parent','Taxon', 'Genus','Species','LTmean (um)','MAmean (um^2)', 'IASmean (%)', 'Pathway']].rename({'parent':"MajorLineage"},axis=1)
nelson["Tissue"] = "leaf"
nelson["Reference"] = "Nelson et al. (2008)"
nelson["IASmean (%)"] = nelson["IASmean (%)"]/100
nelson["Source"] = "Nelson et al 2008"
nelson.head()

Unnamed: 0,MajorLineage,Taxon,Genus,Species,LTmean (um),MAmean (um^2),IASmean (%),Pathway,Tissue,Reference,Source
0,Asparagaceae,Agave americana,Agave,americana,1210,2130,0.086,CAM,leaf,Nelson et al. (2008),Nelson et al 2008
1,Bromeliaceae,Ananas comosus,Ananas,comosus,480,830,0.069,CAM,leaf,Nelson et al. (2008),Nelson et al 2008
2,Asphodelaceae,Aloe pillansii,Aloe,pillansii,2490,3330,0.128,CAM,leaf,Nelson et al. (2008),Nelson et al 2008
3,Crassulaceae,Crassula argentea,Crassula,argentea,2220,3270,0.146,CAM,leaf,Nelson et al. (2008),Nelson et al 2008
4,Commelinaceae,Callisia fragrans,Callisia,fragrans,370,610,0.33,C3+CAM,leaf,Nelson et al. (2008),Nelson et al 2008


In [38]:
nelson[["Taxon","Pathway"]].drop_duplicates()["Pathway"].value_counts()

CAM       15
C3+CAM     4
C3         4
C4         4
Name: Pathway, dtype: int64

## Lujan _et al._ (2022)

In [39]:
lujan = pd.read_csv("../Data/Clusia_leaf_anatomy_summary.csv")
lujan = lujan.rename({"Taxa included in RAD analysis":"TipLabel"},axis=1)

species = []
for t in lujan["TipLabel"]:
    split = t.split("_")
    if split[1] == "sp":
        print(t)
        species.append(" ".join(split[1:]))
    else:
        species.append(split[1])
        
lujan["Species"] = species
lujan["Taxon"] = lujan["Genus"] + " " + lujan["Species"]
lujan = lujan.merge(lujan[["Taxon", "Leaf_thickness", "PM_cell_size"]].groupby("Taxon").agg("mean").reset_index().rename({"Leaf_thickness":"LTmean (um)", "PM_cell_size":"MAmean (um^2)"}, axis=1), on="Taxon")
lujan = lujan[["MajorLineage","Genus","Species","Taxon","LTmean (um)", "MAmean (um^2)", "Pathway"]]
lujan["Reference"] = "Lujan et al. (2022)"
lujan["Tissue"] = "leaf"
lujan = lujan.drop_duplicates().reset_index(drop=True)
lujan["Source"] = "Lujan et al 2022"
lujan

C_sp_484
C_sp_645
Chrysochlamys_sp_591c
Dystovomita_sp_586c


Unnamed: 0,MajorLineage,Genus,Species,Taxon,LTmean (um),MAmean (um^2),Pathway,Reference,Tissue,Source
0,Clusiaceae,Clusia,amazonica,Clusia amazonica,339.5,657.3,C3+CAM,Lujan et al. (2022),leaf,Lujan et al 2022
1,Clusiaceae,Clusia,araracuare,Clusia araracuare,726.8,1278.2,C3+CAM,Lujan et al. (2022),leaf,Lujan et al 2022
2,Clusiaceae,Clusia,brachycarpa,Clusia brachycarpa,352.7,275.6,C3+CAM,Lujan et al. (2022),leaf,Lujan et al 2022
3,Clusiaceae,Clusia,coclensis,Clusia coclensis,615.6,1418.3,C3+CAM,Lujan et al. (2022),leaf,Lujan et al 2022
4,Clusiaceae,Clusia,columnaris,Clusia columnaris,624.4,2520.0,C3+CAM,Lujan et al. (2022),leaf,Lujan et al 2022
...,...,...,...,...,...,...,...,...,...,...
60,Clusiaceae,Dystovomita,sp 586c,Dystovomita sp 586c,505.0,1247.5,C3,Lujan et al. (2022),leaf,Lujan et al 2022
61,Clusiaceae,Tomovita,caputmonsia,Tomovita caputmonsia,427.5,544.3,C3,Lujan et al. (2022),leaf,Lujan et al 2022
62,Clusiaceae,Tomovita,lanceolata,Tomovita lanceolata,318.8,796.2,C3,Lujan et al. (2022),leaf,Lujan et al 2022
63,Clusiaceae,Tomovita,panamaea,Tomovita panamaea,312.1,373.0,C3,Lujan et al. (2022),leaf,Lujan et al 2022


In [40]:
lujan[["Taxon","Pathway"]].drop_duplicates()["Pathway"].value_counts()

C3+CAM    55
C3         9
CAM        1
Name: Pathway, dtype: int64

### Combining dataframes

In [41]:
morph = pd.concat([dbg,elDF,portDF,tryDF, brotSLA, brotLDMC, summaryHeyduk2016, silvera2005, males2018, earnshaw1987, nelson, mogs,lujan])
meanCols = ['IASmean (%)', 'LDMCmean (g/g)', 'LTmean (um)', 'MAmean (um^2)', 'dSLAmean (mm^2/mg)', 'fSLAmean (mm^2/mg)',]
for mc in meanCols:
    morph[mc] = morph[mc].astype(float)
morph["color"] = [cmap[p] for p in morph["Pathway"]]
morph

Unnamed: 0,MajorLineage,Genus,Species,Taxon,Tissue,Pathway,MAmean (um^2),MAse (um^2),LTmean (um),LTse (um),...,IASse (%),Reference,color,Source,LDMCmean (g/g),LDMCse (g/g),dSLAmean (mm^2/mg),dSLAse (mm^2/mg),fSLAmean (mm^2/mg),fSLAse (mm^2/mg)
0,Agavoideae,Agave,americana,Agave americana,Leaf,CAM,1339.683455,20.391867,4887.368333,51.895283,...,0.013377,This publication,xkcd:lichen,DBG,,,,,,
1,Agavoideae,Agave,americana,Agave americana,Leaf,CAM,2212.446822,36.936013,3204.702000,17.616073,...,0.024675,This publication,xkcd:lichen,DBG,,,,,,
2,Agavoideae,Agave,americana,Agave americana,Leaf,CAM,3090.210853,53.191428,4402.634667,114.466009,...,0.050523,This publication,xkcd:lichen,DBG,,,,,,
3,Agavoideae,Agave,attenuata,Agave attenuata,Leaf,CAM,3013.052933,175.556512,2935.331667,57.566621,...,,This publication,xkcd:lichen,DBG,,,,,,
4,Agavoideae,Agave,bovicornuta,Agave bovicornuta,Leaf,CAM,3223.139679,84.612126,2729.777000,82.985558,...,0.009803,This publication,xkcd:lichen,DBG,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60,Clusiaceae,Dystovomita,sp 586c,Dystovomita sp 586c,leaf,C3,1247.500000,,505.000000,,...,,Lujan et al. (2022),xkcd:light eggplant,Lujan et al 2022,,,,,,
61,Clusiaceae,Tomovita,caputmonsia,Tomovita caputmonsia,leaf,C3,544.300000,,427.500000,,...,,Lujan et al. (2022),xkcd:light eggplant,Lujan et al 2022,,,,,,
62,Clusiaceae,Tomovita,lanceolata,Tomovita lanceolata,leaf,C3,796.200000,,318.800000,,...,,Lujan et al. (2022),xkcd:light eggplant,Lujan et al 2022,,,,,,
63,Clusiaceae,Tomovita,panamaea,Tomovita panamaea,leaf,C3,373.000000,,312.100000,,...,,Lujan et al. (2022),xkcd:light eggplant,Lujan et al 2022,,,,,,


I've done my best to catch typos, spelling errors, and fill in missing information. An final version of the dataset is available in this repo (`All-morphological-data.csv`), but I wanted to show how I treated the raw data.

In [42]:
morph.loc[morph["Genus"]=="Aphananthe", "MajorLineage"] = "Cannabaceae"
morph.loc[morph["Taxon"]=="Argyrodendron peralatum", "Taxon"] = "Heritiera peralata"
morph.loc[morph["Taxon"]=="Heritiera peralata", "MajorLineage"] = "Malvaceae"
morph.loc[morph["Taxon"]=="Blechnum penna-marina", "MajorLineage"] = "Aspleniaceae"
morph.loc[morph["Taxon"]=="Clarisia racemosa", "MajorLineage"] = "Moraceae"
morph.loc[morph["Taxon"]=="Coronopus didymus", "MajorLineage"] = "Brassicaceae"
morph.loc[morph["Taxon"]=="Coronopus didymus", "Taxon"] = "Lepidium didymum"
morph.loc[morph["Taxon"]=="Dactylus glomerata", "MajorLineage"] = "Poaceae"
morph.loc[morph["Taxon"]=="Dactylus glomerata", "Taxon"] = "Dactylis glomerata"
morph.loc[morph["Taxon"]=="Evodia fargesii", "MajorLineage"] = "Rutaceae"
morph.loc[morph["Taxon"]=="Evodia fargesii", "Taxon"] = "Tetradium glabrifolium"
morph.loc[morph["Taxon"]=="Evodia lepta", "MajorLineage"] = "Rutaceae"
morph.loc[morph["Taxon"]=="Evodia lepta", "Taxon"] = "Melicope pteleifolia"
morph.loc[morph["Taxon"]=="Glaux maritima", "MajorLineage"] = "Primulaceae"
morph.loc[morph["Taxon"]=="Glaux maritima", "Taxon"] = "Lysimachia maritima"
morph.loc[morph["Taxon"]=="Lloydia serotina", "MajorLineage"] = "Liliaceae"
morph.loc[morph["Taxon"]=="Lloydia serotina", "Taxon"] = "Gagea serotina"
morph.loc[morph["Taxon"]=="Manglietia insignis", "MajorLineage"] = "Magnoliaceae"
morph.loc[morph["Taxon"]=="Manglietia insignis", "Taxon"] = "Magnolia insignis"
morph.loc[morph["Taxon"]=="Michelia champaca", "MajorLineage"] = "Magnoliaceae"
morph.loc[morph["Taxon"]=="Michelia champaca", "Taxon"] = "Magnolia champaca"
morph.loc[morph["Taxon"]=="Pernettya alpina", "MajorLineage"] = "Ericaceae"
morph.loc[morph["Taxon"]=="Pernettya alpina", "Taxon"] = "Gaultheria nubicola"
morph.loc[morph["Taxon"]=="Rapanea melanophloeos", "MajorLineage"] = "Primulaceae"
morph.loc[morph["Taxon"]=="Rapanea melanophloeos", "Taxon"] = "Myrsine melanophloeos"
morph.loc[morph["Taxon"]=="Regelia ciliata", "MajorLineage"] = "Myrtaceae"
morph.loc[morph["Taxon"]=="Regelia ciliata", "Taxon"] = "Melaleuca crossota"
morph.loc[morph["Taxon"]=="Agave americana", "MajorLineage"] = "Agavoideae"

morph.loc[morph["Taxon"]=="Sedum album", "Pathway"] = "C3+CAM"
morph.loc[morph["Taxon"]=="Tectaria dracontifolia", "MajorLineage"] = "Polypodiaceae"
morph.loc[morph["Taxon"]=="Tectaria dracontifolia", "Taxon"] = "Draconopteris draconoptera"
morph.loc[morph["Taxon"]=="Yucca brevifolia", "Pathway"] = "C3"
morph.loc[morph["Taxon"]=="Mollugo verticillata", "Pathway"] = "C3-C4"
morph.loc[morph["Genus"].isin(["Pereskia", "Ceraria", "Portulacaria", "Pereskiopsis", "Quiabentia", "Umbilicus", "Sedum", "Lewisia", "Codonanthe"]), "Pathway"] = "C3+CAM"
morph.loc[morph["Taxon"].isin(["Crassula helmsii", "Claytonia perfoliata"]), "Pathway"] = "C3+CAM"
morph.loc[morph["Taxon"]=="Atriplex halimus", "Pathway"] = "C4"
morph.loc[morph["Genus"]=="Lygeum", "Pathway"] = "C3"
morph.loc[morph["Genus"]=="Viburnum", "MajorLineage"] = "Adoxaceae"
morph.loc[morph["Genus"]=="Alluaudia", "MajorLineage"] = "Didiereaceae"
morph.loc[morph["Genus"].isin(["Alluaudia","Vanilla"]), "Pathway"] = "CAM"

morph.loc[morph["Taxon"]=="Opuntia salmiana", "Taxon"] = "Salmonopuntia salmiana"
morph.loc[morph["Taxon"]=="Salmonopuntia salmiana", "Species"] = "salmiana"
morph.loc[morph["Taxon"]=="Salmonopuntia salmiana", "Genus"] = "Salmonopuntia"
morph.loc[morph["Taxon"]=="Ariocarpus reustus", "Taxon"] = "Ariocarpus retusus"
morph.loc[morph["Taxon"]=="Ariocarpus retusus", "Species"] = "retusus"
morph.loc[morph["Taxon"]=="Ariocarpus retusus", "Genus"] = "Ariocarpus"
morph.loc[morph["Taxon"]=="Lophocereus gatseii", "Taxon"] = "Lophocereus gatesii"
morph.loc[morph["Taxon"]=="Lophocereus gatesii", "Species"] = "gatesii"
morph.loc[morph["Taxon"]=="Lophocereus gatesii", "Genus"] = "Lophocereus"
morph["Genus"] = [t.split()[0] for t in morph["Taxon"]]
morph["Species"] = [t.split()[1] for t in morph["Taxon"]]
morph["Tissue"] = morph["Tissue"].fillna("leaf")

In [43]:
nwCalandrinia = ["acaulis","affinis","alba","bracteosa","breweri","caespitosa","carolinii","ciliata","colchaguensis","compacta","conferta","corymbosa","depressa","filifolia","fuegiana",
                 "galapagosa","graminifolia","heterophylla","involucrata","lancifolia","leucopogon","litoralis","minutissima","monandra","nana","nitida","pauciflora","pilosiuscula",
                 "poeppigiana","polyclados","ranunculina","sanguinea","skottsbergii","solisi","spicigera","taltalensis","villaroelii",]
fixCalandrinia = []
for i,row in morph.iterrows():
    if (row["Genus"]=="Calandrinia") and (row["Species"] not in nwCalandrinia):
        fixCalandrinia.append(row["Taxon"].replace("Calandrinia", "Parakeelya"))
    else:
        fixCalandrinia.append(row["Taxon"])
        
morph["Taxon"] = fixCalandrinia
morph["Genus"] = [t.split()[0] for t in morph["Taxon"]]
morph["Species"] = [t.split()[1] for t in morph["Taxon"]]

Remove gymnosperms

In [44]:
morph = morph[~morph["MajorLineage"].isin(["Pinaceae","Taxaceae","Zamiaceae", "Gnetaceae","Cupressaceae",
                                                 "Araucariaceae", "Podocarpaceae", "Sciadopityaceae"])] # sorry gymnos
morph[["Taxon","Pathway"]].drop_duplicates()["Pathway"].value_counts()

C3           5098
CAM           228
C4            223
C3+CAM        199
C4+CAM         11
C3-C4+CAM       3
C3-C4           2
Name: Pathway, dtype: int64

### Reducing multiple accessions

In [45]:
for s in sorted(morph["Source"].unique()):
    print(s)
    print(morph[morph["Source"]==s][["Taxon","Pathway"]].drop_duplicates()["Pathway"].value_counts())

BROT
C3        602
C4          9
C3+CAM      2
Name: Pathway, dtype: int64
DBG
CAM       60
C3        12
C3+CAM     7
Name: Pathway, dtype: int64
Earnshaw et al 1987
C3        24
C3+CAM    19
CAM       15
Name: Pathway, dtype: int64
Edwards Lab unpublished
C3+CAM    13
CAM        1
C3         1
Name: Pathway, dtype: int64
Heyduk et al 2016
CAM       7
C3        4
C3+CAM    2
Name: Pathway, dtype: int64
Lujan et al 2022
C3+CAM    55
C3         9
CAM        1
Name: Pathway, dtype: int64
Males 2018
CAM       84
C3        67
C3+CAM     8
Name: Pathway, dtype: int64
Nelson et al 2008
CAM       14
C3+CAM     5
C3         4
C4         4
Name: Pathway, dtype: int64
Ogburn
C3+CAM    27
C3        17
CAM        2
C3-C4      2
C4+CAM     2
Name: Pathway, dtype: int64
Silvera et al 2005
C3        101
C3+CAM     52
CAM        43
Name: Pathway, dtype: int64
TRY
C3        4501
C4         216
C3+CAM      20
CAM          8
C4+CAM       1
C3-C4        1
Name: Pathway, dtype: int64
Voznesenskaya
C4+CAM   

In [46]:
morph["Tissue"] = morph["Tissue"].str.lower()
morph = morph.drop(morph[(morph["Genus"].isin(["Austrocylindropuntia", "Cylindropuntia"])) & (morph["Tissue"]=="leaf")].index)
morph = morph.drop(morph[(morph["Genus"].isin(["Quiabentia"])) & (morph["Tissue"]=="stem")].index)
reduced = morph.groupby(["MajorLineage", "Taxon", "Tissue", "Pathway"]).mean().reset_index()
reduced = reduced.drop(["MAse (um^2)", "LTse (um)", "IASse (%)"], axis=1)
reduced = reduced[~reduced["MajorLineage"].isin(["Pinaceae","Taxaceae","Zamiaceae", "Gnetaceae","Cupressaceae",
                                                 "Araucariaceae", "Podocarpaceae", "Sciadopityaceae"])] # sorry gymnos
refs = []
camPheno = []
camPhenoCat = []
for i,row in reduced.iterrows():
    refs.append("; ".join(morph[morph["Taxon"]==row["Taxon"]]["Reference"].unique()))
    
    p=row["Pathway"]
    try:
        if p=="CAM":
            camPheno.append("pCAM")
            camPhenoCat.append(2)
        elif "+CAM" in p:
            camPheno.append("mCAM")
            camPhenoCat.append(1)
        else:
            camPheno.append("non-CAM")
            camPhenoCat.append(0)
    except:
        print(p, row["Taxon"])
reduced["CAMpheno"] = camPheno
reduced["CAMphenoCat"] = camPhenoCat    
reduced["Reference"] = refs
reduced["Genus"] = [s.split()[0] for s in reduced["Taxon"]]
reduced["Species"] = [" ".join(s.split()[1:]) for s in reduced["Taxon"]]
reduced["color"] = [cmap[p] for p in reduced["Pathway"]]
reduced = reduced.reset_index(drop=True)
reduced

Unnamed: 0,MajorLineage,Taxon,Tissue,Pathway,MAmean (um^2),LTmean (um),IASmean (%),LDMCmean (g/g),LDMCse (g/g),dSLAmean (mm^2/mg),dSLAse (mm^2/mg),fSLAmean (mm^2/mg),fSLAse (mm^2/mg),CAMpheno,CAMphenoCat,Reference,Genus,Species,color
0,Acanthaceae,Acanthopsis disperma,leaf,C3,,,,,,5.219207,,,,non-CAM,0,"Cornwell WK, Wright I, Turner J, Maire V, Barb...",Acanthopsis,disperma,xkcd:light eggplant
1,Acanthaceae,Acanthus ilicifolius,leaf,C3,,,,,,2.109827,,,,non-CAM,0,eHALOPH - Halophytes Database (version 3.11) T...,Acanthus,ilicifolius,xkcd:light eggplant
2,Acanthaceae,Avicennia alba,leaf,C3,,370.00000,,,,6.570000,0.000000e+00,,,non-CAM,0,eHALOPH - Halophytes Database (version 3.11) T...,Avicennia,alba,xkcd:light eggplant
3,Acanthaceae,Avicennia germinans,leaf,C3,,,,,,8.580402,3.153483e+00,,,non-CAM,0,eHALOPH - Halophytes Database (version 3.11) T...,Avicennia,germinans,xkcd:light eggplant
4,Acanthaceae,Avicennia lanata,leaf,C3,,,,,,4.820000,4.905126e-15,,,non-CAM,0,eHALOPH - Halophytes Database (version 3.11) T...,Avicennia,lanata,xkcd:light eggplant
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5740,Zygophyllaceae,Tribulus terrestris,leaf,C4,,217.69875,,0.279749,0.016782,17.793025,1.745367e+00,,,non-CAM,0,"Cornwell WK, Wright I, Turner J, Maire V, Barb...",Tribulus,terrestris,xkcd:light eggplant
5741,Zygophyllaceae,Zygophyllum fabago,leaf,C3,,,,0.162882,0.010645,6.789692,5.289712e-01,,,non-CAM,0,"Iversen CM, McCormack ML, Powell AS, Blackwood...",Zygophyllum,fabago,xkcd:light eggplant
5742,Zygophyllaceae,Zygophyllum prismatocarpum,leaf,C3,,,,,,3.326751,5.794986e-01,,,non-CAM,0,"Cornwell WK, Wright I, Turner J, Maire V, Barb...",Zygophyllum,prismatocarpum,xkcd:light eggplant
5743,Zygophyllaceae,Zygophyllum xanthoxylon,leaf,C3,,,,0.140000,,,,,,non-CAM,0,"Wang, Han; Harrison, Sandy P; Prentice, Iain C...",Zygophyllum,xanthoxylon,xkcd:light eggplant


In [47]:
reduced[["Taxon","Pathway"]].drop_duplicates()["Pathway"].value_counts()

C3           5091
C4            223
CAM           222
C3+CAM        193
C4+CAM         11
C3-C4+CAM       3
C3-C4           2
Name: Pathway, dtype: int64

In [48]:
reduced[["Taxon","CAMpheno"]].drop_duplicates()["CAMpheno"].value_counts()

non-CAM    5316
pCAM        222
mCAM        207
Name: CAMpheno, dtype: int64

This reduced data set will be the basis for downstream analyses.

In [49]:
# reduced.to_csv("../Data/Merged/All-morphological-data.csv", index=False)
# reduced = pd.read_csv("../Data/Merged/All-morphological-data.csv")

In [50]:
reduced[["Taxon","CAMpheno"]].drop_duplicates()["CAMpheno"].value_counts()

non-CAM    5316
pCAM        222
mCAM        207
Name: CAMpheno, dtype: int64

### Data for phylogenetic analysis
Now I'll take our final time calibrated tree and pull anatomical data for the species it contains for phylogenetic analysis in `R`.

In [51]:
import toytree as tt
import toyplot as tp

In [52]:
tree = tt.tree("../Results/PhyloWeld.timetree.nwk")

In [53]:
tipLabs = []
genera = []
species = []
accNos = []
datasets = []
mls = []
for t in tree.get_tip_labels():
    tipLabs.append(t)
    split = t.split("_")
    g = split[0]
    if split[1] in ["sp", "cf"]:
        s = " ".join(split[1:-1])        
    elif split[1].isnumeric():
        s = ""
    elif "grandiflora_or_procumbens" in t:
        s = "pilosiuscula"
    else:
        s = split[1]
    if split[-1].isnumeric():
        an = split[-1]
        ds = "Edwards"
    else:
        an = None
        ds = "Wang"
    
    genera.append(g)
    species.append(s)
    accNos.append(an)
    datasets.append(ds)
    mls.append(wfo[wfo["child"]==g]["parent"].values[0])

treeDF = pd.DataFrame()
treeDF["tipLabel"] = tipLabs
treeDF["MajorLineage"] = mls
treeDF["Genus"] = genera
treeDF["Species"] = species
treeDF["accNo"] = accNos
treeDF["Dataset"] = datasets
treeDF

Unnamed: 0,tipLabel,MajorLineage,Genus,Species,accNo,Dataset
0,Calandrinia_creethae_39,Montiaceae,Calandrinia,creethae,39,Edwards
1,Calandrinia_creethae_161,Montiaceae,Calandrinia,creethae,161,Edwards
2,Calandrinia_stagnensis_101,Montiaceae,Calandrinia,stagnensis,101,Edwards
3,Calandrinia_mirabilis_85,Montiaceae,Calandrinia,mirabilis,85,Edwards
4,Calandrinia_quadrivalvis_26,Montiaceae,Calandrinia,quadrivalvis,26,Edwards
...,...,...,...,...,...,...
248,Mollugo_pentaphylla,Molluginaceae,Mollugo,pentaphylla,,Wang
249,Stegnosperma_halimifolium,Stegnospermataceae,Stegnosperma,halimifolium,,Wang
250,Limeum_aethiopicum,Limeaceae,Limeum,aethiopicum,,Wang
251,Beta_vulgaris_vulgaris_cds,Amaranthaceae,Beta,vulgaris,,Wang


I've got to do a little work to bring the taxonomy and spelling to match the anatomical data. Note that, for ease, I've left some old taxonomies in place (e.g., _Ceraria_ instead of _Portulacaria_) because they are consistent between tree and data frame. I'm also going to add a boolean `Drop` column that will be used later in `R` to reduce the tree to one accession per species.

In [54]:
treeDF.loc[(treeDF["Genus"]=="Salmiopuntia") & (treeDF["Species"]=="salmiana"), "Genus"] = "Salmonopuntia"
treeDF.loc[treeDF["Species"]=="gatesii", "Genus"] = "Lophocereus"
treeDF.loc[(treeDF["Genus"]=="Talinella"), "Species"] = "pachypodum"
treeDF.loc[(treeDF["Genus"]=="Talinella"), "Genus"] = "Talinum"
treeDF.loc[(treeDF["Genus"]=="Montia") & (treeDF["Species"]=="parviflora"), "Species"] = "parvifolia"
treeDF.loc[(treeDF["Genus"]=="Suessenguthiella"), "Species"] = "scleranthoides"
treeDF.loc[(treeDF["Genus"]=="Mollugo") & (treeDF["Species"]=="cerviana"), "Genus"] = "Paramollugo"
treeDF.loc[(treeDF["Genus"]=="Glinus"), "Species"] = "lotoides"
treeDF.loc[(treeDF["Genus"]=="Leuenbergeria"), "Genus"] = "Pereskia"
treeDF.loc[(treeDF["Genus"]=="Calandrinia") & (treeDF["Species"]=="eremea"), "Species"] = "eremaea"
treeDF.loc[treeDF["Species"]=="colchagensis", "Species"] = "colchaguensis"

fixCalandrinia = []
for i,row in treeDF.iterrows():
    if (row["Genus"]=="Calandrinia") and (row["Species"] not in nwCalandrinia):
        fixCalandrinia.append("Parakeelya")
    else:
        fixCalandrinia.append(row["Genus"])

treeDF["Genus"] = fixCalandrinia
treeDF["Taxon"] = treeDF["Genus"] + " " + treeDF["Species"]
treeDF["Taxon"] = treeDF["Taxon"].str.strip()
treeDF["Drop"] = treeDF.duplicated(subset=["Taxon"])
toDrop = ["Alluaudia_procera_53", "Alluaudia_dumosa_65", "Portulaca_sp_221"]
treeDF.loc[treeDF["tipLabel"].isin(toDrop), "Drop"] = True
treeDF.loc[treeDF["Species"] == "sp", "Drop"] = True
treeDF

Unnamed: 0,tipLabel,MajorLineage,Genus,Species,accNo,Dataset,Taxon,Drop
0,Calandrinia_creethae_39,Montiaceae,Parakeelya,creethae,39,Edwards,Parakeelya creethae,False
1,Calandrinia_creethae_161,Montiaceae,Parakeelya,creethae,161,Edwards,Parakeelya creethae,True
2,Calandrinia_stagnensis_101,Montiaceae,Parakeelya,stagnensis,101,Edwards,Parakeelya stagnensis,False
3,Calandrinia_mirabilis_85,Montiaceae,Parakeelya,mirabilis,85,Edwards,Parakeelya mirabilis,False
4,Calandrinia_quadrivalvis_26,Montiaceae,Parakeelya,quadrivalvis,26,Edwards,Parakeelya quadrivalvis,False
...,...,...,...,...,...,...,...,...
248,Mollugo_pentaphylla,Molluginaceae,Mollugo,pentaphylla,,Wang,Mollugo pentaphylla,False
249,Stegnosperma_halimifolium,Stegnospermataceae,Stegnosperma,halimifolium,,Wang,Stegnosperma halimifolium,False
250,Limeum_aethiopicum,Limeaceae,Limeum,aethiopicum,,Wang,Limeum aethiopicum,False
251,Beta_vulgaris_vulgaris_cds,Amaranthaceae,Beta,vulgaris,,Wang,Beta vulgaris,False


In [55]:
prunedTree = tree.drop_tips(names=list(treeDF[treeDF["Drop"]==True]["tipLabel"].values))

### Data imputation
I'm now going to create the final anatomical data set for phylogenetic analysis using a little imputation for genera that we have in our tree that do not had _species specific data already_. For example, the taxon _Copiapoa desertorum_ is in our tree, but we only have anatomical data from _Copiapoa rupestris_, so I will use data from _Copiapoa ruprestris_ to represent _Copiapoa_ in our tree. I will not impute data for any genera that already have species specific data; the tree will simply be pruned in `R` to remove those species without data.

In [56]:
dataCols = ["MAmean (um^2)", "LTmean (um)","IASmean (%)"]

In [57]:
noImpute = treeDF[treeDF["Drop"]==False].merge(reduced, on=["MajorLineage","Taxon","Genus","Species"], how="left")
noImpute = noImpute[["tipLabel","MajorLineage","Genus","Species","Taxon","Pathway","MAmean (um^2)", "LTmean (um)","IASmean (%)"]]
noImpute.loc[noImpute["MajorLineage"].isin(["Cactaceae", "Didiereaceae"]), "Pathway"] = "CAM"
noImpute.loc[noImpute["MajorLineage"].isin(["Anacampserotaceae","Basellaceae","Montiaceae","Talinaceae"]), "Pathway"] = "C3+CAM"
noImpute.loc[noImpute["Genus"].isin(["Pereskia","Delosperma", "Pereskiopsis", "Quiabentia", "Portulacaria", "Ceraria", "Didierea", ]), "Pathway"] = "C3+CAM"
noImpute.loc[noImpute["Genus"].isin(["Portulaca","Sesuvium"]), "Pathway"] = "C4+CAM"
noImpute.loc[noImpute["Genus"].isin(["Montia","Claytonia","Montiopsis"]), "Pathway"] = "C3"
noImpute.loc[noImpute["Genus"].isin(["Amaranthus",]), "Pathway"] = "C4"
noImpute.loc[noImpute["MajorLineage"].isin(["Molluginaceae","Phytolaccaceae","Limeaceae","Stegnospermataceae","Nyctaginaceae"]), "Pathway"] = "C3"
noImpute.loc[noImpute["Taxon"].isin(["Parakeelya tumida", "Parakeelya granulifera", "Parakeelya arenicola"]), "Pathway"] = "C3"

camPheno = []
camPhenoCat = []
for i,row in noImpute.iterrows():
    p=row["Pathway"]
    try:
        if p=="CAM":
            camPheno.append("pCAM")
            camPhenoCat.append(2)
        elif "+CAM" in p:
            camPheno.append("mCAM")
            camPhenoCat.append(1)
        else:
            camPheno.append("non-CAM")
            camPhenoCat.append(0)
    except:
        print(p, row["Taxon"])
noImpute["CAMpheno"] = camPheno
noImpute["CAMphenoCat"] = camPhenoCat
noImpute.head()

Unnamed: 0,tipLabel,MajorLineage,Genus,Species,Taxon,Pathway,MAmean (um^2),LTmean (um),IASmean (%),CAMpheno,CAMphenoCat
0,Calandrinia_creethae_39,Montiaceae,Parakeelya,creethae,Parakeelya creethae,C3+CAM,,,,mCAM,1
1,Calandrinia_stagnensis_101,Montiaceae,Parakeelya,stagnensis,Parakeelya stagnensis,C3+CAM,,,,mCAM,1
2,Calandrinia_mirabilis_85,Montiaceae,Parakeelya,mirabilis,Parakeelya mirabilis,C3+CAM,,,,mCAM,1
3,Calandrinia_quadrivalvis_26,Montiaceae,Parakeelya,quadrivalvis,Parakeelya quadrivalvis,C3+CAM,6468.855526,2125.910333,0.296044,mCAM,1
4,Calandrinia_pleiopetala_74,Montiaceae,Parakeelya,pleiopetala,Parakeelya pleiopetala,C3+CAM,6427.563,,0.076247,mCAM,1


In [58]:
noImpute.describe()

Unnamed: 0,MAmean (um^2),LTmean (um),IASmean (%),CAMphenoCat
count,59.0,40.0,49.0,206.0
mean,4802.620515,1131.81908,0.152262,1.024272
std,3371.798393,710.147939,0.083744,0.596243
min,523.1704,141.333667,0.033634,0.0
25%,1943.398072,621.329692,0.091856,1.0
50%,4222.428571,863.647,0.128284,1.0
75%,6401.6027,1466.332063,0.207606,1.0
max,15824.9651,3736.362667,0.386109,2.0


In [59]:
generaWdata = sorted(noImpute.dropna(how="all", subset=dataCols)["Genus"].unique())

In [60]:
sorted(generaWdata)

['Alluaudia',
 'Anacampseros',
 'Anredera',
 'Ariocarpus',
 'Astrophytum',
 'Calandrinia',
 'Calyptridium',
 'Ceraria',
 'Claytonia',
 'Echinopsis',
 'Ferocactus',
 'Grahamia',
 'Grusonia',
 'Gymnocalycium',
 'Halophytum',
 'Lewisia',
 'Lophocereus',
 'Mollugo',
 'Montia',
 'Montiopsis',
 'Opuntia',
 'Parakeelya',
 'Peniocereus',
 'Pereskia',
 'Pereskiopsis',
 'Phemeranthus',
 'Portulaca',
 'Portulacaria',
 'Pterocactus',
 'Salmonopuntia',
 'Stetsonia',
 'Tacinga',
 'Talinopsis',
 'Talinum',
 'Tephrocactus',
 'Tunilla']

In [61]:
newData = pd.concat([dbg, elDF, portDF, mogs])
impute = noImpute.copy()
generaWdata = sorted(noImpute.dropna(how="all", subset=dataCols)["Genus"].unique())
for i,row in impute.iterrows():
    common = newData[(newData["Genus"]==row["Genus"]) & (newData["Pathway"]==row["Pathway"])]
    for m in dataCols:
        if (row["MajorLineage"] == "Cactaceae") and (row["Genus"] != "Pereskia") and (m == "LTmean (um)"):
            continue
        elif row["Genus"] in generaWdata:
            continue
        elif (np.isnan(row[m])):
            if len(common[~common[m].isna()])>3:
                mean = common.describe()[m]["mean"]
                std = common.describe()[m]["std"]/np.sqrt(len(common[~common[m].isna()]))
                newVal = np.random.normal(mean,std)
                impute.loc[impute["Taxon"]==row["Taxon"], m] = newVal
                print(row["Taxon"], m)
            elif len(common[~common[m].isna()])>0:
                newVal = common.describe()[m]["mean"]
                impute.loc[impute["Taxon"]==row["Taxon"], m] = newVal
                print(row["Taxon"], m)
            generaWdata.append(row["Genus"])

Cistanthe grandiflora MAmean (um^2)
Stenocereus yunckeri MAmean (um^2)
Echinocereus pectinatus MAmean (um^2)
Copiapoa desertorum MAmean (um^2)
Quiabentia zehntneri MAmean (um^2)
Didierea madagascariensis MAmean (um^2)
Hypertelis walteri MAmean (um^2)
Pharnaceum exiguum MAmean (um^2)
Limeum aethiopicum MAmean (um^2)


So we only imputed mesophyll cell size for 9 species.

In [62]:
impute.describe()

Unnamed: 0,MAmean (um^2),LTmean (um),IASmean (%),CAMphenoCat
count,68.0,40.0,49.0,206.0
mean,4635.681723,1131.81908,0.152262,1.024272
std,3303.491354,710.147939,0.083744,0.596243
min,523.1704,141.333667,0.033634,0.0
25%,1753.363994,621.329692,0.091856,1.0
50%,4276.038157,863.647,0.128284,1.0
75%,6379.184117,1466.332063,0.207606,1.0
max,15824.9651,3736.362667,0.386109,2.0


In [63]:
morphoTips = sorted(impute.dropna(subset=["MAmean (um^2)","LTmean (um)", "IASmean (%)"],how="all")["tipLabel"])

In [64]:
impute = impute.rename({"MAmean (um^2)":"MA","LTmean (um)":"LT","IASmean (%)":"IAS"},axis=1)

In [65]:
impute

Unnamed: 0,tipLabel,MajorLineage,Genus,Species,Taxon,Pathway,MA,LT,IAS,CAMpheno,CAMphenoCat
0,Calandrinia_creethae_39,Montiaceae,Parakeelya,creethae,Parakeelya creethae,C3+CAM,,,,mCAM,1
1,Calandrinia_stagnensis_101,Montiaceae,Parakeelya,stagnensis,Parakeelya stagnensis,C3+CAM,,,,mCAM,1
2,Calandrinia_mirabilis_85,Montiaceae,Parakeelya,mirabilis,Parakeelya mirabilis,C3+CAM,,,,mCAM,1
3,Calandrinia_quadrivalvis_26,Montiaceae,Parakeelya,quadrivalvis,Parakeelya quadrivalvis,C3+CAM,6468.855526,2125.910333,0.296044,mCAM,1
4,Calandrinia_pleiopetala_74,Montiaceae,Parakeelya,pleiopetala,Parakeelya pleiopetala,C3+CAM,6427.563000,,0.076247,mCAM,1
...,...,...,...,...,...,...,...,...,...,...,...
201,Mollugo_pentaphylla,Molluginaceae,Mollugo,pentaphylla,Mollugo pentaphylla,C3,,,,non-CAM,0
202,Stegnosperma_halimifolium,Stegnospermataceae,Stegnosperma,halimifolium,Stegnosperma halimifolium,C3,,,,non-CAM,0
203,Limeum_aethiopicum,Limeaceae,Limeum,aethiopicum,Limeum aethiopicum,C3,552.450600,,,non-CAM,0
204,Beta_vulgaris_vulgaris_cds,Amaranthaceae,Beta,vulgaris,Beta vulgaris,C3,,,,non-CAM,0


In [66]:
# impute.to_csv("../Data/Portullugo-Anatomy-Data.2023-08-09.csv", index=False)

## References
- Ocampo, G. et al. Evolution of leaf anatomy and photosynthetic pathways in Portulacaceae. Am. J. Bot. 100, 2388–2402 (2013).
- Tavsanoglu Ç. & Pausas J.G. 2018. A functional trait database for Mediterranean Basin plants. Scientific Data 5: 180135.
- Voznesenskaya, E. V., Koteyeva, N. K., Edwards, G. E. & Ocampo, G. Revealing diversity in structural and biochemical forms of C4 photosynthesis and a C3-C4 intermediate in genus Portulaca L. (Portulacaceae). J. Exp. Bot. 61, 3647–3662 (2010).
- Voznesenskaya, E. V., Koteyeva, N. K., Edwards, G. E. & Ocampo, G. Unique photosynthetic phenotypes in Portulaca (Portulacaceae): C3-C4 intermediates and NAD-ME C4 species with Pilosoid-type Kranz anatomy. J. Exp. Bot. 68, 225–239 (2017).