In [1]:
import pandas as pd
import numpy as np
import re

In [2]:
df = pd.read_csv('seinet_results.csv', index_col=0)
df.drop(columns = ['Unnamed: 0', 'seinet_status'], inplace = True)
print(df.shape)
print(df.columns)

(1057, 5)
Index(['Genus', 'Species', 'FNA', 'VPAP', 'SW Field Guide'], dtype='object')


In [3]:
df['Species name'] = df.Genus + ' ' + df.Species

In [4]:
plant_list = pd.read_csv('master_plant_list.csv', index_col=0)

In [5]:
df = df.merge(plant_list, left_on = 'Species name', right_on = 'Species', how = 'left')
df.drop(columns = 'Species_y', inplace = True)
df.rename(columns = {'Species_x' : 'Species'}, inplace = True)
df.head()

Unnamed: 0,Genus,Species,FNA,VPAP,SW Field Guide,Species name,Family
0,Carlowrightia,arizonica,,,"Wiggins 1964, Daniel 1984, Kearney and Peebles...",Carlowrightia arizonica,ACANTHACEAE
1,Justicia,californica,,,"Benson and Darrow 1981, Hickman 1993, Powell 1...",Justicia californica,ACANTHACEAE
2,Mesembryanthemum,nodiflorum,"Nancy J. Vivrette, John E. Bleck & Wayne R. Fe...",,FNA 2004 Duration : Annual Nativity : Non-Nati...,Mesembryanthemum nodiflorum,AIZOACEAE
3,Trianthema,portulacastrum,Wayne R. Ferren Jr. in Flora of North America ...,,"Kearney and Peebles 1969, FNA 2004, Correll an...",Trianthema portulacastrum,AIZOACEAE
4,Amaranthus,crassipes,Sergei L. Mosyakin & Kenneth R. Robertson in F...,,,Amaranthus crassipes,AMARANTHACEAE


# Lifecycle duration

In [6]:
def extract_life_duration(text):
    """
    Extract plant life duration from descriptive text.

    Parameters
    ----------
    text : str or None

    Returns
    -------
    str
        One of:
        'annual', 'biennial', 'perennial',
        'annual/biennial', 'annual/perennial',
        'biennial/perennial', 'unknown'
    """
    if not text or not isinstance(text, str):
        return "unknown"

    t = text.lower()

    found = set()

    # strict word boundaries to avoid false matches
    patterns = {
        "annual": r"\bannual\b",
        "biennial": r"\bbiennial\b",
        "perennial": r"\bperennial\b"
    }

    for label, pattern in patterns.items():
        if re.search(pattern, t):
            found.add(label)

    if not found:
        return "unknown"

    # normalize combinations
    if found == {"annual"}:
        return "annual"
    if found == {"biennial"}:
        return "annual"
    if found == {"perennial"}:
        return "perennial"
    if found == {"annual", "biennial"}:
        return "annual"
    if found == {"annual", "perennial"}:
        return "annual/perennial"
    if found == {"biennial", "perennial"}:
        return "perennial"

    # rare but possible
    return "-".join(sorted(found))

In [7]:
df['duration_FNA'] = df["FNA"].apply(extract_life_duration)
df['duration_VPAP'] = df["VPAP"].apply(extract_life_duration)
df['duration_SWFG'] = df["SW Field Guide"].apply(extract_life_duration)

In [8]:
def consensus_life_duration(row):
    values = [
        row["duration_FNA"],
        row["duration_VPAP"],
        row["duration_SWFG"]
    ]
    values = [v for v in values if v != "unknown"]

    if not values:
        return "unknown"

    # if all agree
    if len(set(values)) == 1:
        return values[0]

    # otherwise keep ambiguity explicit
    return "/".join(sorted(set(values)))

In [9]:
df["duration_consensus"] = df.apply(consensus_life_duration, axis=1)

In [10]:
df.loc[df["duration_consensus"] == "annual/annual-biennial-perennial", "duration_consensus"] = "annual"
df.loc[df["duration_consensus"] == "annual-biennial-perennial/perennial", "duration_consensus"] = "perennial"
df.loc[df["duration_consensus"] == "annual-biennial-perennial", "duration_consensus"] = "annual"
df.loc[(df['Genus'] == 'Baileya') & (df['Species'] == 'multiradiata'), 'duration_consensus'] = "perennial"
df.loc[df["duration_consensus"] == "annual/perennial/perennial", "duration_consensus"] = "perennial"
df.loc[df["duration_consensus"] == "annual/annual/perennial", "duration_consensus"] = "annual"

In [11]:
df.loc[(df['Genus'] == 'Atriplex') & (df['Species'] == 'elegans'), 'duration_consensus'] = "annual"
df.loc[(df['Genus'] == 'Ambrosia') & (df['Species'] == 'confertiflora'), 'duration_consensus'] = "perennial"
df.loc[(df['Genus'] == 'Sonchus') & (df['Species'] == 'oleraceus'), 'duration_consensus'] = "annual"
df.loc[(df['Genus'] == 'Xanthisma') & (df['Species'] == 'spinulosum'), 'duration_consensus'] = "perennial"
df.loc[(df['Genus'] == 'Cryptantha') & (df['Species'] == 'holoptera'), 'duration_consensus'] = "perennial"
df.loc[(df['Genus'] == 'Cuscuta') & (df['Species'] == 'umbellata'), 'duration_consensus'] = "annual"
df.loc[(df['Genus'] == 'Chamaesyce') & (df['Species'] == 'pediculifera'), 'duration_consensus'] = "perennial"
df.loc[(df['Genus'] == 'Ditaxis') & (df['Species'] == 'neomexicana'), 'duration_consensus'] = "perennial"

In [12]:
lifespan = ['unknown', 'unknown', 'unknown', 'unknown', 'unknown', 
            'annual', 'perennial', 'unknown', 'annual', 'perennial', 'annual', 'unknown', 'unknown', 'perennial', 'unknown', 
            'unknown', 'unknown', 'annual', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'perennial', 
            'unknown', 'unknown', 'unknown', 'unknown', 'annual', 'unknown', 'annual', 'annual', 'unknown', 'unknown', 
            'unknown', 'annual', 'annual', 'unknown', 'unknown', 'unknown', 'unknown', 'perennial', 'unknown', 
            'unknown', 'unknown', 'unknown', 'annual', 'annual', 'unknown']

In [13]:
for i, j in zip(df[df["duration_consensus"] == 'annual/perennial'][['Genus', 'Species']].values, lifespan):
    df.loc[(df['Genus'] == i[0]) & (df['Species'] == i[1]), 'duration_consensus'] = j

In [14]:
lifespan2 = ['perennial', 'annual', 'perennial', 'annual', 'perennial', 'perennial', 'perennial', 'perennial', 'perennial', 'annual', 
             'perennial', 'perennial', 'annual', 'perennial', 'annual', 'perennial', 'annual', 'perennial', 'perennial', 'annual', 
             'perennial', 'annual', 'perennial', 'annual', 'perennial', 'annual', 'perennial', 'annual', 'perennial', 'perennial']

In [15]:
for i, j in zip(df[(df.duration_consensus == 'unknown') & (df.duration_SWFG == 'annual/perennial')][['Genus', 'Species']].values, lifespan2):
    df.loc[(df['Genus'] == i[0]) & (df['Species'] == i[1]), 'duration_consensus'] = j

In [16]:
lifespan3 = ['perennial', 'unknown',  'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 
             'perennial', 'perennial', 'unknown', 'unknown', 'unknown', 'perennial', 'annual', 'perennial', 'unknown', 'unknown', 
             'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 
             'perennial', 'perennial', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 
             'perennial', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 
             'unknown', 'unknown', 'unknown', 'unknown', 'perennial',  'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 
             'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 
             'unknown', 'annual', 'unknown', 'annual', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 
             'perennial', 'unknown', 'perennial', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 
             'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown']

In [17]:
for i, j in zip(df[df.duration_consensus == 'unknown'][['Genus', 'Species']].values, lifespan3):
    df.loc[(df['Genus'] == i[0]) & (df['Species'] == i[1]), 'duration_consensus'] = j

In [18]:
lifespan4 = ['unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'annual', 'unknown', 'unknown', 
             'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 
             'perennial', 'perennial', 'perennial', 'perennial', 'unknown', 'unknown', 'unknown', 'perennial', 'perennial', 'annual', 
             'unknown', 'unknown', 'unknown', 'unknown', 'perennial', 'perennial', 'perennial', 'perennial', 'perennial', 'perennial', 
             'unknown', 'annual', 'unknown', 'unknown', 'unknown', 'perennial', 'perennial', 'perennial', 'perennial', 'perennial', 
             'perennial', 'annual', 'unknown', 'annual', 'annual', 'annual', 'unknown', 'annual', 'perennial', 'perennial', 
             'unknown', 'unknown', 'unknown', 'annual', 'annual', 'annual', 'annual', 'perennial', 'unknown', 'unknown', 
             'unknown', 'unknown', 'perennial', 'perennial', 'perennial', 'perennial', 'perennial', 'perennial', 'perennial', 'annual', 
             'unknown', 'perennial']

In [19]:
for i, j in zip(df[df.duration_consensus == 'unknown'][['Genus', 'Species']].values, lifespan4):
    df.loc[(df['Genus'] == i[0]) & (df['Species'] == i[1]), 'duration_consensus'] = j

In [20]:
lifespan5 = ['perennial', 'annual', 'perennial', 'annual', 'annual', 'annual', 'perennial', 'annual', 'perennial', 'annual', 
             'unknown', 'perennial', 'annual', 'annual', 'annual', 'annual', 'perennial', 'annual', 'annual', 'perennial', 
             'perennial', 'annual', 'unknown', 'annual', 'annual', 'annual', 'perennial', 'annual', 'perennial', 'annual', 
             'perennial', 'perennial', 'perennial', 'annual', 'unknown', 'perennial', 'perennial', 'perennial', 'annual', 'annual']

In [21]:
for i, j in zip(df[df.duration_consensus == 'unknown'][['Genus', 'Species']].values, lifespan5):
    df.loc[(df['Genus'] == i[0]) & (df['Species'] == i[1]), 'duration_consensus'] = j

In [22]:
df = df[df.duration_consensus != 'unknown'].reset_index(drop=True)

In [23]:
df.duration_consensus.value_counts()

duration_consensus
perennial    579
annual       475
Name: count, dtype: int64

In [24]:
df.drop(columns = ['duration_FNA', 'duration_VPAP', 'duration_SWFG'], inplace = True)

# Native

In [25]:
NON_NATIVE_TERMS = [
    "non-native",
    "introduced",
    "invasive",
    "exotic",
    "naturalized", 
    "non native", 
    "not native"
]

def parse_nativity(sw_text):
    """
    Determine nativity from SW Field Guide text.

    Parameters
    ----------
    sw_text : str or None

    Returns
    -------
    str
        'native', 'non-native', or 'unknown'
    """

    if pd.isna(sw_text):
        return "unknown"

    text = sw_text.lower()

    # --- Explicit Nativity field ---
    nativity_match = re.search(
        r"nativity\s*:\s*([a-z\-]+)",
        text
    )
    
    if nativity_match:
        value = nativity_match.group(1)

        if value == "native":
            return "native"

        if value in NON_NATIVE_TERMS:
            return "non-native"

    # --- Fallback: search for non-native indicators anywhere ---
    for term in NON_NATIVE_TERMS:
        if re.search(rf"\b{term}\b", text):
            return "non-native"

    return "unknown"

In [26]:
NATIVITY_COLUMNS = ["FNA", "VPAP", "SW Field Guide"]

def nativity_consensus(row):
    results = []

    for col in NATIVITY_COLUMNS:
        value = parse_nativity(row[col])
        results.append(value)

    if "non-native" in results:
        return "non-native"

    if "native" in results:
        return "native"

    return "unknown"

In [27]:
df["nativity_sw"] = df.apply(nativity_consensus, axis=1)

In [28]:
df.loc[(df.Family == 'CACTACEAE') & (df.nativity_sw == 'unknown'), 'nativity_sw'] = 'native'

In [29]:
df.drop(df[(df.Genus == 'Festuca') & (df.Species == 'octoflora')].index, inplace = True)

In [30]:
df.loc[(df.Family == 'ASPARAGACEAE') & (df.nativity_sw == 'unknown'), 'nativity_sw'] = 'native'

In [31]:
df.loc[(df.Family == 'PTERIDACEAE') & (df.nativity_sw == 'unknown'), 'nativity_sw'] = 'native'

In [32]:
df.drop(df[df.Species == 'wootoni'].index, inplace=True)

In [33]:
df.drop(df[df['Species name'] == 'Cheilanthes yavapensis'].index, inplace = True)

In [34]:
df.loc[df['Species name'] == 'Hordeum arizonicum', 'nativity_sw'] = 'native'
df.loc[df['Species name'] == 'Hordeum murinum', 'nativity_sw'] = 'non-native'
df.loc[df['Species name'] == 'Rhus kearneyi', 'nativity_sw'] = 'native'
df.loc[df['Species name'] == 'Diaperia verna', 'nativity_sw'] = 'native'
df.loc[df['Species name'] == 'Isocoma coronopifolia', 'nativity_sw'] = 'native'
df.loc[df['Species name'] == 'Stephanomeria schottii', 'nativity_sw'] = 'native'
df.loc[df['Species name'] == 'Cryptantha ganderi', 'nativity_sw'] = 'native'

In [35]:
nativity = ['unknown', 'unknown', 'native', 'unknown', 'native', 'native', 'unknown', 'unknown', 'unknown', 'unknown', 
            'unknown', 'native', 'native', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'native', 'native', 
            'native', 'native', 'unknown', 'unknown', 'unknown', 'native', 'native', 'native', 'native', 'native', 
            'native', 'unknown', 'unknown', 'native', 'non-native', 'unknown', 'unknown', 'native', 'unknown', 'native', 
            'native', 'unknown', 'unknown', 'unknown', 'unknown', 'native', 'unknown', 'unknown', 'native', 'unknown', 
            'native', 'native', 'unknown', 'unknown', 'unknown', 'unknown', 'native', 'native', 'unknown', 'unknown', 
            'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'native', 'native', 'unknown']

In [36]:
for i, j in zip(df[df.nativity_sw == 'unknown'][['Genus', 'Species']].values, nativity):
    df.loc[(df['Genus'] == i[0]) & (df['Species'] == i[1]), 'nativity_sw'] = j

In [37]:
nativity2 = ['unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'native', 'native', 'native', 'unknown', 'unknown', 
             'unknown', 'native', 'native', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'native', 'native', 
             'native', 'unknown', 'unknown', 'unknown', 'native', 'unknown', 'unknown', 'unknown', 'unknown', 'native', 
             'unknown', 'non-native', 'unknown', 'native', 'native', 'unknown', 'unknown', 'unknown', 'native', 'unknown', 
             'unknown']

In [38]:
for i, j in zip(df[df.nativity_sw == 'unknown'][['Genus', 'Species']].values, nativity2):
    df.loc[(df['Genus'] == i[0]) & (df['Species'] == i[1]), 'nativity_sw'] = j

In [39]:
df.loc[df.nativity_sw == 'unknown', 'nativity_sw'] = 'native'

In [40]:
df.loc[df['Species name'] == 'Ricinus communis', 'nativity_sw'] = 'non-native'

In [41]:
df.loc[df['Species name'] == 'Urochloa reptans', 'nativity_sw'] = 'non-native'

In [42]:
df.loc[df['Species name'] == 'Melia azedarach', 'nativity_sw'] = 'non-native'

In [43]:
df["nativity_sw"].value_counts()

nativity_sw
native        887
non-native    164
Name: count, dtype: int64

# Aquatic

In [44]:
nwl_df = pd.read_csv(r'plant_lists/national_wetland_plant_list.csv', index_col = 0)
nwl_df.shape

(2966, 6)

In [45]:
nwl_df.sample(5)

Unnamed: 0,Scientific Name,Common Name,Duration,Habit,Sun,Water
2798,Utricularia radiata,"Little Floating Bladderwort, Floating Bladderwort",Annual,Herb,,Wet
2476,Sibbaldia procumbens,Creeping Sibbaldia,Perennial,Subshrub,,
716,Crataegus spathulata,"Littlehip Hawthorn, Pasture Hawthorn, Smallfru...",Perennial,"Shrub, Tree",,
2762,Trillium recurvatum,"Prairie Trillium, Prairie Wake-robin, Bloody B...",Perennial,Herb,,
1471,Lloydia serotina,"Common Alplily, Alplily",Perennial,Herb,,


In [46]:
nwl_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2966 entries, 0 to 2965
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Scientific Name  2966 non-null   object
 1   Common Name      2966 non-null   object
 2   Duration         2965 non-null   object
 3   Habit            2965 non-null   object
 4   Sun              1701 non-null   object
 5   Water            1415 non-null   object
dtypes: object(6)
memory usage: 162.2+ KB


In [47]:
df = df.merge(nwl_df[['Scientific Name', 'Habit']], left_on = 'Species name', right_on = 'Scientific Name', how = 'left')

In [48]:
df['aquatic'] = df["Scientific Name"].notna().astype(int)
df.drop(columns = ['Scientific Name', 'Habit'], inplace = True)

In [49]:
df.loc[df.Genus == 'Populus', 'aquatic'] = 1
df.loc[df.Genus == 'Prosopis', 'aquatic'] = 0
df.loc[df.Genus == 'Dichelostemma', 'aquatic'] = 0
df.loc[df.Genus == 'Lycium', 'aquatic'] = 0
df.loc[df.Genus == 'Agave', 'aquatic'] = 0
df.loc[df.Genus == 'Tamarix', 'aquatic'] = 1
df.loc[df['Species name'] == 'Celtis reticulata', 'aquatic'] = 1
df.loc[df.Family == 'CYPERACEAE', 'aquatic'] = 1
df.loc[df.Genus == 'Najas', 'aquatic'] = 1
df.loc[df.Genus == 'Nasturtium', 'aquatic'] = 1

In [50]:
df.aquatic.value_counts()

aquatic
0    923
1    128
Name: count, dtype: int64

In [51]:
df.columns

Index(['Genus', 'Species', 'FNA', 'VPAP', 'SW Field Guide', 'Species name',
       'Family', 'duration_consensus', 'nativity_sw', 'aquatic'],
      dtype='object')

# Lifeform

In [52]:
LIFEFORMS = {"grass", "herb", "subshrub", "shrub", "tree", "vine", "cactus", "succulent", "bryophyte", "pterophyte"}

In [53]:
FAMILY_LIFEFORMS = {
    # Grasses
    "Poaceae": "grass",
    "Cyperaceae": "grass",
    "Juncaceae": "grass",

    # Succulents
    "Crassulaceae": "succulent",
    "Agavaceae": "succulent",

    # Cactus
    "Cactaceae": "cactus",

    # Bryophytes
    "Bryaceae": "bryophyte",
    "Sphagnaceae": "bryophyte",

    # Ferns
    "Pteridaceae": "pterophyte",
    "Dryopteridaceae": "pterophyte",
    "Polypodiaceae": "pterophyte"
}

In [54]:
def normalize_family(family):
    if pd.isna(family):
        return None

    family = family.strip()
    family = re.split(r"[ (;]", family)[0]  # remove annotations
    family = family.capitalize()

    return family

In [55]:
def lifeform_from_family(family):
    family = normalize_family(family)
    return FAMILY_LIFEFORMS.get(family)

In [56]:
LIFEFORM_KEYWORDS = {
    "tree": [" tree", " trees"],
    "shrub": [" shrub", " shrubs"],
    "subshrub": [" subshrub"],
    "herb": [" herb", " herbs", " forb", " forbs", " woody herb"],
    "grass": [" grass", " grasses"],
    "vine": [" vine", " climbing", " twining", " liana"],
    "cactus": [" cactus", " cacti"],
    "succulent": [" succulent", " fleshy"],
    "bryophyte": [" moss", " liverwort"],
    "pterophyte": [" fern", " ferns", " frond"]
}

In [57]:
def extract_lifeforms_from_text(text):
    if pd.isna(text):
        return set()

    text = text.lower()
    found = set()

    for lf, patterns in LIFEFORM_KEYWORDS.items():
        if any(p in text for p in patterns):
            found.add(lf)

    return found

In [58]:
def resolve_lifeform_conflicts(lifeforms):
    lf = set(lifeforms)

    if "tree" in lf and "shrub" in lf:
        return "tree"

    if "cactus" in lf and "succulent" in lf:
        return "cactus"

    if "herb" in lf and "woody herb" in lf:
        return "herb"

    if "vine" in lf and "shrub" in lf:
        return "vine"

    if len(lf) == 1:
        return next(iter(lf))

    if len(lf) > 1:
        # fallback priority
        priority = [
            "tree", "shrub", "subshrub", "vine",
            "grass", "herb", "succulent",
            "cactus", "pterophyte", "bryophyte"
        ]
        for p in priority:
            if p in lf:
                return p

    return None

In [59]:
def lifeform_from_text_sources(row):
    combined = " ".join([
        str(row.get("FNA", "")),
        str(row.get("VPAP", "")),
        str(row.get("SW Field Guide", ""))
    ])

    lifeforms = extract_lifeforms_from_text(combined)
    return resolve_lifeform_conflicts(lifeforms)

In [60]:
def determine_lifeform(row):
    # 1️⃣ Family override (final answer)
    family_lf = lifeform_from_family(row["Family"])
    if family_lf is not None:
        return family_lf

    # 2️⃣ Text-based inference
    return lifeform_from_text_sources(row)

In [61]:
df["lifeform"] = df.apply(determine_lifeform, axis=1)

In [62]:
df.loc[(df.lifeform == 'grass') & (df.Family == 'BRASSICACEAE'), 'lifeform'] = 'herb'

In [63]:
brassica = ['herb', 'herb', 'subshrub', 'herb', 'herb', 'herb']
for i,j in zip(df[(df.Family == 'BRASSICACEAE') & (df.lifeform == 'shrub')]['Species name'], brassica):
    df.loc[df['Species name'] == i, 'lifeform'] = j

In [64]:
df.drop(df[df['Species name'] == 'Arabis perennans'].index, inplace = True)
df.loc[(df.Family == 'BRASSICACEAE') & (df.lifeform == 'vine'), 'lifeform'] = 'herb'
df.loc[(df.Family == 'BRASSICACEAE') & (df.lifeform == 'succulent'), 'lifeform'] = 'herb'
df.loc[df['Species name'] == 'Cannabis sativa', 'lifeform'] = 'herb'
df.loc[(df.duration_consensus == 'annual') & (df.lifeform == 'grass') & (df.Family == 'ASTERACEAE'), 'lifeform'] = 'herb'
df.loc[(df.duration_consensus == 'annual') & (df.lifeform == 'grass') & (~df.Family.isin(['POACEAE', 'CYPERACEAE', 'JUNCACEAE'])), 'lifeform'] = 'herb'
df.loc[(df.duration_consensus == 'annual') & (df.lifeform == 'tree'), 'lifeform'] = 'herb'
df.loc[(df.duration_consensus == 'annual') & (df.lifeform == 'shrub'), 'lifeform'] = 'herb'

In [65]:
ann_subshrub = ['herb', 'herb', 'herb', 'herb', 'subshrub', 'herb', 'subshrub', 'herb', 'herb', 'subshrub', 'subshrub', 'herb']
for i,j in zip(df.loc[(df.duration_consensus == 'annual') & (df.lifeform == 'subshrub'), 'Species name'], ann_subshrub):
    df.loc[df['Species name'] == i, 'lifeform'] = j

In [66]:
df.drop(df[df['Species name'] == 'Ipomoea hederacea'].index, inplace = True)
df.loc[df.Genus == 'Agave', 'lifeform'] = 'succulent'
df.loc[df.Family == 'SELAGINELLACEAE', 'lifeform'] = 'pterophyte'
df.loc[df.Genus == 'Yucca', 'lifeform'] = 'succulent'
df.loc[(df.lifeform == 'grass') & (~df.Family.isin(['POACEAE', 'CYPERACEAE', 'JUNCACEAE'])), 'lifeform'] = 'herb'

In [67]:
shrubs = ['subshrub', 'shrub',  'shrub', 'shrub', 'shrub', 'shrub', 'subshrub', 'subshrub', 'subshrub', 'subshrub', 
          'subshrub', 'subshrub', 'subshrub', 'subshrub', 'subshrub', 'shrub', 'shrub', 'subshrub', 'subshrub', 'subshrub', 
          'subshrub', 'subshrub', 'shrub', 'subshrub', 'subshrub', 'subshrub', 'shrub', 'subshrub', 'subshrub', 'subshrub', 
          'subshrub', 'subshrub', 'subshrub', 'subshrub', 'shrub', 'shrub', 'shrub', 'shrub', 'herb', 'shrub', 
          'shrub', 'subshrub', 'subshrub', 'subshrub', 'shrub', 'subshrub', 'shrub', 'shrub', 'subshrub', 'subshrub', 
          'subshrub', 'subshrub', 'subshrub', 'subshrub', 'herb', 'subshrub', 'subshrub', 'subshrub', 'shrub', 'shrub', 
          'subshrub', 'subshrub', 'shrub', 'shrub', 'shrub', 'shrub', 'subshrub', 'shrub', 'shrub', 'shrub', 
          'shrub', 'shrub', 'shrub', 'shrub', 'shrub', 'subshrub', 'herb', 'shrub', 'subshrub', 'subshrub', 
          'subshrub', 'subshrub', 'subshrub', 'shrub', 'subshrub', 'shrub', 'shrub', 'shrub', 'shrub', 'subshrub', 
          'subshrub', 'subshrub', 'herb', 'subshrub', 'subshrub', 'herb', 'shrub', 'shrub', 'shrub', 'shrub', 
          'shrub', 'herb', 'shrub', 'subshrub', 'subshrub', 'subshrub', 'subshrub', 'shrub', 'herb', 'shrub', 
          'subshrub', 'subshrub', 'herb', 'succulent', 'subshrub', 'subshrub', 'subshrub', 'herb', 'shrub', 'shrub', 
          'subshrub', 'shrub', 'subshrub', 'shrub', 'shrub', 'subshrub', 'subshrub', 'shrub', 'herb', 'shrub']
shrub_species = df[df.lifeform == 'shrub']['Species name']

In [68]:
for i, j in zip(shrub_species, shrubs):
    df.loc[df['Species name'] == i, 'lifeform'] = j

In [69]:
subshrubs = ['subshrub', 'subshrub', 'subshrub', 'subshrub', 'subshrub', 'subshrub', 'subshrub', 'subshrub', 'subshrub', 'subshrub',
             'subshrub', 'subshrub', 'subshrub', 'herb', 'subshrub', 'herb', 'herb', 'subshrub', 'subshrub', 'subshrub', 
             'subshrub', 'herb', 'herb', 'herb', 'subshrub', 'subshrub', 'herb', 'herb', 'herb', 'subshrub', 
             'subshrub', 'shrub', 'herb', 'subshrub', 'subshrub', 'subshrub', 'herb', 'subshrub', 'herb', 'herb', 
             'herb', 'herb', 'subshrub', 'herb', 'herb', 'herb', 'subshrub', 'herb', 'herb', 'herb', 
             'herb', 'herb', 'herb', 'herb', 'herb', 'herb', 'herb', 'subshrub', 'herb','herb', 
             'herb', 'herb', 'herb', 'herb', 'herb', 'herb', 'herb', 'subshrub', 'herb', 'subshrub', 
             'subshrub']

In [70]:
for i, j in zip(df.loc[(~df['Species name'].isin(shrub_species)) & (df.lifeform == 'subshrub')]['Species name'], subshrubs):
    df.loc[(df['Species name'] == i), 'lifeform'] = j

In [71]:
df.loc[df['Species name'] == 'Cylindropuntia spinosior', 'nativity_sw'] = 'native'

In [72]:
df.loc[df.Genus == 'Nolina', 'lifeform'] = 'succulent'

In [73]:
df.drop(df[df['Species name'] == 'Acacia greggii'].index, inplace = True)

In [74]:
df.reset_index(drop = True, inplace = True)

In [75]:
df['lifeform'].value_counts()

lifeform
herb          504
grass         159
subshrub       98
tree           69
shrub          53
vine           48
cactus         37
pterophyte     23
succulent      13
Name: count, dtype: int64

In [77]:
trees = ['shrub', 'shrub', 'tree', 'tree', 'tree', 'tree', 'tree', 'tree', 'tree', 'tree', 
         'tree', 'tree', 'tree', 'shrub', 'tree', 'shrub', 'shrub', 'parasite', 'tree', 'shrub', 
         'shrub', 'shrub', 'shrub', 'shrub', 

In [78]:
for i, j in zip(range(len(df[df.lifeform == 'tree'])), df[df.lifeform == 'tree'][['Species name', 'SW Field Guide']].values):
    print(i, j)

0 ['Rhus kearneyi' nan]
1 ['Peucephyllum schottii'
 "FNA 2006, Keil 2014 (Jepson Online), Kearny and Peebles 1979 Common Name : Schott's pygmycedar Duration : Perennial Nativity : Native Lifeform : Tree General : Much-branched shrubs or small trees, mostly 1-3 m tall, with a rounded shape; stems densely leafy above, naked and whitish-barked below. Leaves : Alternate and sessile; blades narrowly linear, 1-2 cm long by 1-2 mm wide, glabrous, gland-dotted, and resin-varnished; margins generally entire, though rarely some leaves have 1-2 lateral lobes. Flowers : Flower heads discoid, solitary, subtended by leafy bracts; involucre (ring of bracts wrapped around flower head) turbinate to campanulate, 6-12 mm diam, the bracts (phyllaries) 9-18 in 1-2 series, linear to lanceolate with acuminate and gland-dotted tips, the outer phyllaries  grading into the subtending leaves; florets 12-21, all discs, the corollas 7-8 mm high, creamy yellow, sometimes with  purplish tips. Fruits : Achenes 3-4 mm

# Elevation

In [None]:
df[(df.aquatic != 1) & (df.duration_consensus != 'annual') & (df.nativity_sw != 'non-native')].lifeform.value_counts()