In [1]:
import pandas as pd
import numpy as np
import re

In [2]:
df = pd.read_csv('seinet_results.csv', index_col=0)
df.drop(columns = ['Unnamed: 0', 'seinet_status'], inplace = True)
print(df.shape)
print(df.columns)

(1057, 5)
Index(['Genus', 'Species', 'FNA', 'VPAP', 'SW Field Guide'], dtype='object')


# Lifecycle duration

In [3]:
def extract_life_duration(text):
    """
    Extract plant life duration from descriptive text.

    Parameters
    ----------
    text : str or None

    Returns
    -------
    str
        One of:
        'annual', 'biennial', 'perennial',
        'annual/biennial', 'annual/perennial',
        'biennial/perennial', 'unknown'
    """
    if not text or not isinstance(text, str):
        return "unknown"

    t = text.lower()

    found = set()

    # strict word boundaries to avoid false matches
    patterns = {
        "annual": r"\bannual\b",
        "biennial": r"\bbiennial\b",
        "perennial": r"\bperennial\b"
    }

    for label, pattern in patterns.items():
        if re.search(pattern, t):
            found.add(label)

    if not found:
        return "unknown"

    # normalize combinations
    if found == {"annual"}:
        return "annual"
    if found == {"biennial"}:
        return "annual"
    if found == {"perennial"}:
        return "perennial"
    if found == {"annual", "biennial"}:
        return "annual"
    if found == {"annual", "perennial"}:
        return "annual/perennial"
    if found == {"biennial", "perennial"}:
        return "perennial"

    # rare but possible
    return "-".join(sorted(found))

In [4]:
df['duration_FNA'] = df["FNA"].apply(extract_life_duration)
df['duration_VPAP'] = df["VPAP"].apply(extract_life_duration)
df['duration_SWFG'] = df["SW Field Guide"].apply(extract_life_duration)

In [5]:
def consensus_life_duration(row):
    values = [
        row["duration_FNA"],
        row["duration_VPAP"],
        row["duration_SWFG"]
    ]
    values = [v for v in values if v != "unknown"]

    if not values:
        return "unknown"

    # if all agree
    if len(set(values)) == 1:
        return values[0]

    # otherwise keep ambiguity explicit
    return "/".join(sorted(set(values)))

In [6]:
df["duration_consensus"] = df.apply(consensus_life_duration, axis=1)

In [7]:
df.loc[df["duration_consensus"] == "annual/annual-biennial-perennial", "duration_consensus"] = "annual"
df.loc[df["duration_consensus"] == "annual-biennial-perennial/perennial", "duration_consensus"] = "perennial"
df.loc[df["duration_consensus"] == "annual-biennial-perennial", "duration_consensus"] = "annual"
df.loc[(df['Genus'] == 'Baileya') & (df['Species'] == 'multiradiata'), 'duration_consensus'] = "perennial"
df.loc[df["duration_consensus"] == "annual/perennial/perennial", "duration_consensus"] = "perennial"
df.loc[df["duration_consensus"] == "annual/annual/perennial", "duration_consensus"] = "annual"

In [8]:
df.loc[(df['Genus'] == 'Atriplex') & (df['Species'] == 'elegans'), 'duration_consensus'] = "annual"
df.loc[(df['Genus'] == 'Ambrosia') & (df['Species'] == 'confertiflora'), 'duration_consensus'] = "perennial"
df.loc[(df['Genus'] == 'Sonchus') & (df['Species'] == 'oleraceus'), 'duration_consensus'] = "annual"
df.loc[(df['Genus'] == 'Xanthisma') & (df['Species'] == 'spinulosum'), 'duration_consensus'] = "perennial"
df.loc[(df['Genus'] == 'Cryptantha') & (df['Species'] == 'holoptera'), 'duration_consensus'] = "perennial"
df.loc[(df['Genus'] == 'Cuscuta') & (df['Species'] == 'umbellata'), 'duration_consensus'] = "annual"
df.loc[(df['Genus'] == 'Chamaesyce') & (df['Species'] == 'pediculifera'), 'duration_consensus'] = "perennial"
df.loc[(df['Genus'] == 'Ditaxis') & (df['Species'] == 'neomexicana'), 'duration_consensus'] = "perennial"

In [9]:
lifespan = ['unknown', 'unknown', 'unknown', 'unknown', 'unknown', 
            'annual', 'perennial', 'unknown', 'annual', 'perennial', 'annual', 'unknown', 'unknown', 'perennial', 'unknown', 
            'unknown', 'unknown', 'annual', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'perennial', 
            'unknown', 'unknown', 'unknown', 'unknown', 'annual', 'unknown', 'annual', 'annual', 'unknown', 'unknown', 
            'unknown', 'annual', 'annual', 'unknown', 'unknown', 'unknown', 'unknown', 'perennial', 'unknown', 
            'unknown', 'unknown', 'unknown', 'annual', 'annual', 'unknown']

In [10]:
for i, j in zip(df[df["duration_consensus"] == 'annual/perennial'][['Genus', 'Species']].values, lifespan):
    df.loc[(df['Genus'] == i[0]) & (df['Species'] == i[1]), 'duration_consensus'] = j

In [11]:
lifespan2 = ['perennial', 'annual', 'perennial', 'annual', 'perennial', 'perennial', 'perennial', 'perennial', 'perennial', 'annual', 
             'perennial', 'perennial', 'annual', 'perennial', 'annual', 'perennial', 'annual', 'perennial', 'perennial', 'annual', 
             'perennial', 'annual', 'perennial', 'annual', 'perennial', 'annual', 'perennial', 'annual', 'perennial', 'perennial']

In [12]:
for i, j in zip(df[(df.duration_consensus == 'unknown') & (df.duration_SWFG == 'annual/perennial')][['Genus', 'Species']].values, lifespan2):
    df.loc[(df['Genus'] == i[0]) & (df['Species'] == i[1]), 'duration_consensus'] = j

In [13]:
df["duration_consensus"].value_counts()

duration_consensus
perennial    523
annual       438
unknown       96
Name: count, dtype: int64

In [14]:
for i in df[df.duration_consensus == 'unknown'].VPAP.values:
    print(i)
    print()

CANOTIA 3(2) PLANT : Densely branched shrubs or small trees, to 3.5 m tall; old bark dark, scaly; branches tan to gray, densely puberulent to glabrate later. LEAVES : simple, entire to serrulate, broadly elliptic, 2-6 cm long, 1.5-3.7 cm wide, leathery, olive-green, with glandular hairs beneath, glabrous to sparsely hirtellous above; tip rounded to acuminate; base rounded to cordate; petioles 3-10 mm long. INFLORESCENCE : dense panicles, 2-4.5 cm long, puberulent; bracts lanceolate to ovate, glandularpuberulent, to 3 mm long. FLOWERS : to 5 mm long; sepals pinkish, puberulent; petals cream, glabrous. FRUIT : lenticular-orbicular, irregular in outline, 10 mm long, 8 mm wide, mottled brown and yellow, glandular-pubescent, viscid. NOTES : 3 subsp.; sw AZ, Baja CA. REFERENCES : John L. Anderson, 2006, Vascular Plants of Arizona: Anacardiaceae. CANOTIA 3 (2): 13-22.

nan

nan

nan

nan

nan

nan

nan

nan

nan

JANAS 29(1) Plant : Barrel cactus with clustering stems, to 50 cm tall. STEM bas

# Elevation

# Aquatic

# Lifeform

# Native