In [1]:
import pandas as pd
import numpy as np
import re

In [2]:
df = pd.read_csv('seinet_results.csv', index_col=0)
df.drop(columns = ['Unnamed: 0', 'seinet_status'], inplace = True)
print(df.shape)
print(df.columns)

(1057, 5)
Index(['Genus', 'Species', 'FNA', 'VPAP', 'SW Field Guide'], dtype='object')


# Lifecycle duration

In [3]:
def extract_life_duration(text):
    """
    Extract plant life duration from descriptive text.

    Parameters
    ----------
    text : str or None

    Returns
    -------
    str
        One of:
        'annual', 'biennial', 'perennial',
        'annual/biennial', 'annual/perennial',
        'biennial/perennial', 'unknown'
    """
    if not text or not isinstance(text, str):
        return "unknown"

    t = text.lower()

    found = set()

    # strict word boundaries to avoid false matches
    patterns = {
        "annual": r"\bannual\b",
        "biennial": r"\bbiennial\b",
        "perennial": r"\bperennial\b"
    }

    for label, pattern in patterns.items():
        if re.search(pattern, t):
            found.add(label)

    if not found:
        return "unknown"

    # normalize combinations
    if found == {"annual"}:
        return "annual"
    if found == {"biennial"}:
        return "biennial"
    if found == {"perennial"}:
        return "perennial"
    if found == {"annual", "biennial"}:
        return "annual/biennial"
    if found == {"annual", "perennial"}:
        return "annual/perennial"
    if found == {"biennial", "perennial"}:
        return "biennial/perennial"

    # rare but possible
    return "/".join(sorted(found))

In [4]:
df['duration_FNA'] = df["FNA"].apply(extract_life_duration)
df['duration_VPAP'] = df["VPAP"].apply(extract_life_duration)
df['duration_SWFG'] = df["SW Field Guide"].apply(extract_life_duration)

In [5]:
def consensus_life_duration(row):
    values = [
        row["duration_FNA"],
        row["duration_VPAP"],
        row["duration_SWFG"]
    ]
    values = [v for v in values if v != "unknown"]

    if not values:
        return "unknown"

    # if all agree
    if len(set(values)) == 1:
        return values[0]

    # otherwise keep ambiguity explicit
    return "/".join(sorted(set(values)))

In [6]:
df["duration_consensus"] = df.apply(consensus_life_duration, axis=1)
df["duration_consensus"].value_counts()

duration_consensus
perennial                              468
annual                                 368
unknown                                 92
annual/perennial                        58
annual/annual/perennial                 18
annual/biennial                         15
annual/perennial/perennial              13
annual/biennial/perennial                7
biennial/perennial                       7
annual/biennial/perennial/perennial      4
annual/annual/biennial                   4
biennial                                 1
annual/annual/biennial/perennial         1
biennial/perennial/perennial             1
Name: count, dtype: int64

# Elevation

# Aquatic

# Lifeform

In [14]:
df[df.Genus == 'Carnegiea'].FNA.values

array(['Arthur C. Gibson in Flora of North America (vol. 4) Stems 25+ cm diam., widest where proximal branches arise; pith 10+ cm diam. Flowers usually ter-minal, 6.5-8.5 cm diam.; scales on flower tubes broadly triangular to rounded, green with red apices; ovary with locule to 25 mm; filaments white, short; anthers tan. Seeds: testa thin. 2 n = 22. Flowering early May-late Jun. Sonoran desert scrub; 180-1400 m; Ariz., Calif.; Mexico (Sonora). In its habitat Carnegiea gigantea , the saguaro (also spelled sahuaro), is the most conspicuous and most-studied of all North American cacti. It is the tallest cactus of the flora---indeed, these are the tallest trees of the deserts in the United States.'],
      dtype=object)

In [15]:
df[df.Genus == 'Carnegiea'].VPAP.values

array(['JANAS 29(1) Plant : Massive columnar trees to 15+ m tall. STEM simple or the trunk bearing 1-6(-20+) upright-curving branches usually 2-2.5 m above ground, commonly in subwhorls, sometimes rebranched, to 75+ cm in diameter (widest at lowest branches), green, glabrous but woolly at apices; ribs 12-24, 3-4 cm high, continuous, increasing in number from the narrow base. AREOLES mostly circular, 6-8 mm in diameter, spaced about 2.5 cm apart on rib to nearly contiguous at stem apices, bearing short tan to gray wool Leaves : LEAVES of long shoots minute or obsolete; SPINES yellow to reddish brown, aging gray to gray-black, terete to angular, mostly bulbous-based, divergent; central-most spines stout, 3-5(-10) per areole, mostly l-3 cm long, but the basal one longest, 3.5-5(-8) cm long; peripheral spines finer, 12-15(-19) per areole, 1-2 cm long Flowers : nocturnal but remaining open into morning, solitary in areoles, arranged in masses usually on south sides and below apices of the b

In [18]:
df[df.Genus == 'Parkinsonia']['SW Field Guide'].values

array(['Wiggins 1964, Benson and Darrow 1981, Kearney and Peebles 1969 Common Name : blue paloverde Duration : Perennial Nativity : Native Lifeform : Tree General : Large shrubs to small trees reaching 7-10 m tall with a well-developed trunk.  Small straight spines borne singly at nodes. Bark of twigs and branches bluish green, while older trunks are often gray. Leaves : Leaves are pinnate with single pair of pinnae, with 2-4 pairs of obovate leaflets 4-8 mm long, darkening when dried. Flowers : Found in terminal racemes, 22-28 mm wide, calyx green to yellow-green, lobes reflexed; Petals bright yellow, banner with small orange-red spots basally. Fruits : Straw colored oblong pods 4-10 cm long moderately flattened, mostly indehiscent, seeds 1-6. Ecology : Generally found along washes, plains, and canyons, sometimes on slopes from sea level to 4,000 ft (1219 m); flowers March-April. Distribution : s and w AZ, se CA, Sonora and Baja Calif., MEX. Notes : Leafy branches not or not strongly 