In [None]:
# pip install --upgrade pydantic-ai openai

# Wagner Database AI Extraction

#### Code works best with a python version 3.10 < version < 3.12 (I am using 3.10.13)

In [5]:
from enum import Enum
from typing import Optional, List
import os
from openai import OpenAI
import json
import base64
import pandas as pd
import numpy as np
import sys
sys.path.append('/Users/williamharrigan/Desktop/UH/Year_3/semester_2/wagner')
import creds

## pydantic
from pydantic import BaseModel, Field
from openai.lib._pydantic import to_strict_json_schema
from pydantic_ai import Agent
from openai.types.chat.chat_completion_content_part_param import (
    ChatCompletionContentPartTextParam,
    ChatCompletionContentPartImageParam
)

from openai.types.chat.chat_completion_content_part_image_param import (
    ImageURL
)

photo_dir = "../training_set/"

In [6]:
class Description(str, Enum):
    DICOTS = "Dicots"
    MONOCOTS = "Monocots"
    CONIFERS = "Conifers"
    FERNS = "Ferns and fern allies"

class StemHairType(str, Enum):
    DENDRITIC = "DENDRITIC"
    GLABROUS = "GLABROUS"
    HIRSUTE = "HIRSUTE"
    HISPID = "HISPID"
    LEPIDOTE = "LEPIDOTE"
    PILOSE = "PILOSE"
    PUBERULENT = "PUBERULENT"
    STRIGOSE = "STRIGOSE"
    STELLATE = "STELLATE"
    TOMENTOSE = "TOMENTOSE"
    VILLOUS = "VILLOUS"
    GLAUCOUS = "GLAUCOUS"
    
class FruitType(str, Enum):
    ACHENE = "ACHENE"
    AGGREGATE = "AGGREGATE"
    ARTICLE = "ARTICLE"
    BERRY = "BERRY"
    CAPSULE = "CAPSULE"
    CARYOPSIS = "CARYOPSIS"
    DRUPE = "DRUPE"
    FOLLICLE = "FOLLICLE"
    LEGUME = "LEGUME"
    MERICARP = "MERICARP"
    MULTIPLE = "MULTIPLE"
    NUT = "NUT"
    PEPO = "PEPO"
    POME = "POME"
    SCHIZOCARP = "SCHIZOCARP"
    SILICLE = "SILICLE"
    SILIQUE = "SILIQUE"
    SYCONIUM = "SYCONIUM"
    
class LeafShapeType(str, Enum):
    ACEROSE = "ACEROSE"
    AWL_SHAPED = "AWL_SHAPED"
    GLADIATE = "GLADIATE"
    HASTATE = "HASTATE"
    CORDATE = "CORDATE"
    DELTOID = "DELTOID"
    LANCEOLATE = "LANCEOLATE"
    LINEAR = "LINEAR"
    ELLIPTIC = "ELLIPTIC"
    ENSIFORM = "ENSIFORM"
    LYRATE = "LYRATE"
    OBCORDATE = "OBCORDATE"
    FALCATE = "FALCATE"
    FLABELLATE = "FLABELLATE"
    OBDELTOID = "OBDELTOID"
    OBELLIPTIC = "OBELLIPTIC"
    OBLANCEOLATE = "OBLANCEOLATE"
    OBLONG = "OBLONG"
    PERFOLIATE = "PERFOLIATE"
    QUADRATE = "QUADRATE"
    OBOVATE = "OBOVATE"
    ORBICULAR = "ORBICULAR"
    RENIFORM = "RENIFORM"
    RHOMBIC = "RHOMBIC"
    OVAL = "OVAL"
    OVATE = "OVATE"
    ROTUND = "ROTUND"
    SAGITTATE = "SAGITTATE"
    PANDURATE = "PANDURATE"
    PELTATE = "PELTATE"
    SPATULATE = "SPATULATE"
    SUBULATE = "SUBULATE"

class StemHairs(str, Enum):
    DENDRITIC = "DENDRITIC"
    GLABROUS = "GLABROUS"

class PhyllotaxyType(str, Enum):
    ALTERNATE = "ALTERNATE"
    OPPOSITE = "OPPOSITE"
    WHORLED = "WHORLED"
    DECUSSATE = "DECUSSATE"
    DISTICHOUS = "DISTICHOUS"
    EQUITANT = "EQUITANT"
    TERNATE = "TERNATE"
    CAULINE = "CAULINE"

class ImmLeafType(str, Enum):
    SIMPLE = "SIMPLE"
    BIPINNATE = "BIPINNATE"
    PALMATE = "PALMATE"
    PINNATE = "PINNATE"
    TRIFOLIATE = "TRIFOLIATE"

class InflorescenceType(str, Enum):
    CATKIN = "CATKIN"
    CYME = "CYME"
    HEAD = "HEAD"
    PANICLE = "PANICLE"
    RACEME = "RACEME"
    SPATHE_SPADIX = "SPATHE_SPADIX"
    THYRSE = "THYRSE"
    UMBEL = "UMBEL"
    VERTISCILLATE = "VERTISCILLATE"
    SOLITARY = "SOLITARY"
    SPIKE = "SPIKE"
    LANCEOLATE = "LANCEOLATE"
    GLOBOSE = "GLOBOSE"
    INVOLUCRE = "INVOLUCRE"
    CORYMBOSE = "CORYMBOSE"
    STROBILOID = "STROBILOID"

class LeafHairType(str, Enum):
    DENDRITIC = "DENDRITIC"
    GLABROUS = "GLABROUS"
    HIRSUTE = "HIRSUTE"
    HISPID = "HISPID"
    LEPIDOTE = "LEPIDOTE"
    PILOSE = "PILOSE"
    PUBERULENT = "PUBERULENT"
    STRIGOSE = "STRIGOSE"
    STELLATE = "STELLATE"
    TOMENTOSE = "TOMENTOSE"
    VILLOUS = "VILLOUS"
    GLAUCOUS = "GLAUCOUS"

class LeafType(str, Enum):
    SIMPLE = "SIMPLE"
    BIPINNATE = "BIPINNATE"
    PALMATE = "PALMATE"
    PINNATE = "PINNATE"
    TRIFOLIATE = "TRIFOLIATE"
    SIMPLE_ENTIRE = "SIMPLE ENTIRE"
    PALMATELY_LOBED = "PALMATELY LOBED"
    PINNATELY_LOBED = "PINNATELY LOBED"
    COMPOUND = "COMPOUND"
    PALMATELY_COMPOUND = "PALMATELY COMPOUND"
    OBOVATE = "OBOVATE"
    LINEAR = "LINEAR"
    SAGITTATE = "SAGITTATE"
    OVATE = "OVATE"
    LANCEOLATE = "LANCEOLATE"
    RENIFORM = "RENIFORM"

class BreedingType(str, Enum):
    MONOECIOUS = "MONOECIOUS"  # Male and female flowers on the same plant
    ANDROMONOECIOUS = "ANDROMONOECIOUS"
    CHASMOGAMOUS = "CHASMOGAMOUS"
    DIOECIOUS = "DIOECIOUS"  # Separate male and female plants
    GYNODIOECIOUS = "GYNODIOECIOUS"
    POLYGAMO_MONOECIOUS = "POLYGAMO-MONOECIOUS"
    POLYGAMOUS = "POLYGAMOUS"
    POLYGAMO_DIOECIOUS = "POLYGAMO-DIOECIOUS"
    GYNOMONOECIOUS = "GYNOMONOECIOUS"
    STERILE = "STERILE"

class FemaleInflorescenceType(str, Enum):
    AXILLARY = "AXILLARY"
    ANTHELATE = "ANTHELATE"
    CATKIN = "CATKIN"
    CLUSTERS = "CLUSTERS"
    CORYMB = "CORYMB"
    CYME = "CYME"
    DICHASIUM = "DICHASIUM"
    ELONGATE = "ELONGATE"
    FASCICLE = "FASCICLE"
    GLOMERATE = "GLOMERATE"
    HEAD = "HEAD"
    LAX = "LAX"
    PANICLE = "PANICLE"
    PEDUNCULATE = "PEDUNCULATE"
    RACEME = "RACEME"
    SOLITARY = "SOLITARY"
    SCORPIOID = "SCORPIOID"
    SPATHE_SPADIX = "SPATHE_SPADIX"
    SPICIFORM = "SPICIFORM"
    SPIKE = "SPIKE"
    STELLATE = "STELLATE"
    THYRSE = "THYRSE"
    TERMINAL = "TERMINAL"
    UMBEL = "UMBEL"
    VERTISCILLASTER = "VERTISCILLASTER"
    VERTICIL = "VERTICIL"

class LifeFormType(str, Enum):
    ANNUAL_HERB = "ANNUAL_HERB"
    PERENNIAL_HERB = "PERENNIAL_HERB"
    EPIPHYTE = "EPIPHYTE"
    VINE = "VINE"
    SHRUB = "SHRUB"
    TREE = "TREE"

class CorollaType(str, Enum):
    ADNATE = "ADNATE"
    BILABIATE = "BILABIATE"
    CAMPANULATE = "CAMPANULATE"
    CORYMBOSE = "CORYMBOSE"
    CONVOLUTE = "CONVOLUTE"
    CORONA = "CORONA"
    CUNEATE = "CUNEATE"
    CYLINDRICAL = "CYLINDRICAL"
    DISK = "DISK"
    DELTATE = "DELTATE"
    ELLIPTIC = "ELLIPTIC"
    FUNNELFORM = "FUNNELFORM"
    FLABELLATE = "FLABELLATE"
    FILIFORM = "FILIFORM"
    HOOD = "HOOD"
    IRREGULAR = "IRREGULAR"
    KEEL = "KEEL"
    LABELLUM = "LABELLUM"
    LANCEOLATE = "LANCEOLATE"
    LINEAR = "LINEAR"
    LIPPED = "LIPPED"
    LOBBED = "LOBBED"
    OVATE = "OVATE"
    OBLONG = "OBLONG"
    OBCORDATE = "OBCORDATE"
    OBOVATE = "OBOVATE"
    OBLANCEOLATE = "OBLANCEOLATE"
    ORBICULAR = "ORBICULAR"
    PALATE = "PALATE"
    PSEUDORACEMES = "PSEUDORACEMES"
    ROTATE = "ROTATE"
    RAY = "RAY"
    REFLEXED = "REFLEXED"
    RHOMBIC = "RHOMBIC"
    RENIFORM = "RENIFORM"
    SALVERFORM = "SALVERFORM"
    SUBORBICULAR = "SUBORBICULAR"
    SUBRHOMBIC = "SUBRHOMBIC"
    SPUR = "SPUR"
    SPATULATE = "SPATULATE"
    SPICATE = "SPICATE"
    SUBROTATE = "SUBROTATE"
    STANDARD = "STANDARD"
    TUBULAR = "TUBULAR"
    TRIANGULAR = "TRIANGULAR"
    URCEOLATE = "URCEOLATE"
    UNILABIATE = "UNILABIATE"
    VALVATE = "VALVATE"
    VERTICIL = "VERTICIL"
    ZYGOMORPHIC = "ZYGOMORPHIC"
    CUP = "CUP"
    UNGUICULATE = "UNGUICULATE"
    CLAW = "CLAW"
    FASICLE = "FASICLE"
    STELLATE = "STELLATE"
    SUBPANICULATE = "SUBPANICULATE"
    PENTAGONAL = "PENTAGONAL"

class OriginType(str, Enum):
    NATURALIZED = "NATURALIZED"
    INDIGENOUS = "INDIGENOUS"
    ENDEMIC = "ENDEMIC"
    POLYNESIAN_INTRODUCTION = "POLYNESIAN INTRODUCTION"

class LeafShape(str, Enum):
    ACEROSE = "ACEROSE"
    AWL_SHAPED = "AWL-SHAPED"
    GLADIATE = "GLADIATE"
    HASTATE = "HASTATE"
    CORDATE = "CORDATE"
    DELTOID = "DELTOID"
    LANCEOLATE = "LANCEOLATE"
    LINEAR = "LINEAR"
    ELLIPTIC = "ELLIPTIC"
    ENSIFORM = "ENSIFORM"
    LYRATE = "LYRATE"
    OBCORDATE = "OBCORDATE"
    FALCATE = "FALCATE"
    FLABELLATE = "FLABELLATE"
    OBDELTOID = "OBDELTOID"
    OBELLIPTIC = "OBELLIPTIC"
    OBLANCEOLATE = "OBLANCEOLATE"
    OBLONG = "OBLONG"
    PERFOLIATE = "PERFOLIATE"
    QUADRATE = "QUADRATE"
    OBOVATE = "OBOVATE"
    ORBICULAR = "ORBICULAR"
    RENIFORM = "RENIFORM"
    RHOMBIC = "RHOMBIC"
    OVAL = "OVAL"
    OVATE = "OVATE"
    ROTUND = "ROTUND"
    SAGITTATE = "SAGITTATE"
    PANDURATE = "PANDURATE"
    PELTATE = "PELTATE"
    SPATULATE = "SPATULATE"
    SUBULATE = "SUBULATE"

class Location(str, Enum):
    HAWAII = "HAWAII"
    MAUI = "MAUI"
    KAHOOLAWE = "KAHOOLAWE"
    MOLOKAI = "MOLOKAI"
    LANAI = "LANAI"
    OAHU = "OAHU"
    KAUAI = "KAUAI"
    NIIHAU = "NIIHAU"
    ALL_ISLANDS = "ALL ISLANDS"

class LeafTeeth(str, Enum):
    YES = "YES"
    NO = "NO"

class Deciduous(str, Enum):
    YES = "YES"
    NO = "NO"

class FederalStatusType(str, Enum):
    SPECIES_OF_CONCERN = "SPECIES_OF_CONCERN"
    ENDANGERED = "ENDANGERED"
    THREATENED = "THREATENED"
    WITHDRAWN = "WITHDRAWN" 
    
class StatusType(str, Enum):
    NATURALIZED = "NATURALIZED"
    ENDEMIC = "ENDEMIC"
    RARE = "RARE"
    SECURE = "SECURE"
    VULNERABLE = "VULNERABLE" 


class Measurements(BaseModel):
    min: Optional[float] = None
    max: Optional[float] = None
    extreme_min: Optional[float] = None
    extreme_max: Optional[float] = None

class HawaiianPlant(BaseModel):
    # Basic Information
    family: str = Field(..., description="Plant family name (should only be 1 object)")
    genus: str = Field(..., description="Plant genus name (should only be 1 object)")
    species: str = Field(..., description="Plant species name (should only be 1 object)")
    subspecies: Optional[str] = Field(None, description="Subspecies or epithet names")
    common_name: Optional[str] = Field(None, description="Common name of the plant")
    hawaiian_names: Optional[List[str]] = Field(None, description="List of Hawaiian names")
    infraspecific_epithet: str = Field(..., description="The third word in the scientific name of an infraspecific taxon, following the name of the species. This applies only to formal names of plants and fungi, and not to the formal names of bacteria or animals. In the name Cannabis sativa subsp. indica, the word indica is the infraspecific epithet.")
    deciduous: Optional[Deciduous] = Field(None, description="Does the plant occur in a deciduous environment? Do not record unless explicitly stated.")
    
    # References
    wagner_book_number: Optional[str] = Field(None, description="Wagner book reference number")
    page_number: Optional[str] = Field(None, description="Page number found on the bottom or top of page in bold lettering")
    description: Optional[str] = Field(None, description="Take knowledge from outside the passage to infer whether the plant is DICOTS, MONOCOTS, CONIFERS or FERNS")
    
    # Basic Characteristics
    life_form_type: Optional[LifeFormType] = Field(None, description="Growth habit or life form")
    stem_height: Optional[Measurements] = Field(None, description="Stem or general plant height measurements in meters")
    stem_hair_type: Optional[StemHairType] = Field(None, description="Type of hair on stem")
    phyllotaxy_type: Optional[PhyllotaxyType] = Field(None, description="The arrangement of leaves around the stem.")
    imm_leaf_type: Optional[ImmLeafType] = Field(None, description="Immature leaf type.")
    
    # Further Stem Characteristics
    # stem_hairs: Optional[StemHairs] = Field(None, description="Identify whether the stem hairs are dendritic or glabrous.")
    
    # Leaf Characteristics
    leaf_type: Optional[LeafType] = Field(None, description="Simple or compound leaf type")
    leaf_shape_type: Optional[LeafShape] = Field(None, description="Type of leaf shape")
    leaf_margin_type: Optional[str] = Field(None, description="Type of leaf margin")
    leaf_shape_type: Optional[LeafShapeType] = Field(None, description="Shape of leaves")
    leaf_width_dimensions: Optional[Measurements] = Field(None, description="Leaf width dimensions in centimeters")
    leaf_length_dimensions: Optional[Measurements] = Field(None, description="Leaf length dimensions in centimeters")
    leaf_hairs: Optional[str] = Field(None, description="Short description of leaf hairs. (example: densely soft, silky, appressed villous)")
    leaf_hair_upper: Optional[LeafHairType] = Field(None, description="Type of upper hair on leaves")
    leaf_hair_lower: Optional[LeafHairType] = Field(None, description="Type of lower hair on leaves")
    petiole_length: Optional[Measurements] = Field(None, description="Petiole length in centimeters")
    leaflet_leaf_shape: Optional[LeafShapeType] = Field(None, description="Shape of leaflet leaves")
    leaf_teeth: Optional[LeafTeeth] = Field(None, description="Do the leaves have teeth?")
    
    # Juvenile characteristics
    juvenile_leaf_type: Optional[LeafType] = Field(None, description="Simple or compound juvenile leaf type")
    juvenile_leaf_shape_type: Optional[LeafShapeType] = Field(None, description="Shape of juvenile leaves")
    juvenile_leaf_margin_type: Optional[str] = Field(None, description="Type of leaf margin on juvenile plants")
    juvenile_leaf_dimensions: Optional[Measurements] = Field(None, description="Leaf dimensions of juvenile plant in centimeters")
    juvenile_leaf_hair: Optional[LeafHairType] = Field(None, description="Type of hair on juvenile leaves")
    juvenile_petiole_length: Optional[Measurements] = Field(None, description="Juvenile petiole length in centimeters")
    juvenile_leaf_width_dimensions: Optional[Measurements] = Field(None, description="Juvenile Leaf width dimensions in centimeters")
    juvenile_leaf_length_dimensions: Optional[Measurements] = Field(None, description="Juvenile Leaf length dimensions in centimeters")
    
    # Flower Characteristics
    # breeding_type: Optional[str] = Field(None, description="Plant breeding system")
    flower_width: Optional[Measurements] = Field(None, description="Flower width in centimeters")
    peduncle_dimensions: Optional[Measurements] = Field(None, description="Peduncle dimensions in millimeters")
    flower_length: Optional[Measurements] = Field(None, description="Flower length in centimeters")
    rachis_length: Optional[Measurements] = Field(None, description="Rachis length in millimeters")
    rachis_diameter: Optional[Measurements] = Field(None, description="Rachis diameter in millimeters")
    head_length: Optional[Measurements] = Field(None, description="The measured length of the capitulum (flower head) in millimeters.")
    head_diameter: Optional[Measurements] = Field(None, description="The measured diameters of the capitulum (flower head) in millimeters.")
    bur_length: Optional[Measurements] = Field(None, description="The measured length of the bur in millimeters.")
    tepal_length: Optional[Measurements] = Field(None, description="The measured length of the tepal in millimeters.")
    staminate_tepal_length: Optional[Measurements] = Field(None, description="The measured length of the staminate tepal in millimeters.")
    pistillate_tepal_length: Optional[Measurements] = Field(None, description="The measured length of the pistillate tepal in millimeters.")
    ray_length: Optional[Measurements] = Field(None, description="The measured length of the ray in millimeters.")
    ray_width: Optional[Measurements] = Field(None, description="The measured width of the ray in millimeters.")
    florets_length: Optional[Measurements] = Field(None, description="The measured length of the florets in millimeters.")
    corolla_width: Optional[Measurements] = Field(None, description="Corolla width dimensions in millimeters")
    corolla_length: Optional[Measurements] = Field(None, description="Corolla length dimensions in millimeters")
    
    # Inflorescence features
    inflorescence_type: Optional[InflorescenceType] = Field(None, description="In a flowering plant, a cluster of flowers on a branch or a system of branches")
    staminate_inflorescence_length: Optional[Measurements] = Field(None, description="The measured length in millimeters of the male (pollen-producing) flower cluster. This specifically refers to catkins or other inflorescences containing only staminate (male) flowers.")    
    pistillate_inflorescence_length: Optional[Measurements] = Field(None, description="The measured length of the female (seed-producing) flower cluster. This specifically refers to inflorescences containing only pistillate (female) flowers.")    
    inflorescence_flower_length: Optional[Measurements] = Field(None, description="The length of an inflorescence flower in millimeters.")
    female_inflorescence_type: Optional[FemaleInflorescenceType] = Field(None, description="Type of female inflorescence")
    male_inflorescence_type: Optional[FemaleInflorescenceType] = Field(None, description="Type of male inflorescence")

    # Floral Parts
    calyx_dimensions: Optional[Measurements] = Field(None, description="Calyx dimensions in millimeters")
    corolla_type: Optional[CorollaType] = Field(None, description="Type of corolla")
    corolla_color: Optional[str] = Field(None, description="Color of corolla")
    ray_color: Optional[str] = Field(None, description="Color of ray")
    floret_color: Optional[str] = Field(None, description="Color of florets")
    
    # Fruit and Seed Characteristics
    fruit_type: Optional[FruitType] = Field(None, description="Type of fruit")
    fruit_length: Optional[Measurements] = Field(None, description="Fruit length in millimeters")
    fruit_width: Optional[Measurements] = Field(None, description="Fruit width in millimeters")
    fruit_diameter: Optional[Measurements] = Field(None, description="Fruit diameter in millimeters")
    seeds_per_fruit: Optional[dict] = Field(None, description="Range of seeds per fruit")
    seed_dimensions: Optional[Measurements] = Field(None, description="Seed dimensions in millimeters")
    
    # Involucre Measurements
    involucre_length: Optional[Measurements] = Field(None, description="Involucre length in millimeters")
    involucre_width: Optional[Measurements] = Field(None, description="Involucre width in millimeters")
    # staminate_involucre_length: Optional[Measurements] = Field(None, description="Staminate involucre length in millimeters")
    # pistillate_involucre_length: Optional[Measurements] = Field(None, description="Pistillate involucre length in millimeters")

    # Bract Measurements
    bract_length: Optional[Measurements] = Field(None, description="Bract length in millimeters")
    bract_width: Optional[Measurements] = Field(None, description="Bract width in millimeters")
    bract_lower_length: Optional[Measurements] = Field(None, description="Lower bract length in millimeters")
    outer_bract_length: Optional[Measurements] = Field(None, description="Outer bract length in millimeters")
    # staminate_bract_length: Optional[Measurements] = Field(None, description="Staminate bract length in millimeters")
    # pistillate_bract_length: Optional[Measurements] = Field(None, description="Pistillate bract length in millimeters")

    # Bracteole Measurements
    bracteoles_length: Optional[Measurements] = Field(None, description="Bracteoles length in millimeters")
    bracteole_width: Optional[Measurements] = Field(None, description="Bracteole width in millimeters")

    # Pedicel Measurements
    pedicel_length: Optional[Measurements] = Field(None, description="Pedicel length in millimeters")
    pedicel_width: Optional[Measurements] = Field(None, description="Pedicel width in millimeters")
    # staminate_pedicel_length: Optional[Measurements] = Field(None, description="Staminate pedicel length in millimeters")
    # pistillate_pedicel_length: Optional[Measurements] = Field(None, description="Pistillate pedicel length in millimeters")

    # Hypanthium Measurements
    hypanthium_length: Optional[Measurements] = Field(None, description="Hypanthium length in millimeters")
    hypanthium_width: Optional[Measurements] = Field(None, description="Hypanthium width in millimeters")

    # Peduncle Measurements
    peduncle_length: Optional[Measurements] = Field(None, description="Peduncle length in millimeters")
    peduncle_width: Optional[Measurements] = Field(None, description="Peduncle width in millimeters")

    # Calyx Measurements
    calyx_length: Optional[Measurements] = Field(None, description="Calyx length in millimeters")
    calyx_width: Optional[Measurements] = Field(None, description="Calyx width in millimeters")
    calyx_teeth_length: Optional[Measurements] = Field(None, description="Calyx teeth length in millimeters")
    calyx_teeth_width: Optional[Measurements] = Field(None, description="Calyx teeth width in millimeters")
    calyx_length_lobes: Optional[Measurements] = Field(None, description="Calyx lobe length in millimeters")
    calyx_width_lobes: Optional[Measurements] = Field(None, description="Calyx lobe width in millimeters")
    # upper_calyx_length: Optional[Measurements] = Field(None, description="Upper calyx length in millimeters")
    # lower_calyx_length: Optional[Measurements] = Field(None, description="Lower calyx length in millimeters")

    # Inner and Outer Calyx Lobes
    # calyx_lobes_length_inner: Optional[Measurements] = Field(None, description="Inner calyx lobe length in millimeters")
    # calyx_lobes_width_inner: Optional[Measurements] = Field(None, description="Inner calyx lobe width in millimeters")
    # calyx_lobes_length_outer: Optional[Measurements] = Field(None, description="Outer calyx lobe length in millimeters")
    # calyx_lobes_width_outer: Optional[Measurements] = Field(None, description="Outer calyx lobe width in millimeters")

    # # Calyx Tube Measurements
    # calyx_tube_length: Optional[Measurements] = Field(None, description="Calyx tube length in millimeters")
    # calyx_tube_width: Optional[Measurements] = Field(None, description="Calyx tube width in millimeters")

    # Male Calyx Measurements
    # male_calyx_length_lobes: Optional[Measurements] = Field(None, description="Male calyx lobe length in millimeters")
    # male_calyx_width_lobes: Optional[Measurements] = Field(None, description="Male calyx lobe width in millimeters")
    # male_calyx_lobes_length_inner: Optional[Measurements] = Field(None, description="Inner male calyx lobe length in millimeters")
    # male_calyx_lobes_length_outer: Optional[Measurements] = Field(None, description="Outer male calyx lobe length in millimeters")
    # male_calyx_lobes_width_inner: Optional[Measurements] = Field(None, description="Inner male calyx lobe width in millimeters")
    # male_calyx_lobes_width_outer: Optional[Measurements] = Field(None, description="Outer male calyx lobe width in millimeters")

    # # Female Calyx Measurements
    # female_calyx_length_lobes: Optional[Measurements] = Field(None, description="Female calyx lobe length in millimeters")
    # female_calyx_width_lobes: Optional[Measurements] = Field(None, description="Female calyx lobe width in millimeters")
    # female_calyx_lobes_length_inner: Optional[Measurements] = Field(None, description="Inner female calyx lobe length in millimeters")
    # female_calyx_lobes_length_outer: Optional[Measurements] = Field(None, description="Outer female calyx lobe length in millimeters")
    # female_calyx_lobes_width_inner: Optional[Measurements] = Field(None, description="Inner female calyx lobe width in millimeters")
    # female_calyx_lobes_width_outer: Optional[Measurements] = Field(None, description="Outer female calyx lobe width in millimeters")
    # female_calyx_length: Optional[Measurements] = Field(None, description="Female calyx length in millimeters")

    # Genetic Information
    ploidy: Optional[str] = Field(None, description="Ploidy level expressed as a function of n (e.g., 1n, 2n or 3n, etc..)")
    chromosome_number: Optional[int] = Field(None, description="The integer Number of chromosomes")
    average_chromosome_number: Optional[float] = Field(None, description="Average chromosome number")
    breeding_type: Optional[BreedingType] = Field(None, description="Type of breeding that the plant uses.")
    
    # Distribution and Origin
    locations: Optional[List[Location]] = Field(None, description="Islands where the plant is found")
    origin: Optional[OriginType] = Field(None, description="Origin type of the plant")
    federal_status: Optional[FederalStatusType] = Field(None, description="Federal conservation status")
    status: Optional[StatusType] = Field(None, description="General status")

class MeasurementUnits(str, Enum):
    MILLIMETERS = "mm"
    CENTIMETERS = "cm"
    METERS = "m"

In [7]:
## Input to base64 format to be intepreted by extraction agents (chatbots)
with open(f"{photo_dir}/Acanthaceae_Dicliptera_chinensis.jpeg", "rb") as image_file:
            base64_image = base64.b64encode(image_file.read()).decode("utf-8")

## Setting the overall framework for how the extraction agent operates across all interactions
system_prompt  = """
You are an expert plant taxonomist. Please analyze this image and return the details according to the schema provided.
You are exhaustive; you include ALL the details mentioned. Do not make any assumptions about the data and do not try to
interpret what is not obvious from the text. When extracting information, ensure that you return a structured response in JSON format matching the `HawaiianPlant` schema. """

## Prompt to set specific focus for each extraction agent interaction (can be dynamic as need be)
user_prompt= "Transcribe the plant information you see in this image"


## GPT-4o Text Transcription (Text Extraction Only)

In [22]:
## Setting extraction agent to be gpt-4o
## System prompt is being set to specifically transcribe information
openai_img_to_text_transcription_agent = Agent(
    model="openai:gpt-4o",
    result_type=str,
    system_prompt = "You are a vision model capable of accurately performing OCR on an image",
)

## Data to be extracted from input image
image_urls = [
    f"data:image/png;base64,{base64_image}",
]

## Setting chat parameters: low detail (for efficiency) from input image
image_params = [
    ChatCompletionContentPartImageParam(
        type='image_url', 
        image_url=ImageURL(url=url, detail='low')
    ) for url in image_urls
]

## Setting chat prompt to 'user_prompt'
msg_open_ai = [
            ChatCompletionContentPartTextParam(text="Convert this to text. Don't miss any text.", type='text'),
            *image_params
]

## Running data extraction
r = await openai_img_to_text_transcription_agent.run(msg_open_ai)

## Output to 'text'
text = r.data.split("---")[1]
print(text)



171

1. Dicliptera chinensis (L.) Juss. 
[Justice chinensis L.]

nat] 

Sprawling or decumbent perennial herbs; stems 2-7 dm long. Leaves green, lower surface slightly paler, ovate, 2.5-13.5 cm long, sparsely stpiodistris, especially on the veins, scilyidsless prominent on upper sur- faces: white raised streaks the size of a needle; petioles 1-3.5 cm long. Flowers in axillary cymes, each of unequal size, 2 greener ovate bracts of unequal size, the larger one ca. 12-14 mm long, the smaller one ca. 8-9 mm long, all bracts short-vil-

lous especially along the margins, the veins inconspicuous, pedicles only 1-5 mm long; calyx lobes of unequal size. 5-7 mm long, those opp. to sep. in the throat with puber 5 mm, 5-13 mm long. Corolla rous, two-lipped, both lips short-villous. Seeds 4, discoid. Native to tropicale worldwide: in Hawai'i: nat- uralized mainly in secondary successional areas in lowland O'ahu and Hawai'i, but more widespread. Pope (1929) states that this plant was recently int

## DeepSeek Model Extraction (R1 distill llama 70B) from text to Hawaiian Plant Schema

In [21]:
## Setting deepseek LLM as extraction agent
## Data to be extracted in HawaiianPlant schema
groq_extraction_agent = Agent(
    model="groq:deepseek-r1-distill-llama-70b",
    retries=3,
    result_type=HawaiianPlant,
    system_prompt = system_prompt,
    # Had to raise temperature to 0.5
    model_settings = {'temperature': 0.5}
)

## Setting variables and extracting data
r = await groq_extraction_agent.run(text)
deepseek_llama = r.data


## GPT-4o Data Extraction to Hawaiian Plant Schema

In [23]:
## Setting extraction agent to be gpt-4o
## Extracted data will be formatted according to HawaiianPlant schema
openai_extraction_agent = Agent(
    model="openai:gpt-4o",
    result_type=HawaiianPlant,
    system_prompt = system_prompt,
)

## Data to be extracted from input image
image_urls = [
    f"data:image/png;base64,{base64_image}",
]

## Setting chat parameters: low detail (for efficiency) from input image
image_params = [
    ChatCompletionContentPartImageParam(
        type='image_url', 
        image_url=ImageURL(url=url, detail='low')
    ) for url in image_urls
]

## Setting chat prompt to 'user_prompt'
msg_open_ai = [
            ChatCompletionContentPartTextParam(text=user_prompt, type='text'),
            *image_params
]

## Running data extraction
r = await openai_extraction_agent.run(msg_open_ai)

## Setting results to 'gpt_4o_output'
gpt_4o_output = r.data

## Anthropic Model Data Extraction

In [24]:
## Setting extraction agent to be Claude Sonnet model
## Extracted data will be formatted according to HawaiianPlant schema
sonnet_extraction_agent = Agent(
    model="anthropic:claude-3-5-sonnet-latest",
    result_type=HawaiianPlant,
    system_prompt = system_prompt,
    model_settings = {'temperature': 0.2}

)

## Setting context
msg_claude = [
    ChatCompletionContentPartTextParam(text=user_prompt, type='text'),
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": "image/jpeg",
                        "data": f"{base64_image}",
                    }
                },
    
]

## Extracting data from image
r = await sonnet_extraction_agent.run(msg_claude)
sonnet_output = r.data

## GPT o3-Mini Model Data Extraction

In [25]:
## Setting system prompt
gptmini_extraction_agent = Agent(
    model="openai:o3-mini",
    result_type=HawaiianPlant,
    system_prompt = system_prompt,
)

## Extracting output from extracted text
r = await gptmini_extraction_agent.run(text)

## Setting output
gpt_mini_output = r.data

## Comparing Model Outputs

In [36]:
### IGNORE THE FOLLOWING CODE, IT IS A WORK SPACE

In [28]:
## Checking manual results versus automatically extracted results
## Setting class for boolean output based on intepretation if results are equal
class AreAnntoationsEqual(BaseModel):
    are_equal: bool = Field(..., description="Are the two annotations equal")
    justificaiton: str = Field(..., description="Justification of the propose value for are_equal")

## Setting the prompt and model for validation agent
validation_agent = Agent(
    model="groq:llama-3.3-70b-specdec",
    result_type=AreAnntoationsEqual,
    system_prompt = """You are an expert taxonomist. You are comparing the outcome of a manually extracted result versus an automatically extracted result. You need to compare the automatic results and determine whether the result is synonymous or equal the manual one; taking into consideration
    linguisitc and formatting nuances. Your answer is whether the two results are similar True/False and a justificaiton for your answer""",
)

## Models to compare
models = ["gpt_4o_output", "gpt_mini_output", "sonnet_output", "deepseek_llama"]


In [29]:
## Column Names/ properties
props = HawaiianPlant.model_json_schema()['properties'].keys()
props

dict_keys(['family', 'genus', 'species', 'subspecies', 'common_name', 'hawaiian_names', 'infraspecific_epithet', 'deciduous', 'wagner_book_number', 'page_number', 'description', 'life_form_type', 'stem_height', 'stem_hair_type', 'phyllotaxy_type', 'imm_leaf_type', 'leaf_type', 'leaf_shape_type', 'leaf_margin_type', 'leaf_width_dimensions', 'leaf_length_dimensions', 'leaf_hairs', 'leaf_hair_upper', 'leaf_hair_lower', 'petiole_length', 'leaflet_leaf_shape', 'leaf_teeth', 'juvenile_leaf_type', 'juvenile_leaf_shape_type', 'juvenile_leaf_margin_type', 'juvenile_leaf_dimensions', 'juvenile_leaf_hair', 'juvenile_petiole_length', 'juvenile_leaf_width_dimensions', 'juvenile_leaf_length_dimensions', 'flower_width', 'peduncle_dimensions', 'flower_length', 'rachis_length', 'rachis_diameter', 'head_length', 'head_diameter', 'bur_length', 'tepal_length', 'staminate_tepal_length', 'pistillate_tepal_length', 'ray_length', 'ray_width', 'florets_length', 'corolla_width', 'corolla_length', 'inflorescence

In [30]:
manual_results = {
    "datasheet": [
        {
            "Family": "Asteraceae",
            "Genus": "Bidens",
            "Species": "pilosa",
            "Common_Name": "spanish needle",
            "Hawaiian_name_1": "Ki",
            "Hawaiian_name_2": "Ki nehe",
            "Hawaiian_name_3": "Ki pipili",
            "Hawaiian_name_4": "Nehe",
            "Wagner_Book_#": 279,
            "Pg_#": "pg 267-271,279-281",
            "Description": "Dicots",
            "Life_Form_Type": "AH",
            "Stem_Height_(m)_min": 0.3,
            "Stem_Height_(m)_max": 1.8,
            "Phyllotaxy_Type": "O",
            "Leaf_Type": "Compound",
            "Leaflet_leaf_type": np.nan,
            "Leaf_Margin_Type": np.nan,
            "Leaflets_Shape_Type": np.nan,
            "Leaf_Length_(cm)_min": 2.5,
            "Leaf_Length_(cm)_max": 13.5,
            "Leaflet_Length_(cm)_min": np.nan,
            "Leaflet_Length_(cm)_max": np.nan,
            "Leaflet_Width_(cm)_min": np.nan,
            "Leaflet_Width_(cm)_max": np.nan,
            "Leaf_Hair_Type": "G",
            "Breeding_Type": "M",
            "Infloresence_Type": "Cy",
            "Head Length (mm) Min": 8,
            "Head Length (mm) Max": 10,
            "Ray Length (mm) min": 2,
            "Ray Length (mm) max": 8,
            "Ray Width (mm) Min": np.nan,
            "Ray Width (mm) Max": np.nan,
            "Ray Color": "Yellow or White",
            "Bract_Length_(mm)_Min": 2.5,
            "Bract_Length_(mm)_Max": 5,
            "Peduncle_Length_Min_(mm)": 10,
            "Peduncle_Length_Max_(mm)": 90,
            "Pappus Length (mm) Min": 1,
            "Pappus Length (mm) Max": 2,
            "Corolla_Type": "T",
            "Corolla_color": "yellow",
            "Fruit_Type": "A",
            "Fruit_length_(mm)_min": 8,
            "Fruit_length_(mm)_max": 16,
            "Fruit_width_(mm)_ min": np.nan,
            "Fruit_width_(mm)_max": np.nan,
            "Chromosome_#": "24, 36, 46, 48, 72, ca. 76",
            "Average_Chromosome_#": 50.33,
            "Origin_t1": "PC",
            "Island_H": 1.0,
            "Island_MA": 1.0,
            "Island_KAH": 1.0,
            "Island_MO": 1,
            "Island_L": 1.0,
            "Island_O": 1,
            "Island_KAU": 1.0,
            "Island_NI": 1.0,
            "Island_A": 1.0,
            "FedStatus_t1 (do at end) ": "NS"
        },
        {
            "Family": "Asteraceae",
            "Genus": "Bidens",
            "Species": "molokaiensis",
            "Common_Name": "ko`oko`olau",
            "Hawaiian_name_1": "ko`oko`olau",
            "Hawaiian_name_2": np.nan,
            "Hawaiian_name_3": np.nan,
            "Hawaiian_name_4": np.nan,
            "Wagner_Book_#": 279,
            "Pg_#": "pg 267-271,279",
            "Description": "Dicots",
            "Life_Form_Type": "PH",
            "Stem_Height_(m)_min": 0.1,
            "Stem_Height_(m)_max": 0.3,
            "Phyllotaxy_Type": "O",
            "Leaf_Type": "S or C ",
            "Leaflet_leaf_type": "S",
            "Leaf_Margin_Type": "T",
            "Leaflets_Shape_Type": "C or OVA",
            "Leaf_Length_(cm)_min": 3.0,
            "Leaf_Length_(cm)_max": 7.0,
            "Leaflet_Length_(cm)_min": 1.5,
            "Leaflet_Length_(cm)_max": 4.0,
            "Leaflet_Width_(cm)_min": 1.0,
            "Leaflet_Width_(cm)_max": 4.5,
            "Leaf_Hair_Type": "G",
            "Breeding_Type": "M",
            "Infloresence_Type": "Cy",
            "Head Length (mm) Min": 30,
            "Head Length (mm) Max": 55,
            "Ray Length (mm) min": 17,
            "Ray Length (mm) max": 25,
            "Ray Width (mm) Min": 9.0,
            "Ray Width (mm) Max": 11.0,
            "Ray Color": "Yellow or White",
            "Bract_Length_(mm)_Min": 3.0,
            "Bract_Length_(mm)_Max": 6,
            "Peduncle_Length_Min_(mm)": 50,
            "Peduncle_Length_Max_(mm)": 180,
            "Pappus Length (mm) Min": "Abset",
            "Pappus Length (mm) Max": 1,
            "Corolla_Type": "T",
            "Corolla_color": "yellow",
            "Fruit_Type": "A",
            "Fruit_length_(mm)_min": 6,
            "Fruit_length_(mm)_max": 12,
            "Fruit_width_(mm)_ min": 1.0,
            "Fruit_width_(mm)_max": 1.5,
            "Chromosome_#": 72,
            "Average_Chromosome_#": 72.0,
            "Origin_t1": np.nan,
            "Island_H": np.nan,
            "Island_MA": np.nan,
            "Island_KAH": np.nan,
            "Island_MO": 1,
            "Island_L": np.nan,
            "Island_O": 1,
            "Island_KAU": np.nan,
            "Island_NI": np.nan,
            "Island_A": np.nan,
            "FedStatus_t1 (do at end) ": "SOC"
        }
    ]
}

# Convert to DataFrame



In [31]:
groun_truth = {"family": manual_results['datasheet'][0]['Family'], "genus": manual_results['datasheet'][0]['Genus'], "species": "Species"}


In [35]:
manual_results['datasheet'][0]['Species']

'pilosa'

In [32]:
for prop in list(props)[0:3]:
    print("prop")
    for model in models:
        user_prompt = f""" manually annotated {prop}: {groun_truth[prop]}
        automatically annotated {prop}:  {getattr(eval(model),  prop)}
        """
        print("\n\n"+user_prompt)
        r = await validation_agent.run(user_prompt)
        print(r.data)

        print(f"{prop}:{getattr(eval(model),  prop)}:{r.data.are_equal}", end="\t")
    print("\n")  

prop


 manually annotated family: Asteraceae
        automatically annotated family:  Asteraceae
        
are_equal=True justificaiton="The manually annotated family 'Asteraceae' and the automatically annotated family 'Asteraceae' are identical, with no linguistic or formatting nuances that would suggest otherwise."
family:Asteraceae:True	

 manually annotated family: Asteraceae
        automatically annotated family:  Acanthaceae
        
are_equal=False justificaiton='The manually annotated family is Asteraceae, while the automatically annotated family is Acanthaceae. These are two distinct and different plant families, with Asteraceae being the family of sunflowers and daisies, and Acanthaceae being the family of acanthus plants. Therefore, the two results are not equal.'
family:Acanthaceae:False	

 manually annotated family: Asteraceae
        automatically annotated family:  Acanthaceae
        
are_equal=False justificaiton='The manually annotated family, Asteraceae, and the aut