In [2]:
import pandas as pd
import numpy as np
from apni import parse_apni_name_list
import re

In [3]:
apni = pd.read_csv(r"./data/APNI-names-2025-05-06-3043.csv", low_memory=False)
austraits = pd.read_csv(r"./data/austraits-3.0.2/taxa.csv", low_memory=False)

In [4]:
apni.columns

Index(['scientificNameID', 'nameType', 'scientificName', 'scientificNameHTML',
       'canonicalName', 'canonicalNameHTML', 'nameElement',
       'nomenclaturalStatus', 'scientificNameAuthorship', 'autonym', 'hybrid',
       'cultivar', 'formula', 'scientific', 'nomInval', 'nomIlleg',
       'namePublishedIn', 'namePublishedInID', 'namePublishedInYear',
       'nameInstanceType', 'nameAccordingToID', 'nameAccordingTo',
       'originalNameUsage', 'originalNameUsageID', 'originalNameUsageYear',
       'typeCitation', 'kingdom', 'family', 'genericName', 'specificEpithet',
       'infraspecificEpithet', 'cultivarEpithet', 'taxonRank',
       'taxonRankSortOrder', 'taxonRankAbbreviation', 'firstHybridParentName',
       'firstHybridParentNameID', 'secondHybridParentName',
       'secondHybridParentNameID', 'created', 'modified', 'nomenclaturalCode',
       'datasetName', 'taxonomicStatus', 'statusAccordingTo', 'license',
       'ccAttributionIRI'],
      dtype='object')

In [5]:
apni_tidied = apni.loc[:, ["family", "genericName", "specificEpithet"]].dropna().reset_index(drop=True)

In [6]:
apni_tidied

Unnamed: 0,family,genericName,specificEpithet
0,Acanthaceae,Acanthus,ebracteatus
1,Acanthaceae,Acanthus,ebracteatus
2,Acanthaceae,Acanthus,ebracteatus
3,Acanthaceae,Acanthus,ilicifolius
4,Acanthaceae,Acanthus,ilicifolius
...,...,...,...
113678,Cyperaceae,Mariscus,laevigatus
113679,Poaceae,Oplismenus,undulatifolius
113680,Poaceae,Pennisetum,spicatum
113681,Poaceae,Hierochloe,borealis


In [7]:
apni_tidied.query("genericName=='Heliotropium'")

Unnamed: 0,family,genericName,specificEpithet
14906,Boraginaceae,Heliotropium,aenigmatum
14907,Boraginaceae,Heliotropium,aequoreum
14908,Boraginaceae,Heliotropium,albrechtii
14909,Boraginaceae,Heliotropium,alcyonium
14910,Boraginaceae,Heliotropium,ammophilum
...,...,...,...
15138,Boraginaceae,Heliotropium,vagum
15139,Boraginaceae,Heliotropium,ventricosum
15140,Boraginaceae,Heliotropium,vestitum
15141,Boraginaceae,Heliotropium,viator


In [24]:
austraits_tidied = austraits.loc[:, ["taxon_name", "family", "genus", "taxonDistribution"]].dropna().\
        reset_index(drop=True)

In [26]:
austraits_tidied

Unnamed: 0,taxon_name,family,genus,taxonDistribution
0,Abelmoschus,Malvaceae,Abelmoschus,"WA, ChI, NT, Qld (native and naturalised), NSW..."
1,Abelmoschus ficulneus,Malvaceae,Abelmoschus,"WA, NT, Qld"
2,Abelmoschus manihot,Malvaceae,Abelmoschus,"ChI, NT, Qld (naturalised), NSW (doubtfully na..."
3,Abelmoschus moschatus,Malvaceae,Abelmoschus,"WA, NT, Qld, NSW (naturalised)"
4,Abildgaardia ovata,Cyperaceae,Abildgaardia,"NT, Qld, NSW"
...,...,...,...,...
25823,Zoysia macrantha subsp. walshii,Poaceae,Zoysia,"SA, Vic, Tas"
25824,Zoysia matrella,Poaceae,Zoysia,"CoI, ChI (naturalised), Qld, NSW (naturalised)"
25825,Zuloagaea bulbosa,Poaceae,Zuloagaea,NSW (naturalised)
25826,Zygochloa paradoxa,Poaceae,Zygochloa,"NT, SA, Qld, NSW"


In [34]:
with pd.option_context("display.max_rows", None, "display.max_columns", None):
    print(
        austraits_tidied.loc[austraits_tidied.taxon_name.str.contains("Heliotropium"),
        ["taxon_name", "taxonDistribution"]]
    )

                                           taxon_name  \
13126                                    Heliotropium   
13127                         Heliotropium aenigmatum   
13128                          Heliotropium aequoreum   
13129                         Heliotropium albrechtii   
13130                          Heliotropium alcyonium   
13131                         Heliotropium ammophilum   
13132                        Heliotropium amnis-edith   
13133                       Heliotropium amplexicaule   
13134                            Heliotropium apertum   
13135                         Heliotropium arenitense   
13136                           Heliotropium argyreum   
13137                         Heliotropium asperrimum   
13138                             Heliotropium ballii   
13139                         Heliotropium brachygyne   
13140                        Heliotropium brachythrix   
13141                         Heliotropium bracteatum   
13142                        He

In [36]:
austraits_tidied.loc[austraits_tidied.taxon_name.str.contains("Heliotropium") & 
                        austraits_tidied.taxonDistribution.str.contains("NSW"),
                            ["taxon_name", "taxonDistribution"]]

Unnamed: 0,taxon_name,taxonDistribution
13126,Heliotropium,"WA (native and naturalised), CoI, ChI, AR, NT ..."
13133,Heliotropium amplexicaule,"SA (naturalised), Qld (naturalised), NSW (natu..."
13137,Heliotropium asperrimum,"WA, NT, SA, NSW, Vic"
13139,Heliotropium brachygyne,"Qld, NSW"
13152,Heliotropium curassavicum,"WA, NT, SA, Qld, NSW, Vic"
13159,Heliotropium europaeum,"Qld, WA, SA, NSW, ACT (naturalised), Vic"
13180,Heliotropium moorei,"WA, NT, SA, Qld, NSW"
13206,Heliotropium supinum,"WA (naturalised), NT (naturalised), SA (natura..."


In [2]:
apni_species_list = parse_apni_name_list(r"./Australian Plant Name Index (APNI).html")

In [3]:
HORDEUM_SPP = re.compile(pattern=r"Hordeum [\w\. \(\)]*", flags=re.IGNORECASE)

# all Hordeum species in APNI
[re.match(pattern=HORDEUM_SPP, string=_).group() for _ in apni_species_list if \
     re.match(pattern=HORDEUM_SPP, string=_)]

['Hordeum distichon L.',
 'Hordeum geniculatum All.',
 'Hordeum glaucum Steud.',
 'Hordeum hystrix Roth',
 'Hordeum L.',
 'Hordeum leporinum Link',
 'Hordeum marinum Huds.',
 'Hordeum maritimum Stokes',
 'Hordeum murinum L.',
 'Hordeum nodosum L.',
 'Hordeum secalinum Schreb.',
 'Hordeum vulgare L.',
 'Hordeum vulgare subsp. distichon (L.) Korn.',
 'Hordeum vulgare var. distichon (L.) Hook.f.',
 'Hordeum vulgare var. hexastichum (L.) Asch.']

In [4]:
# COOL :)
apni_species_list[pd.Series(apni_species_list).str.contains("Hordeum")]#.unique()

array(['Hordeum distichon L.', 'Hordeum geniculatum All.',
       'Hordeum glaucum Steud.', 'Hordeum hystrix Roth', 'Hordeum L.',
       'Hordeum leporinum Link', 'Hordeum marinum Huds.',
       'Hordeum maritimum Stokes', 'Hordeum murinum L.',
       'Hordeum nodosum L.', 'Hordeum secalinum Schreb.',
       'Hordeum vulgare L.',
       'Hordeum vulgare subsp. distichon (L.) Korn.',
       'Hordeum vulgare var. distichon (L.) Hook.f.',
       'Hordeum vulgare var. hexastichum (L.) Asch.'], dtype='<U97')

In [8]:
austraits

Unnamed: 0,taxon_name,source,acceptedNameUsageID,scientificNameAuthorship,taxonRank,taxonomicStatus,family,taxonDistribution,ccAttributionIRI,genus
0,?Eucalyptus sp. Truslove (Brooker 7499),,,,,unknown,,,,
1,[Corymbia x candida subsp. lautifolia],,,,,unknown,,,,
2,[Corymbia x nowraensis],,,,,unknown,,,,
3,[Corymbia x paractia],,,,,unknown,,,,
4,[Corymbia x semiclara],,,,,unknown,,,,
...,...,...,...,...,...,...,...,...,...,...
28635,Zuloagaea bulbosa,APC,https://id.biodiversity.org.au/node/apni/2892759,(Kunth) Bess,Species,accepted,Poaceae,NSW (naturalised),https://id.biodiversity.org.au/tree/51354547/5...,Zuloagaea
28636,Zygochloa paradoxa,APC,https://id.biodiversity.org.au/node/apni/2910190,(R.Br.) S.T.Blake,Species,accepted,Poaceae,"NT, SA, Qld, NSW",https://id.biodiversity.org.au/tree/51354547/5...,Zygochloa
28637,Zygophyllaceae sp.,,,,,unknown,,,,
28638,Zygophyllum sp.,,,,,genus_known,Zygophyllaceae,,,Zygophyllum
