# Paul’s List, Extended
## Plant Species List Preparation, Native & Exotic

In [36]:
import pandas as pd

df = pd.read_csv("species.csv")

# Reorder the col sequences
new_order = [
    "scientific_name",
    "vernacular_name",
    "family_name",
    "taxon_rank",
    "threatened_species_status",
    "genus",
    "subgenus",
    "exotic"
]

df = df[[col for col in new_order if col in df.columns]]

# Add new col 'native'
df["native"] = ""

# Output new file
df.to_csv("new_species.csv", index=False)

print("The new file is saved as：new_species.csv")

The new file is saved as：new_species.csv


## Use KS file to identify native (planted native)/exotic, others=genus only

In [37]:
species = pd.read_csv("new_species.csv")
ks = pd.read_excel("KS edited Flora_All Species20250624-Weed VS Native.xlsx")

# Standardise column names (remove redundant Spaces)
species.columns = species.columns.str.strip()
ks.columns = ks.columns.str.strip()

# Select key col's from KS file, and remove duplicates
ks_subset = ks[['species_name', 'Local Status', 'planted_native']].drop_duplicates(subset=['species_name'], keep='first')

# Merge（Left Join - species）
merged = pd.merge(
    species,
    ks_subset,
    left_on='scientific_name',
    right_on='species_name',
    how='left'
)

# Fill 'native' col
def classify(row):
    if pd.notna(row['planted_native']) and row['planted_native'].strip().lower() == "planted native":
        return "Native" # See 'Planted Native' as 'Native'
    elif pd.notna(row['Local Status']):
        if row['Local Status'].strip().lower() == "native":
            return "Native"
        elif row['Local Status'].strip().lower() == "exotic":
            return "Exotic"
    return ""
    
merged['native'] = merged.apply(classify, axis=1)

# Created 'category' col
def label_category(row):
    if row['native'] == "Native":
        return "Native"
    elif row['native'] == "Exotic":
        return "Exotic"
    else:
        return "Genus Only"

merged['category'] = merged.apply(label_category, axis=1)

### Merge 'Genus Only' records from P18 file

In [38]:
mqu_file = r"USyd-P18-Presence_FOA_and_PlantNET-Genus.xlsx"
genus_df = pd.read_excel(mqu_file, sheet_name="Presence")

# Standardise col names
genus_df.columns = genus_df.columns.str.strip().str.lower()

# Filter Native? == "genus"
genus_only = genus_df[genus_df["native?"].str.lower() == "genus"]

# Extract normalized_name col（Species names）
genus_names = genus_only["normalized_name"].dropna().unique()

# Update the category value of the matching record in the main Species table
merged.loc[merged["scientific_name"].isin(genus_names), "category"] = "Genus Only"

print(f"Added {len(genus_names)} genus-only records from Presence file (using normalized_name).")

Added 100 genus-only records from Presence file (using normalized_name).


In [39]:
# Delete redundant columns
merged.drop(columns=['species_name', 'Local Status', 'planted_native', 'vernacular_name', 'family_name', 'taxon_rank', 'threatened_species_status', 'genus', 'subgenus'], inplace=True)

merged.to_csv("species_with_category.csv", index=False)

print("The new file is saved as：species_with_category.csv")

The new file is saved as：species_with_category.csv


## Delete unnecessary cols for clarity

In [40]:
species = pd.read_csv("species_with_category.csv")
species.drop(columns=["exotic", "native", "category"], inplace=True, errors="ignore")

## Create 3 separate lists

In [41]:
classified = pd.read_csv("species_with_category.csv")

# Group by category
native_df = species[classified["category"] == "Native"]
exotic_df = species[classified["category"] == "Exotic"]
genus_df = species[classified["category"] == "Genus Only"]

output_file = "prepared_Paul's_List.xlsx"

with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
    native_df.to_excel(writer, sheet_name="Native", index=False)
    exotic_df.to_excel(writer, sheet_name="Exotic", index=False)
    genus_df.to_excel(writer, sheet_name="Genus Only", index=False)

print(f"Exported to {output_file} with 3 sheets: Native, Exotic, Genus Only.")

Exported to prepared_Paul's_List.xlsx with 3 sheets: Native, Exotic, Genus Only.
