# Abundance vs Frequency Scatter Plot

This notebook generates and exports a scatter plot of abundance vs frequency for all species in a category at a location.

| Region on Plot | Interpretation |
| --- | --- |
| Bottom-left | Rare species — seen infrequently and in low numbers. Could be elusive, migratory, or genuinely uncommon |
| Top-right | Common species — seen often and in large numbers. Likely widespread and/or gregarious |
| High frequency, low abundance | Species often seen but in small groups or solo (e.g. a bird that’s always alone but spotted often) |
| Low frequency, high abundance | Species seen rarely, but in big flocks/groups when they do appear (e.g. irruptive species or migratory flocks) |

To use it, update the location, category and required export format in the first code cell, below, before running the notebook.

In [None]:
# Location to report on
location = ""

# Category to report on
category = ""

# Export format for the trend chart:
# PNG     - export as PNG image
# PDF     - export as PDF file
# <blank> - do not export
export_format = "PNG"

In [None]:
from pathlib import Path
import sqlparse

# Read the query file
query_file_path = Path("sql") / "abundance_frequency.sql"
with open(query_file_path.absolute(), "r") as f:
    query = f.read().replace("\n", " ")

# Replace the location and year placeholders
query = query.replace("$LOCATION", location) \
             .replace("$CATEGORY", category)

# Show a pretty-printed form of the query
print(sqlparse.format(query, reindent=True, keyword_case='upper'))

In [None]:
import pandas as pd
import sqlite3
import os

# Connect to the database, execute the query and read the results into a dataframe
database_path = os.environ["NATURE_RECORDER_DB"]
connection = sqlite3.connect(database_path)
df = pd.read_sql_query(query, connection, parse_dates=["Date"])

# Check there is some data
if not df.shape[0]:
    message = f"No data found for category '{category}' at location '{location}'"
    raise ValueError(message)

In [None]:
import pandas as pd
import re

# Calculate abundance and frequency
scatter_plot = (
    df
    .groupby("Species")
    .agg(
        Abundance=("Count", "sum"),
        Frequency=("Species", "count")
    )
    .reset_index()
)

# Create the folder to hold exported reports
export_folder_path = Path("exported")
export_folder_path.mkdir(parents=True, exist_ok=True)

# Export the data to Excel
clean_location = re.sub("[^0-9a-zA-Z ]+", "", location).replace(" ", "-")
clean_category = re.sub("[^0-9a-zA-Z ]+", "", category).replace(" ", "-")
export_file_name = f"{clean_category}-{clean_location}-Abundance-Frequency"
export_file_path = export_folder_path / f"{export_file_name}.xlsx"
scatter_plot.to_excel(export_file_path.absolute(), sheet_name="Abundance vs Frequency", index=False)

# Print the scatter plot data
with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    display(scatter_plot)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
sns.scatterplot(data=scatter_plot, x='Frequency', y='Abundance', hue='Species', s=100)

plt.title(f'Abundance vs Frequency for {category}')
plt.xlabel('Frequency (Number of Sightings)')
plt.ylabel('Abundance (Total Individuals)')
plt.grid(True)

# Move legend below the plot, centered below the plot and with multiple columns
plt.legend(
    title='Species',
    bbox_to_anchor=(0.5, -0.25),
    loc='upper center',
    borderaxespad=0,
    ncol=3
)

# Export to PNG
if export_format.casefold() == "png":
    export_file_path = export_folder_path / f"{export_file_name}.png"
    plt.savefig(export_file_path.absolute(), format="png", dpi=300, bbox_inches="tight")

# Export to PDF
if export_format.casefold() == "pdf":
    export_file_path = export_folder_path / f"{export_file_name}.pdf"
    plt.savefig(export_file_path.absolute(), format="pdf", bbox_inches="tight")

# Show the plot
plt.show()