# Abundance vs Frequency Scatter Plot

This notebook generates and exports a scatter plot of abundance vs frequency for all species in a category at a location.

| Region on Plot | Interpretation |
| --- | --- |
| Bottom-left | Rare species — seen infrequently and in low numbers. Could be elusive, migratory, or genuinely uncommon |
| Top-right | Common species — seen often and in large numbers. Likely widespread and/or gregarious |
| High frequency, low abundance | Species often seen but in small groups or solo (e.g. a bird that’s always alone but spotted often) |
| Low frequency, high abundance | Species seen rarely, but in big flocks/groups when they do appear (e.g. irruptive species or migratory flocks) |


In [13]:
%run ../pathutils.ipynb
%run ../definitions.ipynb
%run ../database.ipynb
%run ../export.ipynb
%run pathutils.ipynb

In [14]:
import seaborn as sns
import matplotlib.pyplot as plt

def export_scatter_plot(export_folder_path, data, category):
    # Create a new figure
    fig = plt.figure(figsize=(10, 6))
    sns.scatterplot(data=data, x='Frequency', y='Abundance', hue='Species', s=100)

    plt.title(f'Abundance vs Frequency for {category}')
    plt.xlabel('Frequency (Number of Sightings)')
    plt.ylabel('Abundance (Total Individuals)')
    plt.grid(True)

    # Move legend below the plot, centered below the plot and with multiple columns
    plt.legend(
        title='Species',
        bbox_to_anchor=(0.5, -0.25),
        loc='upper center',
        borderaxespad=0,
        ncol=3
    )

    # Export the chart
    export_chart(export_folder_path, "abundance", "png")

    # Close the plot
    plt.close(fig)

In [15]:
# Load the definitions
definitions = load_definitions("wildlife")
abundance_definitions = [definition for definition in definitions if definition["Report"].casefold() == "abundance"]

# Iterate over each definition to produce a report for each one
for definition in abundance_definitions:
    for year in range(int(definition["From"]), 1 + int(definition["To"])):
        # Construct the query
        query = construct_query("wildlife", "abundance_vs_frequency_scatter.sql", {
            "LOCATION": definition["Location"],
            "CATEGORY": definition["Category"]
        })

        try:
            # Run the query to load the data - this raises a ValueError if there's no data
            df = query_data("wildlife", query)

            # Calculate abundance and frequency
            scatter_plot = (
                df
                .groupby("Species")
                .agg(
                    Abundance=("Count", "sum"),
                    Frequency=("Species", "count")
                )
                .reset_index()
            )

            # Export the data to Excel
            export_folder_path = get_abundance_frequency_scatter_folder_path(definition["Country"], definition["Location"], definition["Category"])
            export_to_spreadsheet(export_folder_path, "abundance.xlsx", { "Abundance vs Frequency": scatter_plot })

            # Export the chart
            export_scatter_plot(export_folder_path, scatter_plot, definition["Category"])

        except ValueError:
            print(f"WARNING: No data found for location = {definition['Location']}, category = {definition['Category']}")