# Category Composition Pie Chart

This notebook generates and exports a pie chart showing the composition of a category at a location. To use it, update the location, category and required export format in the first code cell, below, before running the notebook.

In [None]:
# Location to report on
location = ""

# Category to report on
category = ""

# Export format for the trend chart:
# PNG     - export as PNG image
# PDF     - export as PDF file
# <blank> - do not export
export_format = "PNG"

In [None]:
from pathlib import Path
import sqlparse

# Read the query file
query_file_path = Path("sql") / "category_composition.sql"
with open(query_file_path.absolute(), "r") as f:
    query = f.read().replace("\n", " ")

# Replace the location and year placeholders
query = query.replace("$LOCATION", location) \
             .replace("$CATEGORY", category)

# Show a pretty-printed form of the query
print(sqlparse.format(query, reindent=True, keyword_case='upper'))

In [None]:
import pandas as pd
import sqlite3
import os

# Connect to the database, execute the query and read the results into a dataframe
database_path = os.environ["NATURE_RECORDER_DB"]
connection = sqlite3.connect(database_path)
df = pd.read_sql_query(query, connection, parse_dates=["Date"])

# Check there is some data
if not df.shape[0]:
    message = f"No data found for category '{category}' at location '{location}'"
    raise ValueError(message)

In [None]:
import pandas as pd
import re

# Group by species and sum number of individuals
species_counts = df.groupby("Species")["Count"].sum()

# Sort values (optional, for nicer plots)
species_counts = species_counts.sort_values(ascending=False)

# Create the folder to hold exported reports
export_folder_path = Path("exported")
export_folder_path.mkdir(parents=True, exist_ok=True)

# Export the data to Excel
clean_location = re.sub("[^0-9a-zA-Z ]+", "", location).replace(" ", "-")
clean_category = re.sub("[^0-9a-zA-Z ]+", "", category).replace(" ", "-")
export_file_name = f"{clean_category}-{clean_location}-Category-Composition"
export_file_path = export_folder_path / f"{export_file_name}.xlsx"
species_counts.to_excel(export_file_path.absolute(), sheet_name="Category Composition", index=True)

# Print the scatter plot data
with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    display(species_counts)

In [None]:

import matplotlib.pyplot as plt

# Prepare data
labels = species_counts.index
sizes = species_counts.values
percentages = sizes / sizes.sum() * 100
legend_labels = [f"{label} ({pct:.1f}%)" for label, pct in zip(labels, percentages)]

# Create figure with extra vertical space for legend
fig, ax = plt.subplots(figsize=(8, 8))

wedges, _ = ax.pie(sizes, startangle=90, counterclock=False)

# Equal aspect ratio
ax.set_aspect('equal')
ax.set_title(f"Composition of '{category}' Category by Species")

# Remove tight_layout, manually control layout instead
fig.subplots_adjust(bottom=0.3)  # Increase this if needed

# Place legend fully outside the plot
ax.legend(wedges, legend_labels,
            title="Species",
            loc='upper center',
            bbox_to_anchor=(0.5, -0.005),
            ncol=3,
            frameon=False)


# Export to PNG
if export_format.casefold() == "png":
    export_file_path = export_folder_path / f"{export_file_name}.png"
    plt.savefig(export_file_path.absolute(), format="png", dpi=300, bbox_inches="tight")

# Export to PDF
if export_format.casefold() == "pdf":
    export_file_path = export_folder_path / f"{export_file_name}.pdf"
    plt.savefig(export_file_path.absolute(), format="pdf", bbox_inches="tight")

# Show the plot
plt.show()