# Category-Location Annual Heatmap

This notebook generates and exports a heatmap of species sightings for a category for a given location and year. To use it, update the year, location, category and required export format in the first code cell, below, before running the notebook.

In [None]:
# Year to report on
year = ""

# List of names of the locations to report on
locations = [""]

# Species category to report on
category = ""

# Export format for the heatmap chart:
# PNG     - export as PNG image
# PDF     - export as PDF file
# <blank> - do not export
export_format = "PNG"

In [None]:
from pathlib import Path
import sqlparse

# Read the query file
query_file_path = Path("sql") / "sightings.sql"
with open(query_file_path.absolute(), "r") as f:
    query = f.read().replace("\n", " ")

# Generate a list of locations suitable for use in the IN clause in the query
location_list = ", ".join([f"'{l}'" for l in locations])

# Replace the location and year placeholders
query = query.replace("$YEAR", year) \
             .replace("$LOCATION", location_list) \
             .replace("$CATEGORY", category) \
             .replace("$SPECIES", "")

# Show a pretty-printed form of the query
print(sqlparse.format(query, reindent=True, keyword_case='upper'))

In [None]:
import pandas as pd
import sqlite3
import os

# Connect to the database, execute the query and read the results into a dataframe
database_path = os.environ["NATURE_RECORDER_DB"]
connection = sqlite3.connect(database_path)
df = pd.read_sql_query(query, connection, parse_dates=["Date"])

# Check there is some data
if not df.shape[0]:
    locations_list = ", ".join(locations)
    message = f"No data found for category '{category}' at locations '{locations_list}' during '{year}'"
    raise ValueError(message)

In [None]:
import calendar
import re

# Create the folder to hold exported reports
export_folder_path = Path("exported")
export_folder_path.mkdir(parents=True, exist_ok=True)

# Pre-process the data to provide a heatmap data source
df["Month"] = df["Date"].dt.month
heatmap_data = df.groupby(["Species", "Month"])["Count"].sum().unstack(fill_value=0)

# Make sure all months are represented even if there are no sightings in the data set for that month
for month in range(1, 13):
    if month not in heatmap_data.columns:
        heatmap_data[month] = 0

# Re-order the columns in calendar order
heatmap_data = heatmap_data[sorted(heatmap_data.columns)]

# Set the labels to the month abbreviations rather than month numbers
heatmap_data.columns = [calendar.month_abbr[m] for m in heatmap_data.columns]

# Export the heatmap data to Excel
locations_list = "-".join(locations)
clean_locations = re.sub("[^0-9a-zA-Z ]+", "", locations_list).replace(" ", "-")
export_file_path = export_folder_path / f"{year}-{category}-{clean_locations}-Heatmap.xlsx"
heatmap_data.to_excel(export_file_path.absolute(), sheet_name="Sightings", index=True)

# Print the heatmap data
with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    display(heatmap_data)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Generate the heatmap
plt.figure(figsize=(12, heatmap_data.shape[0] / 3))
sns.heatmap(heatmap_data, cmap="YlOrRd", annot=False)
locations_list = ", ".join(locations)
plt.title(f"Number of Sightings of {category} at {locations_list} in {year}")
plt.xlabel("")
plt.ylabel("")

# Export to PNG
if export_format.casefold() == "png":
    export_file_path = export_folder_path / f"{year}-{category}-{clean_locations}-Heatmap.png"
    plt.savefig(export_file_path.absolute(), format="png", dpi=300, bbox_inches="tight")

# Export to PDF
if export_format.casefold() == "pdf":
    export_file_path = export_folder_path / f"{year}-{category}-{clean_locations}-Heatmap.pdf"
    plt.savefig(export_file_path.absolute(), format="pdf", bbox_inches="tight")

# And show the plot
plt.show()