# Year-on-Year Species Trend Chart

This notebook generates and exports a year-on-year trend of sightings for a species at a given location.

In [None]:
%run ../pathutils.ipynb
%run ../definitions.ipynb
%run ../database.ipynb
%run ../export.ipynb
%run database.ipynb
%run pathutils.ipynb

In [None]:
import matplotlib.pyplot as plt
from scipy.stats import linregress

def export_trend_chart(export_folder_path, data, filename, species, location):
    x = data["Year"]
    y = data["Count"]

    # Fit linear regression
    slope, intercept, *_ = linregress(x, y)
    trend_y = intercept + slope * x

    # Plot
    fig = plt.figure(figsize=(10, 6))

    # Bar chart for actual data
    plt.bar(x, y, label='Sightings per Year', color='skyblue')

    # Trend line over bars
    plt.plot(x, trend_y, color='red', linewidth=2, label='Trend Line')

    # Set labels and the chart title
    plt.title(f"Year On Year Trends for {species} at {location}")
    plt.xlabel("Year")
    plt.ylabel("Count")

    # Add the legend and grid lines
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.tight_layout()

    # Export the chart
    export_chart(export_folder_path, filename, "png")

    # Close the plot
    plt.close(fig)

In [None]:
# Load the definitions
definitions = load_definitions("wildlife")
trend_definitions = [definition for definition in definitions if definition["Report"].casefold() == "trend"]

# Iterate over each definition to produce a report for each one
for definition in trend_definitions:
    # Retrieve the list of species for this category
    species_list = load_species(definition["Location"], definition["Category"])

    # Iterate over the species in the current category reported at the current location
    for species in species_list:
        # Construct the query
        query = construct_query("wildlife", "species_year_on_year.sql", {
            "LOCATION": definition["Location"],
            "START_YEAR": definition["From"],
            "END_YEAR": definition["To"],
            "SPECIES": species
        })

        try:
            # Run the query to load the data - this raises a ValueError if there's no data
            df = query_data("wildlife", query)

            # Specifically add a separate "year" column to the data frame and aggregate the data
            df["Year"] = df["Date"].dt.year
            yearly_species_counts = df.groupby(["Year", "Species"])["Count"].sum().reset_index()

            # Export the trend data to Excel
            export_folder_path = get_annual_trend_export_folder_path(definition["Country"], definition["Location"], definition["Category"])
            clean_species = clean_string(species)
            filename = f"{clean_species}.xlsx"
            export_to_spreadsheet(export_folder_path, filename, { "Year On Year Trends": yearly_species_counts })

            # Export the chart
            export_trend_chart(export_folder_path, yearly_species_counts, clean_species, species, definition["Location"])

        except ValueError:
            print(f"WARNING: No data found for location = {definition['Location']}, species = {species}, years = {definition['From']} to {definition['To']}")
