<h1 style="text-align: center; font-size:1vw">Environmental Insights Tutorial</h1>
<h3 style="text-align: center; font-size:1vw">Before starting the tutorial, please ensure that you read the README.md file for this python package. </h3>

<center><img src="_static/environmental_insights.png" width="400">

# Installing prerequisites
### Please ensure that you have got all the relevant packages installed. If not, then code to do this for you is avaliable in the file "package_installation.ipynb"

# Import relevant modules for the tutorial, both from the Environmental Insight package (air_pollution_functions, data, models) and auxiliary modules (numpy and matplotlib)

In [None]:
from environmental_insights import air_pollution_functions as ei_air_pollution_functions
from environmental_insights import data as ei_data
from environmental_insights import models as ei_models

import numpy as np
import matplotlib.pyplot as plt
import matplotlib

# Loading of the example dataset for the tutorial

## Load in the data that represents the gridded system used for both the global and the UK Model. 

In [None]:
# Load in the grids that represent the UK Model
uk_grids = ei_data.get_uk_grids()
display(uk_grids)

In [None]:
# Load in the grids that represent the Global Model
global_grids = ei_data.get_global_grids()
display(global_grids)

## Load in data for a particular timestamp for the global dataset for all of the grids.

For the global model the outputs produced are at the hourly level across all of 2022. As such the possible timestamps that can be used are 01-01-2022 000000 to 12-31-2022 230000.

In [None]:
# The format for the Global data is month-day-year HourMinuteSecond

global_complete_dataset = (
    ei_data.air_pollution_concentration_complete_set_real_time_global(
        "07-02-2022 080000"
    )
)
display(global_complete_dataset)

## Load in data for a particular timestamp for the England dataset for all of the grids, and for a single point (latitude and longitude) for a single timestamp. 

For the England model the outputs produced are at the hourly level across all of 2018. As such the possible timestamps that can be used are 2018-01-01 000000 2018-12-31 230000

In [None]:
# The format for the UK dataset is year-month-day HourMinuteSecond

uk_complete_dataset = (
    ei_data.air_pollution_concentration_complete_set_real_time_united_kingdom(
        "2018-01-01 080000"
    )
)
display(uk_complete_dataset)

uk_single_datapoint = (
    ei_data.air_pollution_concentration_nearest_point_real_time_united_kingdom(
        51.5, 0.12, "2018-01-01 080000", uk_grids
    )
)
display(uk_single_datapoint)

# Visualise the different datapoints that have been loaded in. In both the [UK Daily Air Quality Index](https://uk-air.defra.gov.uk/air-pollution/daqi), and the higher level Daily Air Quality Bands

### UK Model Visualisation

In [None]:
air_pollution_DF_daily_air_quality_index_uk = ei_air_pollution_functions.air_pollution_concentrations_to_UK_daily_air_quality_index(
    uk_complete_dataset, "no2", "no2 Prediction mean"
)
air_pollution_DF_daily_air_quality_index_uk = uk_grids.merge(
    air_pollution_DF_daily_air_quality_index_uk, on="UK Model Grid ID"
)

ei_air_pollution_functions.visualise_air_pollution_daily_air_quality_index(
    air_pollution_DF_daily_air_quality_index_uk,
    "no2 AQI",
    "uk_2018_01_01_080000_air_quality_index",
)
ei_air_pollution_functions.visualise_air_pollution_daily_air_quality_bands(
    air_pollution_DF_daily_air_quality_index_uk,
    "no2 Air Quality Index AQI Band",
    "uk_2018_01_01_080000_air_quality_bands",
)

### Global Model Visualisation

In [None]:
air_pollution_DF_daily_air_quality_index_global = ei_air_pollution_functions.air_pollution_concentrations_to_UK_daily_air_quality_index(
    global_complete_dataset, "no2", "no2"
)
air_pollution_DF_daily_air_quality_index_global = global_grids.merge(
    air_pollution_DF_daily_air_quality_index_global, on="Global Model Grid ID"
)

ei_air_pollution_functions.visualise_air_pollution_daily_air_quality_index(
    air_pollution_DF_daily_air_quality_index_global,
    "no2 AQI",
    "global_2018_01_01_080000_air_quality_index",
)
ei_air_pollution_functions.visualise_air_pollution_daily_air_quality_bands(
    air_pollution_DF_daily_air_quality_index_global,
    "no2 Air Quality Index AQI Band",
    "global_2018_01_01_080000_air_quality_bands",
)


# The code from this point onwards required you to download the data from google drive while a more permanent solution for online downloading of the data is sought. 
# Load the typical day data for the UK
A core issue with the use of the data within this package is the amount of data that is avaliable (TBs of data). As such the use of the typical day, e.g. a typical monday in January at 8AM is provided to make conducting analysis more manageable. 
The dataset that is used in this tutorial is for Friday in January at midnight. 

In [None]:
uk_complete_typical_day_january_friday_midnight = (
    ei_data.air_pollution_concentration_typical_day_real_time_united_kingdom(
        1, "Friday", 0
    )
)
uk_single_datapoint_typical_day_january_friday_midnight = (
    ei_data.air_pollution_concentration_nearest_point_typical_day_united_kingdom(
        1, "Friday", 0, 51.5, 0.12, uk_grids
    )
)
display(uk_complete_typical_day_january_friday_midnight)
display(uk_single_datapoint_typical_day_january_friday_midnight)

In [None]:
air_pollution_DF_8am = (
    ei_data.air_pollution_concentration_complete_set_real_time_united_kingdom(
        "2018-01-01 080000"
    )
)
air_pollution_DF_9am = (
    ei_data.air_pollution_concentration_complete_set_real_time_united_kingdom(
        "2018-01-01 090000"
    )
)
air_pollution_DF_8am = ei_air_pollution_functions.air_pollution_concentrations_to_UK_daily_air_quality_index(
    air_pollution_DF_8am, "no2", "no2 Prediction mean"
)
air_pollution_DF_9am = ei_air_pollution_functions.air_pollution_concentrations_to_UK_daily_air_quality_index(
    air_pollution_DF_9am, "no2", "no2 Prediction mean"
)

air_pollution_DF_8am = uk_grids.merge(air_pollution_DF_8am, on="UK Model Grid ID")
air_pollution_DF_9am = uk_grids.merge(air_pollution_DF_9am, on="UK Model Grid ID")
display(air_pollution_DF_8am)

# Visualise the change in the air pollution concentration and air quality index for NO2 between 8am and 9am on 1st January 2018.

In [None]:
ei_air_pollution_functions.change_in_concentrations_visulisation(
    air_pollution_DF_8am,
    air_pollution_DF_9am,
    "no2 Prediction mean",
    "uk_concentration_change_between_8_9_am",
)
ei_air_pollution_functions.change_in_aqi_visulisation(
    air_pollution_DF_8am,
    air_pollution_DF_9am,
    "no2 AQI",
    "uk_aqi_change_between_8_9_am",
)

# Visualising the changes in the air pollution concentrations across a number of timestamps. 

Alongside being able to visualise the changes in air pollution spatially, there is the ability to visualise them temporally, with an aggregate across all of the desired locations.
The example below gives the simple hypothetical scenario of changing the values based on simply doubling, or halving the concerntations. However a model could be plugged into this process as will be seen later. 

In [None]:
# Show the change in concentration line example

# A single month should be used in the example code, with the list days being populated with the days to be analysed, out of ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
month = 1
days = ["Friday"]


# The baseline_DFs represent the DFs that will create the black link in the graph, with change_* being the DFs that contain the concentrations with some change, in this case the change_positive_DFs being the doubling of the concentrations and
# change_negative_DFs being the halving of the concentrations.
baseline_DFs = dict()
change_postive_DFs = dict()
change_negative_DFs = dict()
for day_of_week in days:

    # Create a nested list for each day
    baseline_DFs_single_day = dict()
    change_postive_DFs_single_day = dict()
    change_negative_DFs_single_day = dict()
    for hour in np.arange(0, 24):

        # load in the typical day data of interest
        air_pollution_DF = (
            ei_data.air_pollution_concentration_typical_day_real_time_united_kingdom(
                month, day_of_week, hour
            )
        )

        # Standardise the column names
        air_pollution_DF = air_pollution_DF.rename(
            columns={"nox Prediction mean": "Model Predicition"}
        )
        baseline_DFs_single_day[hour] = air_pollution_DF
        air_pollution_DF_change = air_pollution_DF.copy(deep=True)

        # Double all of the concentrations and add the DF to the corresponding list.
        air_pollution_DF_change["Model Predicition"] = (
            air_pollution_DF_change["Model Predicition"] * 2
        )
        change_postive_DFs_single_day[hour] = air_pollution_DF_change

        # Repeat the process but for the halving of the concentrations
        air_pollution_DF_change = air_pollution_DF.copy(deep=True)
        air_pollution_DF_change["Model Predicition"] = (
            air_pollution_DF_change["Model Predicition"] * 0.5
        )
        change_negative_DFs_single_day[hour] = air_pollution_DF_change

    baseline_DFs[day_of_week] = baseline_DFs_single_day
    change_postive_DFs[day_of_week] = change_postive_DFs_single_day
    change_negative_DFs[day_of_week] = change_negative_DFs_single_day


display(change_postive_DFs["Friday"][0])

### Visualise the changes based on the list of dataframe.

In [None]:
ei_air_pollution_functions.change_in_concentration_line(
    "nox",
    baseline_DFs,
    change_postive_DFs,
    ["Friday"],
    list(np.arange(0, 24)),
    "nox_change_line_positive",
)
ei_air_pollution_functions.change_in_concentration_line(
    "nox",
    baseline_DFs,
    change_negative_DFs,
    ["Friday"],
    list(np.arange(0, 24)),
    "nox_change_line_negative",
)

# Example of using the model to create new predictions based on a changing feature vector. 
Exploring the air pollution change when the wind gust doubles across all locations within the feature vector. 

In [None]:
# Read in a particular model of interest, and the typical day feature vector.
climate_projection_model = ei_models.load_model_united_kingdom(
    "0.5", "Climate Projections", "no2"
)
typical_day_feature_vector = ei_models.load_feature_vector_typical_day_united_kingdom(
    1, "Friday", 8, uk_grids
)
display(typical_day_feature_vector)

typical_day_feature_vector_climate_change = typical_day_feature_vector.copy(deep=True)

# Double the wind gusts within the feature vector DF.
typical_day_feature_vector_climate_change["instantaneous_10m_wind_gust"] = (
    typical_day_feature_vector_climate_change["instantaneous_10m_wind_gust"] * 2
)

In [None]:
# Calculate the air pollution predicitons for the old and the new feature vector and describe the data, highlighting the changes between the scenarios.
display(
    ei_models.make_concentration_predicitions_united_kingdom(
        climate_projection_model,
        typical_day_feature_vector,
        ei_models.get_model_feature_vector("Climate Projections"),
    ).describe()
)
display(
    ei_models.make_concentration_predicitions_united_kingdom(
        climate_projection_model,
        typical_day_feature_vector_climate_change,
        ei_models.get_model_feature_vector("Climate Projections"),
    ).describe()
)

# Access Up to Date OpenStreetMaps data

In [None]:
# Access the amenities of interest, in this case hospitals.
bbox = [51.29, -0.51, 51.69, 0.33]  # Example bounding box around Berlin
amenities_gdf = ei_data.get_amenities_as_geodataframe("hospital", *bbox)
display(amenities_gdf)

# Access the highways of interest, in this case motorways.
bbox = [49.8, -10.5, 60.9, 2.2]
highways_gdf = ei_data.get_highways_as_geodataframe("motorway", *bbox)
highways_gdf.crs = 4326
highways_gdf = highways_gdf.to_crs(3395)
display(highways_gdf)

In [None]:
# Add into the feature vector a new distance feature vector element for a new moroway onto the exists motorway network.
start_point = [0.071113, 52.231664]
end_point = [1.3, 52.6]
uk_grids_centroid = uk_grids.copy(deep=True)
uk_grids_centroid["geometry"] = uk_grids_centroid["geometry"].centroid
new_data, highways_user_added = ei_data.calculate_new_metrics_distance_total(
    highways_gdf, "motorway", start_point, end_point, uk_grids_centroid, uk_grids
)

In [None]:
# visualise the new motorway segment (red) alongside the currently existing network (blue)
color_map = {"osm": "blue", "User Added": "red"}

fig, axes = plt.subplots(1, figsize=(15, 15))
highways_gdf.plot(ax=axes, color=highways_user_added["source"].map(color_map))
axes.axis("off")
# Create custom legend handles
legend_elements = [
    matplotlib.lines.Line2D([0], [0], color="blue", lw=2, label="Current\nMotorway"),
]

# Add the custom legend to the axis
axes.legend(
    handles=legend_elements, fontsize=20, bbox_to_anchor=(1.2, 0.95), markerscale=2
)


fig, axes = plt.subplots(1, figsize=(15, 15))
highways_user_added.plot(ax=axes, color=highways_user_added["source"].map(color_map))
axes.axis("off")
# Create custom legend handles
legend_elements = [
    matplotlib.lines.Line2D([0], [0], color="red", lw=2, label="Proposed\nMotorway"),
    matplotlib.lines.Line2D([0], [0], color="blue", lw=2, label="Current\nMotorway"),
]

# Add the custom legend to the axis
axes.legend(
    handles=legend_elements, fontsize=20, bbox_to_anchor=(1.2, 0.95), markerscale=2
)

# Load in the different models.
air_pollutants = ["no2", "o3", "pm10", "pm2.5", "so2"]
complete_models = dict()
for air_pollutant in air_pollutants:
    complete_models[air_pollutant] = ei_models.load_model_united_kingdom(
        "0.5", "Transport Infrastructure Policy", air_pollutant
    )

typical_day_feature_vector = ei_models.load_feature_vector_typical_day_united_kingdom(
    1, "Friday", 8, uk_grids
)

In [None]:
# The same process as above, is conducted with a real model, and the example of changing the motorway network analysed in the feature vector.
baseline_DFs_air_pollutant = dict()
change_DFs_air_pollutant = dict()
for air_pollutant in air_pollutants:
    month = 1
    days = ["Friday"]
    baseline_DFs = dict()
    changeDFs = dict()
    for day_of_week in days:
        display(day_of_week)
        baseline_DFs_single_day = dict()
        change_DFs_single_day = dict()
        for hour in np.arange(0, 24):

            # Read in the relevant feature vector for the desired timestamp.
            feature_vector = ei_models.load_feature_vector_typical_day_united_kingdom(
                month, day_of_week, hour, uk_grids
            )

            # Create the baseline based on the current data
            air_pollution_estimation_baseline = (
                ei_models.make_concentration_predicitions_united_kingdom(
                    complete_models[air_pollutant],
                    feature_vector,
                    ei_models.get_model_feature_vector(
                        "Transport Infrastructure Policy"
                    ),
                )
            )
            air_pollution_estimation_baseline = (
                air_pollution_estimation_baseline.rename(
                    columns={"Model Predicition": "Model Predicition Baseline"}
                )
            )

            # Modify the feature vector to include details of the new motorway segment.
            feature_vector_modified = ei_data.replace_feature_vector_column(
                feature_vector, new_data, "Road Infrastructure Distance motorway"
            )
            feature_vector_modified = ei_data.replace_feature_vector_column(
                feature_vector_modified, new_data, "Total Length motorway"
            )

            # Calculate the new air pollution concentrations based on the modified feature vector.
            air_pollution_estimation_modified = (
                ei_models.make_concentration_predicitions_united_kingdom(
                    complete_models[air_pollutant],
                    feature_vector_modified,
                    ei_models.get_model_feature_vector(
                        "Transport Infrastructure Policy"
                    ),
                )
            )
            air_pollution_estimation_modified = (
                air_pollution_estimation_modified.rename(
                    columns={"Model Predicition": "Model Predicition Modified"}
                )
            )

            air_pollution_estimation = air_pollution_estimation_modified.merge(
                air_pollution_estimation_baseline, on="UK Model Grid ID"
            )

            air_pollution_estimation_difference = air_pollution_estimation[
                air_pollution_estimation["Model Predicition Baseline"]
                != air_pollution_estimation["Model Predicition Modified"]
            ]

            baseline_DFs_single_day[hour] = air_pollution_estimation_difference[
                ["UK Model Grid ID", "Model Predicition Baseline"]
            ].rename(columns={"Model Predicition Baseline": "Model Predicition"})
            change_DFs_single_day[hour] = air_pollution_estimation_difference[
                ["UK Model Grid ID", "Model Predicition Modified"]
            ].rename(columns={"Model Predicition Modified": "Model Predicition"})

        baseline_DFs[day_of_week] = baseline_DFs_single_day
        changeDFs[day_of_week] = change_DFs_single_day
    baseline_DFs_air_pollutant[air_pollutant] = baseline_DFs
    change_DFs_air_pollutant[air_pollutant] = changeDFs

# Visualise the changes in air pollution across a typical friday due to the placement of the new motorway segment 

In [None]:
chage_concentration_lines_figs = dict()
for air_pollutant in air_pollutants:
    chage_concentration_lines_figs[air_pollutant] = (
        ei_air_pollution_functions.change_in_concentration_line(
            air_pollutant,
            baseline_DFs_air_pollutant[air_pollutant],
            change_DFs_air_pollutant[air_pollutant],
            ["Friday"],
            list(np.arange(0, 24)),
            "motorway_addition_" + air_pollutant,
        )
    )