# NO2 Data Aggregation

Aggregates NO2 values by administrative boundaries and a regular hexagon grid.

To convert from mol/m<sup>2</sup> to molecules/cm<sup>2</sup>, multiply by 6.022140857e+19 (6.022140857 * 10<sup>19</sup>).

# Environment Setup

In [None]:
# Load Notebook formatter
%load_ext nb_black
# %reload_ext nb_black

In [None]:
# Import packages
import os
import glob
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import seaborn as sns
import pandas as pd
import geopandas as gpd
import rasterio as rio
import rasterstats as rs

In [None]:
# Set Options
# sns.set(font_scale=1.5, style="whitegrid")
sns.set(font_scale=1.5)
pd.set_option("display.max_columns", None)
pd.set_option("precision", 15)

In [None]:
# Set working directory
os.chdir("..")
print(f"Working directory: {os.getcwd()}")

In [None]:
def aggregate_raster_data(
    raster_path, vector_path, zonal_statistics="count sum"
):
    """Aggregates raster data to vector polygons, based on specified
    aggregation metrics.

    Parameters
    ----------
    raster_path : str
        Path to raster file containing data that will be aggregated.

    vector_path : str
        Path to the vector file containing polygons to which data will
        be aggregated.

    zonal_statistics : space-delimited str, optional
        Zonal statistics to calculate. Default value is 'count sum'.

    Returns
    -------
    aggregated_data : geopandas geodataframe
        Geodataframe containing the raster data aggreagated with the
        vector polygons.

    Example
    -------
        >>>
        >>>
        >>>
        >>>
    """
    # Extract data and metadata from raster
    with rio.open(raster_path) as src:
        data = src.read(1, masked=True)
        metadata = src.profile

    # Extract zonal stats into geodataframe
    aggregated_data = gpd.GeoDataFrame.from_features(
        rs.zonal_stats(
            vectors=gpd.read_file(vector_path),
            raster=data,
            nodata=metadata.get("nodata"),
            affine=metadata.get("transform"),
            geojson_out=True,
            copy_properties=True,
            stats=zonal_statistics,
        )
    )

    return aggregated_data

In [None]:
def magnitude_change(pre_change, post_change):
    """Calculates the magnitude change.
    """
    # Calculate magnitude change
    change = post_change - pre_change

    return change

In [None]:
def percent_change(pre_change, post_change):
    """Calculates the percent change.
    """
    # Calculate percent change
    change = (
        (post_change - pre_change) / pre_change * 100 if pre_change != 0 else 0
    )

    return change

In [None]:
def get_geometry(shapefile_path, geometry_column="geometry"):
    """Returns a geodataframe with only the index and geometry columns.
    """
    # Get geodataframe with only geometry column
    geometry = gpd.read_file(shapefile_path)[[geometry_column]]

    return geometry

In [None]:
def clean_data(geodatframe, new_name):
    """Creates a new dataframe with only the mean data and renames
    the mean column to a specified name.
    """
    # Create new dataframe with mean data and rename column
    cleaned_data = geodatframe[["mean"]].rename(
        columns={"mean": new_name}, copy=True
    )

    return cleaned_data

In [None]:
def add_change(
    dataframe,
    pre_change_column,
    post_change_column,
    new_column,
    change_type="magnitude",
):
    """Calculates and adds a magnitude or percent change column 
    to all dataframe rows, based on two input columns.
    """
    # Calculate and add change
    dataframe[new_column] = (
        dataframe.apply(
            lambda row: magnitude_change(
                pre_change=row[pre_change_column],
                post_change=row[post_change_column],
            ),
            axis=1,
        )
        if change_type == "magnitude"
        else dataframe.apply(
            lambda row: percent_change(
                pre_change=row[pre_change_column],
                post_change=row[post_change_column],
            ),
            axis=1,
        )
    )

    # Set output message
    message = print(f"Added new column: {new_column}")

    return message

# User-Defined Variables

In [None]:
# Set paths to South Korea shapefiles at levels 0, 1, and 2, and hexagon grid
south_korea_level_0_path = os.path.join(
    "02-raw-data", "vector", "south-korea", "gadm36_south_korea.shp"
)

south_korea_level_0 = gpd.read_file(south_korea_level_0_path)

south_korea_level_1_path = os.path.join(
    "02-raw-data", "vector", "south-korea", "gadm36_south_korea_level_1.shp"
)

south_korea_level_2_path = os.path.join(
    "02-raw-data", "vector", "south-korea", "gadm36_south_korea_level_2.shp"
)

south_korea_hexagon_grid_path = os.path.join(
    "03-processed-data",
    "vector",
    "south-korea",
    "south_korea_hexagon_grid.shp",
)

In [None]:
# Set paths to mean NO2 rasters
no2_mean_jul_2018_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20180701-20180731-MEAN-MOL-PER-M2.tif",
)

no2_mean_aug_2018_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20180801-20180831-MEAN-MOL-PER-M2.tif",
)

no2_mean_sep_2018_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20180901-20180930-MEAN-MOL-PER-M2.tif",
)

no2_mean_oct_2018_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20181001-20181031-MEAN-MOL-PER-M2.tif",
)

no2_mean_nov_2018_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20181101-20181130-MEAN-MOL-PER-M2.tif",
)

no2_mean_dec_2018_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20181201-20181231-MEAN-MOL-PER-M2.tif",
)

no2_mean_jan_2019_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20190101-20190131-MEAN-MOL-PER-M2.tif",
)

no2_mean_feb_2019_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20190201-20190228-MEAN-MOL-PER-M2.tif",
)

no2_mean_mar_2019_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20190301-20190331-MEAN-MOL-PER-M2.tif",
)

no2_mean_apr_2019_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20190401-20190430-MEAN-MOL-PER-M2.tif",
)

no2_mean_may_2019_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20190501-20190531-MEAN-MOL-PER-M2.tif",
)

no2_mean_jun_2019_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20190601-20190630-MEAN-MOL-PER-M2.tif",
)

no2_mean_jul_2019_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20190701-20190731-MEAN-MOL-PER-M2.tif",
)

no2_mean_aug_2019_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20190801-20190831-MEAN-MOL-PER-M2.tif",
)

no2_mean_sep_2019_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20190901-20190930-MEAN-MOL-PER-M2.tif",
)

no2_mean_oct_2019_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20191001-20191031-MEAN-MOL-PER-M2.tif",
)

no2_mean_nov_2019_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20191101-20191130-MEAN-MOL-PER-M2.tif",
)

no2_mean_dec_2019_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20191201-20191231-MEAN-MOL-PER-M2.tif",
)

no2_mean_jan_2020_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20200101-20200131-MEAN-MOL-PER-M2.tif",
)

no2_mean_feb_2020_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20200201-20200229-MEAN-MOL-PER-M2.tif",
)

no2_mean_mar_2020_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20200301-20200331-MEAN-MOL-PER-M2.tif",
)

no2_mean_apr_2020_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20200401-20200430-MEAN-MOL-PER-M2.tif",
)

no2_mean_may_2020_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20200501-20200531-MEAN-MOL-PER-M2.tif",
)

no2_mean_jun_2020_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20200601-20200630-MEAN-MOL-PER-M2.tif",
)

no2_mean_jul_2020_path = os.path.join(
    "03-processed-data",
    "raster",
    "south-korea",
    "statistics",
    "monthly",
    "S5P-OFFL-L3-NO2-20200701-20200731-MEAN-MOL-PER-M2.tif",
)

# Data Acquisition and Preprocessing

In [None]:
# Read shapefiles into geodataframes
# south_korea_level_0 = gpd.read_file(south_korea_level_0_path)
# south_korea_level_1 = gpd.read_file(south_korea_level_1_path)
# south_korea_level_2 = gpd.read_file(south_korea_level_2_path)
# south_korea_hexagon_grid = gpd.read_file(south_korea_hexagon_grid_path)

# Data Processing

Workflow:

* Rasterize/aggregate each monthly mean (by mean) into the administrative boundaries (level 1, level 2, hexagon grid)
    * Function to rasterize grid to geodataframe, drop all columns but the mean and geometry, and rename the mean to the month and year (mar_2020)
* Combine/consolidate all months into single geodataframe
    * Concatenated along 1-axis to combine, keeping only a single instance of the geometry column
* Create change/difference (magnitude and/or percent) for months as necessary within consolidated geodataframe
* Plot months and change on same figure

In [None]:
# Aggregate to hexagon grid
aggregated_hexagon_grid_jul_2018 = aggregate_raster_data(
    raster_path=no2_mean_jul_2018_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_aug_2018 = aggregate_raster_data(
    raster_path=no2_mean_aug_2018_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_sep_2018 = aggregate_raster_data(
    raster_path=no2_mean_sep_2018_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_oct_2018 = aggregate_raster_data(
    raster_path=no2_mean_oct_2018_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_nov_2018 = aggregate_raster_data(
    raster_path=no2_mean_nov_2018_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_dec_2018 = aggregate_raster_data(
    raster_path=no2_mean_dec_2018_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_jan_2019 = aggregate_raster_data(
    raster_path=no2_mean_jan_2019_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_feb_2019 = aggregate_raster_data(
    raster_path=no2_mean_feb_2019_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_mar_2019 = aggregate_raster_data(
    raster_path=no2_mean_mar_2019_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_apr_2019 = aggregate_raster_data(
    raster_path=no2_mean_apr_2019_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_may_2019 = aggregate_raster_data(
    raster_path=no2_mean_may_2019_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_jun_2019 = aggregate_raster_data(
    raster_path=no2_mean_jun_2019_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_jul_2019 = aggregate_raster_data(
    raster_path=no2_mean_jul_2019_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_aug_2019 = aggregate_raster_data(
    raster_path=no2_mean_aug_2019_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_sep_2019 = aggregate_raster_data(
    raster_path=no2_mean_sep_2019_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_oct_2019 = aggregate_raster_data(
    raster_path=no2_mean_oct_2019_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_nov_2019 = aggregate_raster_data(
    raster_path=no2_mean_nov_2019_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_dec_2019 = aggregate_raster_data(
    raster_path=no2_mean_dec_2019_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_jan_2020 = aggregate_raster_data(
    raster_path=no2_mean_jan_2020_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_feb_2020 = aggregate_raster_data(
    raster_path=no2_mean_feb_2020_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_mar_2020 = aggregate_raster_data(
    raster_path=no2_mean_mar_2020_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_apr_2020 = aggregate_raster_data(
    raster_path=no2_mean_apr_2020_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_may_2020 = aggregate_raster_data(
    raster_path=no2_mean_may_2020_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_jun_2020 = aggregate_raster_data(
    raster_path=no2_mean_jun_2020_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_jul_2020 = aggregate_raster_data(
    raster_path=no2_mean_jul_2020_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

In [None]:
# Rasterize mean NO2 to boundaries
# 2018

# 2019
aggregated_level_1_mar_2019 = aggregate_raster_data(
    raster_path=no2_mean_mar_2019_path,
    vector_path=south_korea_level_1_path,
    zonal_statistics="count mean",
)

aggregated_level_2_mar_2019 = aggregate_raster_data(
    raster_path=no2_mean_mar_2019_path,
    vector_path=south_korea_level_2_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_mar_2019 = aggregate_raster_data(
    raster_path=no2_mean_mar_2019_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

# 2020
aggregated_level_1_mar_2020 = aggregate_raster_data(
    raster_path=no2_mean_mar_2020_path,
    vector_path=south_korea_level_1_path,
    zonal_statistics="count mean",
)

aggregated_level_2_mar_2020 = aggregate_raster_data(
    raster_path=no2_mean_mar_2020_path,
    vector_path=south_korea_level_2_path,
    zonal_statistics="count mean",
)

aggregated_hexagon_grid_mar_2020 = aggregate_raster_data(
    raster_path=no2_mean_mar_2020_path,
    vector_path=south_korea_hexagon_grid_path,
    zonal_statistics="count mean",
)

In [None]:
# Get geodataframes with geometry only
south_korea_level_1_geometry = get_geometry(south_korea_level_1_path)
south_korea_level_2_geometry = get_geometry(south_korea_level_2_path)
south_korea_hexagon_grid_geometry = get_geometry(south_korea_hexagon_grid_path)

In [None]:
# Clean NO2 dataframes
cleaned_hexagon_grid_jul_2018 = clean_data(
    geodatframe=aggregated_hexagon_grid_jul_2018, new_name="jul_2018"
)

cleaned_hexagon_grid_aug_2018 = clean_data(
    geodatframe=aggregated_hexagon_grid_aug_2018, new_name="aug_2018"
)

cleaned_hexagon_grid_sep_2018 = clean_data(
    geodatframe=aggregated_hexagon_grid_sep_2018, new_name="sep_2018"
)

cleaned_hexagon_grid_oct_2018 = clean_data(
    geodatframe=aggregated_hexagon_grid_oct_2018, new_name="oct_2018"
)

cleaned_hexagon_grid_nov_2018 = clean_data(
    geodatframe=aggregated_hexagon_grid_nov_2018, new_name="nov_2018"
)

cleaned_hexagon_grid_dec_2018 = clean_data(
    geodatframe=aggregated_hexagon_grid_dec_2018, new_name="dec_2018"
)

cleaned_hexagon_grid_jan_2019 = clean_data(
    geodatframe=aggregated_hexagon_grid_jan_2019, new_name="jan_2019"
)

cleaned_hexagon_grid_feb_2019 = clean_data(
    geodatframe=aggregated_hexagon_grid_feb_2019, new_name="feb_2019"
)

cleaned_hexagon_grid_mar_2019 = clean_data(
    geodatframe=aggregated_hexagon_grid_mar_2019, new_name="mar_2019"
)

cleaned_hexagon_grid_apr_2019 = clean_data(
    geodatframe=aggregated_hexagon_grid_apr_2019, new_name="apr_2019"
)

cleaned_hexagon_grid_may_2019 = clean_data(
    geodatframe=aggregated_hexagon_grid_may_2019, new_name="may_2019"
)

cleaned_hexagon_grid_jun_2019 = clean_data(
    geodatframe=aggregated_hexagon_grid_jun_2019, new_name="jun_2019"
)

cleaned_hexagon_grid_jul_2019 = clean_data(
    geodatframe=aggregated_hexagon_grid_jul_2019, new_name="jul_2019"
)

cleaned_hexagon_grid_aug_2019 = clean_data(
    geodatframe=aggregated_hexagon_grid_aug_2019, new_name="aug_2019"
)

cleaned_hexagon_grid_sep_2019 = clean_data(
    geodatframe=aggregated_hexagon_grid_sep_2019, new_name="sep_2019"
)

cleaned_hexagon_grid_oct_2019 = clean_data(
    geodatframe=aggregated_hexagon_grid_oct_2019, new_name="oct_2019"
)

cleaned_hexagon_grid_nov_2019 = clean_data(
    geodatframe=aggregated_hexagon_grid_nov_2019, new_name="nov_2019"
)

cleaned_hexagon_grid_dec_2019 = clean_data(
    geodatframe=aggregated_hexagon_grid_dec_2019, new_name="dec_2019"
)

cleaned_hexagon_grid_jan_2020 = clean_data(
    geodatframe=aggregated_hexagon_grid_jan_2020, new_name="jan_2020"
)

cleaned_hexagon_grid_feb_2020 = clean_data(
    geodatframe=aggregated_hexagon_grid_feb_2020, new_name="feb_2020"
)

cleaned_hexagon_grid_mar_2020 = clean_data(
    geodatframe=aggregated_hexagon_grid_mar_2020, new_name="mar_2020"
)

cleaned_hexagon_grid_apr_2020 = clean_data(
    geodatframe=aggregated_hexagon_grid_apr_2020, new_name="apr_2020"
)

cleaned_hexagon_grid_may_2020 = clean_data(
    geodatframe=aggregated_hexagon_grid_may_2020, new_name="may_2020"
)

cleaned_hexagon_grid_jun_2020 = clean_data(
    geodatframe=aggregated_hexagon_grid_jun_2020, new_name="jun_2020"
)

cleaned_hexagon_grid_jul_2020 = clean_data(
    geodatframe=aggregated_hexagon_grid_jul_2020, new_name="jul_2020"
)

In [None]:
# Get dataframe for all months of data
south_korea_no2_df = pd.concat(
    [
        cleaned_hexagon_grid_jul_2018,
        cleaned_hexagon_grid_aug_2018,
        cleaned_hexagon_grid_sep_2018,
        cleaned_hexagon_grid_oct_2018,
        cleaned_hexagon_grid_nov_2018,
        cleaned_hexagon_grid_dec_2018,
        cleaned_hexagon_grid_jan_2019,
        cleaned_hexagon_grid_feb_2019,
        cleaned_hexagon_grid_mar_2019,
        cleaned_hexagon_grid_apr_2019,
        cleaned_hexagon_grid_may_2019,
        cleaned_hexagon_grid_jun_2019,
        cleaned_hexagon_grid_jul_2019,
        cleaned_hexagon_grid_aug_2019,
        cleaned_hexagon_grid_sep_2019,
        cleaned_hexagon_grid_oct_2019,
        cleaned_hexagon_grid_nov_2019,
        cleaned_hexagon_grid_dec_2019,
        cleaned_hexagon_grid_jan_2020,
        cleaned_hexagon_grid_feb_2020,
        cleaned_hexagon_grid_mar_2020,
        cleaned_hexagon_grid_apr_2020,
        cleaned_hexagon_grid_may_2020,
        cleaned_hexagon_grid_jun_2020,
        cleaned_hexagon_grid_jul_2020,
    ],
    axis=1,
)

In [None]:
# Get geodataframe for all months of data
south_korea_no2_gdf = south_korea_hexagon_grid_geometry.merge(
    south_korea_no2_df, left_index=True, right_index=True
)

In [None]:
south_korea_no2_gdf.head()

In [None]:
# Add magnitude change columns
try:
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="jul_2018",
        post_change_column="jul_2019",
        new_column="change_jul_2018_2019_magnitude",
        change_type="magnitude",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="aug_2018",
        post_change_column="aug_2019",
        new_column="change_aug_2018_2019_magnitude",
        change_type="magnitude",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="sep_2018",
        post_change_column="sep_2019",
        new_column="change_sep_2018_2019_magnitude",
        change_type="magnitude",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="oct_2018",
        post_change_column="oct_2019",
        new_column="change_oct_2018_2019_magnitude",
        change_type="magnitude",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="nov_2018",
        post_change_column="nov_2019",
        new_column="change_nov_2018_2019_magnitude",
        change_type="magnitude",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="dec_2018",
        post_change_column="dec_2019",
        new_column="change_dec_2018_2019_magnitude",
        change_type="magnitude",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="jan_2019",
        post_change_column="jan_2020",
        new_column="change_jan_2019_2020_magnitude",
        change_type="magnitude",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="feb_2019",
        post_change_column="feb_2020",
        new_column="change_feb_2019_2020_magnitude",
        change_type="magnitude",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="mar_2019",
        post_change_column="mar_2020",
        new_column="change_mar_2019_2020_magnitude",
        change_type="magnitude",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="apr_2019",
        post_change_column="apr_2020",
        new_column="change_apr_2019_2020_magnitude",
        change_type="magnitude",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="may_2019",
        post_change_column="may_2020",
        new_column="change_may_2019_2020_magnitude",
        change_type="magnitude",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="jun_2019",
        post_change_column="jun_2020",
        new_column="change_jun_2019_2020_magnitude",
        change_type="magnitude",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="jul_2019",
        post_change_column="jul_2020",
        new_column="change_jul_2019_2020_magnitude",
        change_type="magnitude",
    )
except Exception as error:
    print(error)

In [None]:
# Add percent change columns
try:
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="jul_2018",
        post_change_column="jul_2019",
        new_column="change_jul_2018_2019_percent",
        change_type="percent",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="aug_2018",
        post_change_column="aug_2019",
        new_column="change_aug_2018_2019_percent",
        change_type="percent",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="sep_2018",
        post_change_column="sep_2019",
        new_column="change_sep_2018_2019_percent",
        change_type="percent",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="oct_2018",
        post_change_column="oct_2019",
        new_column="change_oct_2018_2019_percent",
        change_type="percent",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="nov_2018",
        post_change_column="nov_2019",
        new_column="change_nov_2018_2019_percent",
        change_type="percent",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="dec_2018",
        post_change_column="dec_2019",
        new_column="change_dec_2018_2019_percent",
        change_type="percent",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="jan_2019",
        post_change_column="jan_2020",
        new_column="change_jan_2019_2020_percent",
        change_type="percent",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="feb_2019",
        post_change_column="feb_2020",
        new_column="change_feb_2019_2020_percent",
        change_type="percent",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="mar_2019",
        post_change_column="mar_2020",
        new_column="change_mar_2019_2020_percent",
        change_type="percent",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="apr_2019",
        post_change_column="apr_2020",
        new_column="change_apr_2019_2020_percent",
        change_type="percent",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="may_2019",
        post_change_column="may_2020",
        new_column="change_may_2019_2020_percent",
        change_type="percent",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="jun_2019",
        post_change_column="jun_2020",
        new_column="change_jun_2019_2020_percent",
        change_type="percent",
    )
    add_change(
        dataframe=south_korea_no2_gdf,
        pre_change_column="jul_2019",
        post_change_column="jul_2020",
        new_column="change_jul_2019_2020_percent",
        change_type="percent",
    )
except Exception as error:
    print(error)

In [None]:
# Check contents of consolidated geodataframe
south_korea_no2_gdf.head()

# Data Post-Processing

# Data Visualization

In [None]:
# Plot rasterized level 1 boundaries
fig, ax = plt.subplots(figsize=(10, 10))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
rasterized_level_1_mar_2019.plot(
    column="mean", ax=ax, legend=True, cax=cax, cmap="inferno"
)

plt.show()

In [None]:
# Plot rasterized level 2 boundaries
fig, ax = plt.subplots(figsize=(10, 10))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
rasterized_level_2_mar_2019.plot(
    column="mean", ax=ax, legend=True, cax=cax, cmap="inferno"
)

plt.show()

In [None]:
# Plot rasterized hexagon boundaries
fig, ax = plt.subplots(figsize=(10, 10))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
rasterized_hexagon_grid_mar_2019.plot(
    column="mean",
    ax=ax,
    legend=True,
    cax=cax,
    cmap="inferno",
    #     edgecolor="white",
    linewidth=0.25,
)
south_korea_level_0.plot(
    ax=ax, facecolor="None", edgecolor="red", linewidth=0.5
)

plt.show()

In [None]:
# Plot rasterized hexagon boundaries
with plt.style.context("dark_background"):
    fig, ax = plt.subplots(figsize=(10, 10))
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.1)
    rasterized_hexagon_grid_mar_2019.plot(
        column="mean",
        ax=ax,
        legend=True,
        cax=cax,
        cmap="inferno",
        edgecolor="white",
        linewidth=0.25,
    )
    south_korea_level_0.plot(
        ax=ax, facecolor="None", edgecolor="red", linewidth=0.5
    )

In [None]:
# Plot rasterized hexagon boundaries
fig, ax = plt.subplots(figsize=(10, 10))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
consolidated_with_geom.plot(
    column="diff_mar_2019_2020_percent",
    ax=ax,
    legend=True,
    cax=cax,
    cmap="RdBu_r",
    #     edgecolor="white",
    linewidth=0.25,
)
south_korea_level_0.plot(
    ax=ax, facecolor="None", edgecolor="red", linewidth=0.5
)

plt.show()

In [None]:
# Plot rasterized hexagon boundaries
with plt.style.context("dark_background"):
    fig, ax = plt.subplots(figsize=(10, 10))
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.1)
    consolidated_with_geom.plot(
        column="diff_mar_2019_2020_percent",
        ax=ax,
        legend=True,
        cax=cax,
        cmap="RdBu_r",
        edgecolor="white",
        linewidth=0.25,
    )
    south_korea_level_0.plot(
        ax=ax, facecolor="None", edgecolor="red", linewidth=0.5
    )

In [None]:
# Plot rasterized level 1 boundaries
fig, ax = plt.subplots(figsize=(10, 10))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
rasterized_level_1_mar_2020.plot(
    column="mean", ax=ax, legend=True, cax=cax, cmap="inferno"
)

plt.show()

In [None]:
# Plot rasterized level 2 boundaries
fig, ax = plt.subplots(figsize=(10, 10))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
rasterized_level_2_mar_2020.plot(
    column="mean", ax=ax, legend=True, cax=cax, cmap="inferno"
)

plt.show()

In [None]:
# Plot rasterized hexagon boundaries
fig, ax = plt.subplots(figsize=(10, 10))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
rasterized_hexagon_grid_mar_2020.plot(
    column="mean",
    ax=ax,
    legend=True,
    cax=cax,
    cmap="inferno",
    #     edgecolor="white",
    #     linewidth=3,
)
south_korea_level_0.plot(
    ax=ax, facecolor="None", edgecolor="red", linewidth=0.5
)

plt.show()

In [None]:
# Plot rasterized hexagon boundaries
with plt.style.context("dark_background"):
    fig, ax = plt.subplots(figsize=(10, 10))
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.1)
    rasterized_hexagon_grid_mar_2020.plot(
        column="mean",
        ax=ax,
        legend=True,
        cax=cax,
        cmap="inferno",
        edgecolor="white",
        linewidth=0.1,
    )
    south_korea_level_0.plot(
        ax=ax, facecolor="None", edgecolor="red", linewidth=0.5
    )

# Data Export

In [None]:
# Export NO2 means and change (hexagon grid)
south_korea_no2_gdf.to_csv(
    path_or_buf=os.path.join(
        "03-processed-data",
        "csv",
        "south-korea",
        "no2-south-korea-aggregate-hexagon-grid-jul-2018-jul-2020.csv",
    ),
    sep=",",
    header=True,
    index=False,
)