# NO2 Data Aggregation

Aggregates NO2 values by administrative boundaries and a regular hexagon grid.

To convert from mol/m<sup>2</sup> to molecules/cm<sup>2</sup>, multiply by 6.022140857e+19 (6.022140857 * 10<sup>19</sup>).

# Environment Setup

In [None]:
# Load Notebook formatter
%load_ext nb_black
# %reload_ext nb_black

In [None]:
# Import packages
import os
import glob
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import seaborn as sns
import pandas as pd
import geopandas as gpd
import rasterio as rio
import rasterstats as rs
import sentinel as stl

In [None]:
# Set Options
# sns.set(font_scale=1.5, style="whitegrid")
sns.set(font_scale=1.5)
pd.set_option("display.max_columns", None)
pd.set_option("precision", 15)

In [None]:
# Set working directory
os.chdir("..")
print(f"Working directory: {os.getcwd()}")

# User-Defined Variables

In [None]:
# Set paths to South Korea shapefiles at levels 0, 1, and 2, and hexagon grid
south_korea_level_0_path = os.path.join(
    "02-raw-data", "vector", "south-korea", "gadm36_south_korea.shp"
)

south_korea_level_1_path = os.path.join(
    "02-raw-data", "vector", "south-korea", "gadm36_south_korea_level_1.shp"
)

south_korea_level_2_path = os.path.join(
    "02-raw-data", "vector", "south-korea", "gadm36_south_korea_level_2.shp"
)

south_korea_hexagon_grid_path = os.path.join(
    "03-processed-data",
    "vector",
    "south-korea",
    "south_korea_hexagon_grid.shp",
)

no2_means_foler = os.path.join(
    "03-processed-data", "raster", "south-korea", "statistics", "monthly"
)

# Data Acquisition and Preprocessing

In [None]:
# Get all NO2 means into list
no2_means = sorted(glob.glob(os.path.join(no2_means_foler, "*MEAN*tif",)))

# Data Processing

Workflow:

* Rasterize/aggregate each monthly mean (by mean) into the administrative boundaries (level 1, level 2, hexagon grid)
    * Function to rasterize grid to geodataframe, drop all columns but the mean and geometry, and rename the mean to the month and year (mar_2020)
* Combine/consolidate all months into single geodataframe
    * Concatenated along 1-axis to combine, keeping only a single instance of the geometry column
* Create change/difference (magnitude and/or percent) for months as necessary within consolidated geodataframe
* Plot months and change on same figure

In [None]:
# Get geodataframes with geometry only
south_korea_level_0_geometry = stl.get_geometry(south_korea_level_0_path)
south_korea_level_1_geometry = stl.get_geometry(south_korea_level_1_path)
south_korea_level_2_geometry = stl.get_geometry(south_korea_level_2_path)
south_korea_hexagon_grid_geometry = stl.get_geometry(
    south_korea_hexagon_grid_path
)

In [None]:
# Aggregate to level 1 boundaries, level 2 boundaries, and hexagon grid
aggregated_level_1 = [
    stl.aggregate_raster_data(
        raster_path=raster,
        vector_path=south_korea_level_1_path,
        zonal_statistics="count mean",
    )
    for raster in no2_means
]

aggregated_level_2 = [
    stl.aggregate_raster_data(
        raster_path=raster,
        vector_path=south_korea_level_2_path,
        zonal_statistics="count mean",
    )
    for raster in no2_means
]

aggregated_hexagon_grids = [
    stl.aggregate_raster_data(
        raster_path=raster,
        vector_path=south_korea_hexagon_grid_path,
        zonal_statistics="count mean",
    )
    for raster in no2_means
]

In [None]:
# Set new column names
new_names = [
    "jul_2018",
    "aug_2018",
    "sep_2018",
    "oct_2018",
    "nov_2018",
    "dec_2018",
    "jan_2019",
    "feb_2019",
    "mar_2019",
    "apr_2019",
    "may_2019",
    "jun_2019",
    "jul_2019",
    "aug_2019",
    "sep_2019",
    "oct_2019",
    "nov_2019",
    "dec_2019",
    "jan_2020",
    "feb_2020",
    "mar_2020",
    "apr_2020",
    "may_2020",
    "jun_2020",
    "jul_2020",
]

# Clean NO2 dataframes
cleaned_level_1 = [
    stl.clean_data(geodatframe=gdf, new_name=new_names[index])
    for (index, gdf) in enumerate(aggregated_level_1)
]

cleaned_level_2 = [
    stl.clean_data(geodatframe=gdf, new_name=new_names[index])
    for (index, gdf) in enumerate(aggregated_level_2)
]

cleaned_hexagon_grids = [
    stl.clean_data(geodatframe=gdf, new_name=new_names[index])
    for (index, gdf) in enumerate(aggregated_hexagon_grids)
]

In [None]:
# Get geodataframes with all months of data
concatenated_level_1_gdf = south_korea_level_1_geometry.merge(
    pd.concat(cleaned_level_1, axis=1), left_index=True, right_index=True
)

concatenated_level_2_gdf = south_korea_level_2_geometry.merge(
    pd.concat(cleaned_level_2, axis=1), left_index=True, right_index=True
)

concatenated_hexagon_grids_gdf = south_korea_hexagon_grid_geometry.merge(
    pd.concat(cleaned_hexagon_grids, axis=1), left_index=True, right_index=True
)

In [None]:
# Set months fot pre- and post-change
pre_change_dates = [
    "jul_2018",
    "aug_2018",
    "sep_2018",
    "oct_2018",
    "nov_2018",
    "dec_2018",
    "jan_2019",
    "feb_2019",
    "mar_2019",
    "apr_2019",
    "may_2019",
    "jun_2019",
    "jul_2019",
]

post_change_dates = [
    "jul_2019",
    "aug_2019",
    "sep_2019",
    "oct_2019",
    "nov_2019",
    "dec_2019",
    "jan_2020",
    "feb_2020",
    "mar_2020",
    "apr_2020",
    "may_2020",
    "jun_2020",
    "jul_2020",
]

# Set new column names for magnitude and percent
new_columns_magnitude = [
    "change_jul_2018_2019_magnitude",
    "change_aug_2018_2019_magnitude",
    "change_sep_2018_2019_magnitude",
    "change_oct_2018_2019_magnitude",
    "change_nov_2018_2019_magnitude",
    "change_dec_2018_2019_magnitude",
    "change_jan_2019_2020_magnitude",
    "change_feb_2019_2020_magnitude",
    "change_mar_2019_2020_magnitude",
    "change_apr_2019_2020_magnitude",
    "change_may_2019_2020_magnitude",
    "change_jun_2019_2020_magnitude",
    "change_jul_2019_2020_magnitude",
]

new_columns_percent = [
    "change_jul_2018_2019_percent",
    "change_aug_2018_2019_percent",
    "change_sep_2018_2019_percent",
    "change_oct_2018_2019_percent",
    "change_nov_2018_2019_percent",
    "change_dec_2018_2019_percent",
    "change_jan_2019_2020_percent",
    "change_feb_2019_2020_percent",
    "change_mar_2019_2020_percent",
    "change_apr_2019_2020_percent",
    "change_may_2019_2020_percent",
    "change_jun_2019_2020_percent",
    "change_jul_2019_2020_percent",
]

In [None]:
# Add magnitude and percent change for level 1 boundaries
for index, add_column in enumerate(new_columns_magnitude):
    try:
        stl.add_change(
            dataframe=concatenated_level_1_gdf,
            pre_change_column=pre_change_dates[index],
            post_change_column=post_change_dates[index],
            new_column=add_column,
            change_type="magnitude",
        )
    except Exception as error:
        print(error)

for index, add_column in enumerate(new_columns_percent):
    try:
        stl.add_change(
            dataframe=concatenated_level_1_gdf,
            pre_change_column=pre_change_dates[index],
            post_change_column=post_change_dates[index],
            new_column=add_column,
            change_type="percent",
        )
    except Exception as error:
        print(error)

In [None]:
# Add magnitude and percent change for level 2 boundaries
for index, add_column in enumerate(new_columns_magnitude):
    try:
        stl.add_change(
            dataframe=concatenated_level_2_gdf,
            pre_change_column=pre_change_dates[index],
            post_change_column=post_change_dates[index],
            new_column=add_column,
            change_type="magnitude",
        )
    except Exception as error:
        print(error)

for index, add_column in enumerate(new_columns_percent):
    try:
        stl.add_change(
            dataframe=concatenated_level_2_gdf,
            pre_change_column=pre_change_dates[index],
            post_change_column=post_change_dates[index],
            new_column=add_column,
            change_type="percent",
        )
    except Exception as error:
        print(error)

In [None]:
# Add magnitude and percent change for hexagon grid
for index, add_column in enumerate(new_columns_magnitude):
    try:
        stl.add_change(
            dataframe=concatenated_hexagon_grids_gdf,
            pre_change_column=pre_change_dates[index],
            post_change_column=post_change_dates[index],
            new_column=add_column,
            change_type="magnitude",
        )
    except Exception as error:
        print(error)

for index, add_column in enumerate(new_columns_percent):
    try:
        stl.add_change(
            dataframe=concatenated_hexagon_grids_gdf,
            pre_change_column=pre_change_dates[index],
            post_change_column=post_change_dates[index],
            new_column=add_column,
            change_type="percent",
        )
    except Exception as error:
        print(error)

# Data Post-Processing

# Data Visualization

# Data Export

In [None]:
# Export level 1 boundaries
# CSV
concatenated_level_1_gdf.to_csv(
    path_or_buf=os.path.join(
        "03-processed-data",
        "csv",
        "south-korea",
        "aggregate-level-1-jul-2018-jul-2020.csv",
    ),
    sep=",",
    header=True,
    index=False,
)

# Shapfile
concatenated_level_1_gdf.to_file(
    filename=os.path.join(
        "03-processed-data",
        "vector",
        "south-korea",
        "aggregate_level_1_jul_2018_jul_2020.shp",
    ),
    driver="ESRI Shapefile",
    index=False,
)

In [None]:
# Export level 2 boundaries
# CSV
concatenated_level_2_gdf.to_csv(
    path_or_buf=os.path.join(
        "03-processed-data",
        "csv",
        "south-korea",
        "aggregate-level-2-jul-2018-jul-2020.csv",
    ),
    sep=",",
    header=True,
    index=False,
)

# Shapefile
concatenated_level_2_gdf.to_file(
    filename=os.path.join(
        "03-processed-data",
        "vector",
        "south-korea",
        "aggregate_level_2_jul_2018_jul_2020.shp",
    ),
    driver="ESRI Shapefile",
    index=False,
)

In [None]:
# Export hexagon grid
# CSV
concatenated_hexagon_grids_gdf.to_csv(
    path_or_buf=os.path.join(
        "03-processed-data",
        "csv",
        "south-korea",
        "aggregate-hexagon-grid-jul-2018-jul-2020.csv",
    ),
    sep=",",
    header=True,
    index=False,
)

# Shapefile
concatenated_hexagon_grids_gdf.to_file(
    filename=os.path.join(
        "03-processed-data",
        "vector",
        "south-korea",
        "aggregate_hexagon_grid_jul_2018_jul_2020.shp",
    ),
    driver="ESRI Shapefile",
    index=False,
)