# CO Data Aggregation Time Series

Aggregates CO values by administrative boundaries and a regular hexagon grid.

To convert from mol/m<sup>2</sup> to molecules/cm<sup>2</sup>, multiply by 6.022140857e+19 (6.022140857 * 10<sup>19</sup>).

# Environment Setup

In [None]:
# Load Notebook formatter
%load_ext nb_black
# %reload_ext nb_black

In [None]:
# Import packages
import os
import glob
import seaborn as sns
import pandas as pd
import geopandas as gpd
import sentinel as stl

In [None]:
# Set Options
# sns.set(font_scale=1.5, style="whitegrid")
sns.set(font_scale=1.5)
pd.set_option("display.max_columns", None)
# pd.set_option("display.max_rows", None)
pd.set_option("precision", 15)

In [None]:
# Set working directory
os.chdir("..")
print(f"Working directory: {os.getcwd()}")

# User-Defined Variables

In [None]:
# Set paths to South hexagon grid and daily rasters
south_korea_hexagon_grid_path = os.path.join(
    "03-processed-data",
    "vector",
    "south-korea",
    "south_korea_hexagon_grid.shp",
)

co_rasters_folder = os.path.join(
    "03-processed-data", "raster", "south-korea", "carbon-monoxide"
)

# Data Acquisition and Preprocessing

In [None]:
# Get all CO daily rasters into sorted list
co_rasters = sorted(glob.glob(os.path.join(co_rasters_folder, "*.tif")))
co_rasters[:10]

# Data Processing

In [None]:
# Create list of geodataframes aggegated to the hexagon grid
# Takes ~20-30 minutes
aggregated_hexagon_grids = [
    stl.aggregate_raster_data(
        raster_path=raster,
        vector_path=south_korea_hexagon_grid_path,
        zonal_statistics="count mean",
    )
    for raster in co_rasters
]

In [None]:
# Create list of dataframes with acquisition date for the column name
renamed_data = []
for index, data in enumerate(aggregated_hexagon_grids):
    acquisition_date = os.path.basename(co_rasters[index])[26:45]
    renamed = data.rename(columns={"mean": acquisition_date})
    renamed_data.append(renamed[[acquisition_date]])

In [None]:
# Get geodataframe with all days of aggregated data (merge geometry with values)
hexagon_grid_gdf = (
    gpd.read_file(south_korea_hexagon_grid_path)
    .drop(columns=["Id"], axis=1)
    .merge(pd.concat(renamed_data, axis=1), left_index=True, right_index=True)
)

In [None]:
# hexagon_grid_gdf.head()

# Data Post-Processing

# Data Visualization

# Data Export

In [None]:
# Export hexagon grid
# CSV
# hexagon_grid_gdf.to_csv(
#     path_or_buf=os.path.join(
#         "03-processed-data",
#         "csv",
#         "south-korea",
#         "aggregate-daily-hexagon-grid-jul-2018-aug-2020.csv",
#     ),
#     sep=",",
#     na_rep="nan",
#     header=True,
#     index=False,
# )

# Shapefile
# hexagon_grid_gdf.to_file(
#     filename=os.path.join(
#         "03-processed-data",
#         "vector",
#         "south-korea",
#         "aggregate_daily_hexagon_grid_jul_2018_aug_2020.shp",
#     ),
#     driver="ESRI Shapefile",
#     index=False,
# )

# Geopackage (~5 minutes)
hexagon_grid_gdf.to_file(
    filename=os.path.join(
        "03-processed-data",
        "vector",
        "south-korea",
        "co_aggregate_daily_hexagon_grid_jul_2018_dec_2020.gpkg",
    ),
    driver="GPKG",
    index=False,
)

In [None]:
# Read exported geopackaged into geodataframe
hexagon_grid_time_series = gpd.read_file(
    os.path.join(
        "03-processed-data",
        "vector",
        "south-korea",
        "co_aggregate_daily_hexagon_grid_jul_2018_dec_2020.gpkg",
    )
).set_index(keys="GRID_ID", drop=True)

In [None]:
# Check geodataframe
hexagon_grid_time_series.head()