# CO Data Aggregation Temporal Fluctuation

Observes how CO data aggregated to hexagon grid cells fluctuates up to 30 hours into the future.

To convert from mol/m<sup>2</sup> to molecules/cm<sup>2</sup>, multiply by 6.022140857e+19 (6.022140857 * 10<sup>19</sup>).

# Environment Setup

In [None]:
# Load Notebook formatter
%load_ext nb_black
# %reload_ext nb_black

In [None]:
# Import packages
import os
from datetime import timedelta
from math import sqrt
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
from scipy.interpolate import UnivariateSpline
import seaborn as sns
import pandas as pd
from pandas.plotting import register_matplotlib_converters
import radiance as rd
import sentinel as stl

In [None]:
# Set Options
np.set_printoptions(precision=15)
np.set_printoptions(suppress=True)
# sns.set(font_scale=1.5, style="whitegrid")
sns.set(font_scale=1.5)
pd.set_option("display.max_columns", None)
# pd.set_option("display.max_rows", None)
pd.set_option("precision", 15)
register_matplotlib_converters()

In [None]:
# Set working directory
os.chdir("..")
print(f"Working directory: {os.getcwd()}")

# User-Defined Variables

In [None]:
# Set paths to time series geopackage
hexagon_grid_time_series_path = os.path.join(
    "03-processed-data",
    "vector",
    "south-korea",
    "co_aggregate_daily_hexagon_grid_jul_2018_dec_2020.gpkg",
)

# Data Acquisition and Preprocessing

In [None]:
# Get cleaned time series data into dataframe
time_series_df = stl.clean_time_series(hexagon_grid_time_series_path)

In [None]:
# Check dataframe
time_series_df.head()

In [None]:
# Check dataframe
time_series_df.tail()

# Data Processing

## Timestamp and CO Deltas

### Seoul Area - High

#### Grid J-6

In [None]:
# Calculate time and CO deltas, grid J-6 - Raw values (not absolute magnitude)
(
    time_deltas_j6,
    co_deltas_magnitude_j6,
    co_deltas_percent_j6,
) = stl.calculate_deltas(
    time_series=time_series_df,
    grid_id="J-6",
    max_difference=30,
    return_absolute=False,
)

In [None]:
# Check time deltas
time_deltas_j6[:10]

In [None]:
# Check CO deltas - magnitude
co_deltas_magnitude_j6[:10]

In [None]:
# Check CO deltas - percent
co_deltas_percent_j6[:10]

In [None]:
# Convert deltas to arrays
time_deltas_j6_arr = stl.format_time_delta(time_deltas_j6)
co_deltas_magnitude_j6_arr = stl.format_no2_delta(co_deltas_magnitude_j6)
co_deltas_percent_j6_arr = stl.format_no2_delta(co_deltas_percent_j6)

In [None]:
# Check time delta array
time_deltas_j6_arr[:10]

In [None]:
# Check CO delta array - magnitude
co_deltas_magnitude_j6_arr[:10]

In [None]:
# Check CO delta array - percent
co_deltas_percent_j6_arr[:10]

#### Grid I-6

In [None]:
# Calculate time and NO2 deltas, grid I-6 - Raw values (not absolute magnitude)
(
    time_deltas_i6,
    co_deltas_magnitude_i6,
    co_deltas_percent_i6,
) = stl.calculate_deltas(
    time_series=time_series_df,
    grid_id="I-6",
    max_difference=30,
    return_absolute=False,
)

In [None]:
# Check time deltas
time_deltas_i6[:10]

In [None]:
# Check NO2 deltas - magnitude
co_deltas_magnitude_i6[:10]

In [None]:
# Check NO2 deltas - percent
co_deltas_percent_i6[:10]

In [None]:
# Convert deltas to arrays
time_deltas_i6_arr = stl.format_time_delta(time_deltas_i6)
co_deltas_magnitude_i6_arr = stl.format_no2_delta(co_deltas_magnitude_i6)
co_deltas_percent_i6_arr = stl.format_no2_delta(co_deltas_percent_i6)

In [None]:
# Check time delta array
time_deltas_i6_arr[:10]

In [None]:
# Check NO2 delta array - magnitude
co_deltas_magnitude_i6_arr[:10]

In [None]:
# NO2 delta array - percent
co_deltas_percent_i6_arr[:10]

### Rural Area to the East - Low

#### Grid S-9

In [None]:
# Calculate time and NO2 deltas, grid S-9 - Raw values (not absolute magnitude)
(
    time_deltas_s9,
    co_deltas_magnitude_s9,
    co_deltas_percent_s9,
) = stl.calculate_deltas(
    time_series=time_series_df,
    grid_id="S-9",
    max_difference=30,
    return_absolute=False,
)

In [None]:
# Check time deltas
time_deltas_s9[:10]

In [None]:
# Check NO2 deltas - magnitude
co_deltas_magnitude_s9[:10]

In [None]:
# Check NO2 deltas - percent
co_deltas_percent_s9[:10]

In [None]:
# Convert deltas to arrays
time_deltas_s9_arr = stl.format_time_delta(time_deltas_s9)
co_deltas_magnitude_s9_arr = stl.format_no2_delta(co_deltas_magnitude_s9)
co_deltas_percent_s9_arr = stl.format_no2_delta(co_deltas_percent_s9)

In [None]:
# Check time delta array
time_deltas_s9_arr[:10]

In [None]:
# Check NO2 delta array - magnitude
co_deltas_magnitude_s9_arr[:10]

In [None]:
# NO2 delta array - percent
co_deltas_percent_s9_arr[:10]

### Southern Coast - High

#### Grid N-17

In [None]:
# Calculate time and NO2 deltas, grid N-17 - Raw values (not absolute magnitude)
(
    time_deltas_n17,
    co_deltas_magnitude_n17,
    co_deltas_percent_n17,
) = stl.calculate_deltas(
    time_series=time_series_df,
    grid_id="N-17",
    max_difference=30,
    return_absolute=False,
)

In [None]:
# Check time deltas
time_deltas_n17[:10]

In [None]:
# Check NO2 deltas - magnitude
co_deltas_magnitude_n17[:10]

In [None]:
# Check NO2 deltas - percent
co_deltas_percent_n17[:10]

In [None]:
# Convert deltas to arrays
time_deltas_n17_arr = stl.format_time_delta(time_deltas_n17)
co_deltas_magnitude_n17_arr = stl.format_no2_delta(co_deltas_magnitude_n17)
co_deltas_percent_n17_arr = stl.format_no2_delta(co_deltas_percent_n17)

In [None]:
# Check time delta array
time_deltas_n17_arr[:10]

In [None]:
# Check NO2 delta array - magnitude
co_deltas_magnitude_n17_arr[:10]

In [None]:
# NO2 delta array - percent
co_deltas_percent_n17_arr[:10]

Get means and standard deviations for all grid cells over the time series (exclude NaN). Plot std vs mean scatter plot to get a sense of the values.

### Rural Area in the Center

#### Grid N-11

In [None]:
# Calculate time and NO2 deltas, grid N-11 - Raw values (not absolute magnitude)
(
    time_deltas_n11,
    co_deltas_magnitude_n11,
    co_deltas_percent_n11,
) = stl.calculate_deltas(
    time_series=time_series_df,
    grid_id="N-11",
    max_difference=30,
    return_absolute=False,
)

In [None]:
# Check time deltas
time_deltas_n11[:10]

In [None]:
# Check NO2 deltas - magnitude
co_deltas_magnitude_n11[:10]

In [None]:
# Check NO2 deltas - percent
co_deltas_percent_n11[:10]

In [None]:
# Convert deltas to arrays
time_deltas_n11_arr = stl.format_time_delta(time_deltas_n11)
co_deltas_magnitude_n11_arr = stl.format_no2_delta(co_deltas_magnitude_n11)
co_deltas_percent_n11_arr = stl.format_no2_delta(co_deltas_percent_n11)

In [None]:
# Check time delta array
time_deltas_n11_arr[:10]

In [None]:
# Check NO2 delta array - magnitude
co_deltas_magnitude_n11_arr[:10]

In [None]:
# NO2 delta array - percent
co_deltas_percent_n11_arr[:10]

## Standard Deviation vs. Mean

### 2018

In [None]:
# Slice time series to 2018
time_series_2018 = time_series_df["2018-07-01":"2018-12-31"]

#### All Grid Cells

In [None]:
# Get means and standard deviations for all grid cells
means_2018 = np.array(
    [
        stl.extract_grid_statistic(time_series_2018, grid_id, "mean")
        for grid_id in time_series_2018.columns
    ]
)

standard_deviations_2018 = np.array(
    [
        stl.extract_grid_statistic(time_series_2018, grid_id, "std")
        for grid_id in time_series_2018.columns
    ]
)

#### Subset Regions

### 2019

In [None]:
# Slice time series to 2019
time_series_2019 = time_series_df["2019-01-01":"2019-12-31"]

#### All Grid Cells

In [None]:
# Get means and standard deviations for all grid cells
means_2019 = np.array(
    [
        stl.extract_grid_statistic(time_series_2019, grid_id, "mean")
        for grid_id in time_series_2019.columns
    ]
)

standard_deviations_2019 = np.array(
    [
        stl.extract_grid_statistic(time_series_2019, grid_id, "std")
        for grid_id in time_series_2019.columns
    ]
)

#### Subset Regions

### 2020

In [None]:
# Slice time series to 2020
time_series_2020 = time_series_df["2020-01-01":"2020-11-16"]

#### All Grid Cells

In [None]:
# Get means and standard deviations for all grid cells
means_2020 = np.array(
    [
        stl.extract_grid_statistic(time_series_2020, grid_id, "mean")
        for grid_id in time_series_2020.columns
    ]
)

standard_deviations_2020 = np.array(
    [
        stl.extract_grid_statistic(time_series_2020, grid_id, "std")
        for grid_id in time_series_2020.columns
    ]
)

#### Subset Regions

### 2018-2020

#### All Grid Cells

In [None]:
# Get means and standard deviations for all grid cells
means = np.array(
    [
        stl.extract_grid_statistic(time_series_df, grid_id, "mean")
        for grid_id in time_series_df.columns
    ]
)

standard_deviations = np.array(
    [
        stl.extract_grid_statistic(time_series_df, grid_id, "std")
        for grid_id in time_series_df.columns
    ]
)

#### Subset Regions

##### Seoul Area

In [None]:
# Slice dataframes to subset region grid cells
subset_1_seoul = time_series_df[
    [
        "H-6",
        "H-7",
        "I-5",
        "I-6",
        "I-7",
        "J-5",
        "J-6",
        "J-7",
        "J-8",
        "K-5",
        "K-6",
        "K-7",
        "L-6",
        "L-7",
    ]
]

In [None]:
# Calculate subset region means and standard deviations
means_subset_1 = np.array(
    [
        stl.extract_grid_statistic(subset_1_seoul, grid_id, "mean")
        for grid_id in subset_1_seoul.columns
    ]
)

standard_deviations_subset_1 = np.array(
    [
        stl.extract_grid_statistic(subset_1_seoul, grid_id, "std")
        for grid_id in subset_1_seoul.columns
    ]
)

In [None]:
# Set annotations (optional for plotting)
subset_1_seoul_annotations = subset_1_seoul.columns.tolist()

##### Southern Coast

In [None]:
# Slice dataframes to subset region grid cells
subset_2_south = time_series_df[
    ["M-16", "M-17", "N-16", "N-17", "N-18", "O-16", "O-17",]
]

In [None]:
# Calculate subset region means and standard deviations
means_subset_2 = np.array(
    [
        stl.extract_grid_statistic(subset_2_south, grid_id, "mean")
        for grid_id in subset_2_south.columns
    ]
)

standard_deviations_subset_2 = np.array(
    [
        stl.extract_grid_statistic(subset_2_south, grid_id, "std")
        for grid_id in subset_2_south.columns
    ]
)

In [None]:
# Set annotations (optional for plotting)
subset_2_south_annotations = subset_2_south.columns.tolist()

##### Rural Area to the East

In [None]:
# Slice dataframes to subset region grid cells
subset_3_east = time_series_df[
    ["S-8", "S-9", "T-8", "T-9", "T-10", "U-8", "U-9",]
]

In [None]:
# Calculate subset region means and standard deviations
means_subset_3 = np.array(
    [
        stl.extract_grid_statistic(subset_3_east, grid_id, "mean")
        for grid_id in subset_3_east.columns
    ]
)

standard_deviations_subset_3 = np.array(
    [
        stl.extract_grid_statistic(subset_3_east, grid_id, "std")
        for grid_id in subset_3_east.columns
    ]
)

In [None]:
# Set annotations (optional for plotting)
subset_3_east_annotations = subset_3_east.columns.tolist()

##### Rural Area in the Center

In [None]:
# Slice dataframes to subset region grid cells
subset_4_central = time_series_df[
    ["M-10", "M-11", "N-10", "N-11", "N-12", "O-10", "O-11",]
]

In [None]:
# Calculate subset region means and standard deviations
means_subset_4 = np.array(
    [
        stl.extract_grid_statistic(subset_4_central, grid_id, "mean")
        for grid_id in subset_4_central.columns
    ]
)

standard_deviations_subset_4 = np.array(
    [
        stl.extract_grid_statistic(subset_4_central, grid_id, "std")
        for grid_id in subset_4_central.columns
    ]
)

In [None]:
# Set annotations (optional for plotting)
subset_4_central_annotations = subset_4_central.columns.tolist()

# Data Post-Processing

# Data Visualization

## CO Delta Plots

**TO DO:** Get means and standard deviations for all grid cells over the time series (exclude NaN). Plot std vs mean scatter plot to get a sense of the values.

### Seoul Area - High

#### Grid J-6

Common time differences for J-6 (hours):

* 1.7
* 22.0
* 23.7
* 25.4

In [None]:
# Get unique values for time deltas, rounded to 1 decimal place
unique_time_deltas = np.unique(
    [round(value, 1) for value in np.unique(time_deltas_i6_arr)]
)
unique_time_deltas

In [None]:
# Plot deltas
grid_cell = "J-6"
fig, ax = stl.plot_deltas(
    time_delta=time_deltas_j6_arr,
    no2_delta_magnitude=co_deltas_magnitude_j6_arr,
    no2_delta_percent=co_deltas_percent_j6_arr,
    grid_id=grid_cell,
    product_type="CO",
    #     y_min_magnitude=-1,
    #     y_max_magnitude=1,
    #     x_max=5,
    y_min_percent=-100,
    y_max_percent=100,
    y_label_magnitude=r"CO Delta ($\mathrm{mol \cdot m^{-2}}$)",
    y_label_percent="CO Delta (%)",
)

stl.save_figure(
    output_path=os.path.join(
        "04-graphics-outputs",
        "time-series",
        "south-korea",
        "carbon-monoxide",
        "deltas",
        f"co-deltas-magnitude-percent-grid-{grid_cell.lower().replace('-', '')}.png",
    )
)

#### Grid I-6

In [None]:
# Plot deltas
grid_cell = "I-6"
fig, ax = stl.plot_deltas(
    time_delta=time_deltas_i6_arr,
    no2_delta_magnitude=co_deltas_magnitude_i6_arr,
    no2_delta_percent=co_deltas_percent_i6_arr,
    grid_id=grid_cell,
    product_type="CO",
    #     y_min_magnitude=-1,
    #     y_max_magnitude=1,
    #     x_max=5,
    y_min_percent=-100,
    y_max_percent=100,
    y_label_magnitude=r"CO Delta ($\mathrm{mol \cdot m^{-2}}$)",
    y_label_percent="CO Delta (%)",
)

stl.save_figure(
    output_path=os.path.join(
        "04-graphics-outputs",
        "time-series",
        "south-korea",
        "carbon-monoxide",
        "deltas",
        f"co-deltas-magnitude-percent-grid-{grid_cell.lower().replace('-', '')}.png",
    )
)

### Rural Area to the East - Low

#### Grid S-9

In [None]:
# Plot deltas
grid_cell = "S-9"
fig, ax = stl.plot_deltas(
    time_delta=time_deltas_s9_arr,
    no2_delta_magnitude=co_deltas_magnitude_s9_arr,
    no2_delta_percent=co_deltas_percent_s9_arr,
    grid_id=grid_cell,
    product_type="CO",
    #     y_min_magnitude=-1,
    #     y_max_magnitude=1,
    #     x_max=5,
    y_min_percent=-100,
    y_max_percent=100,
    y_label_magnitude=r"CO Delta ($\mathrm{mol \cdot m^{-2}}$)",
    y_label_percent="CO Delta (%)",
)

stl.save_figure(
    output_path=os.path.join(
        "04-graphics-outputs",
        "time-series",
        "south-korea",
        "carbon-monoxide",
        "deltas",
        f"co-deltas-magnitude-percent-grid-{grid_cell.lower().replace('-', '')}.png",
    )
)

### Southern Coast - High

#### Grid N-17

In [None]:
# Plot deltas
grid_cell = "N-17"
fig, ax = stl.plot_deltas(
    time_delta=time_deltas_n17_arr,
    no2_delta_magnitude=co_deltas_magnitude_n17_arr,
    no2_delta_percent=co_deltas_percent_n17_arr,
    grid_id=grid_cell,
    product_type="CO",
    #     y_min_magnitude=-1,
    #     y_max_magnitude=1,
    #     x_max=5,
    y_min_percent=-100,
    y_max_percent=100,
    y_label_magnitude=r"CO Delta ($\mathrm{mol \cdot m^{-2}}$)",
    y_label_percent="CO Delta (%)",
)

stl.save_figure(
    output_path=os.path.join(
        "04-graphics-outputs",
        "time-series",
        "south-korea",
        "carbon-monoxide",
        "deltas",
        f"co-deltas-magnitude-percent-grid-{grid_cell.lower().replace('-', '')}.png",
    )
)

### Rural Area in the Center - Medium/Low

#### Grid N-11

In [None]:
# Plot deltas
grid_cell = "N-11"
fig, ax = stl.plot_deltas(
    time_delta=time_deltas_n11_arr,
    no2_delta_magnitude=co_deltas_magnitude_n11_arr,
    no2_delta_percent=co_deltas_percent_n11_arr,
    grid_id=grid_cell,
    product_type="CO",
    #     y_min_magnitude=-1,
    #     y_max_magnitude=1,
    #     x_max=5,
    y_min_percent=-100,
    y_max_percent=100,
    y_label_magnitude=r"CO Delta ($\mathrm{mol \cdot m^{-2}}$)",
    y_label_percent="CO Delta (%)",
)

stl.save_figure(
    output_path=os.path.join(
        "04-graphics-outputs",
        "time-series",
        "south-korea",
        "carbon-monoxide",
        "deltas",
        f"co-deltas-magnitude-percent-grid-{grid_cell.lower().replace('-', '')}.png",
    )
)

## Standard Deviation vs. Mean Plots

### 2018

#### All Grid Cells

In [None]:
# Plot CO standard deviation vs mean for all grid cells
with plt.style.context("dark_background"):
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.scatter(
        x=means_2018,
        y=standard_deviations_2018,
        marker="o",
        s=25,
        color="#ff7f00",
        label="Grid Cell",
    )

    # Configure figure, axes, legend, caption
    # Figure
    #     plt.xticks(fontsize=20)
    #     plt.yticks(fontsize=20)

    # Axes
    ax.set_title(
        "South Korea CO, 7/01/2018 - 12/31/2018\n"
        "Grid Cell Standard Deviation vs. Mean",
        fontsize=24,
    )
    ax.set_xlabel(r"Mean ($\mathrm{mol \cdot m^{-2}}$)", fontsize=20)
    ax.set_ylabel(
        r"Standard Deviation ($\mathrm{mol \cdot m^{-2}}$)", fontsize=20
    )
    #     ax.set_xlim(0, round(means.max(), 4))
    #     ax.set_ylim(0, round(means.max(), 4))
    ax.set_xlim(0, 0.05)
    ax.set_ylim(0, 0.05)
    ax.legend(shadow=True, edgecolor="white", fontsize=16, loc="best")
    #     ax.grid(False)

    # Caption
    fig.text(
        0.5,
        0.025,
        f"Data Source: European Space Agency",
        ha="center",
        fontsize=14,
    )

stl.save_figure(
    output_path=os.path.join(
        "04-graphics-outputs",
        "statistics",
        "south-korea",
        "carbon-monoxide",
        "co-stddev-vs-mean-all-grid-cells-2018.png",
    )
)

#### Subset Regions

### 2019

#### All Grid Cells

In [None]:
# Plot NO2 standard deviation vs mean for all grid cells
with plt.style.context("dark_background"):
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.scatter(
        x=means_2019,
        y=standard_deviations_2019,
        marker="o",
        s=25,
        color="#ff7f00",
        label="Grid Cell",
    )

    # Configure figure, axes, legend, caption
    # Figure
    #     plt.xticks(fontsize=20)
    #     plt.yticks(fontsize=20)

    # Axes
    ax.set_title(
        "South Korea CO, 1/01/2019 - 12/31/2019\n"
        "Grid Cell Standard Deviation vs. Mean",
        fontsize=24,
    )
    ax.set_xlabel(r"Mean ($\mathrm{mol \cdot m^{-2}}$)", fontsize=20)
    ax.set_ylabel(
        r"Standard Deviation ($\mathrm{mol \cdot m^{-2}}$)", fontsize=20
    )
    #     ax.set_xlim(0, round(means.max(), 4))
    #     ax.set_ylim(0, round(means.max(), 4))
    ax.set_xlim(0, 0.05)
    ax.set_ylim(0, 0.05)
    ax.legend(shadow=True, edgecolor="white", fontsize=16, loc="best")
    #     ax.grid(False)

    # Caption
    fig.text(
        0.5,
        0.025,
        f"Data Source: European Space Agency",
        ha="center",
        fontsize=14,
    )

stl.save_figure(
    output_path=os.path.join(
        "04-graphics-outputs",
        "statistics",
        "south-korea",
        "carbon-monoxide",
        "co-stddev-vs-mean-all-grid-cells-2019.png",
    )
)

#### Subset Regions

### 2020

#### All Grid Cells

In [None]:
# Plot NO2 standard deviation vs mean for all grid cells
with plt.style.context("dark_background"):
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.scatter(
        x=means_2020,
        y=standard_deviations_2020,
        marker="o",
        s=25,
        color="#ff7f00",
        label="Grid Cell",
    )

    # Configure figure, axes, legend, caption
    # Figure
    #     plt.xticks(fontsize=20)
    #     plt.yticks(fontsize=20)

    # Axes
    ax.set_title(
        "South Korea CO, 1/01/2020 - 12/08/2020\n"
        "Grid Cell Standard Deviation vs. Mean",
        fontsize=24,
    )
    ax.set_xlabel(r"Mean ($\mathrm{mol \cdot m^{-2}}$)", fontsize=20)
    ax.set_ylabel(
        r"Standard Deviation ($\mathrm{mol \cdot m^{-2}}$)", fontsize=20
    )
    #     ax.set_xlim(0, round(means.max(), 4))
    #     ax.set_ylim(0, round(means.max(), 4))
    ax.set_xlim(0, 0.05)
    ax.set_ylim(0, 0.05)
    ax.legend(shadow=True, edgecolor="white", fontsize=16, loc="best")
    #     ax.grid(False)

    # Caption
    fig.text(
        0.5,
        0.025,
        f"Data Source: European Space Agency",
        ha="center",
        fontsize=14,
    )

stl.save_figure(
    output_path=os.path.join(
        "04-graphics-outputs",
        "statistics",
        "south-korea",
        "carbon-monoxide",
        "co-stddev-vs-mean-all-grid-cells-2020.png",
    )
)

#### Subset Regions

### 2018, 2019, 2020

In [None]:
# Linear regression for 2018, 2019, 2020
(
    slope_2018,
    intercept_2018,
    r_value_2018,
    p_value_2018,
    std_err_2018,
) = stats.linregress(means_2018, standard_deviations_2018)

(
    slope_2019,
    intercept_2019,
    r_value_2019,
    p_value_2019,
    std_err_2019,
) = stats.linregress(means_2019, standard_deviations_2019)

(
    slope_2020,
    intercept_2020,
    r_value_2020,
    p_value_2020,
    std_err_2020,
) = stats.linregress(means_2020, standard_deviations_2020)

In [None]:
# Display regression numbers
print(
    "2018 (7/01 - 12/31)\n"
    f"    Slope:     {slope_2018}\n"
    f"    Intercept: {intercept_2018}\n"
    f"    R-Squared: {r_value_2018**2}\n"
    f"    P-Value:   {p_value_2018}\n"
    f"    Std-Error: {std_err_2018}\n"
    f"    RMSE:      {sqrt(std_err_2018)}\n\n"
    "2019 (1/01 - 12/31)\n"
    f"    Slope:     {slope_2019}\n"
    f"    Intercept: {intercept_2019}\n"
    f"    R-Squared: {r_value_2019**2}\n"
    f"    P-Value:   {p_value_2019}\n"
    f"    Std-Error: {std_err_2019}\n"
    f"    RMSE:      {sqrt(std_err_2019)}\n\n"
    "2020 (1/01 - 11/16)\n"
    f"    Slope:     {slope_2020}\n"
    f"    Intercept: {intercept_2020}\n"
    f"    R-Squared: {r_value_2020**2}\n"
    f"    P-Value:   {p_value_2020}\n"
    f"    Std-Error: {std_err_2020}\n"
    f"    RMSE:      {sqrt(std_err_2020)}\n\n"
)

In [None]:
# Plot CO standard deviation vs mean, with linear regression and 1:1 lines
with plt.style.context("dark_background"):
    fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(40, 10))

    # Data
    ax[0].scatter(
        x=means_2018,
        y=standard_deviations_2018,
        marker="o",
        s=25,
        color="#ff7f00",
        label="Grid Cell",
    )

    ax[1].scatter(
        x=means_2019,
        y=standard_deviations_2019,
        marker="o",
        s=25,
        color="#ff7f00",
        label="Grid Cell",
    )

    ax[2].scatter(
        x=means_2020,
        y=standard_deviations_2020,
        marker="o",
        s=25,
        color="#ff7f00",
        label="Grid Cell",
    )

    # Regression lines
    ax[0].plot(
        means_2018,
        intercept_2018 + slope_2018 * means_2018,
        label="Regression Line",
        color="#984ea3",
        linewidth=2,
    )

    ax[1].plot(
        means_2019,
        intercept_2019 + slope_2019 * means_2019,
        label="Regression Line",
        color="#984ea3",
        linewidth=2,
    )

    ax[2].plot(
        means_2020,
        intercept_2020 + slope_2020 * means_2020,
        label="Regression Line",
        color="#984ea3",
        linewidth=2,
    )

    # Configure figure, axes, legend, caption
    # Figure
    #     plt.xticks(fontsize=20)
    #     plt.yticks(fontsize=20)

    dates = [
        "7/01/2018 - 12/31/2018",
        "1/01/2019 - 12/31/2019",
        "1/01/2020 - 12/08/2020",
    ]

    # Axes
    for index, axis in enumerate(fig.axes):
        axis.set_title(
            f"South Korea CO, {dates[index]}\n"
            "Grid Cell Standard Deviation vs. Mean",
            fontsize=24,
        )
        axis.set_xlabel(r"Mean ($\mathrm{mol \cdot m^{-2}}$)", fontsize=20)
        axis.set_ylabel(
            r"Standard Deviation ($\mathrm{mol \cdot m^{-2}}$)", fontsize=20
        )
        #         axis.set_xlim(0, round(means.max(), 4))
        #         axis.set_ylim(0, round(means.max(), 4))
        axis.set_xlim(0, 0.05)
        axis.set_ylim(0, 0.05)
        #     ax.grid(False)
        # 1:1 line
        axis.plot(
            (0, 1),
            (0, 1),
            transform=axis.transAxes,
            ls="--",
            #             c="w",
            #             c="#e41a1c",
            c="#4daf4a",
            #             zorder=0,
            label="1:1 Line",
            linewidth=2,
        )
        axis.legend(shadow=True, edgecolor="white", fontsize=16, loc="best")

    # Caption
    fig.text(
        0.5,
        0.025,
        f"Data Source: European Space Agency",
        ha="center",
        fontsize=14,
    )

stl.save_figure(
    output_path=os.path.join(
        "04-graphics-outputs",
        "statistics",
        "south-korea",
        "carbon-monoxide",
        "co-stddev-vs-mean-all-grid-cells-individual-years.png",
    )
)

#### All Grid Cells

### 2018-2020

#### All Grid Cells

In [None]:
# Plot CO standard deviation vs mean for all grid cells
with plt.style.context("dark_background"):
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.scatter(
        x=means,
        y=standard_deviations,
        marker="o",
        s=25,
        color="#377eb8",
        #         label="Grid Cell",
    )

    ax.scatter(
        x=means_subset_1,
        y=standard_deviations_subset_1,
        marker="o",
        s=25,
        color="#e41a1c",
        label="Seoul",
    )

    ax.scatter(
        x=means_subset_2,
        y=standard_deviations_subset_2,
        marker="o",
        s=25,
        color="#ff7f00",
        label="Southern Coast",
    )

    ax.scatter(
        x=means_subset_3,
        y=standard_deviations_subset_3,
        marker="o",
        s=25,
        color="#f781bf",
        label="Rural - East",
    )

    ax.scatter(
        x=means_subset_4,
        y=standard_deviations_subset_4,
        marker="o",
        s=25,
        color="#ffff33",
        label="Rural - Center",
    )

    # Configure figure, axes, legend, caption
    # Figure
    #     plt.xticks(fontsize=20)
    #     plt.yticks(fontsize=20)

    # Axes
    ax.set_title(
        "South Korea CO, 7/01/2018 - 12/08/2020\n"
        "Grid Cell Standard Deviation vs. Mean",
        fontsize=24,
    )
    ax.set_xlabel(r"Mean ($\mathrm{mol \cdot m^{-2}}$)", fontsize=20)
    ax.set_ylabel(
        r"Standard Deviation ($\mathrm{mol \cdot m^{-2}}$)", fontsize=20
    )
    #     ax.set_xlim(0, round(means.max(), 4))
    #     ax.set_ylim(0, round(means.max(), 4))
    ax.set_xlim(0.0325, 0.0475)
    ax.set_ylim(0.0025, 0.0125)
    ax.legend(shadow=True, edgecolor="white", fontsize=16, loc="best")
    #     ax.grid(False)

    # Caption
    fig.text(
        0.5,
        0.025,
        f"Data Source: European Space Agency",
        ha="center",
        fontsize=14,
    )

stl.save_figure(
    output_path=os.path.join(
        "04-graphics-outputs",
        "statistics",
        "south-korea",
        "carbon-monoxide",
        "co-stddev-vs-mean-all-grid-cells-with-subregions.png",
    )
)

#### Subset Regions

In [None]:
# Plot CO standard deviation vs mean for subset regions
with plt.style.context("dark_background"):
    # Create plot
    fig, ax = plt.subplots(2, 2, figsize=(20, 20))

    # Figure
    plt.suptitle(
        "South Korea NO2, 7/01/2018 - 11/16/2020\n"
        "Subset Regions Standard Deviation vs. Mean",
        size=28,
    )
    plt.subplots_adjust(wspace=0.25)
    plt.subplots_adjust(top=0.90)
    #     plt.xticks(fontsize=20)
    #     plt.yticks(fontsize=20)

    # Configure axes
    # Subplot x values, y values, and titles
    subplot_means = [
        means_subset_1,
        means_subset_2,
        means_subset_3,
        means_subset_4,
    ]
    subplot_standard_deviations = [
        standard_deviations_subset_1,
        standard_deviations_subset_2,
        standard_deviations_subset_3,
        standard_deviations_subset_4,
    ]
    subplot_titles = [
        "Seoul Area",
        "Southern Coast",
        "Rural Area to the East",
        "Rural Area in the Center",
    ]

    for index, axis in enumerate(fig.axes):
        # Scatter plot
        axis.scatter(
            x=subplot_means[index],
            y=subplot_standard_deviations[index],
            marker="o",
            s=25,
            color="#ff7f00",
            label="Grid Cell",
            zorder=2,
        )
        # Vertical and horizontal lines
        axis.axvline(
            means.max(),
            color="#4daf4a",
            label="Maximum Grid Cell Mean",
            linewidth=2,
            zorder=1,
        )
        axis.axvline(
            means.min(),
            color="#984ea3",
            label="Minimum Grid Cell Mean",
            linewidth=2,
            zorder=1,
        )
        axis.axhline(
            standard_deviations.max(),
            color="#e41a1c",
            label="Maximum Grid Cell StdDev",
            linewidth=2,
            zorder=1,
        )
        axis.axhline(
            standard_deviations.min(),
            color="#377eb8",
            label="Minimum Grid Cell StdDev",
            linewidth=2,
            zorder=1,
        )
        # Axes settings
        axis.set_title(subplot_titles[index], fontsize=24)
        axis.set_xlabel(r"Mean ($\mathrm{mol \cdot m^{-2}}$)", fontsize=20)
        axis.set_ylabel(
            r"Standard Deviation ($\mathrm{mol \cdot m^{-2}}$)", fontsize=20
        )
        axis.set_xlim(0, round(means_subset_1.max(), 4))
        axis.set_ylim(0, round(means_subset_1.max(), 4))
        # Legend
        axis.legend(shadow=True, edgecolor="white", fontsize=16, loc="best")

    # Caption
    fig.text(
        0.5,
        0.065,
        f"Data Source: European Space Agency",
        ha="center",
        fontsize=14,
    )

# stl.save_figure(
#     output_path=os.path.join(
#         "04-graphics-outputs",
#         "statistics",
#         "south-korea",
#         "no2-stddev-vs-mean-subset-regions.png",
#     )
# )