# Calculate Baseline

Creates baseline mean, standard deviation, variance, and median GeoTiffs for specified each month and specified date ranges.

Baseline years:

* 2012-2019 (Jan-Dec)

Comparison Year:

* 2020 (Jan-Dec)

# Environment Setup

In [None]:
# Load Notebook formatter
%load_ext nb_black
# %reload_ext nb_black

In [None]:
# Import packages
import os
import glob
import re
import warnings
import viirs

In [None]:
# Set Options
warnings.filterwarnings("ignore")
# sns.set(font_scale=1.5, style="whitegrid")
# sns.set(font_scale=1.5)
# pd.set_option("display.max_columns", None)
# pd.set_option("display.max_rows", None)
# pd.set_option("precision", 15)

In [None]:
# Set working directory
os.chdir("..")
print(f"Working directory: {os.getcwd()}")

# User-Defined Variables

In [None]:
# Set paths
geotiff_input_folder = os.path.join(
    "03-processed-data", "raster", "south-korea", "vnp46a2-clipped"
)

statistics_output_folder = os.path.join(
    "03-processed-data", "raster", "south-korea", "statistics", "vnp46a2"
)

# Data Acquisition and Preprocessing

In [None]:
# Get list of radiance rasters
# radiance_geotiffs = glob.glob(os.path.join(geotiff_input_folder, "*.tif"))
# print(f"Found {len(radiance_geotiffs)} files")

In [None]:
# # Get export metadata (for exporting statistics, same for all files)
# metadata = viirs.extract_geotiff_metadata(
#     glob.glob(os.path.join(geotiff_input_folder, "*.tif"))[0]
# )
# metadata

# Data Processing

## Setup

In [None]:
# Get export metadata (for exporting statistics, same for all files)
metadata = viirs.extract_geotiff_metadata(
    glob.glob(os.path.join(geotiff_input_folder, "*.tif"))[0]
)
metadata

In [None]:
# Set month numbers (for filtering data) and abbreviations (for output name)
month_numbers = [
    "01",
    "02",
    "03",
    "04",
    "05",
    "06",
    "07",
    "08",
    "09",
    "10",
    "11",
    "12",
]

month_abbreviations = [
    "jan",
    "feb",
    "mar",
    "apr",
    "may",
    "jun",
    "jul",
    "aug",
    "sep",
    "oct",
    "nov",
    "dec",
]

In [None]:
# Set date ranges of interest (use leap year to include 2/29)
date_ranges = [
    [
        date[4:]
        for date in viirs.create_date_range(
            start_date="2020-03-15", end_date="2020-04-14"
        )
    ],
    [
        date[4:]
        for date in viirs.create_date_range(
            start_date="2020-04-15", end_date="2020-05-14"
        )
    ],
    [
        date[4:]
        for date in viirs.create_date_range(
            start_date="2020-05-15", end_date="2020-06-14"
        )
    ],
]
date_ranges

## Baseline Data (2012-2019)

### Monthly

In [None]:
# Calculate statistics for all months over the baseline years
for index, baseline_month in enumerate(month_numbers):
    print(f"Started month: {month_abbreviations[index].capitalize()}")
    print("Gathering GeoTiffs within range...")
    # Get list of arrays for acqusition dates that match the month
    array_list = [
        viirs.read_geotiff_into_array(geotiff)
        for geotiff in glob.glob(os.path.join(geotiff_input_folder, "*.tif"))
        if re.compile(f"^{baseline_month}$").match(
            # Acquisition month
            os.path.basename(geotiff)[12:14]
        )
        # Acquisition year
        and os.path.basename(geotiff)[8:12] != "2020"
    ]

    print(f"Number of arrays: {len(array_list)}")
    print("Calculating statistics...")
    # Calculate mean, variance, standard deviation, and median
    statistics = {
        "mean": {
            "array": viirs.calculate_statistic(array_list, statistic="mean"),
            "file": (
                "vnp46a2-south-korea-2012-2019-mean-"
                f"{baseline_month}-{month_abbreviations[index]}.tif"
            ),
        },
        "variance": {
            "array": viirs.calculate_statistic(
                array_list, statistic="variance"
            ),
            "file": (
                "vnp46a2-south-korea-2012-2019-variance-"
                f"{baseline_month}-{month_abbreviations[index]}.tif"
            ),
        },
        "deviation": {
            "array": viirs.calculate_statistic(
                array_list, statistic="deviation"
            ),
            "file": (
                "vnp46a2-south-korea-2012-2019-deviation-"
                f"{baseline_month}-{month_abbreviations[index]}.tif"
            ),
        },
        "median": {
            "array": viirs.calculate_statistic(array_list, statistic="median"),
            "file": (
                "vnp46a2-south-korea-2012-2019-median-"
                f"{baseline_month}-{month_abbreviations[index]}.tif"
            ),
        },
    }

    print("Exporting statistics to GeoTiffs...")
    # Export stastistic arrays to GeoTiff
    for statistic in statistics.keys():
        try:
            viirs.export_array(
                array=statistics.get(statistic).get("array"),
                output_path=os.path.join(
                    statistics_output_folder,
                    statistics.get(statistic).get("file"),
                ),
                metadata=metadata,
            )
        except Exception as error:
            print(error)

    # Ouput completion message
    print(f"Completed month: {month_abbreviations[index].capitalize()}\n")

### Inter-Month Ranges

In [None]:
# Complete for all defined date ranges
for date_range in date_ranges:
    print(f"Started date range: {date_range[0]}-{date_range[-1]}")
    # Initialize list for storing arrays
    array_list = []

    print("Gathering GeoTiffs within range...")
    # Get data within date range over the baseline years
    for geotiff in glob.glob(os.path.join(geotiff_input_folder, "*.tif")):
        for month_day in date_range:
            month = month_day[:2]
            day = month_day[2:]

            # Get arrays from GeoTiffs within the month-day range
            if (
                # Acquisition month
                re.compile(f"^{month}$").match(
                    os.path.basename(geotiff)[12:14]
                )
                # Acquisition day
                and re.compile(f"^{day}$").match(
                    os.path.basename(geotiff)[14:16]
                )
                # Acquisition year
                and os.path.basename(geotiff)[8:12] != "2020"
            ):
                array_list.append(viirs.read_geotiff_into_array(geotiff))

    print(f"Number of arrays: {len(array_list)}")
    print("Calculating statistics...")
    # Calculate mean, variance, standard deviation, and median
    statistics = {
        "mean": {
            "array": viirs.calculate_statistic(array_list, statistic="mean"),
            "file": (
                "vnp46a2-south-korea-2012-2019-mean-"
                f"{date_range[0]}-{date_range[-1]}.tif"
            ),
        },
        "variance": {
            "array": viirs.calculate_statistic(
                array_list, statistic="variance"
            ),
            "file": (
                "vnp46a2-south-korea-2012-2019-variance-"
                f"{date_range[0]}-{date_range[-1]}.tif"
            ),
        },
        "deviation": {
            "array": viirs.calculate_statistic(
                array_list, statistic="deviation"
            ),
            "file": (
                "vnp46a2-south-korea-2012-2019-deviation-"
                f"{date_range[0]}-{date_range[-1]}.tif"
            ),
        },
        "median": {
            "array": viirs.calculate_statistic(array_list, statistic="median"),
            "file": (
                "vnp46a2-south-korea-2012-2019-median-"
                f"{date_range[0]}-{date_range[-1]}.tif"
            ),
        },
    }

    print("Exporting statistics to GeoTiffs...")
    # Export stastistic arrays to GeoTiff
    for statistic in statistics.keys():
        try:
            viirs.export_array(
                array=statistics.get(statistic).get("array"),
                output_path=os.path.join(
                    statistics_output_folder,
                    statistics.get(statistic).get("file"),
                ),
                metadata=metadata,
            )
        except Exception as error:
            print(error)

    print(f"Completed date range: {date_range[0]}-{date_range[-1]}\n")

## Comparison Data (2020)

### Monthly

In [None]:
# Calculate statistics for all months over the baseline years
for index, baseline_month in enumerate(month_numbers):
    print(f"Started month: {month_abbreviations[index].capitalize()}")
    print("Gathering GeoTiffs within range...")
    # Get list of arrays for acqusition dates that match the month
    array_list = [
        viirs.read_geotiff_into_array(geotiff)
        for geotiff in glob.glob(os.path.join(geotiff_input_folder, "*.tif"))
        if re.compile(f"^{baseline_month}$").match(
            # Acquisition month
            os.path.basename(geotiff)[12:14]
        )
        # Acquisition year
        and os.path.basename(geotiff)[8:12] == "2020"
    ]

    print(f"Number of arrays: {len(array_list)}")
    print("Calculating statistics...")
    # Calculate mean, variance, standard deviation, and median
    statistics = {
        "mean": {
            "array": viirs.calculate_statistic(array_list, statistic="mean"),
            "file": (
                "vnp46a2-south-korea-2020-mean-"
                f"{baseline_month}-{month_abbreviations[index]}.tif"
            ),
        },
        "variance": {
            "array": viirs.calculate_statistic(
                array_list, statistic="variance"
            ),
            "file": (
                "vnp46a2-south-korea-2020-variance-"
                f"{baseline_month}-{month_abbreviations[index]}.tif"
            ),
        },
        "deviation": {
            "array": viirs.calculate_statistic(
                array_list, statistic="deviation"
            ),
            "file": (
                "vnp46a2-south-korea-2020-deviation-"
                f"{baseline_month}-{month_abbreviations[index]}.tif"
            ),
        },
        "median": {
            "array": viirs.calculate_statistic(array_list, statistic="median"),
            "file": (
                "vnp46a2-south-korea-2020-median-"
                f"{baseline_month}-{month_abbreviations[index]}.tif"
            ),
        },
    }

    print("Exporting statistics to GeoTiffs...")
    # Export stastistic arrays to GeoTiff
    for statistic in statistics.keys():
        try:
            viirs.export_array(
                array=statistics.get(statistic).get("array"),
                output_path=os.path.join(
                    statistics_output_folder,
                    statistics.get(statistic).get("file"),
                ),
                metadata=metadata,
            )
        except Exception as error:
            print(error)

    # Ouput completion message
    print(f"Completed month: {month_abbreviations[index].capitalize()}\n")

### Inter-Month Ranges

In [None]:
# Complete for all defined date ranges
for date_range in date_ranges:
    print(f"Started date range: {date_range[0]}-{date_range[-1]}")
    # Initialize list for storing arrays
    array_list = []

    print("Gathering GeoTiffs within range...")
    # Get data within date range over the baseline years
    for geotiff in glob.glob(os.path.join(geotiff_input_folder, "*.tif")):
        for month_day in date_range:
            month = month_day[:2]
            day = month_day[2:]

            # Get arrays from GeoTiffs within the month-day range
            if (
                # Acquisition month
                re.compile(f"^{month}$").match(
                    os.path.basename(geotiff)[12:14]
                )
                # Acquisition day
                and re.compile(f"^{day}$").match(
                    os.path.basename(geotiff)[14:16]
                )
                # Acquisition year
                and os.path.basename(geotiff)[8:12] == "2020"
            ):
                array_list.append(viirs.read_geotiff_into_array(geotiff))

    print(f"Number of arrays: {len(array_list)}")
    print("Calculating statistics...")
    # Calculate mean, variance, standard deviation, and median
    statistics = {
        "mean": {
            "array": viirs.calculate_statistic(array_list, statistic="mean"),
            "file": (
                "vnp46a2-south-korea-2020-mean-"
                f"{date_range[0]}-{date_range[-1]}.tif"
            ),
        },
        "variance": {
            "array": viirs.calculate_statistic(
                array_list, statistic="variance"
            ),
            "file": (
                "vnp46a2-south-korea-2020-variance-"
                f"{date_range[0]}-{date_range[-1]}.tif"
            ),
        },
        "deviation": {
            "array": viirs.calculate_statistic(
                array_list, statistic="deviation"
            ),
            "file": (
                "vnp46a2-south-korea-2020-deviation-"
                f"{date_range[0]}-{date_range[-1]}.tif"
            ),
        },
        "median": {
            "array": viirs.calculate_statistic(array_list, statistic="median"),
            "file": (
                "vnp46a2-south-korea-2020-median-"
                f"{date_range[0]}-{date_range[-1]}.tif"
            ),
        },
    }

    print("Exporting statistics to GeoTiffs...")
    # Export stastistic arrays to GeoTiff
    for statistic in statistics.keys():
        try:
            viirs.export_array(
                array=statistics.get(statistic).get("array"),
                output_path=os.path.join(
                    statistics_output_folder,
                    statistics.get(statistic).get("file"),
                ),
                metadata=metadata,
            )
        except Exception as error:
            print(error)

    print(f"Completed date range: {date_range[0]}-{date_range[-1]}\n")