# Preservation potential grids

This notebook converts the cumulative erosion values at the grid point locations to true netCDF format grids of preservation potential, using the parametric distribution fitted in notebook `04`.

If training data has been extracted from the source datasets by running the `00c-extract_training_data_global.ipynb` and `00b-extract_grid_data.ipynb` notebooks, set the `use_extracted_data` variable below to `True` to use this dataset instead of the pre-prepared training data from the [Zenodo repository](https://zenodo.org/record/8157691).

## Notebook options

These cells set some of the important variables and definitions used throughout the notebook.

In [1]:
config_file = "notebook_parameters_test.yml"

In [2]:
from lib.load_params import get_params

params = get_params(config_file, notebook="05")

# If True, use training data extracted in notebook 00c
# Else, use pre-prepared training data downloaded from Zenodo
use_extracted_data = params["use_extracted_data"]

# Number of processes to use
n_jobs = params["n_jobs"]

# Control verbosity level of logging output
verbose = params["verbose"]

# Output directory
outputs_dir = params["output_dir"]

## Notebook setup

Imports, definitions, etc.

### Imports

In [3]:
import glob
import os

import joblib
import numpy as np
import pandas as pd
from gplately import Raster

from lib.check_files import check_prepared_data
from lib.pu import create_grids

### Input and output files

In [4]:
if use_extracted_data:
    data_dir = params["extracted_data_dir"]
else:
    data_dir = check_prepared_data("prepared_data")

data_filename = os.path.join(data_dir, "grid_data.csv")

dist_parametric_filename = os.path.join(
    outputs_dir,
    "global",
    "erodep_dist.joblib",
)

# Output
output_filename_points = os.path.join(
    outputs_dir,
    "global",
    "grid_data_preservation_likelihood.csv",
)
erodep_grid_dir = os.path.join(
    outputs_dir,
    "erosion_grids",
)
likelihood_dir = os.path.join(
    outputs_dir,
    "global",
    "preservation_likelihood_grids",
)
for i in (
    erodep_grid_dir,
    likelihood_dir,
):
    os.makedirs(i, exist_ok=True)

### Load data and calculate likelihood

The likelihood point values will be saved to a `.csv` file in the output directory.

In [5]:
cols = ["lon", "lat", "age (Ma)", "erosion (m)"]
data = pd.read_csv(data_filename, usecols=cols)

if verbose:
    print(
        "Loading parametric distribution from file: "
        + dist_parametric_filename
    )
dist_parametric = joblib.load(dist_parametric_filename)
data["likelihood"] = dist_parametric.pdf(data["erosion (m)"])

data.to_csv(output_filename_points, index=False)

## Create grids

In this section, the different types of output grids are created.

### Erosion

In [6]:
create_grids(
    data,
    output_dir=erodep_grid_dir,
    threads=n_jobs,
    verbose=verbose,
    column="erosion (m)",
    filename_format="erosion_grid_{}Ma.nc",
    extent="global",
)


### Preservation likelihood

In [7]:
for erodep_filename in glob.glob(
    os.path.join(
        erodep_grid_dir,
        r"erosion_grid_*Ma.nc",
    )
):
    if verbose:
        print(
            f"Reading file: {os.path.basename(erodep_filename)}"
        )
    likelihood_basename = os.path.basename(
        erodep_filename
    ).replace("erosion_grid", "preservation_likelihood_grid")
    likelihood_filename = os.path.join(
        likelihood_dir,
        likelihood_basename,
    )

    erodep_raster = Raster(erodep_filename)

    likelihood = Raster(dist_parametric.pdf(erodep_raster))
    if verbose:
        print(
            f" - Writing likelihood file: {likelihood_basename}"
        )
    likelihood.save_to_netcdf4(likelihood_filename)