## Probability grids

This notebook creates the time-dependent probability maps and writes them to file (`.nc` format).

In [5]:
import os

import pandas as pd
from joblib import load

from lib.check_files import check_prepared_data
from lib.pu import (
    calculate_probabilities,
    create_probability_grids,
    COLUMNS_TO_DROP,
    CORRELATED_COLUMNS,
    PRESERVATION_COLUMNS,
)

columns_to_drop = COLUMNS_TO_DROP | CORRELATED_COLUMNS | PRESERVATION_COLUMNS
columns_to_drop = columns_to_drop.difference(
    {
        "lon",
        "lat",
        "present_lon",
        "present_lat",
        "age (Ma)",
    }
)

### Load classifier and input data from file

In [2]:
output_dir = "outputs"
classifier_filename = os.path.join(output_dir, "pu_classifier.joblib")
classifier = load(classifier_filename)

data_dir = "prepared_data"
check_prepared_data(data_dir, verbose=True)
data_filename = os.path.join(data_dir, "grid_data.csv.zip")
point_data = pd.read_csv(data_filename)

### Calculate probabilities

In [3]:
probabilities = calculate_probabilities(
    point_data=point_data.drop(
        columns=columns_to_drop,
        errors="ignore",
    ),
    classifier=classifier,
)
for i in ("lon", "lat"):
    probabilities[f"present_{i}"] = point_data[f"present_{i}"]
probabilities.to_csv(os.path.join(output_dir, "grid_probabilities.csv.zip"), index=False)

### Create grids

In [9]:
n_jobs = 8
grid_output_dir = os.path.join(output_dir, "grids")
os.makedirs(grid_output_dir, exist_ok=True)

create_probability_grids(
    probabilities=probabilities,
    output_dir=grid_output_dir,
    threads=n_jobs,
    extent=(-180, 180, -90, 90),
)