# Unloading Manuscript
## Figures 7 and 8 
## Step 1: Manual Computation of the PDPs

created by Cassie Lumbrazo\
date: Jan 2026\
python environment: **randomforest**

latest: running on UAS linux machine


## Load Everything

In [1]:
# import packages 
%matplotlib inline

# plotting packages 
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns 

sns.set_theme()
# plt.rcParams['figure.figsize'] = [12,6] #overriding size

# data packages 
import pandas as pd
import numpy as np
import xarray as xr

import copy 
import pickle
import sklearn

import matplotlib

In [2]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.colors import Normalize
from sklearn.inspection import PartialDependenceDisplay

# Load model and set up for plotting

In [3]:
from joblib import dump
from joblib import load

filepath = '/home/cassie/data/unloading_regimes_analysis/'
model = load(filepath + 'updated_analysis_random_forest_model_int_20_1000_flip.joblib') # load the random forest model

In [4]:
model

Also can load the new pickle file now that the it works with the new python environment and such, 

In [5]:
# model_pickle = pickle.load(open(filepath + 'updated_analysis_random_forest_model_int_20_1000_flip.pkl', 'rb')) # rb, reading
# model_pickle

In [6]:
df = pd.read_csv(filepath + 'updated_analysis_random_forest_model_int_20_1000_results_with_metdata.csv')

In [7]:
# load the features used in the model
local_path='/home/cassie/data/unloading_regimes_analysis/'
X_features = pd.read_csv(local_path + "X_randomforestmodel_features.csv")
y_features = pd.read_csv(local_path + "y_randomforestmodel_features.csv")

In [8]:
# print the min and max for each feature in the model
for col in X_features.columns:
    print(f"{col}: min={X_features[col].min()}, max={X_features[col].max()}")

shortwave radiation (W/m2): min=0, max=1106
wind speed (m/s): min=0, max=14
air temperature (C): min=-36, max=7


In [9]:
print("Shape of X_features:", X_features.shape)
print("Columns:", X_features.columns)

Shape of X_features: (1650, 3)
Columns: Index(['shortwave radiation (W/m2)', 'wind speed (m/s)',
       'air temperature (C)'],
      dtype='object')


In [10]:
nametemperature_fig = "air temperature (℃)"
namewindspeed_fig = "wind speed (ms⁻¹)"
nameshortwave_fig = "shortwave radiation (Wm⁻²)"

nametemperature = "air temperature (C)"
namewindspeed = "wind speed (m/s)"
nameshortwave = "shortwave radiation (W/m2)"

# temperature_limits = (-25, 3)
# shortwave_limits = (-5, 700)
# windspeed_limits = (0, 20)

In [11]:
features_info = {
    "features": [
        nameshortwave, 
        nametemperature,
        [nameshortwave, nametemperature]   # 2-way PDP
    ],
    "kind": "average"
}


# Calculate the PDPs Manually

### Function for manual PDP computation

> add parallel in this script

In [12]:
from joblib import Parallel, delayed

In [13]:
import os 

In [14]:
os.cpu_count()

24

In [None]:
n_jobs_num = 15  # Set to the number of CPU cores you want to use

1-way PDP in parallel, 

In [None]:
def _pdp_1d_single_value(model, X, feature, val):
    X_temp = X.copy()
    X_temp[feature] = val
    return model.predict_proba(X_temp)[:, 1].mean()


def compute_pdp_1d_parallel(
    model, X, feature, grid, n_jobs=n_jobs_num, backend="loky"
):
    """
    Parallel 1-way PDP computation.

    n_jobs = -1 uses all available cores, the pc has 24 so set it up overnight with 15
    """
    pdp = Parallel(n_jobs=n_jobs, backend=backend)(
        delayed(_pdp_1d_single_value)(model, X, feature, val)
        for val in grid
    )

    return np.array(pdp)

1-way PDP in parallel, 

In [None]:
def _pdp_2d_single_point(model, X, fx, fy, xval, yval):
    X_temp = X.copy()
    X_temp[fx] = xval
    X_temp[fy] = yval
    return model.predict_proba(X_temp)[:, 1].mean()


def compute_pdp_2d_parallel(
    model, X, feature_x, feature_y, grid_x, grid_y,
    n_jobs=n_jobs_num, backend="loky"
):
    """
    Parallel 2-way PDP computation.
    """
    tasks = [
        (x, y) for y in grid_y for x in grid_x
    ]

    results = Parallel(n_jobs=n_jobs, backend=backend)(
        delayed(_pdp_2d_single_point)(
            model, X, feature_x, feature_y, x, y
        )
        for x, y in tasks
    )

    Z = np.array(results).reshape(len(grid_y), len(grid_x))
    return Z

### Define shared grids and limits between figures

> Need to define ranges for calculations, 
temp: [-36,7], select [-35,5]\
sw:   [0,1106], select [0,1000]\
wind: [0,14], select [0,14]


In [None]:
# Axis limits (explicit, publication-controlled)
grid = 500 # the code we ran at first used 80 here, so it needs to match the manual_compute script
temp_grid  = np.linspace(-35, 5, grid) # so the grid we are using is 80, but maybe that is not fine enough. Let's refine it more to 300-500
sw_grid    = np.linspace(0, 1000, grid)
wind_grid  = np.linspace(0, 14, grid)

# Color scale (shared across all 2-way plots)
vmin, vmax = 0.15, 0.60
levels = np.linspace(vmin, vmax, 12)


### Compute all the PDPs for Figure 7

In [None]:
# 1-way PDPs
pdp_temp = compute_pdp_1d_parallel(
    model, X_features, "air temperature (C)", temp_grid
)

# print statement here to see where the code is taking time 
print("Computed 1-way PDP for temperature.")

pdp_sw = compute_pdp_1d_parallel(
    model, X_features, "shortwave radiation (W/m2)", sw_grid
)

print("Computed 1-way PDP for shortwave radiation.")

pdp_wind = compute_pdp_1d_parallel(
    model, X_features, "wind speed (m/s)", wind_grid
)

print("Computed 1-way PDP for wind speed.")

# 2-way PDPs
pdp_temp_sw = compute_pdp_2d_parallel(
    model, X_features,
    "air temperature (C)", "shortwave radiation (W/m2)",
    temp_grid, sw_grid
)

print("Computed 2-way PDP for temperature and shortwave radiation.")

pdp_temp_wind = compute_pdp_2d_parallel(
    model, X_features,
    "air temperature (C)", "wind speed (m/s)",
    temp_grid, wind_grid
)

print("Computed 2-way PDP for temperature and wind speed.")

pdp_sw_wind = compute_pdp_2d_parallel(
    model, X_features,
    "shortwave radiation (W/m2)", "wind speed (m/s)",
    sw_grid, wind_grid
)

print("Computed 2-way PDP for shortwave radiation and wind speed.")


Computed 1-way PDP for temperature.
Computed 1-way PDP for shortwave radiation.
Computed 1-way PDP for wind speed.
Computed 2-way PDP for temperature and shortwave radiation.
Computed 2-way PDP for temperature and wind speed.
Computed 2-way PDP for shortwave radiation and wind speed.


### Compute the PDPs for Figure 8

Set up subset of the data

In [19]:
X_cold = X_features[X_features["air temperature (C)"] < 0].copy()
X_warm = X_features[X_features["air temperature (C)"] >= 0].copy()

Then compute the stratified 2-way PDPs 

In [20]:
pdp_sw_wind_cold = compute_pdp_2d_parallel(
    model, X_cold,
    "shortwave radiation (W/m2)", "wind speed (m/s)",
    sw_grid, wind_grid
)

print("Computed 2-way PDP for shortwave radiation and wind speed (cold subset).")

pdp_sw_wind_warm = compute_pdp_2d_parallel(
    model, X_warm,
    "shortwave radiation (W/m2)", "wind speed (m/s)",
    sw_grid, wind_grid
)

print("Computed 2-way PDP for shortwave radiation and wind speed (warm subset).")


Computed 2-way PDP for shortwave radiation and wind speed (cold subset).
Computed 2-way PDP for shortwave radiation and wind speed (warm subset).


And save the output to open in another script for plotting, 

In [None]:
np.savez(
    "/home/cassie/python/repos/unloading_regimes_analysis/manuscript_figures/manual_pdp_figure_7_8_grid500.npz",
    temp_grid=temp_grid,
    sw_grid=sw_grid,
    wind_grid=wind_grid,
    pdp_temp=pdp_temp,
    pdp_sw=pdp_sw,
    pdp_wind=pdp_wind,
    pdp_temp_sw=pdp_temp_sw,
    pdp_temp_wind=pdp_temp_wind,
    pdp_sw_wind=pdp_sw_wind,
    pdp_sw_wind_cold=pdp_sw_wind_cold, # figure 8
    pdp_sw_wind_warm=pdp_sw_wind_warm  # figure 8
)

Then when we want to open that in another notebook, 

In [22]:
# data = np.load("manual_pdp_figure_7_8_80points.npz")
# pdp_temp = data["pdp_temp"] -->

... doing this inside the next notebook, `Figures_7_8_manual_plot.ipynb`