# Generate Policies for Natural Disaster Simulation

This notebook generates policies for the natural disaster relief experiment from the paper. For each of the 10000 policies, we record the rewards for each of the 12 clusters used in hte experiment. The policies are stored in 'data/natural_disaster/policy_rewards.csv'.

In [10]:
import os
import sys
cwd = os.getcwd()
from pathlib import Path
project_root = os.path.join(Path.cwd(), '..', '..')
sys.path.insert(0, str(project_root))
import numpy as np

from src.environments.natural_disaster import (
    need_based_policy,
    per_capita_need_policy,
    population_based_policy,
    income_based_policy,
    proximity_based_policy,
    randomized_weighted_hybrid_policy,
    mixed_random_policy_k_increments,
    generate_action_space,
    simulate_policy_dynamic_with_tpm
)
import csv
import itertools as it
import multiprocessing as mp
import random

In [11]:
# --- Global bonus config (clusters are 1-based ids) ---
def make_global_bonus(favored_clusters, per_unit_bonus, unit='k-increments'):
    """
    favored_clusters : iterable of 1-based cluster IDs to favor (e.g., {2, 5})
    per_unit_bonus   : float, bonus added to reward per unit allocated
    unit             : 'k-increments' -> units = allocation/k
                       'absolute'     -> units = allocation
    """
    return {
        "clusters": set(favored_clusters),
        "per_unit_bonus": float(per_unit_bonus),
        "unit": unit
    }

def _run_bonus(args):
    """
    Run all simulations for one (preferred_clusters, per_unit_bonus) config
    and write a single CSV. Returns (outfile, n_rows).
    """
    pref, inc, clusters, k, K, horizon, p, epsilon, num_simulations, out_dir = args

    # initial state from the ORIGINAL clusters
    initial_state = tuple(c['initial_need'] for c in clusters)

    # adjust cluster 'initial_need' used for reward normalization (your original new_clusters logic)
    new_clusters = []
    for c in clusters:
        d = c.copy()
        d['initial_need'] = c['initial_need'] + k * horizon
        new_clusters.append(d)

    # action space per worker (cheap to recompute; avoids pickling a big list)
    num_clusters = len(clusters)
    action_space = generate_action_space(num_clusters, k, K)

    # global bonus & output suffix
    if pref:
        global_bonus = make_global_bonus(favored_clusters=pref, per_unit_bonus=inc, unit='k-increments')
        favored = "-".join(map(str, sorted(pref)))
        suffix = f"_bonus_{favored}_{inc}"
    else:
        global_bonus = None
        suffix = ""

    # deterministic base seed per config (reproducible regardless of process order)
    base_seed = (hash((tuple(sorted(pref)) if pref else (), float(inc))) & 0xFFFFFFFF)

    # run simulations
    rewards_rows = []
    for i in range(num_simulations):
        seed = base_seed + i
        random.seed(seed)
        np.random.seed(seed)

        rewards, _policy = simulate_policy_dynamic_with_tpm(
            initial_state=initial_state,
            clusters=new_clusters,
            k=k, K=K, p=p, horizon=horizon,
            action_space=action_space,
            policy_functions={  # use your menu
                "need_based": need_based_policy,
                "per_capita": per_capita_need_policy,
                "population_based": population_based_policy,
                "income_based": income_based_policy,
                "proximity_based": proximity_based_policy,
                "weighted_hybrid": randomized_weighted_hybrid_policy,
                "mixed_random": mixed_random_policy_k_increments,
            },
            epsilon=epsilon,
            global_bonus=global_bonus
        )
        rewards_rows.append(rewards)

    # write CSV atomically
    os.makedirs(out_dir, exist_ok=True)
    outfile = os.path.join(out_dir, f"policy_rewards{suffix}.csv")
    tmpfile = outfile + ".tmp"

    with open(tmpfile, "w", newline="") as f:
        w = csv.writer(f)
        w.writerow([f"Cluster_{i+1}_Reward" for i in range(num_clusters)])
        w.writerows(rewards_rows)

    os.replace(tmpfile, outfile)
    return outfile, len(rewards_rows)

In [14]:
if __name__ == '__main__':
    # Use fork so workers inherit the main process state (Linux HPC friendly)
    try:
        mp.set_start_method('fork', force=True)
    except RuntimeError:
        pass

    ## Experimental setup for fixed population size and need
    clusters = [
        {"id": 1, "density": "High", "proximity": "Far", "income": "High-Income", "population": 148, "initial_need": 100},
        {"id": 2, "density": "High", "proximity": "Far", "income": "Low-Income", "population": 307, "initial_need": 300},
        {"id": 3, "density": "High", "proximity": "Far", "income": "Middle-Income", "population": 616, "initial_need": 200},
        {"id": 4, "density": "High", "proximity": "Near", "income": "High-Income", "population": 816, "initial_need": 50},
        {"id": 5, "density": "High", "proximity": "Near", "income": "Low-Income", "population": 1405, "initial_need": 200},
        {"id": 6, "density": "High", "proximity": "Near", "income": "Middle-Income", "population": 2782,
         "initial_need": 300},
        {"id": 7, "density": "Low", "proximity": "Far", "income": "High-Income", "population": 74, "initial_need": 100},
        {"id": 8, "density": "Low", "proximity": "Far", "income": "Low-Income", "population": 203, "initial_need": 500},
        {"id": 9, "density": "Low", "proximity": "Far", "income": "Middle-Income", "population": 396, "initial_need": 350},
        {"id": 10, "density": "Low", "proximity": "Near", "income": "High-Income", "population": 36, "initial_need": 50},
        {"id": 11, "density": "Low", "proximity": "Near", "income": "Low-Income", "population": 113, "initial_need": 50},
        {"id": 12, "density": "Low", "proximity": "Near", "income": "Middle-Income", "population": 230, "initial_need": 50}
    ]


    #choose to bias rewards, globally
    preferred_clusters = [{1},{2}, {5}]
    per_unit_bonuses = [.05, .10, .2]

    #true if we also want to baseline with no biasing, otherwise, False
    no_bonus = True

    # Allocation Parameters
    K = 150  # Total additional units to allocate
    k = 50  # Allocation increment

    # MDP Parameters
    horizon = 3  # Number of time steps
    initial_state = tuple([cluster['initial_need'] for cluster in clusters])
    p = 0.7
    num_clusters = len(clusters)

    new_clusters = []
    for adict in clusters:
        adict2 = adict.copy()
        adict2['initial_need'] += k * horizon
        new_clusters.append(adict2)

    policy_functions = {
        "need_based": need_based_policy,
        "per_capita": per_capita_need_policy,
        "population_based": population_based_policy,
        "income_based": income_based_policy,
        "proximity_based": proximity_based_policy,
        "weighted_hybrid": randomized_weighted_hybrid_policy,
        "mixed_random": mixed_random_policy_k_increments
    }

    policy_functions_list = [i for i in policy_functions.values()]

    # Step 1: Generate Action Space
    print('generating actions')
    action_space = generate_action_space(num_clusters, k, K)

    # Define parameters
    epsilon = 0.01  # Include only states with probability > 0.01
    ################################################################

    # Generate 1000 different policies for the simulation
    num_simulations = 1000
    simulation_results = []

    all_bonuses = list(it.product(preferred_clusters, per_unit_bonuses))
    if no_bonus:
        all_bonuses.append((set(), 0))  # baseline (no bonus)

    out_dir = os.path.join('..', '..', 'data', 'natural_disaster')

    # Build tasks (each process handles one CSV end-to-end)
    tasks = [
        (pref, inc, clusters, k, K, horizon, p, epsilon, 1000, out_dir)
        for (pref, inc) in all_bonuses
    ]

    N_WORKERS = int(os.environ.get("N_WORKERS", mp.cpu_count()))
    N_WORKERS = max(1, min(N_WORKERS, len(tasks)))  # don’t spawn more workers than tasks

    # Use fork so children inherit the main process state (no pickling of __main__)
    try:
        mp.set_start_method("fork", force=True)
    except RuntimeError:
        pass

    print(f"Launching {len(tasks)} bonus configs with {N_WORKERS} workers (fork)…")
    with mp.get_context("fork").Pool(processes=N_WORKERS) as pool:
        for outfile, n in pool.imap_unordered(_run_bonus, tasks, chunksize=1):
            print(f"✔ wrote {n} rows -> {outfile}")

generating actions
Launching 10 bonus configs with 10 workers (fork)…
✔ wrote 1000 rows -> ../../data/natural_disaster/policy_rewards.csv
✔ wrote 1000 rows -> ../../data/natural_disaster/policy_rewards_bonus_1_0.05.csv
✔ wrote 1000 rows -> ../../data/natural_disaster/policy_rewards_bonus_1_0.2.csv
✔ wrote 1000 rows -> ../../data/natural_disaster/policy_rewards_bonus_5_0.2.csv
✔ wrote 1000 rows -> ../../data/natural_disaster/policy_rewards_bonus_2_0.2.csv
✔ wrote 1000 rows -> ../../data/natural_disaster/policy_rewards_bonus_5_0.1.csv
✔ wrote 1000 rows -> ../../data/natural_disaster/policy_rewards_bonus_1_0.1.csv
✔ wrote 1000 rows -> ../../data/natural_disaster/policy_rewards_bonus_2_0.1.csv
✔ wrote 1000 rows -> ../../data/natural_disaster/policy_rewards_bonus_5_0.05.csv
✔ wrote 1000 rows -> ../../data/natural_disaster/policy_rewards_bonus_2_0.05.csv
