# Generate Policies for Natural Disaster Simulation

This notebook generates policies for the natural disaster relief experiment from the paper. For each of the 10000 policies, we record the rewards for each of the 12 clusters used in hte experiment. The policies are stored in 'data/natural_disaster/policy_rewards.csv'.

In [1]:
import os
import sys
cwd = os.getcwd()
from pathlib import Path
project_root = os.path.join(Path.cwd(), '..', '..')
sys.path.insert(0, str(project_root))

from src.environments.natural_disaster import (
    need_based_policy,
    per_capita_need_policy,
    population_based_policy,
    income_based_policy,
    proximity_based_policy,
    randomized_weighted_hybrid_policy,
    mixed_random_policy_k_increments,
    generate_action_space,
    simulate_policy_dynamic_with_tpm
)
import csv

In [8]:
## Experimental setup for fixed population size and need
clusters = [
    {"id": 1, "density": "High", "proximity": "Far", "income": "High-Income", "population": 148, "initial_need": 100},
    {"id": 2, "density": "High", "proximity": "Far", "income": "Low-Income", "population": 307, "initial_need": 300},
    {"id": 3, "density": "High", "proximity": "Far", "income": "Middle-Income", "population": 616, "initial_need": 200},
    {"id": 4, "density": "High", "proximity": "Near", "income": "High-Income", "population": 816, "initial_need": 50},
    {"id": 5, "density": "High", "proximity": "Near", "income": "Low-Income", "population": 1405, "initial_need": 200},
    {"id": 6, "density": "High", "proximity": "Near", "income": "Middle-Income", "population": 2782,
     "initial_need": 300},
    {"id": 7, "density": "Low", "proximity": "Far", "income": "High-Income", "population": 74, "initial_need": 100},
    {"id": 8, "density": "Low", "proximity": "Far", "income": "Low-Income", "population": 203, "initial_need": 500},
    {"id": 9, "density": "Low", "proximity": "Far", "income": "Middle-Income", "population": 396, "initial_need": 350},
    {"id": 10, "density": "Low", "proximity": "Near", "income": "High-Income", "population": 36, "initial_need": 50},
    {"id": 11, "density": "Low", "proximity": "Near", "income": "Low-Income", "population": 113, "initial_need": 50},
    {"id": 12, "density": "Low", "proximity": "Near", "income": "Middle-Income", "population": 230, "initial_need": 50}
]


#choose to bias rewards, globally
# global_bonus = {
#     "clusters": [2],              # 1-based IDs to boost (optional)
#     "category": "Low-Income",     # or boost a category (optional)
#     "weight": 0.2,                # 20% boost
#     "max_boost": 1.0              # optional safety cap on total boost factor
# }

global_bonus=None


# Allocation Parameters
K = 150  # Total additional units to allocate
k = 50  # Allocation increment

# MDP Parameters
horizon = 3  # Number of time steps
initial_state = tuple([cluster['initial_need'] for cluster in clusters])
p = 0.7
num_clusters = len(clusters)

new_clusters = []
for adict in clusters:
    adict2 = adict.copy()
    adict2['initial_need'] += k * horizon
    new_clusters.append(adict2)

policy_functions = {
    "need_based": need_based_policy,
    "per_capita": per_capita_need_policy,
    "population_based": population_based_policy,
    "income_based": income_based_policy,
    "proximity_based": proximity_based_policy,
    "weighted_hybrid": randomized_weighted_hybrid_policy,
    "mixed_random": mixed_random_policy_k_increments
}

policy_functions_list = [i for i in policy_functions.values()]

# Step 1: Generate Action Space
print('generating actions')
action_space = generate_action_space(num_clusters, k, K)

# Define parameters
epsilon = 0.01  # Include only states with probability > 0.01
################################################################
# Generate 1000 different policies for the simulation
num_simulations = 1000
simulation_results = []

for i in range(num_simulations):
    rewards, policy = simulate_policy_dynamic_with_tpm(
        initial_state=initial_state,
        clusters=new_clusters,
        k=k,
        K=K,
        p=p,
        horizon=horizon,
        action_space=action_space,
        policy_functions=policy_functions,
        epsilon=0.01,
        global_bonus=global_bonus
    )
    simulation_results.append({"simulation": i + 1, "rewards": rewards, "policy": policy})

# Print rewards for the first few simulations
for result in simulation_results[:10]:
    print(f"Simulation {result['simulation']} -> Rewards: {result['rewards']}")
    
if global_bonus:
    bonus = '_bonus'
else:
    bonus=''

output_csv = os.path.join('..', '..', 'data', 'natural_disaster', f'policy_rewards{bonus}.csv')

# Open the file for writing
with open(output_csv, mode='w', newline='') as file:
    writer = csv.writer(file)

    # Write the header
    writer.writerow([f"Cluster_{i + 1}_Reward" for i in range(num_clusters)])

    # Write only the reward vectors
    for result in simulation_results:
        writer.writerow(result["rewards"])
    print(f'Successfully wrote {output_csv}')

generating actions
Simulation 1 -> Rewards: [0.030108264462809967, 0.03707396694214897, 0.023983864620228217, 0.04095833333333349, 0.03004427390791023, 0.09198044077134944, 0.04316785123966903, 0.5453028183937295, 0.2310494765840213, 0.044355475206611664, 0.04095833333333349, 0.0420833333333335]
Simulation 2 -> Rewards: [0.030000000000000034, 0.039706336088154236, 0.02162467532467528, 0.04070833333333348, 0.02207378984651707, 0.07947396694214859, 0.03498870523415947, 0.5516439182030116, 0.0624540771349864, 0.3599519972451773, 0.04416666666666681, 0.037625000000000144]
Simulation 3 -> Rewards: [0.03032479338842978, 0.040418319559228776, 0.02152662337662333, 0.037500000000000144, 0.02262987012987008, 0.2597473370064273, 0.04676391184572965, 0.5516363742318289, 0.05910038567493133, 0.060242975206611704, 0.038250000000000145, 0.04579166666666682]
Simulation 4 -> Rewards: [0.030119090909090925, 0.03635275482093669, 0.021517709563164057, 0.040958333333333465, 0.03269933097205819, 0.306244719