# Generate Policies for Natural Disaster Simulation

This notebook generates policies for the natural disaster relief experiment from the paper. For each of the 10000 policies, we record the rewards for each of the 12 clusters used in hte experiment. The policies are stored in 'data/natural_disaster/policy_rewards.csv'.

In [1]:
from src.environments.natural_disaster import (
    need_based_policy,
    per_capita_need_policy,
    population_based_policy,
    income_based_policy,
    proximity_based_policy,
    randomized_weighted_hybrid_policy,
    mixed_random_policy_k_increments,
    generate_action_space,
    simulate_policy_dynamic_with_tpm
)
import csv
import os

In [5]:
## Experimental setup for fixed population size and need
clusters = [
    {"id": 1, "density": "High", "proximity": "Far", "income": "High-Income", "population": 148, "initial_need": 100},
    {"id": 2, "density": "High", "proximity": "Far", "income": "Low-Income", "population": 307, "initial_need": 300},
    {"id": 3, "density": "High", "proximity": "Far", "income": "Middle-Income", "population": 616, "initial_need": 200},
    {"id": 4, "density": "High", "proximity": "Near", "income": "High-Income", "population": 816, "initial_need": 50},
    {"id": 5, "density": "High", "proximity": "Near", "income": "Low-Income", "population": 1405, "initial_need": 200},
    {"id": 6, "density": "High", "proximity": "Near", "income": "Middle-Income", "population": 2782,
     "initial_need": 300},
    {"id": 7, "density": "Low", "proximity": "Far", "income": "High-Income", "population": 74, "initial_need": 100},
    {"id": 8, "density": "Low", "proximity": "Far", "income": "Low-Income", "population": 203, "initial_need": 500},
    {"id": 9, "density": "Low", "proximity": "Far", "income": "Middle-Income", "population": 396, "initial_need": 350},
    {"id": 10, "density": "Low", "proximity": "Near", "income": "High-Income", "population": 36, "initial_need": 50},
    {"id": 11, "density": "Low", "proximity": "Near", "income": "Low-Income", "population": 113, "initial_need": 50},
    {"id": 12, "density": "Low", "proximity": "Near", "income": "Middle-Income", "population": 230, "initial_need": 50}
]

# Allocation Parameters
K = 150  # Total additional units to allocate
k = 50  # Allocation increment

# MDP Parameters
horizon = 3  # Number of time steps
initial_state = tuple([cluster['initial_need'] for cluster in clusters])
p = 0.7
num_clusters = len(clusters)

new_clusters = []
for adict in clusters:
    adict2 = adict.copy()
    adict2['initial_need'] += k * horizon
    new_clusters.append(adict2)

policy_functions = {
    "need_based": need_based_policy,
    "per_capita": per_capita_need_policy,
    "population_based": population_based_policy,
    "income_based": income_based_policy,
    "proximity_based": proximity_based_policy,
    "weighted_hybrid": randomized_weighted_hybrid_policy,  # Add this line
    "mixed_random": mixed_random_policy_k_increments
}

policy_functions_list = [i for i in policy_functions.values()]

# Step 1: Generate Action Space
print('generating actions')
action_space = generate_action_space(num_clusters, k, K)

# Define parameters
epsilon = 0.01  # Include only states with probability > 0.01
################################################################
# Generate 1000 different policies for the simulation
num_simulations = 10000
simulation_results = []

for i in range(num_simulations):
    rewards, policy = simulate_policy_dynamic_with_tpm(
        initial_state=initial_state,
        clusters=new_clusters,
        k=k,
        K=k,
        p=p,
        horizon=horizon,
        action_space=action_space,
        policy_functions=policy_functions,
        epsilon=0.01
    )
    simulation_results.append({"simulation": i + 1, "rewards": rewards, "policy": policy})

# Print rewards for the first few simulations
for result in simulation_results[:10]:
    print(f"Simulation {result['simulation']} -> Rewards: {result['rewards']}")

output_csv = os.path.join('..', '..', 'data', 'natural_disaster', 'policy_rewards.csv')

# Open the file for writing
with open(output_csv, mode='w', newline='') as file:
    writer = csv.writer(file)

    # Write the header
    writer.writerow([f"Cluster_{i + 1}_Reward" for i in range(num_clusters)])

    # Write only the reward vectors
    for result in simulation_results:
        writer.writerow(result["rewards"])

generating actions
Simulation 1 -> Rewards: [0.030101652892562048, 0.23323278236914724, 0.021092089728453337, 0.03691115702479354, 0.02177922077922075, 0.06620247933884309, 0.029910743801652956, 0.349394681076499, 0.02116694214876014, 0.0385723140495869, 0.03691115702479354, 0.03691115702479354]
Simulation 2 -> Rewards: [0.03050578512396701, 0.01668457300275475, 0.02124439197166467, 0.04479132231404977, 0.02416469893742617, 0.3822782369145995, 0.03731487603305747, 0.24935134562407316, 0.015185950413223171, 0.0452066115702481, 0.03738533057851255, 0.03717768595041338]
Simulation 3 -> Rewards: [0.0350239669421484, 0.02082093663911839, 0.021587957497048376, 0.03717768595041338, 0.026340613931522993, 0.3321046831955913, 0.030696694214876103, 0.07930959949141762, 0.22701005509641806, 0.0526126033057853, 0.04479132231404977, 0.04499896694214894]
Simulation 4 -> Rewards: [0.03015371900826453, 0.016701101928374585, 0.0212656434474616, 0.045451446280991896, 0.03266056670602123, 0.37627823691459