In [33]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from tqdm import tqdm

plt.rcParams['figure.dpi'] = 200

import os
import sys
cwd = os.getcwd()
from pathlib import Path
project_root = os.path.join(Path.cwd(), '..', '..')
sys.path.insert(0, str(project_root))

from src.environments.natural_disaster import (
    need_based_policy,
    per_capita_need_policy,
    population_based_policy,
    income_based_policy,
    proximity_based_policy,
    randomized_weighted_hybrid_policy,
    mixed_random_policy_k_increments,
    generate_action_space,
    simulate_policy_dynamic_with_tpm
)

from src.p_mean import generalized_p_mean, get_optimum_vector, generate_p_grid
from src.portfolio import Policy, Portfolio, budget_portfolio_with_suboptimalities, portfolio_with_line_search, compute_portfolio_worst_approx_ratio, portfolio_of_random_policies, portfolio_of_random_norms, portfolio_with_gpi, gpi


# Setup

## Load scores

In [17]:
scores = pd.read_csv(os.path.join('..', '..', 'data', 'natural_disaster', 'policy_rewards.csv'))
scores = np.array(scores)
scores = [scores[i] for i in range(len(scores))]

## Initialize parameters

In [34]:
alpha = 0.95
N = len(scores[0])
gridsize = 500

p_grid = generate_p_grid(N=N, alpha=alpha, grid_size=gridsize)

alpha_values = [0.05 * j for j in range(2, 20)] + [0.99]


## Helper functions

In [19]:
def get_optimum_policy(p):
    return get_optimum_vector(vectors=scores, p=p)

def get_performance(policy, p):
    return generalized_p_mean(x=policy, p=p)

optimal_values = {
    p: get_performance(get_optimum_policy(p), p) for p in tqdm(p_grid)
}

def get_optimal_value(p):
    if p in optimal_values.keys():
        return optimal_values[p]
    else:
        return get_performance(get_optimum_policy(p), p)


100%|██████████| 500/500 [00:19<00:00, 25.32it/s]


# Compute portfolios

## Heuristic portfolio

In [20]:
heuristic_results = pd.DataFrame(
    columns=['K', 'portfolio_size', 'approximation', 'p_values']
)
heuristic_results.set_index('K', inplace=True)

for K in range(1, 11):
    heuristic_portfolio = budget_portfolio_with_suboptimalities(
        initial_p=-100, K=K, get_optimum_policy=get_optimum_policy, get_performance=get_performance,
    )
    
    print('p values', [policy.p for policy in heuristic_portfolio])
    
    heuristic_approximation = compute_portfolio_worst_approx_ratio(
        portfolio=heuristic_portfolio, get_performance=get_performance,
        get_optimal_value=get_optimal_value, p_grid=p_grid
    )
    
    heuristic_results.at[K, 'portfolio_size'] = len(heuristic_portfolio)
    heuristic_results.at[K, 'approximation'] = heuristic_approximation
    heuristic_results.at[K, 'p_values'] = [round(policy.p, 3) for policy in heuristic_portfolio]
    
    print(heuristic_approximation)


p values [-100]
0.894121199407261
p values [-100, 1.0]
0.9701918471714172
p values [-100, 1.0, -49.5]
0.9701918471714172
p values [-100, 1.0, -49.5, -24.25]
0.9701918471714172
p values [-100, 1.0, -49.5, -24.25, -11.625]
0.9701918471714172
p values [-100, 1.0, -49.5, -24.25, -11.625, -5.3125]
0.9701918471714172
p values [-100, 1.0, -49.5, -24.25, -11.625, -5.3125, -2.15625]
0.9701918471714172
p values [-100, 1.0, -49.5, -24.25, -11.625, -5.3125, -2.15625, -0.578125]
0.9701918471714172
p values [-100, 1.0, -49.5, -24.25, -11.625, -5.3125, -2.15625, -0.578125, 0.2109375]
0.9838380819107201
p values [-100, 1.0, -49.5, -24.25, -11.625, -5.3125, -2.15625, -0.578125, 0.2109375, 0.60546875]
0.9983237634881492


In [21]:
heuristic_results

Unnamed: 0_level_0,portfolio_size,approximation,p_values
K,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1,0.894121,[-100]
2,2,0.970192,"[-100, 1.0]"
3,3,0.970192,"[-100, 1.0, -49.5]"
4,4,0.970192,"[-100, 1.0, -49.5, -24.25]"
5,5,0.970192,"[-100, 1.0, -49.5, -24.25, -11.625]"
6,6,0.970192,"[-100, 1.0, -49.5, -24.25, -11.625, -5.312]"
7,7,0.970192,"[-100, 1.0, -49.5, -24.25, -11.625, -5.312, -2..."
8,8,0.970192,"[-100, 1.0, -49.5, -24.25, -11.625, -5.312, -2..."
9,9,0.983838,"[-100, 1.0, -49.5, -24.25, -11.625, -5.312, -2..."
10,10,0.998324,"[-100, 1.0, -49.5, -24.25, -11.625, -5.312, -2..."


In [22]:
heuristic_results.to_csv('../../data/natural_disaster/portfolios/heuristic_portfolio.csv')

## Line search portfolio

In [23]:
line_search_results = pd.DataFrame(
    columns=['K', 'alpha', 'oracle_calls', 'approximation', 'p_values']
)
line_search_results.set_index('K', inplace=True)

for alpha in alpha_values:
    line_search_portfolio = portfolio_with_line_search(
        get_performance=get_performance, get_optimum_policy=get_optimum_policy, d=N, alpha=alpha,
    )

    print('alpha:', alpha)
    print('p values:', [policy.p for policy in line_search_portfolio])
    print('oracle calls:', line_search_portfolio.oracle_calls)
    print('portfolio size:', len(line_search_portfolio))

    K = len(line_search_portfolio)

    if not K in line_search_results.index:
        line_search_approximation = compute_portfolio_worst_approx_ratio(
            portfolio=line_search_portfolio, get_performance=get_performance,
            get_optimal_value=get_optimal_value, p_grid=p_grid
        )
        line_search_results.at[K, 'alpha'] = alpha
        line_search_results.at[K, 'oracle_calls'] = line_search_portfolio.oracle_calls
        line_search_results.at[K, 'approximation'] = line_search_approximation
        line_search_results.at[K, 'p_values'] = [round(policy.p, 3) for policy in line_search_portfolio]

        print('approximation:', line_search_approximation)
        
        if line_search_approximation == 1.0:
            print('Found optimal portfolio for alpha =', alpha)
            break
        

alpha: 0.1
p values: [-1.0791812460476249]
oracle calls: 1
portfolio size: 1
approximation: 0.894121199407261
alpha: 0.15000000000000002
p values: [-1.3098310436793363]
oracle calls: 1
portfolio size: 1
alpha: 0.2
p values: [-1.5439593106327716]
oracle calls: 1
portfolio size: 1
alpha: 0.25
p values: [-1.7924812503605783]
oracle calls: 1
portfolio size: 1
alpha: 0.30000000000000004
p values: [-2.0639225743800886]
oracle calls: 1
portfolio size: 1
alpha: 0.35000000000000003
p values: [-2.3669787403029057]
oracle calls: 1
portfolio size: 1
alpha: 0.4
p values: [-2.7119194414478502]
oracle calls: 1
portfolio size: 1
alpha: 0.45
p values: [-3.111938258777043]
oracle calls: 2
portfolio size: 1
alpha: 0.5
p values: [-3.5849625007211565]
oracle calls: 3
portfolio size: 1
alpha: 0.55
p values: [-4.15649524309681]
oracle calls: 3
portfolio size: 1
alpha: 0.6000000000000001
p values: [-4.864491000800574]
oracle calls: 4
portfolio size: 1
alpha: 0.65
p values: [-5.768350036552267]
oracle calls: 4

In [24]:
line_search_results

Unnamed: 0_level_0,alpha,oracle_calls,approximation,p_values
K,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,0.1,1,0.894121,[-1.079]
3,0.85,13,0.945846,"[-15.29, -0.018, 0.491]"
5,0.95,54,0.99966,"[-48.445, -0.1, 0.381, 0.536, 0.768]"
7,0.99,453,1.0,"[-247.246, -0.252, 0.32, 0.402, 0.507, 0.727, ..."


In [25]:
line_search_results.to_csv('../../data/natural_disaster/portfolios/line_search_portfolio.csv')

## Random norm portfolio

In [26]:
alpha_0 = 0.90
initial_p = - np.log(N)/np.log(1/alpha_0)

K_values = np.arange(1, 10)
random_norm_results = pd.DataFrame(columns=['K', 'approximation'])
random_norm_results.set_index('K', inplace=True)

T = 10

for K in K_values:
    avg_approximation = 0
    for t in range(T):
        portfolio_random_norm_sample = portfolio_of_random_norms(
            initial_p=initial_p,
            K=K,
            get_optimum_policy=get_optimum_policy,
            seed=t
        )
        actual_approximation_random_norm_sample = compute_portfolio_worst_approx_ratio(
            portfolio=portfolio_random_norm_sample,
            get_optimal_value=get_optimal_value,
            p_grid=p_grid,
            get_performance=get_performance
        )
        actual_approximation_random_norm_sample = np.round(actual_approximation_random_norm_sample, 4)
        avg_approximation += actual_approximation_random_norm_sample

    avg_approximation /= T
    random_norm_results.at[K, 'approximation'] = avg_approximation

    print('K = ' + str(K) + ', actual approximation = ' + str(avg_approximation))


K = 1, actual approximation = 0.86767
K = 2, actual approximation = 0.89746
K = 3, actual approximation = 0.8985
K = 4, actual approximation = 0.8985
K = 5, actual approximation = 0.90276
K = 6, actual approximation = 0.90276
K = 7, actual approximation = 0.9059799999999999
K = 8, actual approximation = 0.9059799999999999
K = 9, actual approximation = 0.9059799999999999


In [27]:
random_norm_results.to_csv('../../data/natural_disaster/portfolios/random_norm_portfolio.csv')

## Random policy portfolio

In [28]:
K_values = np.arange(1, 10)
random_policy_results = pd.DataFrame(columns=['K', 'approximation'])
random_policy_results.set_index('K', inplace=True)

T = 10

for K in K_values:
    avg_approximation = 0
    for t in range(T):
        portfolio_random_policy = portfolio_of_random_policies(
            policies=[Policy(scores[i]) for i in range(len(scores))], K=K
        )
        actual_approximation_random_policy = compute_portfolio_worst_approx_ratio(
            portfolio=portfolio_random_policy,
            get_optimal_value=get_optimal_value,
            p_grid=p_grid,
            get_performance=get_performance
        )
        avg_approximation += actual_approximation_random_policy
    avg_approximation = np.round(avg_approximation/T, 4)
    random_policy_results.loc[K, 'approximation'] = avg_approximation

    print('K = ' + str(K) + ', actual approximation = ' + str(avg_approximation))


K = 1, actual approximation = 0.6209
K = 2, actual approximation = 0.6927
K = 3, actual approximation = 0.6669
K = 4, actual approximation = 0.6989
K = 5, actual approximation = 0.7268
K = 6, actual approximation = 0.7177
K = 7, actual approximation = 0.7304
K = 8, actual approximation = 0.733
K = 9, actual approximation = 0.7504


In [29]:
random_policy_results.to_csv('../../data/natural_disaster/portfolios/random_policy_portfolio.csv')

## GPI portfolio

In [30]:
gpi_results = pd.DataFrame(
    columns=['K', 'approximation']
)

gpi_results.set_index('K', inplace=True)

gpi_portfolio = portfolio_with_gpi(vectors=scores, portfolio_size=10)
policies = list(gpi_portfolio.policies)

print('There are ' + str(len(policies)) + ' policies in the GPI portfolio.')

for K in range(1, len(gpi_portfolio) + 1):
    portfolio = Portfolio()
    for i in range(K):
        portfolio.add_policy(policies[i])
        
    gpi_approximation = compute_portfolio_worst_approx_ratio(
        portfolio=portfolio, get_performance=get_performance,
        get_optimal_value=get_optimal_value, p_grid=p_grid
    )
    
    print('K:', K)
    print('Approximation:', gpi_approximation)
    
    gpi_results.loc[K, 'approximation'] = gpi_approximation

Iteration 0
differential_evolution step 1: f(x)= -0.184991
differential_evolution step 2: f(x)= -0.20088
differential_evolution step 3: f(x)= -0.20088
differential_evolution step 4: f(x)= -0.229459
differential_evolution step 5: f(x)= -0.251975
differential_evolution step 6: f(x)= -0.296956
differential_evolution step 7: f(x)= -0.296956
differential_evolution step 8: f(x)= -0.296956
differential_evolution step 9: f(x)= -0.300202
differential_evolution step 10: f(x)= -0.300202
differential_evolution step 11: f(x)= -0.300202
differential_evolution step 12: f(x)= -0.316561
differential_evolution step 13: f(x)= -0.316561
differential_evolution step 14: f(x)= -0.321535
differential_evolution step 15: f(x)= -0.358264
differential_evolution step 16: f(x)= -0.358264
differential_evolution step 17: f(x)= -0.418341
differential_evolution step 18: f(x)= -0.418341
differential_evolution step 19: f(x)= -0.432906
differential_evolution step 20: f(x)= -0.432906
differential_evolution step 21: f(x)= -

In [31]:
gpi_results

Unnamed: 0_level_0,approximation
K,Unnamed: 1_level_1
1,0.628603
2,0.628696
3,0.629735
4,0.629735
5,0.735183
6,0.735183
7,0.735183
8,0.735183
9,0.735183
10,0.735183


In [32]:
gpi_results.to_csv('../../data/natural_disaster/portfolios/gpi_portfolio.csv')