In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import pandas as pd
from src.p_mean import generalized_p_mean, generate_p_grid, get_optimum_vector, get_optimum_value
from src.portfolio import portfolio_with_line_search, budget_portfolio_with_suboptimalities, compute_portfolio_worst_approx_ratio, portfolio_of_random_norms, portfolio_of_random_policies, Policy, Portfolio, portfolio_with_gpi

plt.rcParams['figure.dpi'] = 200


## Load data

In [2]:
df = pd.read_csv('../../data/sclm_real_world/policy_rewards.csv')

# Remove rows with zero scores
for id in df['Key']:
    if df.loc[df['Key'] == id, 'Score for reward function 0'].values[0] == 0.0:
        # remove the row
        df = df[df['Key'] != id]

df

Unnamed: 0,Key,Category,Score for reward function 0,Score for reward function 1,Score for reward function 2,Score for reward function 3,Score for reward function 4,Score for reward function 5,Score for reward function 6,Score for reward function 7,...,Score for reward function 275,Score for reward function 276,Score for reward function 277,Score for reward function 278,Score for reward function 279,Score for reward function 280,Score for reward function 281,Score for reward function 282,Score for reward function 283,Score for reward function 284
0,Enrollment gestational age (0-10),Enrollment gestational age,332.09,313.49,302.93,302.33,320.29,329.22,313.49,330.82,...,327.8,331.8,302.84,299.87,321.11,299.87,332.09,295.62,322.0,299.87
1,Enrollment gestational age (11-20),Enrollment gestational age,209.38,184.67,184.76,192.6,196.8,206.78,184.67,210.0,...,201.87,207.78,190.13,184.29,192.09,184.29,209.38,183.09,197.38,184.29
4,Enrollment delivery status (0-0),Enrollment delivery status,1005.38,938.73,921.09,933.6,966.87,993.36,938.73,1001.02,...,994.33,1001.2,930.84,908.4,951.93,908.4,1005.38,900.02,972.0,908.4
5,Enrollment delivery status (1-1),Enrollment delivery status,173.24,164.36,162.13,159.09,171.51,174.04,164.36,173.98,...,173.58,171.09,160.42,161.49,165.82,161.49,173.24,158.4,171.49,161.49
7,Gravidity (number of pregnancies) (1-1),Number of pregnancies,621.27,594.71,578.18,568.93,595.31,612.62,594.71,618.62,...,616.89,617.73,567.36,563.64,586.22,563.64,621.27,547.0,595.78,563.64
8,Gravidity (number of pregnancies) (2-4),Number of pregnancies,537.24,490.16,486.64,501.07,519.87,534.76,490.16,536.11,...,528.87,533.87,501.38,487.58,511.4,487.58,537.24,491.53,524.49,487.58
9,Gravidity (number of pregnancies) (5-10),Number of pregnancies,6.53,5.98,5.53,6.56,7.47,6.29,5.98,6.58,...,7.33,6.22,6.78,5.11,6.42,5.11,6.53,4.78,7.73,5.11
10,Parity (number of viable pregnancies) (0-0),Number of viable pregnancies,583.02,556.22,542.22,535.96,559.2,574.58,556.22,580.8,...,580.27,581.91,532.31,527.6,548.42,527.6,583.02,512.33,559.2,527.6
11,Parity (number of viable pregnancies) (1-1),Number of viable pregnancies,391.64,350.8,345.49,354.8,372.31,392.24,350.8,391.02,...,384.69,383.38,360.62,355.31,371.47,355.31,391.64,353.44,377.02,355.31
12,Parity (number of viable pregnancies) (2-2),Number of viable pregnancies,162.69,158.24,158.71,163.24,167.0,160.53,158.24,161.84,...,163.51,167.33,158.53,152.4,159.36,152.4,162.69,157.13,166.51,152.4


In [3]:
scores = [
    np.array((df['Score for reward function ' + str(i)])) for i in range(285)
]

## Initialize parameters

In [4]:
alpha = 0.60
N = 53
p_grid = generate_p_grid(N=N, alpha=alpha, grid_size=500)

alpha_values = [0.05 * j for j in range(2, 20)] + [0.99]

## Helper functions

In [5]:
def get_optimum_policy(p):
    return get_optimum_vector(vectors=scores, p=p)

def get_performance(policy, p):
    return generalized_p_mean(x=policy, p=p)

optimal_values = {
    p: get_performance(get_optimum_policy(p), p) for p in tqdm(p_grid)
}

def get_optimal_value(p):
    if p in optimal_values.keys():
        return optimal_values[p]
    else:
        return get_performance(Policy(get_optimum_policy(p)), p)


100%|██████████| 500/500 [00:03<00:00, 159.97it/s]


# Compute portfolios

## Heuristic portfolio

In [6]:
heuristic_results = pd.DataFrame(
    columns=['K', 'portfolio_size', 'approximation', 'p_values']
)
heuristic_results.set_index('K', inplace=True)

for K in range(1, 11):
    heuristic_portfolio = budget_portfolio_with_suboptimalities(
        initial_p=-100, K=K, get_optimum_policy=get_optimum_policy, get_performance=get_performance,
    )

    print('p values', [policy.p for policy in heuristic_portfolio])

    heuristic_approximation = compute_portfolio_worst_approx_ratio(
        portfolio=heuristic_portfolio, get_performance=get_performance,
        get_optimal_value=get_optimal_value, p_grid=p_grid
    )

    heuristic_results.at[K, 'portfolio_size'] = len(heuristic_portfolio)
    heuristic_results.at[K, 'approximation'] = heuristic_approximation
    heuristic_results.at[K, 'p_values'] = [round(policy.p, 3) for policy in heuristic_portfolio]

    print(heuristic_approximation)


p values [-100]
0.9383407578022731
p values [-100, 1.0]
0.9383407578022731
p values [-100, 1.0, -49.5]
0.9383407578022731
p values [-100, 1.0, -49.5, -24.25]
0.9383407578022731
p values [-100, 1.0, -49.5, -24.25, -11.625]
0.9383407578022731
p values [-100, 1.0, -49.5, -24.25, -11.625, -5.3125]
0.9822052471912309
p values [-100, 1.0, -49.5, -24.25, -11.625, -5.3125, -2.15625]
0.9822052471912309
p values [-100, 1.0, -49.5, -24.25, -11.625, -5.3125, -2.15625, -0.578125]
0.9858470422981619
p values [-100, 1.0, -49.5, -24.25, -11.625, -5.3125, -2.15625, -0.578125, 0.2109375]
0.9866862263519582
p values [-100, 1.0, -49.5, -24.25, -11.625, -5.3125, -2.15625, -0.578125, 0.2109375, -0.18359375]
0.9906965584829375


In [7]:
heuristic_results

Unnamed: 0_level_0,portfolio_size,approximation,p_values
K,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1,0.938341,[-100]
2,2,0.938341,"[-100, 1.0]"
3,3,0.938341,"[-100, 1.0, -49.5]"
4,4,0.938341,"[-100, 1.0, -49.5, -24.25]"
5,5,0.938341,"[-100, 1.0, -49.5, -24.25, -11.625]"
6,6,0.982205,"[-100, 1.0, -49.5, -24.25, -11.625, -5.312]"
7,7,0.982205,"[-100, 1.0, -49.5, -24.25, -11.625, -5.312, -2..."
8,8,0.985847,"[-100, 1.0, -49.5, -24.25, -11.625, -5.312, -2..."
9,9,0.986686,"[-100, 1.0, -49.5, -24.25, -11.625, -5.312, -2..."
10,10,0.990697,"[-100, 1.0, -49.5, -24.25, -11.625, -5.312, -2..."


In [8]:
heuristic_results.to_csv('../../data/sclm_real_world/portfolios/heuristic_portfolio.csv')

## p-Mean Portfolio with Line Search

In [9]:
line_search_results = pd.DataFrame(
    columns=['K', 'alpha', 'oracle_calls', 'approximation', 'p_values']
)
line_search_results.set_index('K', inplace=True)

for alpha in alpha_values:
    line_search_portfolio = portfolio_with_line_search(
        get_optimum_policy=get_optimum_policy, get_performance=get_performance, d=len(scores[0]), alpha=alpha
    )

    print('alpha:', alpha)
    print('p values:', [policy.p for policy in line_search_portfolio])
    print('oracle calls:', line_search_portfolio.oracle_calls)
    print('portfolio size:', len(line_search_portfolio))

    K = len(line_search_portfolio)

    if not K in line_search_results.index:
        line_search_approximation = compute_portfolio_worst_approx_ratio(
            portfolio=line_search_portfolio, get_performance=get_performance,
            get_optimal_value=get_optimal_value, p_grid=p_grid
        )
        line_search_results.at[K, 'alpha'] = alpha
        line_search_results.at[K, 'oracle_calls'] = line_search_portfolio.oracle_calls
        line_search_results.at[K, 'approximation'] = line_search_approximation
        line_search_results.at[K, 'p_values'] = [round(policy.p, 3) for policy in line_search_portfolio]
        
        if line_search_approximation == 1.0:
            print('Found optimal portfolio for alpha =', alpha)
            break

        print('approximation:', line_search_approximation)


alpha: 0.1
p values: [np.float64(-1.7242758696007894)]
oracle calls: 2
portfolio size: 1
approximation: 0.9235673961490934
alpha: 0.15000000000000002
p values: [np.float64(-2.0927995831486377)]
oracle calls: 2
portfolio size: 1
alpha: 0.2
p values: [np.float64(-2.4668810662894636)]
oracle calls: 3
portfolio size: 1
alpha: 0.25
p values: [np.float64(-2.8639602272816), np.float64(0.034009943179600044)]
oracle calls: 7
portfolio size: 2
approximation: 0.9820359281437125
alpha: 0.30000000000000004
p values: [np.float64(-3.2976591325706526), np.float64(-0.07441478314266314)]
oracle calls: 7
portfolio size: 2
alpha: 0.35000000000000003
p values: [np.float64(-3.7818710626317373), np.float64(-0.19546776565793433)]
oracle calls: 8
portfolio size: 2
alpha: 0.4
p values: [np.float64(-4.333004553512617), np.float64(-0.33325113837815423), np.float64(0.3333744308109229)]
oracle calls: 11
portfolio size: 3
approximation: 0.9820359281437125
alpha: 0.45
p values: [np.float64(-4.972139820765523), np.flo

In [10]:
line_search_results

Unnamed: 0_level_0,alpha,oracle_calls,approximation,p_values
K,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,0.1,2,0.923567,[-1.724]
2,0.25,7,0.982036,"[-2.864, 0.034]"
3,0.4,11,0.982036,"[-4.333, -0.333, 0.333]"
4,0.55,19,0.982036,"[-6.641, -0.433, -0.075, 0.463]"
5,0.65,23,0.99372,"[-9.216, -4.108, -0.437, -0.078, 0.461]"
6,0.75,46,0.999176,"[-13.801, -6.4, -0.594, -0.22, 0.085, 0.542]"
7,0.8,61,1.0,"[-17.793, -3.698, -0.549, -0.186, -0.038, 0.22..."


In [11]:
line_search_results.to_csv('../../data/sclm_real_world/portfolios/line_search_portfolio.csv')

## Random norm portfolio

In [12]:
alpha_0 = 0.90
initial_p = - np.log(N)/np.log(1/alpha_0)

K_values = np.arange(1, 10)
random_norm_results = pd.DataFrame(columns=['K', 'approximation'])
random_norm_results.set_index('K', inplace=True)

T = 10

for K in K_values:
    avg_approximation = 0
    for t in range(T):
        portfolio_random_norm_sample = portfolio_of_random_norms(
            initial_p=initial_p,
            K=K,
            get_optimum_policy=get_optimum_policy,
            seed=t
        )
        actual_approximation_random_norm_sample = compute_portfolio_worst_approx_ratio(
            portfolio=portfolio_random_norm_sample,
            get_optimal_value=get_optimal_value,
            p_grid=p_grid,
            get_performance=get_performance
        )
        actual_approximation_random_norm_sample = np.round(actual_approximation_random_norm_sample, 4)
        avg_approximation += actual_approximation_random_norm_sample

    avg_approximation /= T
    random_norm_results.at[K, 'approximation'] = avg_approximation

    print('K = ' + str(K) + ', actual approximation = ' + str(avg_approximation))


K = 1, actual approximation = 0.91339
K = 2, actual approximation = 0.9418300000000001
K = 3, actual approximation = 0.9418300000000001
K = 4, actual approximation = 0.9418300000000001
K = 5, actual approximation = 0.9468800000000002
K = 6, actual approximation = 0.9468800000000002
K = 7, actual approximation = 0.9471
K = 8, actual approximation = 0.9471
K = 9, actual approximation = 0.9471


In [13]:
random_norm_results.to_csv('../../data/sclm_real_world/portfolios/random_norm_portfolio.csv')

## Random policy portfolio

In [14]:
K_values = np.arange(1, 10)
random_policy_results = pd.DataFrame(columns=['K', 'approximation'])
random_policy_results.set_index('K', inplace=True)

T = 10

for K in K_values:
    avg_approximation = 0
    for t in range(T):
        portfolio_random_policy = portfolio_of_random_policies(
            policies=[Policy(scores[i]) for i in range(len(scores))], K=K
        )
        actual_approximation_random_policy = compute_portfolio_worst_approx_ratio(
            portfolio=portfolio_random_policy,
            get_optimal_value=get_optimal_value,
            p_grid=p_grid,
            get_performance=get_performance
        )
        avg_approximation += actual_approximation_random_policy
    avg_approximation = np.round(avg_approximation/T, 4)
    random_policy_results.loc[K, 'approximation'] = avg_approximation

    print('K = ' + str(K) + ', actual approximation = ' + str(avg_approximation))


K = 1, actual approximation = 0.5925
K = 2, actual approximation = 0.5381
K = 3, actual approximation = 0.5822
K = 4, actual approximation = 0.5912
K = 5, actual approximation = 0.5969
K = 6, actual approximation = 0.6547
K = 7, actual approximation = 0.6824
K = 8, actual approximation = 0.6234
K = 9, actual approximation = 0.6556


In [15]:
random_policy_results.to_csv('../../data/sclm_real_world/portfolios/random_policy_portfolio.csv')

## GPI Portfolio

In [16]:
gpi_results = pd.DataFrame(
    columns=['K', 'approximation']
)

gpi_results.set_index('K', inplace=True)

gpi_portfolio = portfolio_with_gpi(vectors=scores)
policies = list(gpi_portfolio.policies)

for K in range(1, len(gpi_portfolio) + 1):
    portfolio = Portfolio()
    for i in range(K):
        portfolio.add_policy(policies[i])

    gpi_approximation = compute_portfolio_worst_approx_ratio(
        portfolio=portfolio, get_performance=get_performance,
        get_optimal_value=get_optimal_value, p_grid=p_grid
    )

    print('K:', K)
    print('Approximation:', gpi_approximation)

    gpi_results.loc[K, 'approximation'] = gpi_approximation
    

Iteration 0
differential_evolution step 1: f(x)= -5.4513980937084625
differential_evolution step 2: f(x)= -5.4513980937084625
differential_evolution step 3: f(x)= -5.895473938657972
differential_evolution step 4: f(x)= -5.9707902024536565
differential_evolution step 5: f(x)= -5.9707902024536565
differential_evolution step 6: f(x)= -5.9707902024536565
differential_evolution step 7: f(x)= -5.9707902024536565
differential_evolution step 8: f(x)= -5.987761246136586
differential_evolution step 9: f(x)= -5.987761246136586
differential_evolution step 10: f(x)= -6.027808261971586
differential_evolution step 11: f(x)= -6.521153808260237
differential_evolution step 12: f(x)= -6.521153808260237
differential_evolution step 13: f(x)= -6.576372548918812
differential_evolution step 14: f(x)= -6.853563353821414
differential_evolution step 15: f(x)= -6.974905639316944
differential_evolution step 16: f(x)= -6.974905639316944
differential_evolution step 17: f(x)= -7.52346633794798
differential_evolution 

In [23]:
gpi_results

Unnamed: 0_level_0,approximation
K,Unnamed: 1_level_1
1,0.347305
2,0.640719
3,0.640719
4,0.640719
5,0.640719
6,0.640719
7,0.640719
8,0.640719
9,0.640719


In [23]:
gpi_results.to_csv('../../data/sclm_real_world/gpi_portfolio.csv')