In [None]:
from src.portfolio import compute_portfolio_worst_approx_ratio, Portfolio
from src.p_mean import generate_p_grid, generalized_p_mean
from src.environments.taxi.main import get_optimum
from src.portfolio import portfolio_with_line_search, portfolio_of_random_norms, portfolio_of_random_policies, budget_portfolio_with_suboptimalities
import time
import numpy as np


In [None]:
def precompute_optimal_values(get_optimum, N, alpha, grid_size=100):
    """
    Precompute the optimal performance (max over all policies) for p
    on a grid from p_min = -log2(N) to p_max = 1, in increments of 'step'.

    :return:
       p_to_optval: dict mapping p -> float (optimal performance at that p)
       p_grid:      sorted list of p-values used
    """
    p_vals = generate_p_grid(N=N, alpha=alpha, grid_size=grid_size)
    print('grid: ', p_vals)
    p_to_optval = {}
    p_to_optvec = {}
    for p_val in p_vals:
        print('p val: ', p_val)
        p_mean, vectors = get_optimum(p_val)
        p_to_optval[p_val] = p_mean
        p_to_optvec[p_val] = vectors

    p_vals = sorted(p_vals)
    return p_to_optval, p_to_optvec, p_vals


# Generate the p grid and precompute the optimal values

In [None]:
uniform_portfolio_dict, uniform_portfolio_vectors, uniform_grid = precompute_optimal_values(get_optimum=get_optimum, N=4, alpha=0.95)
# np.save(f'portfolios/{0.95}_uniform.npy', uniform_portfolio_dict)
print('\n\n')
print('#'*100)

# p-Mean Portfolio with line search

In [None]:
start = time.time()

for alpha in [0.5, 0.6, 0.7, 0.8, 0.95]:
    portfolio = portfolio_with_line_search(
        alpha=alpha, get_performance=generalized_p_mean, get_optimum_policy=get_optimum,
        d=4,
    )
    oracle_calls = portfolio.oracle_calls
    
    policies = portfolio.policies
    portfolio_p_vals = [policy.p for policy in policies]
    portfolio_K = len(portfolio_p_vals)
    initial_p = min(portfolio_p_vals)

    file_path = f'portfolios/{alpha}_main.npy'
    # np.save(file_path, [(policy.p, policy) for policy in policies])
    
    print(f'alpha: {alpha}')
    print(f'\n\nFound {portfolio_K} policies. Minimum p: {initial_p}')
    print(f'Oracle calls: {oracle_calls}')
    
    approx = compute_portfolio_worst_approx_ratio(
        portfolio=portfolio,
        get_optimal_value=get_optimum,
        p_grid=uniform_grid,
        get_performance=generalized_p_mean
    )
    print('Worst Approximation Ratio: ', approx)


# Portfolio through uniform sampling of p values

In [None]:
import numpy as np
np.random.seed(0)

for alpha in [0.5, 0.6, 0.7, 0.8, 0.9, 0.95]:
    portfolio_main = np.load(f'portfolios/{alpha}_main.npy', allow_pickle=True)
    portfolio_main = list(portfolio_main.flatten()[0])
    initial_p = portfolio_main[:, 0].min()
    portfolio_K = len(portfolio_main)
    print(alpha, initial_p)
    
    random_norm_portfolio = portfolio_of_random_norms(
        initial_p=initial_p,
        K=portfolio_K,
        get_optimum_policy=get_optimum,
    )

    approx = compute_portfolio_worst_approx_ratio(
        portfolio=random_norm_portfolio,
        get_optimal_value=get_optimum,
        p_grid=uniform_grid,
        get_performance=generalized_p_mean
    )
    print('Worst Approximation Ratio: ', approx)

    np.save(f'portfolios/{alpha}_random.npy', random_norm_portfolio)

# Heuristic/binary search portfolio

In [None]:
def get_optimum_policy(p):
    p_mean, vectors = get_optimum(p)
    return vectors

for alpha in [0.5, 0.6, 0.7, 0.9, 0.95]:
    portfolio_main = np.load(f'portfolios/{alpha}_main.npy', allow_pickle=True)
    portfolio_main = list(portfolio_main.flatten()[0])
    portfolio_main = np.array([list(i) for i in portfolio_main])
    initial_p = -13.15
    portfolio_K = len(portfolio_main)
    print(alpha, initial_p, portfolio_K)

    print(f'\talpha {alpha}, init p: {initial_p}, K {portfolio_K}')
    portfolio_heuristic = budget_portfolio_with_suboptimalities(initial_p, portfolio_K, get_optimum_policy, get_performance=generalized_p_mean)

    approx = compute_portfolio_worst_approx_ratio(
        portfolio=portfolio_heuristic,
        get_optimal_value=get_optimum,
        p_grid=uniform_grid,
        get_performance=generalized_p_mean
    )
    print('Worst Approximation Ratio: ', approx)
    
    np.save(f'portfolios/{alpha}_heuristic.npy', portfolio_heuristic)
    print('#'*10)