In [None]:
from src.portfolio import compute_portfolio_worst_approx_ratio, Portfolio, Policy
from src.p_mean import generate_p_grid, generalized_p_mean
from src.environments.taxi.main import get_optimum
from src.portfolio import portfolio_with_line_search, portfolio_of_random_norms, portfolio_of_random_policies, budget_portfolio_with_suboptimalities
import time
import numpy as np


In [None]:
def precompute_optimal_values(get_optimum, N, alpha, grid_size=100):
    """
    Precompute the optimal performance (max over all policies) for p
    on a grid from p_min = -log2(N) to p_max = 1, in increments of 'step'.

    :return:
       p_to_optval: dict mapping p -> float (optimal performance at that p)
       p_grid:      sorted list of p-values used
    """
    p_vals = generate_p_grid(N=N, alpha=alpha, grid_size=grid_size)
    print('grid: ', p_vals)
    p_to_optval = {}
    p_to_optvec = {}
    for p_val in p_vals:
        print('p val: ', p_val)
        p_mean, vectors = get_optimum(p_val)
        p_to_optval[p_val] = p_mean
        p_to_optvec[p_val] = vectors

    p_vals = sorted(p_vals)
    return p_to_optval, p_to_optvec, p_vals


def get_optimum_policy(p):
    """
    Get the optimal policy for a given p value.
    """
    p_mean, vectors = get_optimum(p)
    return vectors


# Generate the p grid and precompute the optimal values

In [None]:
uniform_portfolio_dict, uniform_portfolio_vectors, uniform_grid = precompute_optimal_values(get_optimum=get_optimum, N=4, alpha=0.95)
np.save(f'portfolios/{0.95}_uniform.npy', uniform_portfolio_dict)
print('\n\n')
print('#'*100)

# p-Mean Portfolio with Line Search

In [None]:
def get_optimal_value(p):
    p_mean, vectors = get_optimum(p)
    return p_mean

for alpha in [0.5, 0.6, 0.7, 0.8, 0.95]:
    portfolio = portfolio_with_line_search(
        alpha=alpha, get_performance=generalized_p_mean, get_optimum_policy=get_optimum_policy,
        d=4,
    )
    oracle_calls = portfolio.oracle_calls
    
    policies = portfolio.policies
    portfolio_p_vals = [policy.p for policy in policies]
    portfolio_K = len(portfolio_p_vals)
    initial_p = min(portfolio_p_vals)

    file_path = f'portfolios/{alpha}_main.npy'
    np.save(file_path, [(policy.p, policy) for policy in policies])
    
    print(f'alpha: {alpha}')
    print(f'\n\nFound {portfolio_K} policies. Minimum p: {initial_p}')
    print(f'Oracle calls: {oracle_calls}')
    
    approx = compute_portfolio_worst_approx_ratio(
        portfolio=portfolio,
        get_optimal_value=get_optimal_value,
        p_grid=uniform_grid,
        get_performance=generalized_p_mean
    )
    print('Worst Approximation Ratio: ', approx)


# Random Norm Portfolio

In [None]:
np.random.seed(0)

N = 4
alpha_0 = 0.90
initial_p = - np.log(N)/np.log(1/alpha_0)


for portfolio_K in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]:
    print(f'Portfolio K: {portfolio_K}')
    
    random_norm_portfolio = portfolio_of_random_norms(
        initial_p=initial_p,
        K=portfolio_K,
        get_optimum_policy=get_optimum_policy,
    )

    approx = compute_portfolio_worst_approx_ratio(
        portfolio=random_norm_portfolio,
        get_optimal_value=get_optimal_value,
        p_grid=uniform_grid,
        get_performance=generalized_p_mean
    )
    print('Worst Approximation Ratio: ', approx)

    np.save(f'portfolios/{portfolio_K}_random_norm.npy', random_norm_portfolio)

# Heuristic/binary search portfolio

In [None]:
def get_optimum_policy(p):
    p_mean, vectors = get_optimum(p)
    return vectors


def get_optimal_value(p):
    p_mean, vectors = get_optimum(p)
    return p_mean


for portfolio_K in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]:
    initial_p = -25  # Starting point for the heuristic portfolio
    print(f'Portfolio K: {portfolio_K}')
    portfolio_heuristic = budget_portfolio_with_suboptimalities(
        initial_p=initial_p, 
        K=portfolio_K,
        get_optimum_policy=get_optimum_policy,
        get_performance=generalized_p_mean
    )

    approx = compute_portfolio_worst_approx_ratio(
        portfolio=portfolio_heuristic,
        get_optimal_value=get_optimal_value,
        p_grid=uniform_grid,
        get_performance=generalized_p_mean
    )
    print('Worst Approximation Ratio: ', approx)
    
    np.save(f'portfolios/{portfolio_K}_heuristic.npy', portfolio_heuristic)
    print('#'*10)

## Random policy portfolio

In [None]:
def get_random_policy():
    """
    Generate a random policy for a given p value.
    """
    p = np.random.uniform(-100, 1)
    p_mean, vectors = get_optimum(p, episodes=150)
    return Policy(vectors)


for portfolio_K in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]:
    print(f'Portfolio K: {portfolio_K}')
    portfolio = Portfolio()
    for _ in range(portfolio_K):
        policy = get_random_policy()
        portfolio.add_policy(policy)

    approx = compute_portfolio_worst_approx_ratio(
        portfolio=portfolio,
        get_optimal_value=get_optimal_value,
        p_grid=uniform_grid,
        get_performance=generalized_p_mean
    )
    print('Worst Approximation Ratio: ', approx)

    np.save(f'portfolios/{portfolio_K}_random_policy.npy', portfolio_heuristic)
    print('#'*10)