This notebook calculates the optimal non-robust test split task losses for each problem.

In [None]:
%cd ../

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

## Storage problem

In [None]:
from storage.data import get_tensors
from storage.problems import StorageProblemNonRobust

LOG_PRICES = False

In [None]:
def get_optimal_task_losses(shuffle: bool) -> list[float]:
    tensors, y_info = get_tensors(shuffle=shuffle, log_prices=LOG_PRICES)
    assert isinstance(y_info, tuple)
    y_mean, y_std = y_info

    prob = StorageProblemNonRobust(T=24, y_mean=y_mean, y_std=y_std)

    task_losses = []
    y_test_np = tensors['Y_test'].numpy()
    for y in y_test_np:
        prob.solve(y)
        task_loss = prob.task_loss_np(y, is_standardized=True)
        task_losses.append(task_loss)
    return task_losses

In [None]:
shuffle = False
task_losses = get_optimal_task_losses(shuffle=shuffle)
print(f'Shuffle: {shuffle}, mean test task loss: {np.mean(task_losses)}')

_ = plt.hist(task_losses, bins=100)

In [None]:
shuffle = True
task_losses = get_optimal_task_losses(shuffle=shuffle)
print(f'Shuffle: {shuffle}, mean test task loss: {np.mean(task_losses)}')

_ = plt.hist(task_losses, bins=100)

## Portfolio optimization

In [None]:
from portfolio import synthetic, yfinance
from portfolio.problems import PortfolioProblemNonRobust

In [None]:
def get_optimal_task_losses(seed: int) -> list[float]:
    batch_size = 1000
    alpha = 0.9  # 0.1
    phi = 0.7  # 0.1
    loaders, y_info = synthetic.get_loaders(batch_size, seed=seed, alpha=alpha, phi=phi)
    assert isinstance(y_info, tuple)
    y_mean, y_std = y_info
    
    prob = PortfolioProblemNonRobust(N=2, y_mean=y_mean, y_std=y_std)

    task_losses = []
    for _, y in loaders['test']:
        y_test_np = y.numpy()
        for y in y_test_np:
            prob.solve(y)
            task_loss = prob.task_loss_np(y, is_standardized=True)
            task_losses.append(task_loss)
    return task_losses

In [None]:
results = {}
for seed in range(10):
    task_losses = get_optimal_task_losses(seed)
    results[seed] = np.mean(task_losses)

sr = pd.Series(results, name='test_task_loss')
sr.index.name = 'seed'
sr.to_csv('out/portfolio_syn_optimal.csv')

display(sr)
display(sr.agg(['mean', 'std']))

## Portfolio (yfinance)

In [None]:
def get_optimal_task_losses(seed: int, shuffle: bool) -> list[float]:
    batch_size = 1000
    loaders, y_info = yfinance.get_loaders(batch_size, year=2013, seed=seed, shuffled=shuffle)
    assert isinstance(y_info, tuple)
    y_mean, y_std = y_info
    
    prob = PortfolioProblemNonRobust(N=15, y_mean=y_mean, y_std=y_std)

    task_losses = []
    for _, y in loaders['test']:
        y_test_np = y.numpy()
        for y in y_test_np:
            prob.solve(y)
            task_loss = prob.task_loss_np(y, is_standardized=True)
            task_losses.append(task_loss)
    return task_losses

In [None]:
results = {}
for seed in range(10):
    task_losses = get_optimal_task_losses(seed=seed, shuffle=False)
    results[seed] = np.mean(task_losses)
sr = pd.Series(results)
print(sr.agg(['mean', 'std']))

In [None]:
results = {}
for seed in range(10):
    task_losses = get_optimal_task_losses(seed=seed, shuffle=True)
    results[seed] = np.mean(task_losses)
sr = pd.Series(results)
print(sr.agg(['mean', 'std']))