In [None]:
import os
import numpy as np
import pandas as pd
import pandas as pd
from scipy.stats import skewnorm


def mix_skew_normal_dist(skewness, loc, scale, seq_len):
    quantile1 = skewnorm.cdf(0.01, a=skewness, loc=loc, scale=scale)
    quantile2 = skewnorm.cdf(0.99, a=skewness, loc=loc, scale=scale)
    samples = skewnorm.ppf(
        np.random.uniform(quantile1, quantile2, size=seq_len),
        a=skewness,
        loc=loc,
        scale=scale,
    )
    samples[samples > 0.99] = 0.99
    samples[samples < 0.01] = 0.01
    return samples

In [None]:
%load_ext autoreload
%autoreload 2
import numpy as np

time_horizon = 200
n_trails = 32
quantile = 0.99

list_mu_sigma = [
    (0.8, 0.05),
    (0.75, 0.05),
    (0.7, 0.05),
    (0.6, 0.05),
    (0.7, 0.1),
    (0.6, 0.1),
    (0.5, 0.1),
]

dataset = {}
for skew in range(-100, 100 + 1, 5):
    data = []
    for mu, sigma in list_mu_sigma:
        a_s = []
        for n in range(n_trails):
            np.random.seed(n)
            a = mix_skew_normal_dist(skew, mu, sigma, time_horizon)
            a_s.append(np.hstack(a))
        data.append(a_s)
        print(skew, mu, sigma, np.quantile(a_s, quantile), np.max(a_s))
    data = np.asarray(data)
    dataset[str(skew)] = data

print(dataset.keys())

In [None]:
import matplotlib.pyplot as plt

for i, arm in enumerate(dataset[list(dataset.keys())[-1]]):
    plt.ecdf(arm.flatten(), label="Arm " + str(i))

plt.legend()

In [None]:
number_of_arms = len(list_mu_sigma)
instances = dataset.keys()
dataset_name = "synth_ablation"

In [None]:
optimizers = ["RandomSearch"]
results_list = []
for instance in instances:
    arm_index_method_list = [
        (
            int(arm_index),
            optimizers[0],
            optimizers[0] + "_Arm_" + str(arm_index),
        )
        for arm_index in range(number_of_arms)
    ]

    for arm_index, optimizer, optimizer_method in arm_index_method_list:
        for trial in range(n_trails):
            losses = 1 - dataset[instance][arm_index, trial]
            classifiers = [str(arm_index)] * time_horizon
            for iteration, (loss, classifier) in enumerate(zip(losses, classifiers)):
                dict1 = {
                    "instance": instance,
                    "repetition": trial,
                    "arm_index": arm_index,
                    "iteration": iteration,
                    "loss": loss,
                    "optimizer": optimizer,
                    "classifier": classifier,
                }
                results_list.append(dict1)

df = pd.DataFrame(results_list)

df.to_csv("./dataset/" + dataset_name + ".csv", index=False)