In [1]:
from simulate_module import *

import pandas as pd
from pathlib import Path
import numpy as np
np.random.seed(1234)

In [2]:
sigma_setting = {"high_bw": [10, .2],
                "medium_bw": [1, .2],
                "low_bw": [.5, .2]}

In [3]:
args = {
    "n_tasks": 15,
    "conservative": True,
    "target_test_size": 0.8
}

In [None]:
for s in sigma_setting.keys():
    # set directory
    if args["conservative"]:
        data_path = Path("conservative_derived_data")
    else:
        data_path = Path("derived_data")


    data_path = Path(data_path)
    working_path = data_path / s
    working_path.mkdir(parents = True, exist_ok = True)

    # generate data ------------------------------------------------
    np.random.seed(1234)
    f, betas, zs = random_functions(args["n_tasks"], 6,
                                    sigma_between = sigma_setting[s][0],
                                    sigma_within = sigma_setting[s][-1])
    result = []

    for i, fi in enumerate(f):
        x = np.random.uniform(0, 1, 100)
        result.append({
            "task": i,
            "x": x,
            "f": fi(x),
            "y": fi(x) + np.random.normal(0, .1, len(x))
        })

    # save data
    data_df = pd.concat([pd.DataFrame(r) for r in result])
    data_df.to_csv(working_path / "tasks.csv", index = False)
    data_df = data_df.reset_index()


    betas_df = np.hstack([np.arange(args["n_tasks"])[:, np.newaxis], np.array(zs)[:, np.newaxis], betas])
    betas_df = pd.DataFrame(betas_df)
    betas_df.columns = ["task", "cluster"] + [f"beta{i}" for i in range(betas.shape[1])]
    betas_df.to_csv(working_path / "betas.csv", index = False)

    # relationship between tasks (bandits) and their original clusters
    #d = dict.fromkeys(betas_df.cluster, [])
    #for k, v in zip(betas_df.cluster, betas_df.task):
    #    d[k] = d[k] +[v]

    data_dict = data_df.to_dict(orient = "list")

    # add key "cluster" to `data_dict`
    #data_dict["cluster"] = []
    #for task in data_dict["task"]:
        #cluster = get_key(d, task)
        #if(cluster == "There is no such key"):
            #print("task = " + str(task))
            #break
        #data_dict["cluster"].append(cluster)


    # bandit selection ------------------------------------------------
    for target_task in range(args["n_tasks"]):
        input_data = prepare_input(data_dict, target_task = target_task, target_test_size = args["target_test_size"], 
                                  preprocess = True)
        pd.DataFrame.from_dict(input_data["data_dict"]).to_csv(working_path / "tasks_processed.csv", index = False)

        losses, alpha, beta, bandit_selects, pi, bl, bandit_weights = bandit_source_train(input_data, model = LinearRegression(), batch_size = 8,
                                                      decay_rate = .5, n_it = 100, loss = mse, 
                                                                                          conservative = args["conservative"]
                                                                                         )

        output_dir = working_path / f"target_{target_task}_{args['target_test_size']}"
        output_dir.mkdir(exist_ok = True)
        pd.DataFrame.from_dict(alpha).to_csv(output_dir / "alpha.csv")
        pd.DataFrame.from_dict(beta).to_csv(output_dir / "beta.csv")
        pd.DataFrame.from_dict({"losses": losses, "bandit_selects": bandit_selects}).to_csv(output_dir / "losses.csv")
        pd.DataFrame.from_dict(pi).to_csv(output_dir / "pi.csv")
        pd.DataFrame.from_dict(bl).to_csv(output_dir / "baseline.csv")
        pd.DataFrame.from_dict(bandit_weights).to_csv(output_dir / "bandit_weights.csv")

  processed = (raw - raw.min(axis = 0)) / (raw.max(axis = 0) - raw.min(axis = 0))
  processed = (raw - raw.min(axis = 0)) / (raw.max(axis = 0) - raw.min(axis = 0))
  processed = (raw - raw.min(axis = 0)) / (raw.max(axis = 0) - raw.min(axis = 0))
  processed = (raw - raw.min(axis = 0)) / (raw.max(axis = 0) - raw.min(axis = 0))
  processed = (raw - raw.min(axis = 0)) / (raw.max(axis = 0) - raw.min(axis = 0))
  processed = (raw - raw.min(axis = 0)) / (raw.max(axis = 0) - raw.min(axis = 0))
  processed = (raw - raw.min(axis = 0)) / (raw.max(axis = 0) - raw.min(axis = 0))
  processed = (raw - raw.min(axis = 0)) / (raw.max(axis = 0) - raw.min(axis = 0))
  processed = (raw - raw.min(axis = 0)) / (raw.max(axis = 0) - raw.min(axis = 0))
  processed = (raw - raw.min(axis = 0)) / (raw.max(axis = 0) - raw.min(axis = 0))
  processed = (raw - raw.min(axis = 0)) / (raw.max(axis = 0) - raw.min(axis = 0))
  processed = (raw - raw.min(axis = 0)) / (raw.max(axis = 0) - raw.min(axis = 0))
  processed = (r