In [1]:
from simulate_module import *

import pandas as pd
import numpy as np
np.random.seed(1234)

In [2]:
data_path = "derived_data"
if not os.path.isdir(data_path):
        os.makedirs(data_path)

In [3]:
sigma_setting = {"high_bw": [10, .2],
                "medium_bw": [1, .2],
                "low_bw": [.5, .2]}


In [5]:
n_tasks = 15
for s in sigma_setting.keys():
    # set directory
    working_path = data_path + "/" + s
    if not os.path.isdir(working_path):
        os.makedirs(working_path)
        
    # generate data
    f, betas, zs = random_functions(n_tasks, 6,
                                    sigma_between = sigma_setting[s][0],
                                    sigma_within = sigma_setting[s][-1])
    result = []

    for i, fi in enumerate(f):
        x = np.random.uniform(0, 1, 100)
        result.append({
            "task": i,
            "x": x,
            "f": fi(x),
            "y": fi(x) + np.random.normal(0, .1, len(x))
        })
    
    # save data
    data_df = pd.concat([pd.DataFrame(r) for r in result])
    data_df.to_csv(working_path + "/tasks.csv", index = False)
    data_df = data_df.reset_index()
    

    betas_df = np.hstack([np.arange(n_tasks)[:, np.newaxis], np.array(zs)[:, np.newaxis], betas])
    betas_df = pd.DataFrame(betas_df)
    betas_df.columns = ["task", "cluster"] + [f"beta{i}" for i in range(betas.shape[1])]
    betas_df.to_csv(working_path + "/betas.csv", index = False)
    # relationship between tasks (bandits) and their original clusters
    d = dict.fromkeys(betas_df.cluster, [])
    for k, v in zip(betas_df.cluster, betas_df.task):
        d[k] = d[k] +[v]
    
    data_dict = data_df.to_dict()
    # add key "cluster" to `data_dict`
    data_dict["cluster"] = []

    for task in data_dict["task"].values():
        cluster = get_key(d, task)
        if(cluster == "There is no such key"):
            print("task = " + str(task))
            break
        data_dict["cluster"].append(cluster)
        
    for target_task in range(n_tasks):
        target_test_size = 0.9
        input_data = prepare_input(data_dict, target_task = target_task, target_test_size = target_test_size, d = d)
        losses, alpha, beta, bandit_selects, pi, bl, bandit_weights = bandit_source_train(input_data, model = LinearRegression(), batch_size = 8,
                                                      decay_rate = 1, n_it = 100, loss = mse)
        output_dir = working_path + "/target_" + str(target_task) + "_" + str(target_test_size) + "/"
        if not os.path.isdir(output_dir):
            os.makedirs(output_dir)
        pd.DataFrame.from_dict(alpha).to_csv(output_dir + "alpha.csv")
        pd.DataFrame.from_dict(beta).to_csv(output_dir + "beta.csv")
        pd.DataFrame.from_dict({"losses": losses, "bandit_selects": bandit_selects}).to_csv(output_dir + "losses.csv")
        pd.DataFrame.from_dict(pi).to_csv(output_dir + "pi.csv")
        pd.DataFrame.from_dict(bl).to_csv(output_dir + "baseline.csv")
        pd.DataFrame.from_dict(bandit_weights).to_csv(output_dir + "bandit_weights.csv")