In [1]:
from simulate_module import *

import pandas as pd
from pathlib import Path
import numpy as np
np.random.seed(1234)
import torch

import copy

## Preparing data

In [2]:
def basis(x, knots=None, degree = 10):
    if knots is None:
        knots = np.linspace(0, 1, 10)
        
    H = [np.ones(len(x))]
    for k in range(len(knots)):
        for j in range(1, degree + 1):
            H.append(pos(x - knots[k]) ** j)
    
    return np.vstack(H).T
def random_functions(n_tasks, k_clusters, sigma_between, sigma_within):
    np.random.seed(1234)
    betas, zs = gaussian_mixture(n_tasks, k_clusters, sigma_between, sigma_within)
    functions = []
    for beta in betas:
        functions.append(spline_fun(beta))
        
    return functions, betas, zs

def gaussian_mixture(n_samples, k_clusters, sigma_between, sigma_within, dim = 101):
    means = np.random.normal(0, sigma_between, (k_clusters, dim))
    cluster_props = np.random.dirichlet(k_clusters * [1.5])
    
    betas, zs = [], []
    for task in range(n_samples):
        z = np.random.choice(range(k_clusters), p = cluster_props)
        delta = np.random.normal(0, sigma_within, (1, dim))
        betas.append(means[z] + delta)
        zs.append(z)
        
    return np.vstack(betas), zs
        
def spline_fun(beta):
    def f(x):
        return np.matmul(basis(x), beta)
    return f
    


In [3]:
working_path = Path("debug_nn")
working_path.mkdir(exist_ok = True)

In [4]:

np.random.seed(123)
f, betas, zs = random_functions(15, 6,
                                sigma_between = 10,
                                sigma_within = .2)
result = []
for i, fi in enumerate(f):
    x = np.random.uniform(0, 1, 100)
    result.append({
        "task": i,
        "x": x,
        "f": fi(x),
        "y": fi(x) + np.random.normal(0, .1, len(x))
    })
# save data
data_df = pd.concat([pd.DataFrame(r) for r in result])
data_df = data_df.reset_index()
data_dict = data_df.to_dict(orient = "list")
data_df.to_csv(working_path / "tasks.csv", index = False)
betas_df = np.hstack([np.arange(15)[:, np.newaxis], np.array(zs)[:, np.newaxis], betas])
betas_df = pd.DataFrame(betas_df)
betas_df.columns = ["task", "cluster"] + [f"beta{i}" for i in range(betas.shape[1])]
betas_df.to_csv(working_path / "betas.csv", index = False)
    


In [26]:
input_data = prepare_input(data_dict,
                                   target_task = 1,
                                   target_train_size = .8,
                                  preprocess = True)

In [6]:
pd.DataFrame.from_dict(input_data["data_dict"]).to_csv(working_path / "tasks_processed.csv",
                                                               index = False)

In [7]:
loss_fn =  torch.nn.MSELoss()

In [9]:
for target_task in range(15):
    input_data = prepare_input(data_dict,
                                   target_task = target_task,
                                   target_test_size = .4,
                                  preprocess = True)
    losses, alpha, beta, bandit_selects, pi, bl = bandit_source_train(input_data = input_data,
                                                                          model_class = "nn",
                                                                          batch_size = 8,
                                                                          decay_rate = .5,
                                                                          n_it = 100,
                                                                          loss_fn =  loss_fn,
                                                                          conservative = True,
                                                                     save_path = working_path)
    print(str(target_task), str(bl["bandit_final"]), str(bl["target_train"]))

0 [0.9233148694038391] [0.7593525648117065]
1 [0.6882787346839905] [0.7515687942504883]
2 [0.383392870426178] [0.3881509602069855]
3 [0.6310557126998901] [0.5095764398574829]
4 [0.7076671123504639] [0.6813622713088989]
5 [0.1463281214237213] [0.1689569354057312]
6 [0.3074452579021454] [0.0767473503947258]
7 [0.3466690480709076] [0.32277312874794006]
8 [0.25931107997894287] [0.1485668122768402]
9 [0.19205568730831146] [0.2814365029335022]
10 [0.813493549823761] [0.6310582160949707]
11 [0.4050254225730896] [0.3989312946796417]
12 [0.1638627052307129] [0.14746856689453125]
13 [0.6245920062065125] [0.618249773979187]
14 [0.23450426757335663] [0.3704051971435547]


## Start running

In [7]:
base_dir = Path("degree10_scale_epochs50")
base_dir.mkdir(exist_ok = True)
metadata = []
i = 0
sigma_setting = {"high_bw": [10, .2],
                "medium_bw": [1, .2],
                "low_bw": [.5, .2]}

for target_test_size in [.8]:#np.linspace(0.2,0.8,4):
    for s in sigma_setting:
        for target_task in range(15):
            for decay_rate in [.2, 0.5, 1]:
                for conservative in [True]:
                    for model_type in ["nn"]:
                        metadata.append({
                            "path": "exp" + str(i),
                            "n_tasks": 15,
                            "conservative": conservative,
                            "target_test_size": target_test_size,
                            "model_type": model_type,
                            "s": s,
                            "target_task": target_task,
                            "decay_rate": decay_rate
                        })
                        i += 1

metadf = pd.concat([pd.DataFrame(m, index=[i]) for i, m in enumerate(metadata)])
metadf.to_csv( base_dir / "metadata2.csv")

In [8]:
for i, args in enumerate(metadata):
    working_path = Path(base_dir / args["path"])
    working_path.mkdir(parents = True, exist_ok = True)
    
    if args["model_type"] == "lm":
        loss_fn = mse
    elif args["model_type"] == "nn":
        loss_fn =  torch.nn.MSELoss()
    
    
    # generate data ------------------------------------------------
    np.random.seed(1234)
    f, betas, zs = random_functions(args["n_tasks"], 6,
                                    sigma_between = sigma_setting[args["s"]][0],
                                    sigma_within = sigma_setting[args["s"]][-1])
    result = []
    for i, fi in enumerate(f):
        x = np.random.uniform(0, 1, 100)
        result.append({
            "task": i,
            "x": x,
            "f": fi(x),
            "y": fi(x) + np.random.normal(0, .1, len(x))
        })
    # save data
    data_df = pd.concat([pd.DataFrame(r) for r in result])
    data_df.to_csv(working_path.joinpath("tasks.csv"), index = False)
    data_df = data_df.reset_index()
    betas_df = np.hstack([np.arange(args["n_tasks"])[:, np.newaxis], np.array(zs)[:, np.newaxis], betas])
    betas_df = pd.DataFrame(betas_df)
    betas_df.columns = ["task", "cluster"] + [f"beta{i}" for i in range(betas.shape[1])]
    betas_df.to_csv(working_path / "betas.csv", index = False)
    data_dict = data_df.to_dict(orient = "list")
    
    input_data = prepare_input(data_dict,
                               target_task = args["target_task"],
                                target_test_size = args["target_test_size"],
                                preprocess = True)
    pd.DataFrame.from_dict(input_data["data_dict"]).to_csv(working_path / "tasks_processed.csv",
                                                               index = False)
        # bandit selection
    losses, alpha, beta, bandit_selects, pi, bl = bandit_source_train(input_data = input_data,
                                                                          model_class = args["model_type"],
                                                                          batch_size = 8,
                                                                          decay_rate = args["decay_rate"],
                                                                          n_it = 100,
                                                                          loss_fn =  loss_fn,
                                                                          conservative = args["conservative"],
                                                                     save_path = working_path)
        # save outputs
    save_files(working_path, alpha, beta, losses, bandit_selects, pi, bl)
    print(args)
    print(str(bl["bandit_final"]), str(bl["target_train"]))

{'path': 'exp0', 'n_tasks': 15, 'conservative': True, 'target_test_size': 0.8, 'model_type': 'nn', 's': 'high_bw', 'target_task': 0, 'decay_rate': 0.2}
[0.6757275462150574] [1.8130533695220947]
{'path': 'exp1', 'n_tasks': 15, 'conservative': True, 'target_test_size': 0.8, 'model_type': 'nn', 's': 'high_bw', 'target_task': 0, 'decay_rate': 0.5}
[0.6020539999008179] [1.9853076934814453]
{'path': 'exp2', 'n_tasks': 15, 'conservative': True, 'target_test_size': 0.8, 'model_type': 'nn', 's': 'high_bw', 'target_task': 0, 'decay_rate': 1}
[0.582392692565918] [1.6535911560058594]
{'path': 'exp3', 'n_tasks': 15, 'conservative': True, 'target_test_size': 0.8, 'model_type': 'nn', 's': 'high_bw', 'target_task': 1, 'decay_rate': 0.2}
[0.6698150038719177] [0.5108088254928589]
{'path': 'exp4', 'n_tasks': 15, 'conservative': True, 'target_test_size': 0.8, 'model_type': 'nn', 's': 'high_bw', 'target_task': 1, 'decay_rate': 0.5}
[0.49688369035720825] [0.5980560183525085]
{'path': 'exp5', 'n_tasks': 15, 

{'path': 'exp43', 'n_tasks': 15, 'conservative': True, 'target_test_size': 0.8, 'model_type': 'nn', 's': 'high_bw', 'target_task': 14, 'decay_rate': 0.5}
[0.30553311109542847] [0.27728238701820374]
{'path': 'exp44', 'n_tasks': 15, 'conservative': True, 'target_test_size': 0.8, 'model_type': 'nn', 's': 'high_bw', 'target_task': 14, 'decay_rate': 1}
[0.2275719940662384] [0.19679197669029236]
{'path': 'exp45', 'n_tasks': 15, 'conservative': True, 'target_test_size': 0.8, 'model_type': 'nn', 's': 'medium_bw', 'target_task': 0, 'decay_rate': 0.2}
[0.8102046847343445] [1.3701298236846924]
{'path': 'exp46', 'n_tasks': 15, 'conservative': True, 'target_test_size': 0.8, 'model_type': 'nn', 's': 'medium_bw', 'target_task': 0, 'decay_rate': 0.5}
[0.9172632098197937] [1.4957354068756104]
{'path': 'exp47', 'n_tasks': 15, 'conservative': True, 'target_test_size': 0.8, 'model_type': 'nn', 's': 'medium_bw', 'target_task': 0, 'decay_rate': 1}
[0.8140246272087097] [1.4015593528747559]
{'path': 'exp48', 

{'path': 'exp85', 'n_tasks': 15, 'conservative': True, 'target_test_size': 0.8, 'model_type': 'nn', 's': 'medium_bw', 'target_task': 13, 'decay_rate': 0.5}
[0.5881219506263733] [0.6482661962509155]
{'path': 'exp86', 'n_tasks': 15, 'conservative': True, 'target_test_size': 0.8, 'model_type': 'nn', 's': 'medium_bw', 'target_task': 13, 'decay_rate': 1}
[0.6159874200820923] [0.49578922986984253]
{'path': 'exp87', 'n_tasks': 15, 'conservative': True, 'target_test_size': 0.8, 'model_type': 'nn', 's': 'medium_bw', 'target_task': 14, 'decay_rate': 0.2}
[0.33647245168685913] [0.3414643406867981]
{'path': 'exp88', 'n_tasks': 15, 'conservative': True, 'target_test_size': 0.8, 'model_type': 'nn', 's': 'medium_bw', 'target_task': 14, 'decay_rate': 0.5}
[0.30887356400489807] [0.25562453269958496]
{'path': 'exp89', 'n_tasks': 15, 'conservative': True, 'target_test_size': 0.8, 'model_type': 'nn', 's': 'medium_bw', 'target_task': 14, 'decay_rate': 1}
[0.26866698265075684] [0.27989575266838074]
{'path':

KeyboardInterrupt: 

`nn` class

## Train model

Bandit selection debugging

In [181]:
model = nn(n_inputs = 1)
n_it = 100
batch_size = 8
decay_rate = .5
conservative = True
loss_fn = torch.nn.MSELoss()
lr = 5e-4
n_epochs = 10

working_path = Path("debug_nn_combine")
working_path.mkdir(exist_ok = True)

In [182]:
bandit_selects = [None]
# initialize hyperparameters
alpha = dict.fromkeys(input_data["source_task"], [1])
beta = dict.fromkeys(input_data["source_task"], [1])
pi = dict.fromkeys(input_data["source_task"], [0])

# initialize model from target training data
mod = nn()
val_x, val_y = mod.prepare_data(input_data["X_target_val"], input_data["y_target_val"])
X_current, y_current = mod.prepare_data(input_data["X_target_train"], input_data["y_target_train"])
mod.fit( X_current, y_current, n_epochs = n_epochs, lr = lr)

l = mod.evaluate(val_x, val_y)
losses = [l]

model_old = copy.deepcopy(mod.model) # *********************

for t in range(n_it):
    
    # select bandit
    bandit_current, pi = get_bandit(input_data, alpha, beta,t, pi)
    bandit_selects.append(bandit_current)
    
    # set training data at this iteration
    X_current, y_current, _ = subset_data(input_data["source_dict"], 
                               key_value = bandit_current,
                               key_name = "task", test_size = 0)
    batch_id = random.choices(list(range(0, len(y_current))), k = batch_size)
    X_current, y_current = X_current[batch_id, :], y_current[batch_id]
    X_current = np.concatenate((X_current, input_data["X_target_val"]), axis = 0)
    y_current = np.concatenate((y_current, input_data["y_target_val"]), axis = 0)
    X_current, y_current = mod.prepare_data(X_current, y_current)
    
    # train model
    mod.initialize()
    #mod = nn()
    mod.fit(X_current, y_current, loss_fn = loss_fn, n_epochs = n_epochs, lr = lr)
    
    
    
    # combine parameters with previous model
    mod.combine_with_old(model_old, decay_rate = .9)
    #print(t, ", current = ", mod.model[4].weight[0,0].detach().numpy(),
         # ", old = ", model_old[4].weight[0,0].detach().numpy())
    
    model_old = copy.deepcopy(mod.model)
    
    
    # evaluate model
    l = mod.evaluate(val_x, val_y, loss_fn = loss_fn)
    losses += [l]
    
    
    model_old = copy.deepcopy(mod.model)
    
    # update bandit parameters
    if conservative:
        thres = 100000
    else:
        thres = avg_loss(bandit_selects, losses, bandit_current)
    alpha, beta = update_hyper_para(alpha, beta, t, losses,
                                    bandit_current,
                                    thres = thres
                                   )
    
    mod.save(path = working_path.joinpath("current" + str(t)), x_new = X_current, y_new = y_current, para = True)
    mod.save(path = working_path.joinpath(str(t)), x_new = val_x, y_new = val_y, para = True)

In [183]:
bl = baseline(input_data = input_data, alpha = alpha, beta = beta,
                  N = 10, model_class = "nn", loss_fn = loss_fn, bandit_final_model = mod)

In [184]:
bl

{'bandit_weighted': [0.2672058641910553],
 'bandit_final': [0.1665942668914795],
 'all_source': [0.74484783411026],
 'target_train': [0.21231599152088165],
 'random_source': [1.1200346]}

In [39]:
def baseline(input_data, alpha, beta, model_class,  loss_fn, N, bandit_final_model):
    """
    Baseline models of out-of-domain generalization
    """
    final_loss = dict.fromkeys(["bandit_weighted", "bandit_final", "all_source", "target_train", "random_source"], [])
    
    # weighted all source, by bandit selection parameters ----
    if model_class == "nn":
        mod = nn()
    elif model_class == "lm":
        mod = lm()
    X_end, y_end = draw_weighted_samples(input_data, alpha, beta)
    X_end, y_end = mod.prepare_data(X_end, y_end)

    mod.fit(X_end, y_end, loss_fn)
    test_x, test_y = mod.prepare_data(input_data["X_target_test"], input_data["y_target_test"])
    final_loss["bandit_weighted"] = [mod.evaluate(test_x, test_y, loss_fn = loss_fn).item()]
    
    # bandit_final ----------------
    final_loss["bandit_final"] = [bandit_final_model.evaluate(test_x, test_y, loss_fn = loss_fn).item()]
    
    # All sources----
    mod.initialize()
    X_sources, y_sources = mod.prepare_data(input_data["source_dict"]["x"], input_data["source_dict"]["y"])
    mod.fit(X_sources, y_sources, loss_fn)
    final_loss["all_source"] = [mod.evaluate(test_x, test_y, loss_fn = loss_fn).item()]
    
    # target train ---
    mod.initialize()
    X_train, y_train = mod.prepare_data(input_data["X_target_train"], input_data["y_target_train"])
    mod_train = mod.fit(X_train, y_train, loss_fn)
   
    final_loss["target_train"] =[ mod.evaluate(test_x, test_y, loss_fn = loss_fn).item()]

    # One random source + target train ----
    mod.initialize()
    for n in range(N):
        # one random source
        X_random, y_random, _ = subset_data(input_data["data_dict"],
                                            key_value = random.choice(input_data["source_task"]),
                                            key_name = "task", test_size = 0)
        X_random = np.concatenate((X_random, input_data["X_target_train"]), axis = 0)
        y_random = np.concatenate((y_random, input_data["y_target_train"]), axis = 0)
        X_random, y_random = mod.prepare_data(X_random, y_random)

        mod.fit(X_random, y_random, loss_fn)
        final_loss["random_source"] =[ mod.evaluate(test_x, test_y, loss_fn = loss_fn)]
    final_loss["random_source"] = [np.mean(final_loss["random_source"])]
    
    return(final_loss)


In [40]:
def bandit_source_train(input_data, model_class, batch_size, decay_rate, n_it, loss_fn, conservative = False,
                       save_path = None):
    bandit_selects = [None]
    # initialize hyperparameters
    alpha = dict.fromkeys(input_data["source_task"], [1])
    beta = dict.fromkeys(input_data["source_task"], [1])
    pi = dict.fromkeys(input_data["source_task"], [0])
    
    if model_class == "nn":
        mod = nn()
    elif model_class == "lm":
        mod = lm()
    val_x, val_y = mod.prepare_data(input_data["X_target_val"], input_data["y_target_val"])

    # initialize model from target training data ----------------
    X_current, y_current = mod.prepare_data(input_data["X_target_train"], input_data["y_target_train"])
    mod.fit( X_current, y_current)
    model_old = copy.deepcopy(mod.model)
    
    l = mod.evaluate(val_x, val_y, loss_fn = loss_fn)
    losses = [l]
    
    # bandit selection loop  ----------------
    for t in range(n_it):
        # select bandit
        bandit_current, pi = get_bandit(input_data, alpha, beta,t, pi)
        bandit_selects.append(bandit_current)

        # set training data at this iteration
        X_current, y_current, _ = subset_data(input_data["source_dict"], 
                                   key_value = bandit_current,
                                   key_name = "task", test_size = 0)
        batch_id = random.choices(list(range(0, len(y_current))), k = batch_size)
        X_current, y_current = X_current[batch_id, :], y_current[batch_id]
        X_current = np.concatenate((X_current, input_data["X_target_val"]), axis = 0)
        y_current = np.concatenate((y_current, input_data["y_target_val"]), axis = 0)
        X_current, y_current = mod.prepare_data(X_current, y_current)

        # train model
        mod.initialize()
        mod.fit(X_current, y_current, loss_fn = loss_fn)

        # combine parameters with previous model
        mod.combine_with_old(model_old, decay_rate = decay_rate)

        # evaluate model
        l = mod.evaluate(val_x, val_y, loss_fn = loss_fn)
        losses += [l]
        model_old = copy.deepcopy(mod.model)
        
        # update bandit parameters
        if conservative:
            thres = 100000
        else:
            thres = avg_loss(bandit_selects, losses, bandit_current)
        alpha, beta = update_hyper_para(alpha, beta, t, losses,
                                        bandit_current,
                                        thres = thres
                                       )
        if not save_path is None:
            mod.save(path = save_path.joinpath("current" + str(t)), x_new = X_current, y_new = y_current, para = True)
            mod.save(path = save_path.joinpath(str(t)), x_new = val_x, y_new = val_y, para = True)
    
    # baseline   ---------------- 
    bl = baseline(input_data = input_data, alpha = alpha, beta = beta,
                  N = 10, model_class = model_class, loss_fn = loss_fn, bandit_final_model = mod)
    
    return losses, alpha, beta, bandit_selects, pi, bl


In [185]:
losses, alpha, beta, bandit_selects, pi, bl = bandit_source_train(input_data = input_data,
                                                                          model_class ="nn",
                                                                          batch_size = 8,
                                                                          decay_rate = .5,
                                                                          n_it = 100,
                                                                          loss_fn =  loss_fn,
                                                                          conservative = True,
                                                                     save_path = None)

In [186]:
bl

{'bandit_weighted': [0.43148690462112427],
 'bandit_final': [0.1863572597503662],
 'all_source': [0.7830487489700317],
 'target_train': [0.28077125549316406],
 'random_source': [0.18895642]}

In [12]:
save_files(working_path, alpha, beta, losses, bandit_selects, pi, None)

## lm debug

In [16]:
model = lm()
n_it = 100
batch_size = 8
decay_rate = .5
conservative = False
loss_fn = mse

working_path = Path("debug_lm_combine")
working_path.mkdir(exist_ok = True)

In [17]:
bandit_selects = [None]
# initialize hyperparameters
alpha = dict.fromkeys(input_data["source_task"], [1])
beta = dict.fromkeys(input_data["source_task"], [1])
pi = dict.fromkeys(input_data["source_task"], [0])

# initialize model from target training data
mod = lm()
val_x, val_y = mod.prepare_data(input_data["X_target_val"], input_data["y_target_val"])
X_current, y_current = mod.prepare_data(input_data["X_target_train"], input_data["y_target_train"])
mod.fit( X_current, y_current)

l = mod.evaluate(val_x, val_y, loss_fn = loss_fn)
losses = [l]

model_old = copy.deepcopy(mod.model) # *********************

for t in range(n_it):
    
    # select bandit
    bandit_current, pi = get_bandit(input_data, alpha, beta,t, pi)
    bandit_selects.append(bandit_current)
    
    # set training data at this iteration
    X_current, y_current, _ = subset_data(input_data["source_dict"], 
                               key_value = bandit_current,
                               key_name = "task", test_size = 0)
    batch_id = random.choices(list(range(0, len(y_current))), k = batch_size)
    X_current, y_current = X_current[batch_id, :], y_current[batch_id]
    X_current = np.concatenate((X_current, input_data["X_target_val"]), axis = 0)
    y_current = np.concatenate((y_current, input_data["y_target_val"]), axis = 0)
    X_current, y_current = mod.prepare_data(X_current, y_current)
    
    # train model
    #mod = model.initialize(n_inputs = 1)
    mod = lm()
    mod.fit(X_current, y_current, loss_fn = loss_fn)
    
    
    
    # combine parameters with previous model
    mod.combine_with_old(model_old, decay_rate = .9)
    
    
    model_old = copy.deepcopy(mod.model)
    
    
    # evaluate model
    l = mod.evaluate(val_x, val_y, loss_fn = loss_fn)
    losses += [l]
    
    
    model_old = copy.deepcopy(mod.model)
    
    # update bandit parameters
    if conservative:
        thres = 100000
    else:
        thres = avg_loss(bandit_selects, losses, bandit_current)
    alpha, beta = update_hyper_para(alpha, beta, t, losses,
                                    bandit_current,
                                    thres = thres
                                   )
    
    mod.save(path = working_path.joinpath("current" + str(t)), x_new = X_current, y_new = y_current, para = True)
    mod.save(path = working_path.joinpath(str(t)), x_new = val_x, y_new = val_y, para = True)

In [22]:
t

99

In [113]:
working_path


PosixPath('debug_lm_combine')