Testing experiments

In [1]:
from simulate_module import *

import pandas as pd
from pathlib import Path
import numpy as np
np.random.seed(1234)
import torch

import copy

In [2]:
np.linspace(-1, 1, 20)[:, np.newaxis]

array([[-1.        ],
       [-0.89473684],
       [-0.78947368],
       [-0.68421053],
       [-0.57894737],
       [-0.47368421],
       [-0.36842105],
       [-0.26315789],
       [-0.15789474],
       [-0.05263158],
       [ 0.05263158],
       [ 0.15789474],
       [ 0.26315789],
       [ 0.36842105],
       [ 0.47368421],
       [ 0.57894737],
       [ 0.68421053],
       [ 0.78947368],
       [ 0.89473684],
       [ 1.        ]])

In [3]:
sigma_setting = {"high_bw": [10, .2],
                "medium_bw": [1, .2],
                "low_bw": [.5, .2]}

In [4]:
args = {
    "n_tasks": 15,
    "conservative": True,
    "target_test_size": 0.8,
    "model_type": "nn",
    "base_output_dir": "test"
}

In [5]:
if args["model_type"] == "lm":
    model_class = lm()
    loss_fn = mse
elif args["model_type"] == "nn":
    model_class = nn()
    loss_fn =  torch.nn.MSELoss()

In [6]:
s = "high_bw"
    # set directory
if args["conservative"]:
    data_path = Path(args["base_output_dir"]) / Path("model_" + args["model_type"] + "/conservative_derived_data")
else:
    data_path = Path(args["base_output_dir"]) / Path("model_" + args["model_type"] + "/derived_data")
data_path = Path(data_path)
working_path = data_path / s
working_path.mkdir(parents = True, exist_ok = True)
# generate data ------------------------------------------------
np.random.seed(1234)
f, betas, zs = random_functions(args["n_tasks"], 6,
                                sigma_between = sigma_setting[s][0],
                                sigma_within = sigma_setting[s][-1])
result = []
for i, fi in enumerate(f):
    x = np.random.uniform(0, 1, 100)
    result.append({
        "task": i,
        "x": x,
        "f": fi(x)
        
    })


In [7]:
for k , _ in enumerate(result):
    
    result[k]["p"] = 1 / (1 + np.exp(- result[k]["x"])) + np.random.normal(0, .1, len(result[k]["x"]))
    result[k]["y"] = np.zeros((len(result[k]["x"]), 2))
    for i, v in enumerate(result[k]["p"]):
        if v > np.mean(result[k]["p"]):
            result[k]["y"][i, 1] = 1
        else:
            result[k]["y"][i, 0] = 1

In [8]:
for k , _ in enumerate(result):
    
    result[k]["p"] = 1 / (1 + np.exp(- result[k]["x"])) + np.random.normal(0, .1, len(result[k]["x"]))
    result[k]["y"] = np.zeros(len(result[k]["x"]))
    for i, v in enumerate(result[k]["p"]):
        if v > np.median(result[k]["p"]):
            result[k]["y"][i] = 1  

In [9]:
data_df = pd.concat([pd.DataFrame(r) for r in result])
data_df.to_csv(working_path / "tasks.csv", index = False)
data_df = data_df.reset_index()
betas_df = np.hstack([np.arange(args["n_tasks"])[:, np.newaxis], np.array(zs)[:, np.newaxis], betas])
betas_df = pd.DataFrame(betas_df)
betas_df.columns = ["task", "cluster"] + [f"beta{i}" for i in range(betas.shape[1])]
betas_df.to_csv(working_path / "betas.csv", index = False)
data_dict = data_df.to_dict(orient = "list")

In [10]:
class nn():
    """
    Neural network
    """
    def __init__(self, n_inputs = 1, n_outputs = 2, H = 200):
        self.model = torch.nn.Sequential(
            torch.nn.Linear(n_inputs, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, n_outputs),
        )
    def initialize(self, n_inputs = 1, n_outputs = 2, H = 200):
        self.model = torch.nn.Sequential(
            torch.nn.Linear(n_inputs, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, n_outputs),
        )
        return self
    def prepare_data(self, x, y):
        if type(x) != torch.Tensor:
            if len(x.shape) > 1:
                x = torch.tensor(x[:, 1:]).float()
            else:
                x = torch.tensor(x).float()
        if type(y) != torch.Tensor:
            y = torch.tensor(y).float()
        return x, y
    def fit(self, x_train, y_train, loss_fn = torch.nn.MSELoss(), n_epochs = 10, lr = 1e-4):
        model = self.model
        optimizer = torch.optim.Adam(self.model.parameters(), lr = lr)
        for epoch in range(n_epochs):
            # get loss
            optimizer.zero_grad()
            y_hat = self.model(x_train[:, np.newaxis])
            loss = loss_fn(y_train, y_hat)

            # update weights
            loss.backward()
            optimizer.step()
        return self
            
        return model
    def evaluate(self, x_test, y_test, loss_fn = torch.nn.MSELoss()):
        with torch.no_grad():
            y_hat = self.model(x_test[:, np.newaxis])
            l = loss_fn(y_test, y_hat)
        return l
    def pred(self, x_new):
        with torch.no_grad():
            y_hat = self.model(x_new)
        return y_hat
    def combine_with_old(self, model_old, decay_rate = .5):
        for i in range(len(model_old)):
            if "weight" in dir(model_old[i]):
                self.model[i].weight = torch.nn.Parameter(decay_rate * model_old[i].weight + (1 - decay_rate) * self.model[i].weight)
                self.model[i].bias = torch.nn.Parameter(decay_rate * model_old[i].bias + (1 - decay_rate) * self.model[i].bias)
    def save(self, path = ".", x_new = None, y_new = None, para = True):
        x_new, y_new = self.prepare_data(x_new, y_new)
        path = Path(path)
        path.mkdir(parents = True, exist_ok = True)
        y_hat = self.pred(x_new)
        if not x_new is None:
            pd.DataFrame.from_dict({"x": [item[0] for item in x_new.tolist()], 
                        "y": y_new,
                        "y_hat": [item[0] for item in y_hat.tolist()]
                       }).to_csv(path / Path("fitted.csv"))

In [11]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

AttributeError: type object 'nn' has no attribute 'CrossEntropyLoss'

In [None]:
class nn():
    """
    Neural network
    """
    def __init__(self, n_inputs = 1, n_outputs = 2, H = 200):
        self.model = torch.nn.Sequential(
            torch.nn.Linear(n_inputs, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, n_outputs),
        )
    def initialize(self, n_inputs = 1, n_outputs = 2, H = 200):
        self.model = torch.nn.Sequential(
            torch.nn.Linear(n_inputs, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, n_outputs),
        )
        return self
    def prepare_data(self, x, y, binary = True):
        if type(x) != torch.Tensor:
            if len(x.shape) > 1:
                x = torch.tensor(x[:, 1:]).float()
            else:
                x = torch.tensor(x).float()
        if (binary is True) and (len(y.shape) == 1):
            y = np.array([y, 1-y]).T
        if type(y) != torch.Tensor:
            y = torch.tensor(y).float()
        return x, y
    def fit(self, x_train, y_train, loss_fn = torch.nn.MSELoss(), n_epochs = 10, lr = 1e-4):
        model = self.model
        #optimizer = torch.optim.Adam(self.model.parameters(), lr = lr)
        optimizer = torch.optim.SGD(self.model.parameters(), lr = lr, momentum=0.9)
        for epoch in range(n_epochs):
            # get loss
            optimizer.zero_grad()
            y_hat = self.model(x_train[:, np.newaxis])
            loss = loss_fn(y_train, y_hat)

            # update weights
            loss.backward()
            optimizer.step()
        return self
            
        return model
    def evaluate(self, x_test, y_test, loss_fn = torch.nn.MSELoss()):
        with torch.no_grad():
            y_hat = self.model(x_test[:, np.newaxis])
            l = loss_fn(y_test, y_hat)
        return l
    def pred(self, x_new):
        with torch.no_grad():
            y_hat = self.model(x_new)
        return y_hat
    def combine_with_old(self, model_old, decay_rate = .5):
        for i in range(len(model_old)):
            if "weight" in dir(model_old[i]):
                self.model[i].weight = torch.nn.Parameter(decay_rate * model_old[i].weight + (1 - decay_rate) * self.model[i].weight)
                self.model[i].bias = torch.nn.Parameter(decay_rate * model_old[i].bias + (1 - decay_rate) * self.model[i].bias)
    def save(self, path = ".", x_new = None, y_new = None, para = True, binary = True):
        x_new, y_new = self.prepare_data(x_new, y_new)
        path = Path(path)
        path.mkdir(parents = True, exist_ok = True)
        y_hat = self.pred(x_new)
        if not x_new is None:
            pd.DataFrame.from_dict({"x": [item[0] for item in x_new.tolist()], 
                        "y": y_new,
                        "y_hat": [item[0] for item in y_hat.tolist()]
                       }).to_csv(path / Path("fitted.csv"))
        if binary is True:
            
            pd.DataFrame.from_dict({"x": [item[0] for item in x_new.tolist()], 
                        "y": torch.max(y_new, 1).indices.tolist(),
                        "y_hat": torch.max(y_hat, 1).indices.tolist()
                       }).to_csv(path / Path("fitted.csv"))

In [None]:

def subset_data(data_dict, key_name = "task", key_value = 0, test_size = 0.33):
    """
    Subsetting data by the value of a key.
    
    Parameters
    ---
    data_dict: dict
        the dictionary one wants to subset
    key_name: str
        the key one wants to subset on
    key_value: list / int / str
        the value of the key desirable in the output subset
    test_size: float
        how to split the resulting subset; if set to zero, then the output won't be splitted

    Returns
    ---
    
    """
    if type(data_dict[key_name]) == list:
        values = data_dict[key_name]
    else:
        values = list(data_dict[key_name].values())
    
    n_task = max(values) + 1    
    if type(key_value) != list:
        idx_task = [i for (i, v) in enumerate(values) if v == key_value]
    else:
        idx_task = [i for (i, v) in enumerate(values) if v in key_value]
        
    tasks = [data_dict['task'][i] for i in idx_task]
    
    
    x = [data_dict['x'][i] for i in idx_task]
    y = np.array([data_dict['y'][i] for i in idx_task])
    X = np.array([np.ones(len(idx_task)), np.array(x)]).T
    
    if test_size == 0:
        return X, y, tasks
    else:
        X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                        test_size = test_size,
                                                        random_state = 123,
                                                           stratify = y)
    return X_train, X_test, y_train, y_test



In [None]:
def prepare_input(data_dict, target_task, target_test_size, preprocess = True):
    """
    Preparing input data for bandit selection
    
    Parameters
    ---
    data_dict: dict
        all data, including source and target
    target_task: int
        data with data_dict["task"] equals to target_task will be in the target
    target_test_size: float
        within [0, 1) indicating the proportion of the validation + test set.
    
    Returns
    ---
    input_data: dict
        keys including data_dict, source_dict,
                        source_task, source_cluster,
                        X_target_train, X_target_test, X_target_val, y_target_train, y_target_test, y_target_val
    """
    
    n_tasks = max(data_dict["task"]) + 1


    input_data = {"data_dict": data_dict}
    input_data["X_target_train"], input_data["X_target_test"], input_data["y_target_train"], input_data["y_target_test"] = subset_data(data_dict, key_value = target_task, key_name = "task",
                                                                                                                                      test_size = target_test_size)
    input_data["X_target_val"], input_data["X_target_test"], input_data["y_target_val"], input_data["y_target_test"] = train_test_split(input_data["X_target_test"], input_data["y_target_test"], 
                                                        test_size = .5,
                                                        random_state = 123, stratify = input_data["y_target_test"]  )
        
    input_data["source_task"] = [v for v in range(n_tasks) if v != target_task]
    
    
    idx_source = [i for (i, v) in enumerate(data_dict['task']) if v != target_task]
    
    # source data
    input_data["source_dict"] = {}
    for key_name in data_dict.keys():
        input_data["source_dict"][key_name] = [data_dict[key_name][i] for i in idx_source]
    
    
    if preprocess:
        input_data["data_dict"] = pre(raw_data = input_data["data_dict"]).pre_process(key_names = ["y", "x", "f"], by_key = "task")
        input_data["source_dict"] = pre(raw_data = input_data["source_dict"]).pre_process(key_names = ["y", "x"], by_key = "task")
        input_data = pre(raw_data = input_data).pre_process(key_names = ["X_target_test", "X_target_val", "X_target_train",
                                          "y_target_train", "y_target_val", "y_target_test"], by_key = None)
    
    return(input_data)



In [None]:
input_data = prepare_input(data_dict,
                               target_task = 4,
                               target_test_size = 0.6,
                              preprocess = True)
pd.DataFrame.from_dict(input_data["data_dict"]).to_csv(working_path / "tasks_processed.csv",
                                                           index = False)

In [None]:
optimizer = torch.optim.SGD(model.model.parameters(), lr = 1e-4, momentum=0.9)
for epoch in range(10):
    # get loss
    optimizer.zero_grad()
    y_hat = model.model(val_x[:, np.newaxis])
    loss = loss_fn(val_y, y_hat)

    # update weights
    loss.backward()
    optimizer.step()

In [None]:
input = torch.randn(3, 2, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(2)
output = loss_fn(input, target)

In [None]:
y_hat = model.pred(val_x)

In [None]:
torch.max(y_hat,1).indices.type(torch.float32)

In [None]:
val_y.dtype

In [None]:
torch.nn.BCELoss()(val_y, val_y)

In [None]:
torch.max(y_hat,1).indices.type(torch.float32)

In [None]:
torch.nn.BCELoss()(val_y[:,0], val_y[:,0])

In [None]:
torch.nn.BCELoss()(y_hat, val_y)

In [None]:
?loss_fn

In [None]:
mod = nn()
mod.fit(torch.Tensor(result[0]["x"]),
        torch.Tensor(result[0]["f"]),
        n_epochs = 100, loss_fn = loss_fn)

In [None]:
yhat = mod.pred(val_x)

In [None]:
y

In [None]:
model = nn(n_inputs = 1)
n_it = 100
batch_size = 64
decay_rate = .5
conservative = False
loss_fn = torch.nn.CrossEntropyLoss()

In [None]:
bandit_selects = [None]
# initialize hyperparameters
alpha = dict.fromkeys(input_data["source_task"], [1])
beta = dict.fromkeys(input_data["source_task"], [1])
pi = dict.fromkeys(input_data["source_task"], [0])

mod = nn()
val_x, val_y = mod.prepare_data(input_data["X_target_val"], input_data["y_target_val"])

# initialize model from target training data
X_current, y_current = mod.prepare_data(input_data["X_target_train"], input_data["y_target_train"])
mod.fit( X_current, y_current, n_epochs = 100, loss_fn = loss_fn)
l = mod.evaluate(val_x, val_y, loss_fn = loss_fn)
losses = [l]
model_old = copy.deepcopy(mod.model)

for t in range(n_it):
    # select bandit
    bandit_current, pi = get_bandit(input_data, alpha, beta,t, pi)
    bandit_selects.append(bandit_current)
    
    # set training data at this iteration
    X_current, y_current, _ = subset_data(input_data["source_dict"], 
                               key_value = bandit_current,
                               key_name = "task", test_size = 0)
    batch_id = random.choices(list(range(0, len(y_current))), k = batch_size)
    X_current, y_current = X_current[batch_id, :], y_current[batch_id]

    #X_current = np.concatenate((X_current, input_data["X_target_val"]), axis = 0)
    #y_current = np.concatenate((y_current, input_data["y_target_val"]), axis = 0)
    
    X_current, y_current = mod.prepare_data(X_current, y_current)
    # train model
    #mod = model.initialize(n_inputs = 1)
    mod = nn()
    mod.fit(X_current, y_current, loss_fn = loss_fn, n_epochs = 200)
    
    
    
    # combine parameters with previous model
    #mod.combine_with_old(model_old, decay_rate = 1)
    
    # evaluate model
    l = mod.evaluate(val_x, val_y, loss_fn = loss_fn)
    losses += [l]
    model_old = copy.deepcopy(mod.model)
    print(t, ", current = ", mod.model[2].weight[0,0].detach().numpy(),
          ", old = ", model_old[2].weight[0,0].detach().numpy())
    # update bandit parameters
    if conservative:
        thres = 100000
    else:
        thres = avg_loss(bandit_selects, losses, bandit_current)
    alpha, beta = update_hyper_para(alpha, beta, t, losses,
                                    bandit_current,
                                    thres = thres
                                   )
    mod.save(path = working_path / ("current" + str(t)), x_new = X_current, y_new = y_current, para = True)
    mod.save(path = working_path / str(t), x_new = val_x, y_new = val_y, para = True)

In [None]:
save_files(working_path, alpha, beta, losses, bandit_selects, pi, pi)

In [None]:
model

In [None]:
"weight" in dir(model_old[1])

In [None]:
m1 = nn()
m1.initialize()
print(m1.model[2].weight[0,0].detach().numpy(), ", old = ", m2[2].weight[0,0].detach().numpy())


m1.fit(val_x, val_y)

m2 = copy.deepcopy(m1.model)
print(m1.model[2].weight[0,0].detach().numpy(), ", old = ", m2[2].weight[0,0].detach().numpy())


m1.fit(X_current, y_current)

print(m1.model[2].weight[0,0].detach().numpy(), ", old = ", m2[2].weight[0,0].detach().numpy())

m1.combine_with_old(m2)
print(m1.model[2].weight[0,0].detach().numpy(), ", old = ", m2[2].weight[0,0].detach().numpy())



In [None]:
m1 = nn()
#m1.initialize()
m2 = copy.deepcopy(m1.model)
print(m1.model[2].weight[0,0].detach().numpy(), ", old = ", m2[2].weight[0,0].detach().numpy())

m1.fit(X_current, y_current)
print(m1.model[2].weight[0,0].detach().numpy(), ", old = ", m2[2].weight[0,0].detach().numpy())



In [None]:
m = nn()
for i in range(len(model_old)):
    if "weight" in dir(model_old[i]):
        m.model[i].weight = torch.nn.Parameter(decay_rate * m2[i].weight + (1 - decay_rate) * m1.model[i].weight)
        m.model[i].bias = torch.nn.Parameter(decay_rate * m2[i].bias + (1 - decay_rate) * m1.model[i].bias)

In [None]:
print(m1.model[2].weight[0,0].detach().numpy(), ", old = ", m2[2].weight[0,0].detach().numpy(),
     ", combined = ", m.model[2].weight[0,0].detach().numpy())



In [None]:
print( m.model[2].weight[0,0].detach().numpy())



In [None]:
with torch.no_grad():
    m.model[i].weight = torch.nn.Parameter((1 - decay_rate) * m1.model[i].weight)
        

In [None]:
m2[0].weight



In [None]:
m1.model[0].weight


In [None]:
m1.model[0].weight = m2[0].weight

In [None]:
m1.model[0].weight

In [None]:
for name, param in m.model.named_parameters():
    param.copy_(m1.model[name])

In [None]:
m.model[2].weight[0,0] = torch.nn.Parameter(torch(0))
print(m.model[2].weight[0,0].detach().numpy())

In [None]:
m1.combine_with_old(m2, decay_rate = 0.5)

In [None]:
print(m1.model[2].weight[0,0].detach().numpy(), ", old = ", m2[2].weight[0,0].detach().numpy(),
     ", combined = ", m1.model[2].weight[0,0].detach().numpy())




In [None]:
model = nn()
model.fit(val_x, val_y)
yhat = model.pred(val_x)
pd.DataFrame.from_dict({"x": [item[0] for item in val_x.tolist()],
                        "y": val_y.tolist(),
                       "yhat": [item[0] for item in yhat.tolist()]}).to_csv(working_path / "subset_data.csv")

### Combining two nn models 

In [None]:
class nn():
    """
    Neural network
    """
    def __init__(self, n_inputs = 1, n_outputs = 2, H = 200):
        self.model = torch.nn.Sequential(
            torch.nn.Linear(n_inputs, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, n_outputs),
        )
    def initialize(self, n_inputs = 1, n_outputs = 2, H = 200):
        self.model = torch.nn.Sequential(
            torch.nn.Linear(n_inputs, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, n_outputs),
        )
        return self
    def prepare_data(self, x, y):
        if type(x) != torch.Tensor:
            if len(x.shape) > 1:
                x = torch.tensor(x[:, 1:]).float()
            else:
                x = torch.tensor(x).float()
        if type(y) != torch.Tensor:
            y = torch.tensor(y).float()
        return x, y
    def fit(self, x_train, y_train, loss_fn = torch.nn.MSELoss(), n_epochs = 10, lr = 1e-4):
        model = self.model
        optimizer = torch.optim.Adam(self.model.parameters(), lr = lr)
        for epoch in range(n_epochs):
            # get loss
            optimizer.zero_grad()
            y_hat = self.model(x_train[:, np.newaxis])
            loss = loss_fn(y_train, y_hat)

            # update weights
            loss.backward()
            optimizer.step()
        return self
            
        return model
    def evaluate(self, x_test, y_test, loss_fn = torch.nn.MSELoss()):
        with torch.no_grad():
            y_hat = self.model(x_test[:, np.newaxis])
            l = loss_fn(y_test, y_hat)
        return l
    def pred(self, x_new):
        with torch.no_grad():
            y_hat = self.model(x_new)
        return y_hat
    def combine_with_old(self, model_old, decay_rate = .5):
        for i in range(len(model_old)):
            if "weight" in dir(model_old[i]):
                self.model[i].weight = torch.nn.Parameter(decay_rate * model_old[i].weight + (1 - decay_rate) * self.model[i].weight)
                self.model[i].bias = torch.nn.Parameter(decay_rate * model_old[i].bias + (1 - decay_rate) * self.model[i].bias)
    def save(self, path = ".", x_new = None, y_new = None, para = True):
        x_new, y_new = self.prepare_data(x_new, y_new)
        path = Path(path)
        path.mkdir(parents = True, exist_ok = True)
        y_hat = self.pred(x_new)
        if not x_new is None:
            pd.DataFrame.from_dict({"x": [item[0] for item in x_new.tolist()], 
                        "y": y_new,
                        "y_hat": [item[0] for item in y_hat.tolist()]
                       }).to_csv(path / Path("fitted.csv"))

In [None]:
np.random.seed(1234)
f, betas, zs = random_functions(args["n_tasks"], 6,
                                sigma_between = sigma_setting[s][0],
                                sigma_within = sigma_setting[s][-1])
result = []
for i, fi in enumerate(f):
    x = np.random.uniform(0, 1, 100)
    result.append({
        "task": i,
        "x": x,
        "f": fi(x),
        "y": fi(x) + np.random.normal(0, .1, len(x))
    })
# save data
data_df = pd.concat([pd.DataFrame(r) for r in result])
data_df = data_df.reset_index()
data_dict = data_df.to_dict(orient = "list")

input_data = prepare_input(data_dict,
                                   target_task = 5,
                                   target_test_size = .4,
                                  preprocess = True)

In [None]:
val_x, val_y = mod.prepare_data(input_data["X_target_val"], input_data["y_target_val"])

# initialize model from target training data
X_current, y_current = mod.prepare_data(input_data["X_target_train"], input_data["y_target_train"])
mod.fit( X_current, y_current, n_epochs = 100, loss_fn = loss_fn)

In [None]:
m1 = nn()
m1.fit( X_current, y_current, n_epochs = 100)

In [None]:
m1.model[2].weight[2, 2]

In [None]:
m2 = nn()
m2.fit(val_x, val_y)

In [None]:
m2.model[2].weight[2, 2]

In [None]:
m1.combine_with_old(m2.model, decay_rate = .5)

In [None]:
m1.model[2].weight[2, 2]

In [None]:
bandit_selects = [None]
# initialize hyperparameters
alpha = dict.fromkeys(input_data["source_task"], [1])
beta = dict.fromkeys(input_data["source_task"], [1])
pi = dict.fromkeys(input_data["source_task"], [0])

mod = nn()
val_x, val_y = mod.prepare_data(input_data["X_target_val"], input_data["y_target_val"])

# initialize model from target training data
X_current, y_current = mod.prepare_data(input_data["X_target_train"], input_data["y_target_train"])
mod.fit( X_current, y_current, n_epochs = 100)
l = mod.evaluate(val_x, val_y)
losses = [l]

model_old = copy.deepcopy(mod.model)

for t in range(n_it):
    
    # select bandit
    bandit_current, pi = get_bandit(input_data, alpha, beta,t, pi)
    bandit_selects.append(bandit_current)
    
    # set training data at this iteration
    X_current, y_current, _ = subset_data(input_data["source_dict"], 
                               key_value = bandit_current,
                               key_name = "task", test_size = 0)
    batch_id = random.choices(list(range(0, len(y_current))), k = batch_size)
    X_current, y_current = X_current[batch_id, :], y_current[batch_id]
    X_current = np.concatenate((X_current, input_data["X_target_val"]), axis = 0)
    y_current = np.concatenate((y_current, input_data["y_target_val"]), axis = 0)
    X_current, y_current = mod.prepare_data(X_current, y_current)
    
    # train model
    #mod = model.initialize(n_inputs = 1)
    mod = nn()
    mod.fit(X_current, y_current, loss_fn = loss_fn, n_epochs = 200)
    
    
    
    # combine parameters with previous model
    #mod.combine_with_old(model_old, decay_rate = 1)
    
    # evaluate model
    l = mod.evaluate(val_x, val_y, loss_fn = loss_fn)
    losses += [l]
    model_old = copy.deepcopy(mod.model)
    print(t, ", current = ", mod.model[2].weight[0,0].detach().numpy(),
          ", old = ", model_old[2].weight[0,0].detach().numpy())
    # update bandit parameters
    if conservative:
        thres = 100000
    else:
        thres = avg_loss(bandit_selects, losses, bandit_current)
    alpha, beta = update_hyper_para(alpha, beta, t, losses,
                                    bandit_current,
                                    thres = thres
                                   )
    mod.save(path = working_path / ("current" + str(t)), x_new = X_current, y_new = y_current, para = True)
    mod.save(path = working_path / str(t), x_new = val_x, y_new = val_y, para = True)