In [1]:
from simulate_module import *

import pandas as pd
from pathlib import Path
import numpy as np
np.random.seed(1234)
import torch

import copy

## Preparing data

In [6]:
np.random.seed(1234)
f, betas, zs = random_functions(15, 6,
                                sigma_between = 10,
                                sigma_within = .2)
result = []
for i, fi in enumerate(f):
    x = np.random.uniform(0, 1, 100)
    result.append({
        "task": i,
        "x": x,
        "f": fi(x),
        "y": fi(x) + np.random.normal(0, .1, len(x))
    })
# save data
data_df = pd.concat([pd.DataFrame(r) for r in result])
data_df = data_df.reset_index()
data_dict = data_df.to_dict(orient = "list")

input_data = prepare_input(data_dict,
                                   target_task = 5,
                                   target_test_size = .4,
                                  preprocess = True)

`nn` class

In [62]:
class nn():
    """
    Neural network
    """
    def __init__(self, n_inputs = 1, n_outputs = 1, H = 200):
        self.model = torch.nn.Sequential(
            torch.nn.Linear(n_inputs, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, n_outputs),
        )
    def initialize(self, n_inputs = 1, n_outputs = 1, H = 200):
        self.model = torch.nn.Sequential(
            torch.nn.Linear(n_inputs, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, n_outputs),
        )
        return self
    def prepare_data(self, x, y):
        if type(x) != torch.Tensor:
            if len(x.shape) > 1:
                x = torch.tensor(x[:, 1:]).float()
            else:
                x = torch.tensor(x).float()
        if type(y) != torch.Tensor:
            y = torch.tensor(y).float()
        return x, y
    def fit(self, x_train, y_train, loss_fn = torch.nn.MSELoss(), n_epochs = 10, lr = 1e-4):
        model = self.model
        optimizer = torch.optim.Adam(self.model.parameters(), lr = lr)
        for epoch in range(n_epochs):
            # get loss
            optimizer.zero_grad()
            y_hat = self.model(x_train[:, np.newaxis])
            loss = loss_fn(y_train, y_hat)

            # update weights
            loss.backward()
            optimizer.step()
        return self
            
        return model
    def evaluate(self, x_test, y_test, loss_fn = torch.nn.MSELoss()):
        with torch.no_grad():
            y_hat = self.model(x_test[:, np.newaxis])
            l = loss_fn(y_test, y_hat)
        return l
    def pred(self, x_new):
        with torch.no_grad():
            y_hat = self.model(x_new)
        return y_hat
    def combine_with_old(self, model_old, decay_rate = .5):
        for i in range(len(model_old)):
            if "weight" in dir(model_old[i]):
                self.model[i].weight = torch.nn.Parameter(decay_rate * model_old[i].weight + (1 - decay_rate) * self.model[i].weight)
                self.model[i].bias = torch.nn.Parameter(decay_rate * model_old[i].bias + (1 - decay_rate) * self.model[i].bias)
    def save(self, path = ".", x_new = None, y_new = None, para = True):
        x_new, y_new = self.prepare_data(x_new, y_new)
        path = Path(path)
        path.mkdir(parents = True, exist_ok = True)
        y_hat = self.pred(x_new)
        if not x_new is None:
            pd.DataFrame.from_dict({"x": [item[0] for item in x_new.tolist()], 
                        "y": y_new,
                        "y_hat": [item[0] for item in y_hat.tolist()]
                       }).to_csv(path / Path("fitted.csv"))

## Test `combine_with_old`

In [63]:
# two batches of data
val_x, val_y = mod.prepare_data(input_data["X_target_val"], input_data["y_target_val"])
train_x, train_y = mod.prepare_data(input_data["X_target_val"], input_data["y_target_val"])

In [41]:
m1 = nn()
m1.fit(val_x, val_y)

print(m1.model[2].weight[2, 2])

tensor(0.0030, grad_fn=<SelectBackward>)


In [42]:
m2 = copy.deepcopy(m1)
print(m2.model[2].weight[2, 2])

tensor(0.0030, grad_fn=<SelectBackward>)


In [43]:
m1.fit(train_x, train_y)
print(m1.model[2].weight[2, 2])

tensor(0.0020, grad_fn=<SelectBackward>)


In [44]:
m1.combine_with_old(m2.model, decay_rate = 0.5)
print(m1.model[2].weight[2, 2])

tensor(0.0025, grad_fn=<SelectBackward>)


## Train model

In [64]:
model = nn(n_inputs = 1)
n_it = 10
batch_size = 64
decay_rate = .5
conservative = False
loss_fn = torch.nn.MSELoss()

In [65]:
working_path = Path("debug_nn_combine")
working_path.mkdir(exist_ok = True)

In [66]:
bandit_selects = [None]
# initialize hyperparameters
alpha = dict.fromkeys(input_data["source_task"], [1])
beta = dict.fromkeys(input_data["source_task"], [1])
pi = dict.fromkeys(input_data["source_task"], [0])

# initialize model from target training data
mod = nn()
val_x, val_y = mod.prepare_data(input_data["X_target_val"], input_data["y_target_val"])
X_current, y_current = mod.prepare_data(input_data["X_target_train"], input_data["y_target_train"])
mod.fit( X_current, y_current, n_epochs = 100)

l = mod.evaluate(val_x, val_y)
losses = [l]

model_old = copy.deepcopy(mod.model) # *********************

for t in range(n_it):
    
    # select bandit
    bandit_current, pi = get_bandit(input_data, alpha, beta,t, pi)
    bandit_selects.append(bandit_current)
    
    # set training data at this iteration
    X_current, y_current, _ = subset_data(input_data["source_dict"], 
                               key_value = bandit_current,
                               key_name = "task", test_size = 0)
    batch_id = random.choices(list(range(0, len(y_current))), k = batch_size)
    X_current, y_current = X_current[batch_id, :], y_current[batch_id]
    X_current = np.concatenate((X_current, input_data["X_target_val"]), axis = 0)
    y_current = np.concatenate((y_current, input_data["y_target_val"]), axis = 0)
    X_current, y_current = mod.prepare_data(X_current, y_current)
    
    # train model
    #mod = model.initialize(n_inputs = 1)
    mod = nn()
    mod.fit(X_current, y_current, loss_fn = loss_fn, n_epochs = 200)
    
    
    
    # combine parameters with previous model
    mod.combine_with_old(model_old, decay_rate = .5)
    print(t, ", current = ", mod.model[4].weight[0,0].detach().numpy(),
          ", old = ", model_old[4].weight[0,0].detach().numpy())
    
    model_old = copy.deepcopy(mod.model)
    
    
    # evaluate model
    l = mod.evaluate(val_x, val_y, loss_fn = loss_fn)
    losses += [l]
    
    
    model_old = copy.deepcopy(mod.model)
    
    # update bandit parameters
    if conservative:
        thres = 100000
    else:
        thres = avg_loss(bandit_selects, losses, bandit_current)
    alpha, beta = update_hyper_para(alpha, beta, t, losses,
                                    bandit_current,
                                    thres = thres
                                   )
    
    mod.save(path = working_path.joinpath("current" + str(t)), x_new = X_current, y_new = y_current, para = True)
    mod.save(path = working_path.joinpath(str(t)), x_new = val_x, y_new = val_y, para = True)

0 , current =  0.021639107 , old =  0.0062913853
1 , current =  0.04301337 , old =  0.021639107
2 , current =  0.033790406 , old =  0.04301337
3 , current =  0.012443873 , old =  0.033790406
4 , current =  -0.024848742 , old =  0.012443873
5 , current =  -0.00790301 , old =  -0.024848742
6 , current =  -0.01955099 , old =  -0.00790301
7 , current =  0.0025633778 , old =  -0.01955099
8 , current =  0.0033883443 , old =  0.0025633778
9 , current =  -0.029628009 , old =  0.0033883443
