# Multitask Clean

In [1]:
%load_ext autoreload
%autoreload 2

import pinot
from pinot.net import Net
from pinot import data
import torch
import dgl

Using backend: pytorch


In [3]:
def get_data(col=None):

    def generate_mask(ds):
        mask = [(~torch.isnan(d[1]), d[0], d[1])
                for d in ds]
        return mask

    def filter_cols(ds, num_cols=6, col=None):
        if col is None:
            ds = [(d[0], d[1][:,:num_cols]) for d in ds]
        else:
            ds = [(d[0], d[1][:,col].view(-1, 1)) for d in ds]
        return ds

    ds = data.moonshot_meta()

    # move to GPU
    cuda_ds = []
    for d in ds:
        d = tuple([i.to(torch.device('cuda:0')) for i in d])
        cuda_ds.append(d)

    ds_tr, ds_te = data.utils.split(cuda_ds, [4, 1])
    
    ds_tr = data.utils.batch(ds_tr, len(ds_tr))
    ds_te = data.utils.batch(ds_te, len(ds_te))
    
    ds_tr = filter_cols(ds_tr, num_cols=9, col=col)
    ds_te = filter_cols(ds_te, num_cols=9, col=col)
    
    num_data = torch.cat([m[0] for m in generate_mask(ds_tr)]).sum()
    return ds_tr, ds_te, num_data

# Multitask Train and Test

In [5]:
from pinot.multitask import MultitaskNet
from pinot.regressors import (VariationalGaussianProcessRegressor,
                              ExactGaussianProcessRegressor)

representation = pinot.representation.Sequential(
    pinot.representation.dgl_legacy.gn(model_name='GraphConv'),
    [32, 'tanh', 32, 'tanh', 32, 'tanh']
)

mtn = MultitaskNet(representation,
                   output_regressor=ExactGaussianProcessRegressor,
#                    VariationalGaussianProcessRegressor,
#                    num_data=num_data
                  ).to(torch.device('cuda:0'))

optimizer = torch.optim.Adam(mtn.parameters(), lr=1e-4, weight_decay=0.0)

## Run experiment.

In [6]:
from pinot.app.experiment import TrainAndTest
from pinot.multitask.metrics import r2, pearson, rmse, avg_nll
from pinot.multitask.experiment import MultitaskTrain

ds_tr, ds_te, num_data = get_data()

tt = TrainAndTest(net=mtn,
                  data_te=ds_te,
                  data_tr=ds_tr,
                  n_epochs=1,
                  optimizer=optimizer,
                  metrics=[r2, pearson, rmse, avg_nll],
                  train_cls=MultitaskTrain)

# TODO: stratify results by task
results = tt.run()