# Multi-task learning for regression

## Motivation for MTL

<img src="http://nicksenger.com/onecatholiclife/wp-content/uploads/2016/08/KarateKid_WaxOnWaxOff.jpg" alt="drawing" width="400"/>

Knowledge in one task can give insight and assist in learning another

learning tasks in parrallel while using a shared representation

### Insufficient Data

### Form of regularization

Multi-task learning works because regularization induced by requiring an algorithm to perform well on a related task can be superior to regularization that prevents overfitting by penalizing all complexity uniformly.

### Reduced Training Time 

Instead of having to train many different models for each task we are able to train one model for all tasks.

## Problems with MTL

### negative transfer 

### dominating tasks

### more complicated loss function

as a result of multiple summed losses

## MTL for Classification

lots of examples. spam, computer vision, autonomous vehicles...




# Motivating Coding Example

From Richard Caruana's thesis paper on Multi-task learning 

## Tasks

* Task 1 = $B1      \vee Parity(B2-B8)$
* Task 2 = $\neg(B1) \vee  Parity(B2-B8)$
* Task 3 = $B1      \wedge Parity(B2-B8)$
* Task 4 = $\neg(B1) \wedge Parity(B2-B8)$

example

input: 10100101

Task 1: $B1 == 1 \vee Parity(B2-B8) \rightarrow B1 == 1 \rightarrow Task1 = True$

Task 2: $B1 == 0 \vee Parity(B2-B8) \rightarrow B1 !=0 \rightarrow Parity(B2-B8) = 3 \rightarrow Task 2 = False$

Task 3: $B1 == 1 \wedge Parity(B2-B8) \rightarrow True \wedge False \rightarrow Task 3 = False$

Task 4: $Parity(B2-B8) == False \rightarrow Task 4 = False$

### shared information between tasks

We can see that there is shared information between tasks, for example Task 1 and Task 2 are both true (false) if the Parity of bits 2 through 8 is true (false). likewise task 3 and 4 are both false if the parity of bits 2 through 8 are false.




In [3]:
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn.functional as F
from make_targets import getParity, get_data
from model_training import train_model, test_model

################################################################################################################################
# DEFINE DATASET

class BinaryDataset(Dataset):
    
    def __init__(self):
        # data loading
        df = pd.read_csv('./targets.csv')
        X = []
        for i in range(256):
            X.append([int(i) for i in str(format(i, '08b'))])

        self.x = torch.FloatTensor(X)
        self.y1 = torch.transpose(torch.FloatTensor([df['1'].to_numpy()]), 0, 1)
        self.y2 = torch.transpose(torch.FloatTensor([df['2'].to_numpy()]), 0, 1)
        self.y3 = torch.transpose(torch.FloatTensor([df['3'].to_numpy()]), 0, 1)
        self.y4 = torch.transpose(torch.FloatTensor([df['4'].to_numpy()]), 0, 1) 
        self.num_samples = self.x.shape[0]


    def __getitem__(self, index):
        return self.x[index], self.y1[index], self.y2[index], self.y3[index], self.y4[index]

    def __len__(self):
        return self.num_samples
    
################################################################################################################################
# DEFINE MODEL

class MultiTask(nn.Module):

    def __init__(self, num_targets):
        super(MultiTask, self).__init__()
        self.hidden1 = nn.Linear(8, 100)  # 8 input units 160 hidden units
        self.hidden2 = nn.Linear(100, 20)
        self.output = nn.Linear(20, num_targets)    # 1 output
    
    def forward(self, x):
        x = F.relu(self.hidden1(x))         # relu activation function for hidden layers
        x = F.relu(self.hidden2(x)) 
        x = torch.sigmoid(self.output(x))   # sigmoid returns probability of being 1
        return x  
    
################################################################################################################################
# IMPORT DATA

# format and shuffle data
dataset = BinaryDataset()
dataset_size = len(dataset)
valid_split = 0.2
random_seed = 42

# Creating data indices for training and validation splits:
indices = list(range(dataset_size))
split = int(np.floor(valid_split * dataset_size))
np.random.seed(random_seed)
np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

train_loader = torch.utils.data.DataLoader(dataset, batch_size=len(train_sampler), 
                                           sampler=train_sampler)
validation_loader = torch.utils.data.DataLoader(dataset, batch_size=len(valid_sampler),
                                                sampler=valid_sampler)
dataloaders = [train_loader, validation_loader]

# initialize models
single_task1 = MultiTask(1) # task 1 
single_task2 = MultiTask(1) # task 2
single_task3 = MultiTask(1) # task 3 
single_task4 = MultiTask(1) # task 4
    
# train individual models
print("training task 1 (single task learning): ")
print("----------------------------------------------------")
train1_acc, test1_acc, target1_acc = train_model(single_task1, dataloaders, [1])
make_graphs(train1_acc, test1_acc, target1_acc, "task1_STL")

print("training task 2 (single task learning): ")
print("----------------------------------------------------")
train2_acc, test2_acc, target2_acc = train_model(single_task2, dataloaders, [2])
make_graphs(train2_acc, test2_acc, target2_acc, "task2_STL")

print("training task 3 (single task learning): ")
print("----------------------------------------------------")
train3_acc, test3_acc, target3_acc = train_model(single_task3, dataloaders, [3])
make_graphs(train3_acc, test3_acc, target3_acc, "task3_STL")

print("training task 4 (single task learning): ")
print("----------------------------------------------------")
train4_acc, test4_acc, target4_acc = train_model(single_task4, dataloaders, [4])
make_graphs(train4_acc, test4_acc, target4_acc, "task4_STL")

# save models torch.save(model.state_dict()
torch.save(single_task1.state_dict(), "./models/single_task1.pth") 
torch.save(single_task2.state_dict(), "./models/single_task2.pth") 
torch.save(single_task3.state_dict(), "./models/single_task3.pth") 
torch.save(single_task4.state_dict(), "./models/single_task4.pth") 