In [1]:
import sys,os,argparse,time
import numpy as np
import torch
import utils
from datetime import datetime
import pickle
tstart=time.time()

In [2]:
# Arguments
parser=argparse.ArgumentParser(description='xxx')
parser.add_argument('--seed',               default=0,              type=int,     help='(default=%(default)d)')
parser.add_argument('--device',             default='cpu',          type=str,     help='gpu id')
parser.add_argument('--experiment',         default='16_task_groups',       type =str,    help='Mnist or dissertation')
parser.add_argument('--approach',           default='PUGCL',          type =str,    help='Method, always Lifelong Uncertainty-aware learning')
parser.add_argument('--data_path',          default='data/data.csv',     type=str,     help='gpu id')

# Training parameters
parser.add_argument('--output',             default='',             type=str,     help='')
parser.add_argument('--checkpoint_dir',     default='../checkpoints_16_tasks',    type=str,   help='')
parser.add_argument('--n_epochs',           default=100,              type=int,     help='')
parser.add_argument('--batch_size',         default=64,             type=int,     help='')
parser.add_argument('--lr',                 default=0.03,           type=float,   help='')
parser.add_argument('--hidden_size',        default=800,           type=int,     help='')
parser.add_argument('--parameter',          default='',             type=str,     help='')

# UCB HYPER-PARAMETERS
parser.add_argument('--MC_samples',         default='10',           type=int,     help='Number of Monte Carlo samples')
parser.add_argument('--rho',                default='-3',           type=float,   help='Initial rho')
parser.add_argument('--sigma1',             default='0.0',          type=float,   help='STD foor the 1st prior pdf in scaled mixture Gaussian')
parser.add_argument('--sigma2',             default='6.0',          type=float,   help='STD foor the 2nd prior pdf in scaled mixture Gaussian')
parser.add_argument('--pi',                 default='0.25',         type=float,   help='weighting factor for prior')

parser.add_argument('--resume',             default='no',           type=str,     help='resume?')
parser.add_argument('--sti',                default=1,              type=int,     help='starting task?')

parser.add_argument("-f", "--fff", help="a dummy argument to fool ipython", default="1")

args=parser.parse_args()
utils.print_arguments(args)

Arguments =
	seed: 0
	device: cpu
	experiment: 16_task_groups
	approach: PUGCL
	data_path: data/data.csv
	output: 
	checkpoint_dir: ../checkpoints_16_tasks
	n_epochs: 100
	batch_size: 64
	lr: 0.03
	hidden_size: 800
	parameter: 
	MC_samples: 10
	rho: -3.0
	sigma1: 0.0
	sigma2: 6.0
	pi: 0.25
	resume: no
	sti: 1
	fff: /Users/jonastjomsland/Library/Jupyter/runtime/kernel-003dd4ae-5b1e-4b75-9dcd-182fa5aa8112.json


In [3]:
# Set seed for stable results
np.random.seed(args.seed)
torch.manual_seed(args.seed)

# Check if Cuda is available
if torch.cuda.is_available():
    torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

print("Using device:", args.device)

# Checkpoint
checkpoint = utils.make_directories(args)
args.checkpoint = checkpoint
print()

# PUGCL with two tasks:
from data import dataloader_16_tasks as dataloader

# Import Lifelong Uncertainty-aware Learning approach:
#from bayesian_model.lul import Lul
from training_method import PUGCL

# Import model used:
#from bayesian_model.bayesian_network import BayesianNetwork
from bayesian_model.bayesian_network import BayesianNetwork

Using device: cpu
16_task_groups_PUGCL
Results will be saved in  ../checkpoints_16_tasks/16_task_groups_PUGCL



In [4]:
print()
print("Starting this session on: ")
print(datetime.now().strftime("%Y-%m-%d %H:%M"))

# Load data:
print("Loading data...")
data, task_outputs, input_size = dataloader.get(data_path=args.data_path)
print("Input size =", input_size, "\nTask info =", task_outputs)
print("Number of data samples: ", len(data[0]['train']['x']))
args.num_tasks = len(task_outputs)
args.input_size = input_size
args.task_outputs = task_outputs
pickle.dump(data, open( "data/data.p", "wb" ))

# Initialize Bayesian network
print("Initializing network...")
model = BayesianNetwork(args).to(args.device)

# Initialize Lul approach
print("Initialize Lifelong Uncertainty-aware Learning")
approach = PUGCL(model, args=args)
print("-"*100)

# Check wether resuming:
if args.resume == "yes":
    checkpoint = torch.load(os.path.join(args.checkpoint, 'model_{}.pth.tar'.format(args.sti)))
    model.load_state_dict(checkpoint['model_state_dict'])
    model = model.to(device=args.device)
else:
    args.sti = 0


Starting this session on: 
2020-05-15 10:33
Loading data...
Input size = [1, 29] 
Task info = [(0, 2), (1, 2), (2, 2), (3, 2), (4, 2), (5, 2), (6, 2), (7, 2), (8, 2), (9, 2), (10, 2), (11, 2), (12, 2), (13, 2), (14, 2), (15, 2)]
Number of data samples:  500
Initializing network...
Initialize Lifelong Uncertainty-aware Learning
----------------------------------------------------------------------------------------------------




In [None]:
# Iterate over the two tasks:
loss = np.zeros((len(task_outputs), len(task_outputs)), dtype=np.float32)
for task, n_class in task_outputs[args.sti:]:
    print('*'*100)
    print('Task {:2d} ({:s})'.format(task, data[task]['name']))
    print('*'*100)

    # Get data:
    xtrain = data[task]['train']['x'][:,1:].type(torch.float32).to(args.device)
    ytrain = data[task]['train']['y'].type(torch.float32).to(args.device)
    xvalid = data[task]['valid']['x'][:,1:].type(torch.float32).to(args.device)
    yvalid = data[task]['valid']['y'].type(torch.float32).to(args.device)

    # Start training
    print("Starting training for the tasks in group: ", task)
    approach.train(task, xtrain, ytrain, xvalid, yvalid)
    print('_'*100)

    # Validate for this task group:
    for u in range(task+1):
        xtest = data[u]['test']['x'][:,1:].type(torch.float32).to(args.device)
        ytest = data[u]['test']['y'].type(torch.float32).to(args.device)
        test_loss = approach.eval(u, xtest, ytest, debug=True)
        print("Test on task {:2d} - {:15s}: loss={:.3f}".format(u, data[u]['name'], test_loss))
        loss[task, u] = test_loss

    # Save
    print("Saving at " + args.checkpoint)
    np.savetxt(os.path.join(args.checkpoint, '{}_{}_{}.txt'.format(args.experiment, args.approach, args.seed)), loss, '%.5f')

****************************************************************************************************
Task  0 (Vacuum cleaning)
****************************************************************************************************
Starting training for the tasks in group:  0
Batch: 0/500 

	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, Number alpha)


Batch: 448/500 | Epoch   1, time=626.5ms/ 12.3ms | Training loss: 1.845 | Learning rate: 0.030 | *
Batch: 448/500 | Epoch   2, time=619.5ms/ 12.2ms | Training loss: 1.755 | Learning rate: 0.030 | *
Batch: 448/500 | Epoch   3, time=590.6ms/ 12.1ms | Training loss: 1.711 | Learning rate: 0.030 | *
Batch: 448/500 | Epoch   4, time=590.7ms/ 12.0ms | Training loss: 1.690 | Learning rate: 0.030 | *
Batch: 448/500 | Epoch   5, time=590.3ms/ 13.9ms | Training loss: 1.641 | Learning rate: 0.030 | *
Batch: 448/500 | Epoch   6, time=613.7ms/ 12.0ms | Training loss: 1.748 | Learning rate: 0.030 |
Batch: 448/500 | Epoch   7, time=589.4ms/ 12.1ms | Training loss: 1.702 | Learning rate: 0.030 |
Batch: 448/500 | Epoch   8, time=587.6ms/ 12.3ms | Training loss: 1.697 | Learning rate: 0.030 |
Batch: 448/500 | Epoch   9, time=587.1ms/ 12.0ms | Training loss: 1.585 | Learning rate: 0.030 | *
Batch: 448/500 | Epoch  10, time=591.2ms/ 12.2ms | Training loss: 1.700 | Learning rate: 0.030 |
Batch: 448/500 | E

Batch: 448/500 | Epoch  84, time=615.7ms/ 14.0ms | Training loss: 1.544 | Learning rate: 0.000 |
Batch: 448/500 | Epoch  85, time=611.1ms/ 12.3ms | Training loss: 1.481 | Learning rate: 0.000 |
Batch: 448/500 | Epoch  86, time=606.9ms/ 14.0ms | Training loss: 1.441 | Learning rate: 0.000 | *
Batch: 448/500 | Epoch  87, time=606.8ms/ 14.9ms | Training loss: 1.441 | Learning rate: 0.000 | *
Batch: 448/500 | Epoch  88, time=613.8ms/ 12.5ms | Training loss: 1.454 | Learning rate: 0.000 |
Batch: 448/500 | Epoch  89, time=598.7ms/ 15.4ms | Training loss: 1.477 | Learning rate: 0.000 |
Batch: 448/500 | Epoch  90, time=611.1ms/ 13.9ms | Training loss: 1.478 | Learning rate: 0.000 |
Batch: 448/500 | Epoch  91, time=602.5ms/ 14.1ms | Training loss: 1.441 | Learning rate: 0.000 |
Batch: 448/500 | Epoch  92, time=614.2ms/ 13.0ms | Training loss: 1.500 | Learning rate: 0.000 | lr=1.5e-06
Batch: 448/500 | Epoch  93, time=626.0ms/ 13.1ms | Training loss: 1.456 | Learning rate: 0.000 |
Batch: 448/500 

Batch: 448/500 | Epoch   3, time=588.4ms/ 12.3ms | Training loss: 1.486 | Learning rate: 0.050 | *
Batch: 448/500 | Epoch   4, time=622.6ms/ 12.7ms | Training loss: 1.449 | Learning rate: 0.050 | *
Batch: 448/500 | Epoch   5, time=613.7ms/ 13.1ms | Training loss: 1.475 | Learning rate: 0.050 |
Batch: 448/500 | Epoch   6, time=605.1ms/ 12.0ms | Training loss: 1.499 | Learning rate: 0.050 |
Batch: 448/500 | Epoch   7, time=588.9ms/ 12.4ms | Training loss: 1.494 | Learning rate: 0.050 |
Batch: 448/500 | Epoch   8, time=615.1ms/ 13.1ms | Training loss: 1.426 | Learning rate: 0.050 | *
Batch: 448/500 | Epoch   9, time=604.3ms/ 16.1ms | Training loss: 1.530 | Learning rate: 0.050 |
Batch: 448/500 | Epoch  10, time=619.4ms/ 12.8ms | Training loss: 1.395 | Learning rate: 0.050 | *
Batch: 448/500 | Epoch  11, time=603.8ms/ 12.4ms | Training loss: 1.390 | Learning rate: 0.050 | *
Batch: 448/500 | Epoch  12, time=595.2ms/ 12.8ms | Training loss: 1.422 | Learning rate: 0.050 |
Batch: 448/500 | Epo

Batch: 448/500 | Epoch   4, time=608.0ms/ 14.4ms | Training loss: 1.403 | Learning rate: 0.050 | *
Batch: 448/500 | Epoch   5, time=602.7ms/ 13.0ms | Training loss: 1.390 | Learning rate: 0.050 | *
Batch: 448/500 | Epoch   6, time=625.1ms/ 13.8ms | Training loss: 1.379 | Learning rate: 0.050 | *
Batch: 448/500 | Epoch   7, time=603.8ms/ 12.1ms | Training loss: 1.346 | Learning rate: 0.050 | *
Batch: 448/500 | Epoch   8, time=608.7ms/ 14.1ms | Training loss: 1.337 | Learning rate: 0.050 | *
Batch: 448/500 | Epoch   9, time=626.1ms/ 14.4ms | Training loss: 1.328 | Learning rate: 0.050 | *
Batch: 448/500 | Epoch  10, time=620.8ms/ 12.6ms | Training loss: 1.342 | Learning rate: 0.050 |
Batch: 448/500 | Epoch  11, time=621.3ms/ 13.1ms | Training loss: 1.358 | Learning rate: 0.050 |
Batch: 448/500 | Epoch  12, time=632.8ms/ 32.0ms | Training loss: 1.327 | Learning rate: 0.050 | *
Batch: 256/500 