In [1]:
import sys,os,argparse,time
import numpy as np
import torch
import utils
from datetime import datetime
import pickle
tstart=time.time()

In [2]:
# Arguments
parser=argparse.ArgumentParser(description='xxx')
parser.add_argument('--seed',               default=0,              type=int,     help='(default=%(default)d)')
parser.add_argument('--device',             default='cpu',          type=str,     help='gpu id')
parser.add_argument('--experiment',         default='16_task_groups',       type =str,    help='Mnist or dissertation')
parser.add_argument('--approach',           default='PUGCL',          type =str,    help='Method, always Lifelong Uncertainty-aware learning')
parser.add_argument('--data_path',          default='data/data.csv',     type=str,     help='gpu id')

# Training parameters
parser.add_argument('--output',             default='',             type=str,     help='')
parser.add_argument('--checkpoint_dir',     default='../checkpoints_16_tasks',    type=str,   help='')
parser.add_argument('--n_epochs',           default= 200,              type=int,     help='')
parser.add_argument('--batch_size',         default=64,             type=int,     help='')
parser.add_argument('--lr',                 default=0.03,           type=float,   help='')
parser.add_argument('--hidden_size',        default=800,           type=int,     help='')
parser.add_argument('--parameter',          default='',             type=str,     help='')

# UCB HYPER-PARAMETERS
parser.add_argument('--MC_samples',         default='10',           type=int,     help='Number of Monte Carlo samples')
parser.add_argument('--rho',                default='-3',           type=float,   help='Initial rho')
parser.add_argument('--sigma1',             default='0.0',          type=float,   help='STD foor the 1st prior pdf in scaled mixture Gaussian')
parser.add_argument('--sigma2',             default='6.0',          type=float,   help='STD foor the 2nd prior pdf in scaled mixture Gaussian')
parser.add_argument('--pi',                 default='0.25',         type=float,   help='weighting factor for prior')

parser.add_argument('--resume',             default='no',           type=str,     help='resume?')
parser.add_argument('--sti',                default=1,              type=int,     help='starting task?')

parser.add_argument("-f", "--fff", help="a dummy argument to fool ipython", default="1")

args=parser.parse_args()
utils.print_arguments(args)

Arguments =
	seed: 0
	device: cpu
	experiment: 16_task_groups
	approach: PUGCL
	data_path: data/data.csv
	output: 
	checkpoint_dir: ../checkpoints_16_tasks
	n_epochs: 200
	batch_size: 64
	lr: 0.03
	hidden_size: 800
	parameter: 
	MC_samples: 10
	rho: -3.0
	sigma1: 0.0
	sigma2: 6.0
	pi: 0.25
	resume: no
	sti: 1
	fff: /Users/jonastjomsland/Library/Jupyter/runtime/kernel-003dd4ae-5b1e-4b75-9dcd-182fa5aa8112.json


In [3]:
# Set seed for stable results
np.random.seed(args.seed)
torch.manual_seed(args.seed)

# Check if Cuda is available
if torch.cuda.is_available():
    torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

print("Using device:", args.device)

# Checkpoint
checkpoint = utils.make_directories(args)
args.checkpoint = checkpoint
print()

# PUGCL with two tasks:
from data import dataloader_16_tasks as dataloader

# Import Lifelong Uncertainty-aware Learning approach:
#from bayesian_model.lul import Lul
from training_method import PUGCL

# Import model used:
#from bayesian_model.bayesian_network import BayesianNetwork
from bayesian_model.bayesian_network import BayesianNetwork

Using device: cpu
16_task_groups_PUGCL
Results will be saved in  ../checkpoints_16_tasks/16_task_groups_PUGCL



In [4]:
print()
print("Starting this session on: ")
print(datetime.now().strftime("%Y-%m-%d %H:%M"))

# Load data:
print("Loading data...")
data, task_outputs, input_size = dataloader.get(data_path=args.data_path)
print("Input size =", input_size, "\nTask info =", task_outputs)
print("Number of data samples: ", len(data[0]['train']['x']))
args.num_tasks = len(task_outputs)
args.input_size = input_size
args.task_outputs = task_outputs
pickle.dump(data, open( "data/data.p", "wb" ))

# Initialize Bayesian network
print("Initializing network...")
model = BayesianNetwork(args).to(args.device)

# Initialize Lul approach
print("Initialize Lifelong Uncertainty-aware Learning")
approach = PUGCL(model, args=args)
print("-"*100)

# Check wether resuming:
if args.resume == "yes":
    checkpoint = torch.load(os.path.join(args.checkpoint, 'model_{}.pth.tar'.format(args.sti)))
    model.load_state_dict(checkpoint['model_state_dict'])
    model = model.to(device=args.device)
else:
    args.sti = 0


Starting this session on: 
2020-05-14 21:22
Loading data...
Input size = [1, 29] 
Task info = [(0, 2), (1, 2), (2, 2), (3, 2), (4, 2), (5, 2), (6, 2), (7, 2), (8, 2), (9, 2), (10, 2), (11, 2), (12, 2), (13, 2), (14, 2), (15, 2)]
Number of data samples:  500
Initializing network...
Initialize Lifelong Uncertainty-aware Learning
----------------------------------------------------------------------------------------------------




In [5]:
# Iterate over the two tasks:
loss = np.zeros((len(task_outputs), len(task_outputs)), dtype=np.float32)
for task, n_class in task_outputs[args.sti:]:
    print('*'*100)
    print('Task {:2d} ({:s})'.format(task, data[task]['name']))
    print('*'*100)

    # Get data:
    xtrain = data[task]['train']['x'][:,1:].type(torch.float32).to(args.device)
    ytrain = data[task]['train']['y'].type(torch.float32).to(args.device)
    xvalid = data[task]['valid']['x'][:,1:].type(torch.float32).to(args.device)
    yvalid = data[task]['valid']['y'].type(torch.float32).to(args.device)

    # Start training
    print("Starting training for the tasks in group: ", task)
    approach.train(task, xtrain, ytrain, xvalid, yvalid)
    print('_'*100)

    # Validate for this task group:
    for u in range(task+1):
        xtest = data[u]['test']['x'][:,1:].type(torch.float32).to(args.device)
        ytest = data[u]['test']['y'].type(torch.float32).to(args.device)
        test_loss = approach.eval(u, xtest, ytest, debug=True)
        print("Test on task {:2d} - {:15s}: loss={:.3f}".format(u, data[u]['name'], test_loss))
        loss[task, u] = test_loss

    # Save
    print("Saving at " + args.checkpoint)
    np.savetxt(os.path.join(args.checkpoint, '{}_{}_{}.txt'.format(args.experiment, args.approach, args.seed)), loss, '%.5f')

****************************************************************************************************
Task  0 (Vacuum cleaning)
****************************************************************************************************
Starting training for the tasks in group:  0
Batch: 0/500 

	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, Number alpha)


Batch: 448/500 | Epoch   1, time=621.6ms/ 11.6ms | Training loss: 1.699 | Learning rate: 0.030 | *
Batch: 448/500 | Epoch   2, time=590.5ms/ 11.7ms | Training loss: 1.627 | Learning rate: 0.030 | *
Batch: 448/500 | Epoch   3, time=683.0ms/ 36.5ms | Training loss: 1.594 | Learning rate: 0.030 | *
Batch: 448/500 | Epoch   4, time=787.5ms/ 18.5ms | Training loss: 1.557 | Learning rate: 0.030 | *
Batch: 448/500 | Epoch   5, time=630.3ms/ 13.0ms | Training loss: 1.514 | Learning rate: 0.030 | *
Batch: 448/500 | Epoch   6, time=615.3ms/ 11.8ms | Training loss: 1.608 | Learning rate: 0.030 |
Batch: 448/500 | Epoch   7, time=623.5ms/ 12.5ms | Training loss: 1.584 | Learning rate: 0.030 |
Batch: 448/500 | Epoch   8, time=605.6ms/ 13.2ms | Training loss: 1.592 | Learning rate: 0.030 |
Batch: 448/500 | Epoch   9, time=603.3ms/ 12.3ms | Training loss: 1.488 | Learning rate: 0.030 | *
Batch: 448/500 | Epoch  10, time=628.8ms/ 12.1ms | Training loss: 1.566 | Learning rate: 0.030 |
Batch: 448/500 | E

Batch: 448/500 | Epoch  85, time=569.4ms/ 11.6ms | Training loss: 1.333 | Learning rate: 0.000 |
Batch: 448/500 | Epoch  86, time=569.7ms/ 12.7ms | Training loss: 1.279 | Learning rate: 0.000 | *
Batch: 448/500 | Epoch  87, time=570.4ms/ 12.2ms | Training loss: 1.277 | Learning rate: 0.000 | *
Batch: 448/500 | Epoch  88, time=565.5ms/ 11.5ms | Training loss: 1.288 | Learning rate: 0.000 |
Batch: 448/500 | Epoch  89, time=565.6ms/ 11.8ms | Training loss: 1.314 | Learning rate: 0.000 |
Batch: 448/500 | Epoch  90, time=566.1ms/ 11.6ms | Training loss: 1.299 | Learning rate: 0.000 |
Batch: 448/500 | Epoch  91, time=565.3ms/ 11.6ms | Training loss: 1.276 | Learning rate: 0.000 | *
Batch: 448/500 | Epoch  92, time=565.3ms/ 11.7ms | Training loss: 1.325 | Learning rate: 0.000 |
Batch: 448/500 | Epoch  93, time=565.9ms/ 11.5ms | Training loss: 1.290 | Learning rate: 0.000 |
Batch: 448/500 | Epoch  94, time=565.7ms/ 11.5ms | Training loss: 1.306 | Learning rate: 0.000 |
Batch: 448/500 | Epoch  

Batch: 448/500 | Epoch  17, time=582.6ms/ 11.7ms | Training loss: 1.501 | Learning rate: 0.006 |
Batch: 448/500 | Epoch  18, time=577.7ms/ 11.7ms | Training loss: 1.501 | Learning rate: 0.006 |
Batch: 448/500 | Epoch  19, time=576.8ms/ 11.5ms | Training loss: 1.501 | Learning rate: 0.006 |
Batch: 448/500 | Epoch  20, time=575.7ms/ 11.5ms | Training loss: 1.501 | Learning rate: 0.006 |
Batch: 448/500 | Epoch  21, time=573.1ms/ 11.6ms | Training loss: 1.501 | Learning rate: 0.006 | lr=1.9e-03
Batch: 448/500 | Epoch  22, time=574.1ms/ 12.0ms | Training loss: 1.501 | Learning rate: 0.002 |
Batch: 448/500 | Epoch  23, time=575.8ms/ 11.5ms | Training loss: 1.501 | Learning rate: 0.002 |
Batch: 448/500 | Epoch  24, time=574.0ms/ 12.0ms | Training loss: 1.501 | Learning rate: 0.002 |
Batch: 448/500 | Epoch  25, time=571.9ms/ 11.9ms | Training loss: 1.501 | Learning rate: 0.002 |
Batch: 448/500 | Epoch  26, time=575.3ms/ 11.6ms | Training loss: 1.501 | Learning rate: 0.002 | lr=6.2e-04
Batch: 4

Batch: 448/500 | Epoch  39, time=597.6ms/ 12.4ms | Training loss: 1.370 | Learning rate: 0.002 |
Batch: 448/500 | Epoch  40, time=597.6ms/ 11.9ms | Training loss: 1.370 | Learning rate: 0.002 |
Batch: 448/500 | Epoch  41, time=596.8ms/ 12.2ms | Training loss: 1.370 | Learning rate: 0.002 | lr=6.2e-04
Batch: 448/500 | Epoch  42, time=597.6ms/ 12.7ms | Training loss: 1.370 | Learning rate: 0.001 |
Batch: 448/500 | Epoch  43, time=598.6ms/ 12.1ms | Training loss: 1.370 | Learning rate: 0.001 |
Batch: 448/500 | Epoch  44, time=599.9ms/ 12.2ms | Training loss: 1.370 | Learning rate: 0.001 |
Batch: 448/500 | Epoch  45, time=603.2ms/ 13.1ms | Training loss: 1.370 | Learning rate: 0.001 |
Batch: 448/500 | Epoch  46, time=594.6ms/ 12.1ms | Training loss: 1.370 | Learning rate: 0.001 | lr=2.1e-04
Batch: 448/500 | Epoch  47, time=596.1ms/ 12.1ms | Training loss: 1.370 | Learning rate: 0.000 |
Batch: 448/500 | Epoch  48, time=595.4ms/ 12.1ms | Training loss: 1.370 | Learning rate: 0.000 |
Batch: 4

Batch: 448/500 | Epoch  45, time=820.8ms/ 17.4ms | Training loss: 1.257 | Learning rate: 0.006 |
Batch: 448/500 | Epoch  46, time=841.3ms/ 17.7ms | Training loss: 1.257 | Learning rate: 0.006 |
Batch: 448/500 | Epoch  47, time=835.1ms/ 16.8ms | Training loss: 1.257 | Learning rate: 0.006 | lr=1.9e-03
Batch: 448/500 | Epoch  48, time=794.7ms/ 16.3ms | Training loss: 1.257 | Learning rate: 0.002 |
Batch: 448/500 | Epoch  49, time=772.8ms/ 16.3ms | Training loss: 1.257 | Learning rate: 0.002 |
Batch: 448/500 | Epoch  50, time=777.7ms/ 16.9ms | Training loss: 1.257 | Learning rate: 0.002 |
Batch: 448/500 | Epoch  51, time=772.4ms/ 16.2ms | Training loss: 1.257 | Learning rate: 0.002 |
Batch: 448/500 | Epoch  52, time=803.4ms/ 16.7ms | Training loss: 1.257 | Learning rate: 0.002 | lr=6.2e-04
Batch: 448/500 | Epoch  53, time=836.8ms/ 17.7ms | Training loss: 1.257 | Learning rate: 0.001 |
Batch: 448/500 | Epoch  54, time=842.2ms/ 18.1ms | Training loss: 1.257 | Learning rate: 0.001 |
Batch: 4

Batch: 448/500 | Epoch  40, time=820.9ms/ 18.5ms | Training loss: 1.460 | Learning rate: 0.080 |
Batch: 448/500 | Epoch  41, time=792.0ms/ 17.6ms | Training loss: 1.472 | Learning rate: 0.080 |
Batch: 448/500 | Epoch  42, time=772.8ms/ 16.6ms | Training loss: 1.422 | Learning rate: 0.080 | *
Batch: 448/500 | Epoch  43, time=786.8ms/ 16.8ms | Training loss: 1.569 | Learning rate: 0.080 |
Batch: 448/500 | Epoch  44, time=835.9ms/ 17.6ms | Training loss: 1.427 | Learning rate: 0.080 |
Batch: 448/500 | Epoch  45, time=838.9ms/ 18.2ms | Training loss: 1.467 | Learning rate: 0.080 |
Batch: 448/500 | Epoch  46, time=837.7ms/ 19.9ms | Training loss: 1.423 | Learning rate: 0.080 |
Batch: 448/500 | Epoch  47, time=832.6ms/ 16.9ms | Training loss: 1.380 | Learning rate: 0.080 | *
Batch: 448/500 | Epoch  48, time=799.5ms/ 16.2ms | Training loss: 1.310 | Learning rate: 0.080 | *
Batch: 448/500 | Epoch  49, time=773.7ms/ 16.1ms | Training loss: 1.455 | Learning rate: 0.080 |
Batch: 448/500 | Epoch  

Batch: 448/500 | Epoch  13, time=590.0ms/ 12.1ms | Training loss: 1.400 | Learning rate: 0.050 |
Batch: 448/500 | Epoch  14, time=597.2ms/ 12.0ms | Training loss: 1.408 | Learning rate: 0.050 |
Batch: 448/500 | Epoch  15, time=588.4ms/ 12.9ms | Training loss: 1.382 | Learning rate: 0.050 | *
Batch: 448/500 | Epoch  16, time=585.5ms/ 11.8ms | Training loss: 1.395 | Learning rate: 0.050 |
Batch: 448/500 | Epoch  17, time=585.2ms/ 12.2ms | Training loss: 1.361 | Learning rate: 0.050 | *
Batch: 448/500 | Epoch  18, time=588.6ms/ 11.8ms | Training loss: 1.360 | Learning rate: 0.050 | *
Batch: 448/500 | Epoch  19, time=584.7ms/ 13.3ms | Training loss: 1.370 | Learning rate: 0.050 |
Batch: 448/500 | Epoch  20, time=585.1ms/ 12.2ms | Training loss: 1.363 | Learning rate: 0.050 |
Batch: 448/500 | Epoch  21, time=587.5ms/ 11.8ms | Training loss: 1.348 | Learning rate: 0.050 | *
Batch: 448/500 | Epoch  22, time=585.1ms/ 12.6ms | Training loss: 1.343 | Learning rate: 0.050 | *
Batch: 448/500 | Epo


KeyboardInterrupt

