In [1]:
import time, datetime
import copy
import os
import sys
import warnings
#warnings.filterwarnings("ignore", category=UserWarning)
#warnings.filterwarnings("ignore", category=RuntimeWarning)

import numpy as np
from loguru import logger
import yaml
from utils import dumb_reward_plot
import gym

sys.path.append('./envs/cartpole-envs')
sys.path.append('./')
import cartpole_envs
#import highway_env

from utils import plot_reward, plot_index
from mpc.mpc_cp import MPC
from baselines.NN import NN
from baselines.NP import NP

def prepare_dynamics(gym_config):
    dynamics_name = gym_config['dynamics_name']
    seed = gym_config['seed']
    dynamics_set = []
    for i in range(len(dynamics_name)):
        env = gym.make(dynamics_name[i])
        # env.seed(seed)
        dynamics_set.append(gym.make(dynamics_name[i]))
    
    # use pre-defined env sequence
    task = [dynamics_set[i] for i in gym_config['task_dynamics_list']]
    return task

def load_config(config_path="config.yml"):
    if os.path.isfile(config_path):
        f = open(config_path)
        return yaml.load(f, Loader=yaml.FullLoader)
    else:
        raise Exception("Configuration file is not found in the path: "+config_path)


In [2]:
config = load_config('config/config_test_np.yml')
nn_config = config['NN_config']
mpc_config = config['mpc_config']
gym_config = config['gym_config']
render = gym_config['render']
np_config = config['NP_config']

# initialize the mixture model
# model = DPGPMM(dpgp_config=dpgp_config)
# model = SingleSparseGP(sparse_gp_config=sparse_gp_config)
# model = SingleGP(gp_config=gp_config)
# model_test =
# NOTICE: Model
#  ###########
# model = NN(NN_config=nn_config)
model = NP(NP_config=np_config)
logger.info('Using model: {}', model.name)

# initial MPC controller
mpc_controller = MPC(mpc_config=mpc_config)

# prepare task
# the task is solved, if each dynamic is solved
task = prepare_dynamics(gym_config)
print(gym_config)

"""start DPGP-MBRL"""
data_buffer = []
label_list = []
subtask_list = []
subtask_reward = []
subtask_succ_count = [0]
comp_trainable = [1]
task_reward = []
trainable = True
task_solved = False
subtask_solved = [False, False, False, False]
total_count = 0
task_epi = 0
log_name = None

total_tasks = 1  #4

2020-05-19 21:45:30.394 | INFO     | __main__:<module>:17 - Using model: NP


{'render': False, 'task_dynamics_list': [0, 1, 2, 3], 'subtask_episode': 3, 'subtask_episode_length': 200, 'task_episode': 100, 'seed': 1000, 'dynamics_name': ['CartPoleSwingUpEnvCm05Pm04Pl05-v0', 'CartPoleSwingUpEnvCm05Pm04Pl07-v0', 'CartPoleSwingUpEnvCm05Pm08Pl05-v0', 'CartPoleSwingUpEnvCm05Pm08Pl07-v0']}


In [3]:
"""NN pretrain"""
pretrain_episodes = 10
for task_idx in range(total_tasks):
    env = task[task_idx]
    # data collection
    for epi in range(pretrain_episodes):
        obs = env.reset()
        done = False
        mpc_controller.reset()
        while not done:
            action = env.action_space.sample()
            obs_next, reward, done, state_next = env.step(action)
            # NOTICE: Model
            #  ###########
            model.data_process([0, obs, action, obs_next - obs])
            obs = copy.deepcopy(obs_next)

#print('collected data: ', len(data))
# training the model
# NOTICE: Model
#  ###########
model.validation_flag = True
#model.n_epochs = 20
# print('model.data =', model.data)
# print('model.data.size() =', model.data.size())
# print('model.label =', model.label)
# print('model.label.size() =', model.label.size())
model.fit()

2020-05-19 21:45:30.606 | INFO     | baselines.NP:fit:207 - training epoch [0/400],loss train: 1.8099.


fitting model
X.size() = torch.Size([657, 6])
RESHAPED X.size() = torch.Size([1, 657, 6])


2020-05-19 21:45:30.623 | INFO     | baselines.NP:fit:207 - training epoch [1/400],loss train: 1.5247.
2020-05-19 21:45:30.641 | INFO     | baselines.NP:fit:207 - training epoch [2/400],loss train: 1.9261.
2020-05-19 21:45:30.658 | INFO     | baselines.NP:fit:207 - training epoch [3/400],loss train: 1.2790.
2020-05-19 21:45:30.675 | INFO     | baselines.NP:fit:207 - training epoch [4/400],loss train: 1.2314.
2020-05-19 21:45:30.691 | INFO     | baselines.NP:fit:207 - training epoch [5/400],loss train: 1.2662.
2020-05-19 21:45:30.708 | INFO     | baselines.NP:fit:207 - training epoch [6/400],loss train: 1.1938.
2020-05-19 21:45:30.725 | INFO     | baselines.NP:fit:207 - training epoch [7/400],loss train: 1.1724.
2020-05-19 21:45:30.742 | INFO     | baselines.NP:fit:207 - training epoch [8/400],loss train: 1.0666.
2020-05-19 21:45:30.759 | INFO     | baselines.NP:fit:207 - training epoch [9/400],loss train: 1.0441.
2020-05-19 21:45:30.776 | INFO     | baselines.NP:fit:207 - training epoc

2020-05-19 21:45:31.941 | INFO     | baselines.NP:fit:207 - training epoch [80/400],loss train: 0.4015.
2020-05-19 21:45:31.958 | INFO     | baselines.NP:fit:207 - training epoch [81/400],loss train: 0.3250.
2020-05-19 21:45:31.975 | INFO     | baselines.NP:fit:207 - training epoch [82/400],loss train: 0.4972.
2020-05-19 21:45:31.991 | INFO     | baselines.NP:fit:207 - training epoch [83/400],loss train: 0.4258.
2020-05-19 21:45:32.008 | INFO     | baselines.NP:fit:207 - training epoch [84/400],loss train: 0.3795.
2020-05-19 21:45:32.024 | INFO     | baselines.NP:fit:207 - training epoch [85/400],loss train: 0.3695.
2020-05-19 21:45:32.041 | INFO     | baselines.NP:fit:207 - training epoch [86/400],loss train: 0.4511.
2020-05-19 21:45:32.058 | INFO     | baselines.NP:fit:207 - training epoch [87/400],loss train: 0.3244.
2020-05-19 21:45:32.075 | INFO     | baselines.NP:fit:207 - training epoch [88/400],loss train: 0.4352.
2020-05-19 21:45:32.091 | INFO     | baselines.NP:fit:207 - trai

2020-05-19 21:45:33.257 | INFO     | baselines.NP:fit:207 - training epoch [159/400],loss train: 0.0692.
2020-05-19 21:45:33.274 | INFO     | baselines.NP:fit:207 - training epoch [160/400],loss train: 0.0500.
2020-05-19 21:45:33.290 | INFO     | baselines.NP:fit:207 - training epoch [161/400],loss train: -0.0009.
2020-05-19 21:45:33.307 | INFO     | baselines.NP:fit:207 - training epoch [162/400],loss train: 0.1024.
2020-05-19 21:45:33.324 | INFO     | baselines.NP:fit:207 - training epoch [163/400],loss train: 0.1982.
2020-05-19 21:45:33.340 | INFO     | baselines.NP:fit:207 - training epoch [164/400],loss train: 0.0875.
2020-05-19 21:45:33.357 | INFO     | baselines.NP:fit:207 - training epoch [165/400],loss train: 0.1030.
2020-05-19 21:45:33.373 | INFO     | baselines.NP:fit:207 - training epoch [166/400],loss train: 0.2143.
2020-05-19 21:45:33.390 | INFO     | baselines.NP:fit:207 - training epoch [167/400],loss train: 0.0694.
2020-05-19 21:45:33.406 | INFO     | baselines.NP:fit:

2020-05-19 21:45:34.554 | INFO     | baselines.NP:fit:207 - training epoch [237/400],loss train: -0.2127.
2020-05-19 21:45:34.570 | INFO     | baselines.NP:fit:207 - training epoch [238/400],loss train: -0.2390.
2020-05-19 21:45:34.587 | INFO     | baselines.NP:fit:207 - training epoch [239/400],loss train: 0.1470.
2020-05-19 21:45:34.604 | INFO     | baselines.NP:fit:207 - training epoch [240/400],loss train: 0.2026.
2020-05-19 21:45:34.621 | INFO     | baselines.NP:fit:207 - training epoch [241/400],loss train: 0.0033.
2020-05-19 21:45:34.638 | INFO     | baselines.NP:fit:207 - training epoch [242/400],loss train: -0.0965.
2020-05-19 21:45:34.654 | INFO     | baselines.NP:fit:207 - training epoch [243/400],loss train: -0.0127.
2020-05-19 21:45:34.671 | INFO     | baselines.NP:fit:207 - training epoch [244/400],loss train: -0.0620.
2020-05-19 21:45:34.688 | INFO     | baselines.NP:fit:207 - training epoch [245/400],loss train: -0.0328.
2020-05-19 21:45:34.704 | INFO     | baselines.NP

2020-05-19 21:45:35.853 | INFO     | baselines.NP:fit:207 - training epoch [315/400],loss train: -0.4227.
2020-05-19 21:45:35.870 | INFO     | baselines.NP:fit:207 - training epoch [316/400],loss train: -0.3872.
2020-05-19 21:45:35.886 | INFO     | baselines.NP:fit:207 - training epoch [317/400],loss train: -0.3169.
2020-05-19 21:45:35.903 | INFO     | baselines.NP:fit:207 - training epoch [318/400],loss train: -0.4106.
2020-05-19 21:45:35.920 | INFO     | baselines.NP:fit:207 - training epoch [319/400],loss train: -0.4837.
2020-05-19 21:45:35.937 | INFO     | baselines.NP:fit:207 - training epoch [320/400],loss train: -0.4549.
2020-05-19 21:45:35.953 | INFO     | baselines.NP:fit:207 - training epoch [321/400],loss train: -0.3951.
2020-05-19 21:45:35.970 | INFO     | baselines.NP:fit:207 - training epoch [322/400],loss train: -0.4929.
2020-05-19 21:45:35.986 | INFO     | baselines.NP:fit:207 - training epoch [323/400],loss train: -0.4898.
2020-05-19 21:45:36.003 | INFO     | baselines

2020-05-19 21:45:37.162 | INFO     | baselines.NP:fit:207 - training epoch [393/400],loss train: -0.5539.
2020-05-19 21:45:37.179 | INFO     | baselines.NP:fit:207 - training epoch [394/400],loss train: -0.5582.
2020-05-19 21:45:37.195 | INFO     | baselines.NP:fit:207 - training epoch [395/400],loss train: -0.5570.
2020-05-19 21:45:37.212 | INFO     | baselines.NP:fit:207 - training epoch [396/400],loss train: -0.5512.
2020-05-19 21:45:37.229 | INFO     | baselines.NP:fit:207 - training epoch [397/400],loss train: -0.5402.
2020-05-19 21:45:37.245 | INFO     | baselines.NP:fit:207 - training epoch [398/400],loss train: -0.5580.
2020-05-19 21:45:37.262 | INFO     | baselines.NP:fit:207 - training epoch [399/400],loss train: -0.5667.


-0.5666735768318176

In [4]:
"""testing the model with MPC while training """
test_episode = 2
test_epoch = 10
log = []
for ep in range(test_epoch):
    print('epoch: ', ep)
    for task_idx in range(total_tasks):
        env = task[task_idx]
        print('task: ', task_idx)
        for epi in range(test_episode):
            #print('episode: ', epi)
            acc_reward = 0
            obs = env.reset()

            O, A, R, acc_reward, done = [], [], [], 0, False
            mpc_controller.reset()
            i = 0
            while not done:
                i+= 1

                # env.render()
                env_copy = prepare_dynamics(gym_config)[task_idx]
                env_copy.reset()
                # NOTICE: Model
                #  ###########
                action = np.array([mpc_controller.act(task=env_copy, model=model, state=obs)])
                obs_next, reward, done, state_next = env.step(action)
                A.append(action)
                O.append(state_next)
                R.append(reward)

                # append data but not training
                # NOTICE: Model
                #  ###########
                model.data_process([0, obs, action, obs_next - obs])
                obs = copy.deepcopy(obs_next)
                acc_reward += reward
                # logger.info('reward: {}', reward)
                #time.sleep(0.1)
            print('task: ', task_idx,'step: ', i, 'acc_reward: ', acc_reward)
            env.close()

            if done:
                samples = {
                    "obs": np.array(O),
                    "actions": np.array(A),
                    "rewards": np.array(R),
                    "reward_sum": acc_reward,
                }
                print('******************')
                print('acc_reward', acc_reward)
                print('******************')
                log.append(samples)
                if log_name is None:
                    log_name = datetime.datetime.now()
                path = './misc/log/' + log_name.strftime("%d-%H-%M") + '.npy'
                np.save(path, log, allow_pickle=True)
                dumb_reward_plot(path)

        # use the collected date to train model
        print('fitting the model...')
        #model.n_epochs = 20

        # NOTICE: Model
        #  ###########
        model.fit()

epoch:  0
task:  0
task:  0 step:  200 acc_reward:  63.74971897832089
******************
acc_reward 63.74971897832089
******************


2020-05-19 21:49:30.610 | INFO     | baselines.NP:fit:207 - training epoch [0/400],loss train: -0.3748.
2020-05-19 21:49:30.626 | INFO     | baselines.NP:fit:207 - training epoch [1/400],loss train: -0.3020.
2020-05-19 21:49:30.643 | INFO     | baselines.NP:fit:207 - training epoch [2/400],loss train: -0.3652.
2020-05-19 21:49:30.660 | INFO     | baselines.NP:fit:207 - training epoch [3/400],loss train: -0.4350.
2020-05-19 21:49:30.676 | INFO     | baselines.NP:fit:207 - training epoch [4/400],loss train: -0.3502.


task:  0 step:  200 acc_reward:  73.32728319086564
******************
acc_reward 73.32728319086564
******************
fitting the model...
fitting model
X.size() = torch.Size([1057, 6])
RESHAPED X.size() = torch.Size([1, 1057, 6])


2020-05-19 21:49:30.693 | INFO     | baselines.NP:fit:207 - training epoch [5/400],loss train: -0.4864.
2020-05-19 21:49:30.710 | INFO     | baselines.NP:fit:207 - training epoch [6/400],loss train: -0.3328.
2020-05-19 21:49:30.727 | INFO     | baselines.NP:fit:207 - training epoch [7/400],loss train: -0.3676.
2020-05-19 21:49:30.743 | INFO     | baselines.NP:fit:207 - training epoch [8/400],loss train: -0.2993.
2020-05-19 21:49:30.760 | INFO     | baselines.NP:fit:207 - training epoch [9/400],loss train: -0.3452.
2020-05-19 21:49:30.777 | INFO     | baselines.NP:fit:207 - training epoch [10/400],loss train: -0.4934.
2020-05-19 21:49:30.794 | INFO     | baselines.NP:fit:207 - training epoch [11/400],loss train: -0.3155.
2020-05-19 21:49:30.811 | INFO     | baselines.NP:fit:207 - training epoch [12/400],loss train: -0.4081.
2020-05-19 21:49:30.827 | INFO     | baselines.NP:fit:207 - training epoch [13/400],loss train: -0.3601.
2020-05-19 21:49:30.844 | INFO     | baselines.NP:fit:207 - 

2020-05-19 21:49:32.016 | INFO     | baselines.NP:fit:207 - training epoch [84/400],loss train: -0.5775.
2020-05-19 21:49:32.032 | INFO     | baselines.NP:fit:207 - training epoch [85/400],loss train: -0.6482.
2020-05-19 21:49:32.049 | INFO     | baselines.NP:fit:207 - training epoch [86/400],loss train: -0.5510.
2020-05-19 21:49:32.066 | INFO     | baselines.NP:fit:207 - training epoch [87/400],loss train: -0.4888.
2020-05-19 21:49:32.083 | INFO     | baselines.NP:fit:207 - training epoch [88/400],loss train: -0.2580.
2020-05-19 21:49:32.099 | INFO     | baselines.NP:fit:207 - training epoch [89/400],loss train: -0.3122.
2020-05-19 21:49:32.116 | INFO     | baselines.NP:fit:207 - training epoch [90/400],loss train: -0.1161.
2020-05-19 21:49:32.133 | INFO     | baselines.NP:fit:207 - training epoch [91/400],loss train: -0.2126.
2020-05-19 21:49:32.149 | INFO     | baselines.NP:fit:207 - training epoch [92/400],loss train: -0.3522.
2020-05-19 21:49:32.166 | INFO     | baselines.NP:fit:2

2020-05-19 21:49:33.324 | INFO     | baselines.NP:fit:207 - training epoch [162/400],loss train: -0.7417.
2020-05-19 21:49:33.341 | INFO     | baselines.NP:fit:207 - training epoch [163/400],loss train: -0.6945.
2020-05-19 21:49:33.358 | INFO     | baselines.NP:fit:207 - training epoch [164/400],loss train: -0.6795.
2020-05-19 21:49:33.374 | INFO     | baselines.NP:fit:207 - training epoch [165/400],loss train: -0.7556.
2020-05-19 21:49:33.391 | INFO     | baselines.NP:fit:207 - training epoch [166/400],loss train: -0.6745.
2020-05-19 21:49:33.408 | INFO     | baselines.NP:fit:207 - training epoch [167/400],loss train: -0.7383.
2020-05-19 21:49:33.424 | INFO     | baselines.NP:fit:207 - training epoch [168/400],loss train: -0.7718.
2020-05-19 21:49:33.441 | INFO     | baselines.NP:fit:207 - training epoch [169/400],loss train: -0.7175.
2020-05-19 21:49:33.457 | INFO     | baselines.NP:fit:207 - training epoch [170/400],loss train: -0.7540.
2020-05-19 21:49:33.474 | INFO     | baselines

2020-05-19 21:49:34.628 | INFO     | baselines.NP:fit:207 - training epoch [240/400],loss train: -0.9607.
2020-05-19 21:49:34.645 | INFO     | baselines.NP:fit:207 - training epoch [241/400],loss train: -0.9321.
2020-05-19 21:49:34.662 | INFO     | baselines.NP:fit:207 - training epoch [242/400],loss train: -0.8392.
2020-05-19 21:49:34.678 | INFO     | baselines.NP:fit:207 - training epoch [243/400],loss train: -0.9108.
2020-05-19 21:49:34.695 | INFO     | baselines.NP:fit:207 - training epoch [244/400],loss train: -0.8841.
2020-05-19 21:49:34.712 | INFO     | baselines.NP:fit:207 - training epoch [245/400],loss train: -0.9440.
2020-05-19 21:49:34.729 | INFO     | baselines.NP:fit:207 - training epoch [246/400],loss train: -0.9604.
2020-05-19 21:49:34.746 | INFO     | baselines.NP:fit:207 - training epoch [247/400],loss train: -0.9579.
2020-05-19 21:49:34.762 | INFO     | baselines.NP:fit:207 - training epoch [248/400],loss train: -0.8632.
2020-05-19 21:49:34.779 | INFO     | baselines

2020-05-19 21:49:35.933 | INFO     | baselines.NP:fit:207 - training epoch [318/400],loss train: -0.9547.
2020-05-19 21:49:35.950 | INFO     | baselines.NP:fit:207 - training epoch [319/400],loss train: -0.9940.
2020-05-19 21:49:35.966 | INFO     | baselines.NP:fit:207 - training epoch [320/400],loss train: -1.0236.
2020-05-19 21:49:35.983 | INFO     | baselines.NP:fit:207 - training epoch [321/400],loss train: -0.9390.
2020-05-19 21:49:35.999 | INFO     | baselines.NP:fit:207 - training epoch [322/400],loss train: -0.8963.
2020-05-19 21:49:36.016 | INFO     | baselines.NP:fit:207 - training epoch [323/400],loss train: -0.9332.
2020-05-19 21:49:36.032 | INFO     | baselines.NP:fit:207 - training epoch [324/400],loss train: -0.8455.
2020-05-19 21:49:36.049 | INFO     | baselines.NP:fit:207 - training epoch [325/400],loss train: -0.9463.
2020-05-19 21:49:36.066 | INFO     | baselines.NP:fit:207 - training epoch [326/400],loss train: -0.9015.
2020-05-19 21:49:36.083 | INFO     | baselines

2020-05-19 21:49:37.239 | INFO     | baselines.NP:fit:207 - training epoch [396/400],loss train: -1.0405.
2020-05-19 21:49:37.256 | INFO     | baselines.NP:fit:207 - training epoch [397/400],loss train: -1.0347.
2020-05-19 21:49:37.273 | INFO     | baselines.NP:fit:207 - training epoch [398/400],loss train: -1.1511.
2020-05-19 21:49:37.290 | INFO     | baselines.NP:fit:207 - training epoch [399/400],loss train: -1.0880.


epoch:  1
task:  0
task:  0 step:  200 acc_reward:  133.21995335289316
******************
acc_reward 133.21995335289316
******************


2020-05-19 21:53:09.840 | INFO     | baselines.NP:fit:207 - training epoch [0/400],loss train: -1.1445.
2020-05-19 21:53:09.857 | INFO     | baselines.NP:fit:207 - training epoch [1/400],loss train: -1.0828.
2020-05-19 21:53:09.874 | INFO     | baselines.NP:fit:207 - training epoch [2/400],loss train: -1.0656.
2020-05-19 21:53:09.890 | INFO     | baselines.NP:fit:207 - training epoch [3/400],loss train: -1.1026.
2020-05-19 21:53:09.907 | INFO     | baselines.NP:fit:207 - training epoch [4/400],loss train: -1.1100.


task:  0 step:  200 acc_reward:  136.99097589237127
******************
acc_reward 136.99097589237127
******************
fitting the model...
fitting model
X.size() = torch.Size([1457, 6])
RESHAPED X.size() = torch.Size([1, 1457, 6])


2020-05-19 21:53:09.923 | INFO     | baselines.NP:fit:207 - training epoch [5/400],loss train: -1.0571.
2020-05-19 21:53:09.940 | INFO     | baselines.NP:fit:207 - training epoch [6/400],loss train: -1.0248.
2020-05-19 21:53:09.957 | INFO     | baselines.NP:fit:207 - training epoch [7/400],loss train: -0.7881.
2020-05-19 21:53:09.974 | INFO     | baselines.NP:fit:207 - training epoch [8/400],loss train: -0.9467.
2020-05-19 21:53:09.990 | INFO     | baselines.NP:fit:207 - training epoch [9/400],loss train: -1.0965.
2020-05-19 21:53:10.007 | INFO     | baselines.NP:fit:207 - training epoch [10/400],loss train: -0.9060.
2020-05-19 21:53:10.024 | INFO     | baselines.NP:fit:207 - training epoch [11/400],loss train: -1.0183.
2020-05-19 21:53:10.040 | INFO     | baselines.NP:fit:207 - training epoch [12/400],loss train: -0.9096.
2020-05-19 21:53:10.057 | INFO     | baselines.NP:fit:207 - training epoch [13/400],loss train: -1.0709.
2020-05-19 21:53:10.074 | INFO     | baselines.NP:fit:207 - 

2020-05-19 21:53:11.248 | INFO     | baselines.NP:fit:207 - training epoch [84/400],loss train: -1.0848.
2020-05-19 21:53:11.265 | INFO     | baselines.NP:fit:207 - training epoch [85/400],loss train: -1.0631.
2020-05-19 21:53:11.281 | INFO     | baselines.NP:fit:207 - training epoch [86/400],loss train: -1.1004.
2020-05-19 21:53:11.298 | INFO     | baselines.NP:fit:207 - training epoch [87/400],loss train: -1.0419.
2020-05-19 21:53:11.315 | INFO     | baselines.NP:fit:207 - training epoch [88/400],loss train: -1.0808.
2020-05-19 21:53:11.332 | INFO     | baselines.NP:fit:207 - training epoch [89/400],loss train: -0.9894.
2020-05-19 21:53:11.348 | INFO     | baselines.NP:fit:207 - training epoch [90/400],loss train: -1.0916.
2020-05-19 21:53:11.365 | INFO     | baselines.NP:fit:207 - training epoch [91/400],loss train: -1.1296.
2020-05-19 21:53:11.382 | INFO     | baselines.NP:fit:207 - training epoch [92/400],loss train: -1.0436.
2020-05-19 21:53:11.399 | INFO     | baselines.NP:fit:2

2020-05-19 21:53:12.559 | INFO     | baselines.NP:fit:207 - training epoch [162/400],loss train: -1.0041.
2020-05-19 21:53:12.575 | INFO     | baselines.NP:fit:207 - training epoch [163/400],loss train: -1.0992.
2020-05-19 21:53:12.592 | INFO     | baselines.NP:fit:207 - training epoch [164/400],loss train: -1.0860.
2020-05-19 21:53:12.609 | INFO     | baselines.NP:fit:207 - training epoch [165/400],loss train: -1.0983.
2020-05-19 21:53:12.626 | INFO     | baselines.NP:fit:207 - training epoch [166/400],loss train: -1.1932.
2020-05-19 21:53:12.643 | INFO     | baselines.NP:fit:207 - training epoch [167/400],loss train: -1.0515.
2020-05-19 21:53:12.660 | INFO     | baselines.NP:fit:207 - training epoch [168/400],loss train: -1.1148.
2020-05-19 21:53:12.677 | INFO     | baselines.NP:fit:207 - training epoch [169/400],loss train: -1.1017.
2020-05-19 21:53:12.693 | INFO     | baselines.NP:fit:207 - training epoch [170/400],loss train: -1.1313.
2020-05-19 21:53:12.710 | INFO     | baselines

2020-05-19 21:53:13.874 | INFO     | baselines.NP:fit:207 - training epoch [240/400],loss train: -1.0843.
2020-05-19 21:53:13.890 | INFO     | baselines.NP:fit:207 - training epoch [241/400],loss train: -1.0150.
2020-05-19 21:53:13.907 | INFO     | baselines.NP:fit:207 - training epoch [242/400],loss train: -1.0901.
2020-05-19 21:53:13.924 | INFO     | baselines.NP:fit:207 - training epoch [243/400],loss train: -1.0596.
2020-05-19 21:53:13.941 | INFO     | baselines.NP:fit:207 - training epoch [244/400],loss train: -0.9622.
2020-05-19 21:53:13.957 | INFO     | baselines.NP:fit:207 - training epoch [245/400],loss train: -0.7332.
2020-05-19 21:53:13.974 | INFO     | baselines.NP:fit:207 - training epoch [246/400],loss train: -1.0344.
2020-05-19 21:53:13.991 | INFO     | baselines.NP:fit:207 - training epoch [247/400],loss train: -1.0598.
2020-05-19 21:53:14.008 | INFO     | baselines.NP:fit:207 - training epoch [248/400],loss train: -1.0849.
2020-05-19 21:53:14.024 | INFO     | baselines

2020-05-19 21:53:15.187 | INFO     | baselines.NP:fit:207 - training epoch [318/400],loss train: -1.1981.
2020-05-19 21:53:15.203 | INFO     | baselines.NP:fit:207 - training epoch [319/400],loss train: -1.1964.
2020-05-19 21:53:15.220 | INFO     | baselines.NP:fit:207 - training epoch [320/400],loss train: -1.1322.
2020-05-19 21:53:15.237 | INFO     | baselines.NP:fit:207 - training epoch [321/400],loss train: -1.1314.
2020-05-19 21:53:15.254 | INFO     | baselines.NP:fit:207 - training epoch [322/400],loss train: -1.2072.
2020-05-19 21:53:15.271 | INFO     | baselines.NP:fit:207 - training epoch [323/400],loss train: -1.1633.
2020-05-19 21:53:15.288 | INFO     | baselines.NP:fit:207 - training epoch [324/400],loss train: -1.1184.
2020-05-19 21:53:15.304 | INFO     | baselines.NP:fit:207 - training epoch [325/400],loss train: -1.1936.
2020-05-19 21:53:15.321 | INFO     | baselines.NP:fit:207 - training epoch [326/400],loss train: -1.0314.
2020-05-19 21:53:15.338 | INFO     | baselines

2020-05-19 21:53:16.502 | INFO     | baselines.NP:fit:207 - training epoch [396/400],loss train: -1.2329.
2020-05-19 21:53:16.519 | INFO     | baselines.NP:fit:207 - training epoch [397/400],loss train: -1.2242.
2020-05-19 21:53:16.536 | INFO     | baselines.NP:fit:207 - training epoch [398/400],loss train: -1.2775.
2020-05-19 21:53:16.553 | INFO     | baselines.NP:fit:207 - training epoch [399/400],loss train: -1.1840.


epoch:  2
task:  0
task:  0 step:  200 acc_reward:  182.15963296245235
******************
acc_reward 182.15963296245235
******************


2020-05-19 21:56:51.184 | INFO     | baselines.NP:fit:207 - training epoch [0/400],loss train: -1.2405.
2020-05-19 21:56:51.202 | INFO     | baselines.NP:fit:207 - training epoch [1/400],loss train: -1.2469.
2020-05-19 21:56:51.219 | INFO     | baselines.NP:fit:207 - training epoch [2/400],loss train: -1.2595.
2020-05-19 21:56:51.235 | INFO     | baselines.NP:fit:207 - training epoch [3/400],loss train: -1.2455.
2020-05-19 21:56:51.252 | INFO     | baselines.NP:fit:207 - training epoch [4/400],loss train: -1.2743.


task:  0 step:  200 acc_reward:  177.16010820273237
******************
acc_reward 177.16010820273237
******************
fitting the model...
fitting model
X.size() = torch.Size([1857, 6])
RESHAPED X.size() = torch.Size([1, 1857, 6])


2020-05-19 21:56:51.269 | INFO     | baselines.NP:fit:207 - training epoch [5/400],loss train: -1.3005.
2020-05-19 21:56:51.286 | INFO     | baselines.NP:fit:207 - training epoch [6/400],loss train: -1.2042.
2020-05-19 21:56:51.303 | INFO     | baselines.NP:fit:207 - training epoch [7/400],loss train: -1.2228.
2020-05-19 21:56:51.320 | INFO     | baselines.NP:fit:207 - training epoch [8/400],loss train: -1.2891.
2020-05-19 21:56:51.336 | INFO     | baselines.NP:fit:207 - training epoch [9/400],loss train: -1.2561.
2020-05-19 21:56:51.353 | INFO     | baselines.NP:fit:207 - training epoch [10/400],loss train: -1.2565.
2020-05-19 21:56:51.370 | INFO     | baselines.NP:fit:207 - training epoch [11/400],loss train: -1.2513.
2020-05-19 21:56:51.387 | INFO     | baselines.NP:fit:207 - training epoch [12/400],loss train: -1.2528.
2020-05-19 21:56:51.404 | INFO     | baselines.NP:fit:207 - training epoch [13/400],loss train: -1.2441.
2020-05-19 21:56:51.421 | INFO     | baselines.NP:fit:207 - 

2020-05-19 21:56:52.603 | INFO     | baselines.NP:fit:207 - training epoch [84/400],loss train: -1.2665.
2020-05-19 21:56:52.619 | INFO     | baselines.NP:fit:207 - training epoch [85/400],loss train: -1.2001.
2020-05-19 21:56:52.636 | INFO     | baselines.NP:fit:207 - training epoch [86/400],loss train: -1.2590.
2020-05-19 21:56:52.653 | INFO     | baselines.NP:fit:207 - training epoch [87/400],loss train: -1.1349.
2020-05-19 21:56:52.670 | INFO     | baselines.NP:fit:207 - training epoch [88/400],loss train: -1.2450.
2020-05-19 21:56:52.687 | INFO     | baselines.NP:fit:207 - training epoch [89/400],loss train: -1.2292.
2020-05-19 21:56:52.704 | INFO     | baselines.NP:fit:207 - training epoch [90/400],loss train: -1.1239.
2020-05-19 21:56:52.720 | INFO     | baselines.NP:fit:207 - training epoch [91/400],loss train: -1.2641.
2020-05-19 21:56:52.737 | INFO     | baselines.NP:fit:207 - training epoch [92/400],loss train: -1.1683.
2020-05-19 21:56:52.754 | INFO     | baselines.NP:fit:2

2020-05-19 21:56:53.923 | INFO     | baselines.NP:fit:207 - training epoch [162/400],loss train: -1.1791.
2020-05-19 21:56:53.940 | INFO     | baselines.NP:fit:207 - training epoch [163/400],loss train: -1.1813.
2020-05-19 21:56:53.956 | INFO     | baselines.NP:fit:207 - training epoch [164/400],loss train: -1.2631.
2020-05-19 21:56:53.973 | INFO     | baselines.NP:fit:207 - training epoch [165/400],loss train: -1.2357.
2020-05-19 21:56:53.990 | INFO     | baselines.NP:fit:207 - training epoch [166/400],loss train: -1.2384.
2020-05-19 21:56:54.007 | INFO     | baselines.NP:fit:207 - training epoch [167/400],loss train: -1.2495.
2020-05-19 21:56:54.024 | INFO     | baselines.NP:fit:207 - training epoch [168/400],loss train: -1.2609.
2020-05-19 21:56:54.041 | INFO     | baselines.NP:fit:207 - training epoch [169/400],loss train: -1.2511.
2020-05-19 21:56:54.058 | INFO     | baselines.NP:fit:207 - training epoch [170/400],loss train: -1.2389.
2020-05-19 21:56:54.075 | INFO     | baselines

2020-05-19 21:56:55.241 | INFO     | baselines.NP:fit:207 - training epoch [240/400],loss train: -1.2155.
2020-05-19 21:56:55.257 | INFO     | baselines.NP:fit:207 - training epoch [241/400],loss train: -1.2228.
2020-05-19 21:56:55.274 | INFO     | baselines.NP:fit:207 - training epoch [242/400],loss train: -1.1816.
2020-05-19 21:56:55.291 | INFO     | baselines.NP:fit:207 - training epoch [243/400],loss train: -1.2298.
2020-05-19 21:56:55.308 | INFO     | baselines.NP:fit:207 - training epoch [244/400],loss train: -1.1890.
2020-05-19 21:56:55.325 | INFO     | baselines.NP:fit:207 - training epoch [245/400],loss train: -1.2469.
2020-05-19 21:56:55.342 | INFO     | baselines.NP:fit:207 - training epoch [246/400],loss train: -1.2133.
2020-05-19 21:56:55.358 | INFO     | baselines.NP:fit:207 - training epoch [247/400],loss train: -1.2045.
2020-05-19 21:56:55.375 | INFO     | baselines.NP:fit:207 - training epoch [248/400],loss train: -1.1338.
2020-05-19 21:56:55.392 | INFO     | baselines

2020-05-19 21:56:56.559 | INFO     | baselines.NP:fit:207 - training epoch [318/400],loss train: -1.2005.
2020-05-19 21:56:56.576 | INFO     | baselines.NP:fit:207 - training epoch [319/400],loss train: -1.2091.
2020-05-19 21:56:56.593 | INFO     | baselines.NP:fit:207 - training epoch [320/400],loss train: -1.1665.
2020-05-19 21:56:56.609 | INFO     | baselines.NP:fit:207 - training epoch [321/400],loss train: -1.2632.
2020-05-19 21:56:56.626 | INFO     | baselines.NP:fit:207 - training epoch [322/400],loss train: -1.1839.
2020-05-19 21:56:56.643 | INFO     | baselines.NP:fit:207 - training epoch [323/400],loss train: -1.1725.
2020-05-19 21:56:56.660 | INFO     | baselines.NP:fit:207 - training epoch [324/400],loss train: -1.2694.
2020-05-19 21:56:56.677 | INFO     | baselines.NP:fit:207 - training epoch [325/400],loss train: -1.1966.
2020-05-19 21:56:56.694 | INFO     | baselines.NP:fit:207 - training epoch [326/400],loss train: -1.1870.
2020-05-19 21:56:56.711 | INFO     | baselines

2020-05-19 21:56:57.877 | INFO     | baselines.NP:fit:207 - training epoch [396/400],loss train: -1.3036.
2020-05-19 21:56:57.894 | INFO     | baselines.NP:fit:207 - training epoch [397/400],loss train: -1.2245.
2020-05-19 21:56:57.911 | INFO     | baselines.NP:fit:207 - training epoch [398/400],loss train: -1.2723.
2020-05-19 21:56:57.928 | INFO     | baselines.NP:fit:207 - training epoch [399/400],loss train: -1.2968.


epoch:  3
task:  0
task:  0 step:  200 acc_reward:  163.44984841104414
******************
acc_reward 163.44984841104414
******************


2020-05-19 22:00:32.090 | INFO     | baselines.NP:fit:207 - training epoch [0/400],loss train: -1.2637.
2020-05-19 22:00:32.107 | INFO     | baselines.NP:fit:207 - training epoch [1/400],loss train: -1.2871.
2020-05-19 22:00:32.124 | INFO     | baselines.NP:fit:207 - training epoch [2/400],loss train: -1.2419.
2020-05-19 22:00:32.141 | INFO     | baselines.NP:fit:207 - training epoch [3/400],loss train: -1.2660.
2020-05-19 22:00:32.158 | INFO     | baselines.NP:fit:207 - training epoch [4/400],loss train: -1.2772.


task:  0 step:  200 acc_reward:  160.96041207482762
******************
acc_reward 160.96041207482762
******************
fitting the model...
fitting model
X.size() = torch.Size([2257, 6])
RESHAPED X.size() = torch.Size([1, 2257, 6])


2020-05-19 22:00:32.175 | INFO     | baselines.NP:fit:207 - training epoch [5/400],loss train: -1.2377.
2020-05-19 22:00:32.192 | INFO     | baselines.NP:fit:207 - training epoch [6/400],loss train: -1.2603.
2020-05-19 22:00:32.208 | INFO     | baselines.NP:fit:207 - training epoch [7/400],loss train: -1.2393.
2020-05-19 22:00:32.225 | INFO     | baselines.NP:fit:207 - training epoch [8/400],loss train: -1.2746.
2020-05-19 22:00:32.242 | INFO     | baselines.NP:fit:207 - training epoch [9/400],loss train: -1.2908.
2020-05-19 22:00:32.259 | INFO     | baselines.NP:fit:207 - training epoch [10/400],loss train: -1.3240.
2020-05-19 22:00:32.276 | INFO     | baselines.NP:fit:207 - training epoch [11/400],loss train: -1.2880.
2020-05-19 22:00:32.293 | INFO     | baselines.NP:fit:207 - training epoch [12/400],loss train: -1.3168.
2020-05-19 22:00:32.310 | INFO     | baselines.NP:fit:207 - training epoch [13/400],loss train: -1.2827.
2020-05-19 22:00:32.326 | INFO     | baselines.NP:fit:207 - 

2020-05-19 22:00:33.508 | INFO     | baselines.NP:fit:207 - training epoch [84/400],loss train: -1.2936.
2020-05-19 22:00:33.525 | INFO     | baselines.NP:fit:207 - training epoch [85/400],loss train: -1.2925.
2020-05-19 22:00:33.541 | INFO     | baselines.NP:fit:207 - training epoch [86/400],loss train: -1.3070.
2020-05-19 22:00:33.558 | INFO     | baselines.NP:fit:207 - training epoch [87/400],loss train: -1.2750.
2020-05-19 22:00:33.575 | INFO     | baselines.NP:fit:207 - training epoch [88/400],loss train: -1.2762.
2020-05-19 22:00:33.592 | INFO     | baselines.NP:fit:207 - training epoch [89/400],loss train: -1.3046.
2020-05-19 22:00:33.609 | INFO     | baselines.NP:fit:207 - training epoch [90/400],loss train: -1.2675.
2020-05-19 22:00:33.625 | INFO     | baselines.NP:fit:207 - training epoch [91/400],loss train: -1.2979.
2020-05-19 22:00:33.642 | INFO     | baselines.NP:fit:207 - training epoch [92/400],loss train: -1.2543.
2020-05-19 22:00:33.659 | INFO     | baselines.NP:fit:2

2020-05-19 22:00:34.823 | INFO     | baselines.NP:fit:207 - training epoch [162/400],loss train: -1.2581.
2020-05-19 22:00:34.840 | INFO     | baselines.NP:fit:207 - training epoch [163/400],loss train: -1.2973.
2020-05-19 22:00:34.857 | INFO     | baselines.NP:fit:207 - training epoch [164/400],loss train: -1.2679.
2020-05-19 22:00:34.873 | INFO     | baselines.NP:fit:207 - training epoch [165/400],loss train: -1.2967.
2020-05-19 22:00:34.890 | INFO     | baselines.NP:fit:207 - training epoch [166/400],loss train: -1.2830.
2020-05-19 22:00:34.907 | INFO     | baselines.NP:fit:207 - training epoch [167/400],loss train: -1.3018.
2020-05-19 22:00:34.924 | INFO     | baselines.NP:fit:207 - training epoch [168/400],loss train: -1.2810.
2020-05-19 22:00:34.941 | INFO     | baselines.NP:fit:207 - training epoch [169/400],loss train: -1.2688.
2020-05-19 22:00:34.958 | INFO     | baselines.NP:fit:207 - training epoch [170/400],loss train: -1.2672.
2020-05-19 22:00:34.975 | INFO     | baselines

2020-05-19 22:00:36.137 | INFO     | baselines.NP:fit:207 - training epoch [240/400],loss train: -1.3138.
2020-05-19 22:00:36.153 | INFO     | baselines.NP:fit:207 - training epoch [241/400],loss train: -1.2820.
2020-05-19 22:00:36.170 | INFO     | baselines.NP:fit:207 - training epoch [242/400],loss train: -1.2680.
2020-05-19 22:00:36.187 | INFO     | baselines.NP:fit:207 - training epoch [243/400],loss train: -1.2943.
2020-05-19 22:00:36.204 | INFO     | baselines.NP:fit:207 - training epoch [244/400],loss train: -1.3252.
2020-05-19 22:00:36.221 | INFO     | baselines.NP:fit:207 - training epoch [245/400],loss train: -1.3220.
2020-05-19 22:00:36.238 | INFO     | baselines.NP:fit:207 - training epoch [246/400],loss train: -1.3038.
2020-05-19 22:00:36.255 | INFO     | baselines.NP:fit:207 - training epoch [247/400],loss train: -1.2892.
2020-05-19 22:00:36.271 | INFO     | baselines.NP:fit:207 - training epoch [248/400],loss train: -1.2988.
2020-05-19 22:00:36.288 | INFO     | baselines

2020-05-19 22:00:37.451 | INFO     | baselines.NP:fit:207 - training epoch [318/400],loss train: -1.3074.
2020-05-19 22:00:37.468 | INFO     | baselines.NP:fit:207 - training epoch [319/400],loss train: -1.3012.
2020-05-19 22:00:37.485 | INFO     | baselines.NP:fit:207 - training epoch [320/400],loss train: -1.2912.
2020-05-19 22:00:37.501 | INFO     | baselines.NP:fit:207 - training epoch [321/400],loss train: -1.2104.
2020-05-19 22:00:37.518 | INFO     | baselines.NP:fit:207 - training epoch [322/400],loss train: -1.2284.
2020-05-19 22:00:37.535 | INFO     | baselines.NP:fit:207 - training epoch [323/400],loss train: -1.2754.
2020-05-19 22:00:37.553 | INFO     | baselines.NP:fit:207 - training epoch [324/400],loss train: -1.2041.
2020-05-19 22:00:37.569 | INFO     | baselines.NP:fit:207 - training epoch [325/400],loss train: -1.2163.
2020-05-19 22:00:37.586 | INFO     | baselines.NP:fit:207 - training epoch [326/400],loss train: -1.2747.
2020-05-19 22:00:37.603 | INFO     | baselines

2020-05-19 22:00:38.768 | INFO     | baselines.NP:fit:207 - training epoch [396/400],loss train: -1.3099.
2020-05-19 22:00:38.784 | INFO     | baselines.NP:fit:207 - training epoch [397/400],loss train: -1.2996.
2020-05-19 22:00:38.801 | INFO     | baselines.NP:fit:207 - training epoch [398/400],loss train: -1.2729.
2020-05-19 22:00:38.818 | INFO     | baselines.NP:fit:207 - training epoch [399/400],loss train: -1.2409.


epoch:  4
task:  0


KeyboardInterrupt: 