In [1]:
import time, datetime
import copy
import os
import sys
import warnings
#warnings.filterwarnings("ignore", category=UserWarning)
#warnings.filterwarnings("ignore", category=RuntimeWarning)

import numpy as np
from loguru import logger
import yaml
from utils import dumb_reward_plot
import gym

sys.path.append('./envs/cartpole-envs')
sys.path.append('./')
import cartpole_envs
#import highway_env

from utils import plot_reward, plot_index
from mpc.mpc_cp import MPC
from baselines.NN import NN

def prepare_dynamics(gym_config):
    dynamics_name = gym_config['dynamics_name']
    seed = gym_config['seed']
    dynamics_set = []
    for i in range(len(dynamics_name)):
        env = gym.make(dynamics_name[i])
        # env.seed(seed)
        dynamics_set.append(gym.make(dynamics_name[i]))
    
    # use pre-defined env sequence
    task = [dynamics_set[i] for i in gym_config['task_dynamics_list']]
    return task

def load_config(config_path="config.yml"):
    if os.path.isfile(config_path):
        f = open(config_path)
        return yaml.load(f, Loader=yaml.FullLoader)
    else:
        raise Exception("Configuration file is not found in the path: "+config_path)


In [2]:
config = load_config('config/config_cp_nn_delay.yml')
nn_config = config['NN_config']
mpc_config = config['mpc_config']
gym_config = config['gym_config']
render = gym_config['render']

# initialize the mixture model
# model = DPGPMM(dpgp_config=dpgp_config)
# model = SingleSparseGP(sparse_gp_config=sparse_gp_config)
# model = SingleGP(gp_config=gp_config)
model = NN(NN_config=nn_config)
logger.info('Using model: {}', model.name)

# initial MPC controller
mpc_controller = MPC(mpc_config=mpc_config)

# prepare task
# the task is solved, if each dynamic is solved
task = prepare_dynamics(gym_config)
print(gym_config)

"""start DPGP-MBRL"""
data_buffer = []
label_list = []
subtask_list = []
subtask_reward = []
subtask_succ_count = [0]
comp_trainable = [1]
task_reward = []
trainable = True
task_solved = False
subtask_solved = [False, False, False, False]
total_count = 0
task_epi = 0
log_name = None

total_tasks = 1
max_delay_step = gym_config["max_delay_step"]

2020-05-21 16:18:41.133 | INFO     | __main__:<module>:12 - Using model: NN


{'render': False, 'task_dynamics_list': [0, 1, 2, 3], 'subtask_episode': 3, 'subtask_episode_length': 200, 'task_episode': 100, 'seed': 1000, 'dynamics_name': ['CartPoleSwingUpEnvCm05Pm04Pl05-v0', 'CartPoleSwingUpEnvCm05Pm04Pl07-v0', 'CartPoleSwingUpEnvCm05Pm08Pl05-v0', 'CartPoleSwingUpEnvCm05Pm08Pl07-v0'], 'max_delay_step': 2}


In [3]:
"""NN pretrain"""
pretrain_episodes = 2
# for task_idx in range(total_tasks):
#     env = task[task_idx]
#     # data collection
#     for epi in range(pretrain_episodes):
#         obs = env.reset()
#         done = False
#         mpc_controller.reset()
#         while not done:
#             action = env.action_space.sample()
#             obs_next, reward, done, state_next = env.step(action)
#             model.data_process([0, obs, action, obs_next - obs])
#             obs = obs_next
            
for task_idx in range(total_tasks):
    env = task[task_idx]
    # data collection
    for epi in range(pretrain_episodes):
        act_buf = np.zeros(max_delay_step+1)
#         delay_step = np.random.randint(0,max_delay_step)
        delay_step = 1
        obs = env.reset()
        done = False
        mpc_controller.reset()
        while not done:
            action = env.action_space.sample()
            act_buf = np.concatenate((act_buf, action))[1:]
            obs_next, reward, done, state_next = env.step([act_buf[-1-delay_step]])
#             print(obs, act_buf)
#             print(np.concatenate((obs, act_buf[:-1])), action, np.concatenate((obs_next - obs, act_buf[1:])))
            model.data_process([0, np.concatenate((obs, act_buf[:-1])), action, np.concatenate((obs_next - obs, act_buf[1:]))])
            obs = copy.deepcopy(obs_next)
#print('collected data: ', len(data))
# training the model
model.validation_flag = True
#model.n_epochs = 20
model.fit()

2020-05-21 16:18:41.261 | INFO     | baselines.NN:fit:177 - Epoch [9/600], loss train: 0.9016, loss test  0.9040
2020-05-21 16:18:41.280 | INFO     | baselines.NN:fit:177 - Epoch [19/600], loss train: 0.7292, loss test  0.7210
2020-05-21 16:18:41.298 | INFO     | baselines.NN:fit:177 - Epoch [29/600], loss train: 0.4924, loss test  0.4871
2020-05-21 16:18:41.316 | INFO     | baselines.NN:fit:177 - Epoch [39/600], loss train: 0.3399, loss test  0.3670
2020-05-21 16:18:41.335 | INFO     | baselines.NN:fit:177 - Epoch [49/600], loss train: 0.2767, loss test  0.3444
2020-05-21 16:18:41.353 | INFO     | baselines.NN:fit:177 - Epoch [59/600], loss train: 0.2482, loss test  0.3322


[ 0.0000000e+00  0.0000000e+00 -1.0000000e+00  1.2246468e-16
  0.0000000e+00] [ 0.          0.         -0.24480611]
[ 0.00000000e+00 -2.40520631e-17 -1.00000000e+00  1.22464680e-16
  2.16468568e-16] [ 0.         -0.24480611  0.89021277]
[-9.62082525e-19 -3.26408148e-01 -1.00000000e+00  1.22464680e-16
  9.79224443e-01] [-0.24480611  0.89021277 -0.95559698]
[-0.01305633  0.86271827 -0.99923299 -0.03915896 -2.58815482] [ 0.89021277 -0.95559698  0.0058586 ]
[ 0.0214524  -0.40675553 -0.99792979  0.0643128   1.17120059] [-0.95559698  0.0058586  -0.63033533]
[ 0.00518218 -0.40941986 -0.99984672  0.0175083   1.25496322] [ 0.0058586  -0.63033533 -0.20717843]
[-0.01119461 -1.2506305  -0.99946575 -0.03268352  3.79884009] [-0.63033533 -0.20717843 -0.36971983]
[-0.06121983 -1.50883293 -0.98300187 -0.18359556  4.53451931] [-0.20717843 -0.36971983 -0.3554869 ]
[-0.12157315 -1.92412232 -0.93375786 -0.35790538  5.54286103] [-0.36971983 -0.3554869   0.35878307]
[-0.19853804 -2.22693931 -0.83219696 -0.55

2020-05-21 16:18:41.372 | INFO     | baselines.NN:fit:177 - Epoch [69/600], loss train: 0.2305, loss test  0.3127
2020-05-21 16:18:41.390 | INFO     | baselines.NN:fit:177 - Epoch [79/600], loss train: 0.2207, loss test  0.2931
2020-05-21 16:18:41.408 | INFO     | baselines.NN:fit:177 - Epoch [89/600], loss train: 0.2143, loss test  0.2879
2020-05-21 16:18:41.425 | INFO     | baselines.NN:fit:177 - Epoch [99/600], loss train: 0.2096, loss test  0.2836
2020-05-21 16:18:41.443 | INFO     | baselines.NN:fit:177 - Epoch [109/600], loss train: 0.2047, loss test  0.2793
2020-05-21 16:18:41.461 | INFO     | baselines.NN:fit:177 - Epoch [119/600], loss train: 0.1991, loss test  0.2741
2020-05-21 16:18:41.479 | INFO     | baselines.NN:fit:177 - Epoch [129/600], loss train: 0.1920, loss test  0.2668
2020-05-21 16:18:41.496 | INFO     | baselines.NN:fit:177 - Epoch [139/600], loss train: 0.1827, loss test  0.2569
2020-05-21 16:18:41.514 | INFO     | baselines.NN:fit:177 - Epoch [149/600], loss tr

0.0007005491061136127

In [4]:
"""testing the model with MPC while training """
test_episode = 2
test_epoch = 20
log = []
for ep in range(test_epoch):
    print('epoch: ', ep)
    for task_idx in range(total_tasks):
        env = task[task_idx]
        print('task: ', task_idx)
        for epi in range(test_episode):
            #print('episode: ', epi)
            acc_reward = 0
            obs = env.reset()
            act_buf = np.zeros(max_delay_step+1)
#             delay_step = np.random.randint(0,max_delay_step)
            delay_step = 1

            O, A, R, acc_reward, done = [], [], [], 0, False
            mpc_controller.reset()
            i = 0
            while not done:
                i+= 1

                # env.render()
                env_copy = prepare_dynamics(gym_config)[task_idx]
                env_copy.reset()
                
                act_buf = act_buf[1:]
                action = np.array([mpc_controller.act(task=env_copy, model=model, state=np.concatenate((obs, act_buf)))])
                act_buf = np.concatenate((act_buf, action))
                obs_next, reward, done, state_next = env.step([act_buf[-1-delay_step]])
                model.data_process([0, np.concatenate((obs, act_buf[:-1])), action, np.concatenate((obs_next - obs, act_buf[1:]))])
                
#                 action = np.array([mpc_controller.act(task=env_copy, model=model, state=obs)])
#                 obs_next, reward, done, state_next = env.step(action)
                A.append(action)
                O.append(state_next)
                R.append(reward)

                # append data but not training
#                 model.data_process([0, obs, action, obs_next - obs])
                obs = copy.deepcopy(obs_next)
                acc_reward += reward
                # logger.info('reward: {}', reward)
                #time.sleep(0.1)
            print('task: ', task_idx,'step: ', i, 'acc_reward: ', acc_reward)
            env.close()

            if done:
                samples = {
                    "obs": np.array(O),
                    "actions": np.array(A),
                    "rewards": np.array(R), 
                    "reward_sum": acc_reward,
                }
                print('******************')
                print('acc_reward', acc_reward)
                print('******************')
                log.append(samples)
                if log_name is None:
                    log_name = datetime.datetime.now()
                path = './misc/log/' + log_name.strftime("%d-%H-%M") + '.npy'
                np.save(path, log, allow_pickle=True)
                dumb_reward_plot(path)

        # use the collected date to train model
        print('fitting the model...')
        #model.n_epochs = 20
        model.fit()

epoch:  0
task:  0
task:  0 step:  50 acc_reward:  13.175292036522713
******************
acc_reward 13.175292036522713
******************


2020-05-19 20:40:39.151 | INFO     | baselines.NN:fit:175 - Epoch [9/100], loss train: 0.1529, loss test  0.1346
2020-05-19 20:40:39.170 | INFO     | baselines.NN:fit:175 - Epoch [19/100], loss train: 0.0476, loss test  0.0654
2020-05-19 20:40:39.189 | INFO     | baselines.NN:fit:175 - Epoch [29/100], loss train: 0.0170, loss test  0.0335
2020-05-19 20:40:39.208 | INFO     | baselines.NN:fit:175 - Epoch [39/100], loss train: 0.0071, loss test  0.0209


task:  0 step:  49 acc_reward:  13.48922037363048
******************
acc_reward 13.48922037363048
******************
fitting the model...
data size:  159


2020-05-19 20:40:39.227 | INFO     | baselines.NN:fit:175 - Epoch [49/100], loss train: 0.0035, loss test  0.0161
2020-05-19 20:40:39.246 | INFO     | baselines.NN:fit:175 - Epoch [59/100], loss train: 0.0021, loss test  0.0127
2020-05-19 20:40:39.265 | INFO     | baselines.NN:fit:175 - Epoch [69/100], loss train: 0.0091, loss test  0.0261
2020-05-19 20:40:39.283 | INFO     | baselines.NN:fit:175 - Epoch [79/100], loss train: 0.0012, loss test  0.0165
2020-05-19 20:40:39.302 | INFO     | baselines.NN:fit:175 - Epoch [89/100], loss train: 0.0036, loss test  0.0127
2020-05-19 20:40:39.320 | INFO     | baselines.NN:fit:175 - Epoch [99/100], loss train: 0.0006, loss test  0.0112


epoch:  1
task:  0
task:  0 step:  99 acc_reward:  32.00315925748389
******************
acc_reward 32.00315925748389
******************


2020-05-19 20:40:54.621 | INFO     | baselines.NN:fit:175 - Epoch [9/100], loss train: 0.0412, loss test  0.0546
2020-05-19 20:40:54.643 | INFO     | baselines.NN:fit:175 - Epoch [19/100], loss train: 0.0161, loss test  0.0339
2020-05-19 20:40:54.665 | INFO     | baselines.NN:fit:175 - Epoch [29/100], loss train: 0.0077, loss test  0.0273


task:  0 step:  111 acc_reward:  28.29869358522366
******************
acc_reward 28.29869358522366
******************
fitting the model...
data size:  369


2020-05-19 20:40:54.688 | INFO     | baselines.NN:fit:175 - Epoch [39/100], loss train: 0.0043, loss test  0.0212
2020-05-19 20:40:54.710 | INFO     | baselines.NN:fit:175 - Epoch [49/100], loss train: 0.0025, loss test  0.0179
2020-05-19 20:40:54.733 | INFO     | baselines.NN:fit:175 - Epoch [59/100], loss train: 0.0018, loss test  0.0174
2020-05-19 20:40:54.755 | INFO     | baselines.NN:fit:175 - Epoch [69/100], loss train: 0.0070, loss test  0.0251
2020-05-19 20:40:54.777 | INFO     | baselines.NN:fit:175 - Epoch [79/100], loss train: 0.0057, loss test  0.0192
2020-05-19 20:40:54.799 | INFO     | baselines.NN:fit:175 - Epoch [89/100], loss train: 0.0035, loss test  0.0164
2020-05-19 20:40:54.822 | INFO     | baselines.NN:fit:175 - Epoch [99/100], loss train: 0.0013, loss test  0.0150


epoch:  2
task:  0
task:  0 step:  200 acc_reward:  115.66433860651836
******************
acc_reward 115.66433860651836
******************


2020-05-19 20:41:24.012 | INFO     | baselines.NN:fit:175 - Epoch [9/100], loss train: 0.0408, loss test  0.0273


task:  0 step:  200 acc_reward:  110.56737490546855
******************
acc_reward 110.56737490546855
******************
fitting the model...
data size:  769


2020-05-19 20:41:24.057 | INFO     | baselines.NN:fit:175 - Epoch [19/100], loss train: 0.0110, loss test  0.0135
2020-05-19 20:41:24.100 | INFO     | baselines.NN:fit:175 - Epoch [29/100], loss train: 0.0050, loss test  0.0088
2020-05-19 20:41:24.145 | INFO     | baselines.NN:fit:175 - Epoch [39/100], loss train: 0.0053, loss test  0.0075
2020-05-19 20:41:24.189 | INFO     | baselines.NN:fit:175 - Epoch [49/100], loss train: 0.0033, loss test  0.0056
2020-05-19 20:41:24.233 | INFO     | baselines.NN:fit:175 - Epoch [59/100], loss train: 0.0033, loss test  0.0079
2020-05-19 20:41:24.277 | INFO     | baselines.NN:fit:175 - Epoch [69/100], loss train: 0.0075, loss test  0.0098
2020-05-19 20:41:24.321 | INFO     | baselines.NN:fit:175 - Epoch [79/100], loss train: 0.0028, loss test  0.0060
2020-05-19 20:41:24.365 | INFO     | baselines.NN:fit:175 - Epoch [89/100], loss train: 0.0012, loss test  0.0049
2020-05-19 20:41:24.409 | INFO     | baselines.NN:fit:175 - Epoch [99/100], loss train: 

epoch:  3
task:  0
task:  0 step:  200 acc_reward:  123.77987962153477
******************
acc_reward 123.77987962153477
******************


2020-05-19 20:41:53.664 | INFO     | baselines.NN:fit:175 - Epoch [9/100], loss train: 0.0736, loss test  0.0431


task:  0 step:  200 acc_reward:  124.14795795814605
******************
acc_reward 124.14795795814605
******************
fitting the model...
data size:  1169


2020-05-19 20:41:53.715 | INFO     | baselines.NN:fit:175 - Epoch [19/100], loss train: 0.0116, loss test  0.0142
2020-05-19 20:41:53.766 | INFO     | baselines.NN:fit:175 - Epoch [29/100], loss train: 0.0034, loss test  0.0081
2020-05-19 20:41:53.816 | INFO     | baselines.NN:fit:175 - Epoch [39/100], loss train: 0.0016, loss test  0.0057
2020-05-19 20:41:53.867 | INFO     | baselines.NN:fit:175 - Epoch [49/100], loss train: 0.0010, loss test  0.0053
2020-05-19 20:41:53.918 | INFO     | baselines.NN:fit:175 - Epoch [59/100], loss train: 0.0008, loss test  0.0049
2020-05-19 20:41:53.969 | INFO     | baselines.NN:fit:175 - Epoch [69/100], loss train: 0.0007, loss test  0.0049
2020-05-19 20:41:54.019 | INFO     | baselines.NN:fit:175 - Epoch [79/100], loss train: 0.0008, loss test  0.0047
2020-05-19 20:41:54.070 | INFO     | baselines.NN:fit:175 - Epoch [89/100], loss train: 0.0005, loss test  0.0047
2020-05-19 20:41:54.121 | INFO     | baselines.NN:fit:175 - Epoch [99/100], loss train: 

epoch:  4
task:  0
task:  0 step:  200 acc_reward:  181.5861880485557
******************
acc_reward 181.5861880485557
******************


2020-05-19 20:42:23.451 | INFO     | baselines.NN:fit:175 - Epoch [9/100], loss train: 0.0078, loss test  0.0070


task:  0 step:  200 acc_reward:  182.25434261564573
******************
acc_reward 182.25434261564573
******************
fitting the model...
data size:  1569


2020-05-19 20:42:23.524 | INFO     | baselines.NN:fit:175 - Epoch [19/100], loss train: 0.0025, loss test  0.0024
2020-05-19 20:42:23.596 | INFO     | baselines.NN:fit:175 - Epoch [29/100], loss train: 0.0009, loss test  0.0019
2020-05-19 20:42:23.668 | INFO     | baselines.NN:fit:175 - Epoch [39/100], loss train: 0.0007, loss test  0.0023
2020-05-19 20:42:23.741 | INFO     | baselines.NN:fit:175 - Epoch [49/100], loss train: 0.0117, loss test  0.0114
2020-05-19 20:42:23.812 | INFO     | baselines.NN:fit:175 - Epoch [59/100], loss train: 0.0009, loss test  0.0025
2020-05-19 20:42:23.885 | INFO     | baselines.NN:fit:175 - Epoch [69/100], loss train: 0.0080, loss test  0.0067
2020-05-19 20:42:23.957 | INFO     | baselines.NN:fit:175 - Epoch [79/100], loss train: 0.0012, loss test  0.0029
2020-05-19 20:42:24.030 | INFO     | baselines.NN:fit:175 - Epoch [89/100], loss train: 0.0013, loss test  0.0021
2020-05-19 20:42:24.102 | INFO     | baselines.NN:fit:175 - Epoch [99/100], loss train: 

epoch:  5
task:  0
task:  0 step:  200 acc_reward:  180.90134672104367
******************
acc_reward 180.90134672104367
******************
task:  0 step:  200 acc_reward:  178.3536317702831
******************
acc_reward 178.3536317702831
******************
fitting the model...
data size:  1969


2020-05-19 20:42:56.720 | INFO     | baselines.NN:fit:175 - Epoch [9/100], loss train: 0.0325, loss test  0.0277
2020-05-19 20:42:56.815 | INFO     | baselines.NN:fit:175 - Epoch [19/100], loss train: 0.0128, loss test  0.0085
2020-05-19 20:42:56.911 | INFO     | baselines.NN:fit:175 - Epoch [29/100], loss train: 0.0088, loss test  0.0104
2020-05-19 20:42:57.007 | INFO     | baselines.NN:fit:175 - Epoch [39/100], loss train: 0.0284, loss test  0.0387
2020-05-19 20:42:57.101 | INFO     | baselines.NN:fit:175 - Epoch [49/100], loss train: 0.0164, loss test  0.0105
2020-05-19 20:42:57.196 | INFO     | baselines.NN:fit:175 - Epoch [59/100], loss train: 0.0062, loss test  0.0088
2020-05-19 20:42:57.292 | INFO     | baselines.NN:fit:175 - Epoch [69/100], loss train: 0.0026, loss test  0.0028
2020-05-19 20:42:57.387 | INFO     | baselines.NN:fit:175 - Epoch [79/100], loss train: 0.0099, loss test  0.0066
2020-05-19 20:42:57.482 | INFO     | baselines.NN:fit:175 - Epoch [89/100], loss train: 0

epoch:  6
task:  0
task:  0 step:  200 acc_reward:  164.98115636853234
******************
acc_reward 164.98115636853234
******************
task:  0 step:  200 acc_reward:  153.82545841727145
******************
acc_reward 153.82545841727145
******************
fitting the model...
data size:  2369


2020-05-19 20:43:27.033 | INFO     | baselines.NN:fit:175 - Epoch [9/100], loss train: 0.0017, loss test  0.0033
2020-05-19 20:43:27.134 | INFO     | baselines.NN:fit:175 - Epoch [19/100], loss train: 0.0100, loss test  0.0088
2020-05-19 20:43:27.236 | INFO     | baselines.NN:fit:175 - Epoch [29/100], loss train: 0.0014, loss test  0.0021
2020-05-19 20:43:27.337 | INFO     | baselines.NN:fit:175 - Epoch [39/100], loss train: 0.0012, loss test  0.0021
2020-05-19 20:43:27.438 | INFO     | baselines.NN:fit:175 - Epoch [49/100], loss train: 0.0032, loss test  0.0057
2020-05-19 20:43:27.539 | INFO     | baselines.NN:fit:175 - Epoch [59/100], loss train: 0.0108, loss test  0.0050
2020-05-19 20:43:27.641 | INFO     | baselines.NN:fit:175 - Epoch [69/100], loss train: 0.0009, loss test  0.0027
2020-05-19 20:43:27.743 | INFO     | baselines.NN:fit:175 - Epoch [79/100], loss train: 0.0019, loss test  0.0024
2020-05-19 20:43:27.844 | INFO     | baselines.NN:fit:175 - Epoch [89/100], loss train: 0

epoch:  7
task:  0
task:  0 step:  200 acc_reward:  74.11950268109298
******************
acc_reward 74.11950268109298
******************
task:  0 step:  200 acc_reward:  72.35935993671474
******************
acc_reward 72.35935993671474
******************
fitting the model...
data size:  2769


2020-05-19 20:43:57.457 | INFO     | baselines.NN:fit:175 - Epoch [9/100], loss train: 0.0112, loss test  0.0050
2020-05-19 20:43:57.581 | INFO     | baselines.NN:fit:175 - Epoch [19/100], loss train: 0.0038, loss test  0.0062
2020-05-19 20:43:57.705 | INFO     | baselines.NN:fit:175 - Epoch [29/100], loss train: 0.0021, loss test  0.0022
2020-05-19 20:43:57.830 | INFO     | baselines.NN:fit:175 - Epoch [39/100], loss train: 0.0075, loss test  0.0057
2020-05-19 20:43:57.954 | INFO     | baselines.NN:fit:175 - Epoch [49/100], loss train: 0.0089, loss test  0.0126
2020-05-19 20:43:58.081 | INFO     | baselines.NN:fit:175 - Epoch [59/100], loss train: 0.0113, loss test  0.0166
2020-05-19 20:43:58.206 | INFO     | baselines.NN:fit:175 - Epoch [69/100], loss train: 0.0109, loss test  0.0109
2020-05-19 20:43:58.331 | INFO     | baselines.NN:fit:175 - Epoch [79/100], loss train: 0.0038, loss test  0.0029
2020-05-19 20:43:58.457 | INFO     | baselines.NN:fit:175 - Epoch [89/100], loss train: 0

epoch:  8
task:  0
task:  0 step:  200 acc_reward:  149.46296929737528
******************
acc_reward 149.46296929737528
******************
task:  0 step:  200 acc_reward:  161.17431767701854
******************
acc_reward 161.17431767701854
******************
fitting the model...
data size:  3169


2020-05-19 20:44:28.052 | INFO     | baselines.NN:fit:175 - Epoch [9/100], loss train: 0.0075, loss test  0.0082
2020-05-19 20:44:28.185 | INFO     | baselines.NN:fit:175 - Epoch [19/100], loss train: 0.0126, loss test  0.0146
2020-05-19 20:44:28.316 | INFO     | baselines.NN:fit:175 - Epoch [29/100], loss train: 0.0025, loss test  0.0045
2020-05-19 20:44:28.448 | INFO     | baselines.NN:fit:175 - Epoch [39/100], loss train: 0.0008, loss test  0.0024
2020-05-19 20:44:28.579 | INFO     | baselines.NN:fit:175 - Epoch [49/100], loss train: 0.0241, loss test  0.0299
2020-05-19 20:44:28.710 | INFO     | baselines.NN:fit:175 - Epoch [59/100], loss train: 0.0013, loss test  0.0023
2020-05-19 20:44:28.841 | INFO     | baselines.NN:fit:175 - Epoch [69/100], loss train: 0.0100, loss test  0.0236
2020-05-19 20:44:28.971 | INFO     | baselines.NN:fit:175 - Epoch [79/100], loss train: 0.0011, loss test  0.0026
2020-05-19 20:44:29.102 | INFO     | baselines.NN:fit:175 - Epoch [89/100], loss train: 0

epoch:  9
task:  0
task:  0 step:  200 acc_reward:  174.98415535265528
******************
acc_reward 174.98415535265528
******************
task:  0 step:  200 acc_reward:  173.3962451705526
******************
acc_reward 173.3962451705526
******************
fitting the model...
data size:  3569


2020-05-19 20:44:58.694 | INFO     | baselines.NN:fit:175 - Epoch [9/100], loss train: 0.0063, loss test  0.0128
2020-05-19 20:44:58.846 | INFO     | baselines.NN:fit:175 - Epoch [19/100], loss train: 0.0084, loss test  0.0050
2020-05-19 20:44:58.999 | INFO     | baselines.NN:fit:175 - Epoch [29/100], loss train: 0.0066, loss test  0.0109
2020-05-19 20:44:59.153 | INFO     | baselines.NN:fit:175 - Epoch [39/100], loss train: 0.0061, loss test  0.0178
2020-05-19 20:44:59.305 | INFO     | baselines.NN:fit:175 - Epoch [49/100], loss train: 0.0027, loss test  0.0046
2020-05-19 20:44:59.458 | INFO     | baselines.NN:fit:175 - Epoch [59/100], loss train: 0.0029, loss test  0.0090
2020-05-19 20:44:59.611 | INFO     | baselines.NN:fit:175 - Epoch [69/100], loss train: 0.0045, loss test  0.0121
2020-05-19 20:44:59.763 | INFO     | baselines.NN:fit:175 - Epoch [79/100], loss train: 0.0041, loss test  0.0101
2020-05-19 20:44:59.916 | INFO     | baselines.NN:fit:175 - Epoch [89/100], loss train: 0

epoch:  10
task:  0
task:  0 step:  200 acc_reward:  99.77024468487285
******************
acc_reward 99.77024468487285
******************
task:  0 step:  200 acc_reward:  100.93635360491025
******************
acc_reward 100.93635360491025
******************
fitting the model...
data size:  3969


2020-05-19 20:45:29.652 | INFO     | baselines.NN:fit:175 - Epoch [9/100], loss train: 0.0323, loss test  0.0205
2020-05-19 20:45:29.828 | INFO     | baselines.NN:fit:175 - Epoch [19/100], loss train: 0.0090, loss test  0.0103
2020-05-19 20:45:30.004 | INFO     | baselines.NN:fit:175 - Epoch [29/100], loss train: 0.0034, loss test  0.0071
2020-05-19 20:45:30.179 | INFO     | baselines.NN:fit:175 - Epoch [39/100], loss train: 0.0049, loss test  0.0072
2020-05-19 20:45:30.355 | INFO     | baselines.NN:fit:175 - Epoch [49/100], loss train: 0.0033, loss test  0.0072
2020-05-19 20:45:30.531 | INFO     | baselines.NN:fit:175 - Epoch [59/100], loss train: 0.0025, loss test  0.0062
2020-05-19 20:45:30.707 | INFO     | baselines.NN:fit:175 - Epoch [69/100], loss train: 0.0053, loss test  0.0080
2020-05-19 20:45:30.883 | INFO     | baselines.NN:fit:175 - Epoch [79/100], loss train: 0.0030, loss test  0.0109
2020-05-19 20:45:31.059 | INFO     | baselines.NN:fit:175 - Epoch [89/100], loss train: 0

epoch:  11
task:  0
task:  0 step:  200 acc_reward:  178.0481704716096
******************
acc_reward 178.0481704716096
******************
task:  0 step:  200 acc_reward:  176.1180075951444
******************
acc_reward 176.1180075951444
******************
fitting the model...
data size:  4369


2020-05-19 20:46:00.737 | INFO     | baselines.NN:fit:175 - Epoch [9/100], loss train: 0.0029, loss test  0.0049
2020-05-19 20:46:00.919 | INFO     | baselines.NN:fit:175 - Epoch [19/100], loss train: 0.0015, loss test  0.0025
2020-05-19 20:46:01.102 | INFO     | baselines.NN:fit:175 - Epoch [29/100], loss train: 0.0025, loss test  0.0051
2020-05-19 20:46:01.284 | INFO     | baselines.NN:fit:175 - Epoch [39/100], loss train: 0.0056, loss test  0.0059
2020-05-19 20:46:01.467 | INFO     | baselines.NN:fit:175 - Epoch [49/100], loss train: 0.0057, loss test  0.0040
2020-05-19 20:46:01.650 | INFO     | baselines.NN:fit:175 - Epoch [59/100], loss train: 0.0030, loss test  0.0056
2020-05-19 20:46:01.833 | INFO     | baselines.NN:fit:175 - Epoch [69/100], loss train: 0.0020, loss test  0.0035
2020-05-19 20:46:02.017 | INFO     | baselines.NN:fit:175 - Epoch [79/100], loss train: 0.0021, loss test  0.0043
2020-05-19 20:46:02.200 | INFO     | baselines.NN:fit:175 - Epoch [89/100], loss train: 0

epoch:  12
task:  0
task:  0 step:  200 acc_reward:  184.7059338068831
******************
acc_reward 184.7059338068831
******************
task:  0 step:  200 acc_reward:  184.5048787140778
******************
acc_reward 184.5048787140778
******************
fitting the model...
data size:  4769


2020-05-19 20:46:36.429 | INFO     | baselines.NN:fit:175 - Epoch [9/100], loss train: 0.0553, loss test  0.0522
2020-05-19 20:46:36.634 | INFO     | baselines.NN:fit:175 - Epoch [19/100], loss train: 0.0053, loss test  0.0074
2020-05-19 20:46:36.839 | INFO     | baselines.NN:fit:175 - Epoch [29/100], loss train: 0.0026, loss test  0.0059
2020-05-19 20:46:37.044 | INFO     | baselines.NN:fit:175 - Epoch [39/100], loss train: 0.0047, loss test  0.0056
2020-05-19 20:46:37.251 | INFO     | baselines.NN:fit:175 - Epoch [49/100], loss train: 0.0063, loss test  0.0071
2020-05-19 20:46:37.457 | INFO     | baselines.NN:fit:175 - Epoch [59/100], loss train: 0.0019, loss test  0.0063
2020-05-19 20:46:37.662 | INFO     | baselines.NN:fit:175 - Epoch [69/100], loss train: 0.0012, loss test  0.0036
2020-05-19 20:46:37.868 | INFO     | baselines.NN:fit:175 - Epoch [79/100], loss train: 0.0010, loss test  0.0034
2020-05-19 20:46:38.074 | INFO     | baselines.NN:fit:175 - Epoch [89/100], loss train: 0

epoch:  13
task:  0
task:  0 step:  200 acc_reward:  173.03659760123315
******************
acc_reward 173.03659760123315
******************
task:  0 step:  200 acc_reward:  171.87354126299567
******************
acc_reward 171.87354126299567
******************
fitting the model...
data size:  5169


2020-05-19 20:47:07.919 | INFO     | baselines.NN:fit:175 - Epoch [9/100], loss train: 0.0256, loss test  0.0307
2020-05-19 20:47:08.147 | INFO     | baselines.NN:fit:175 - Epoch [19/100], loss train: 0.0566, loss test  0.0567
2020-05-19 20:47:08.376 | INFO     | baselines.NN:fit:175 - Epoch [29/100], loss train: 0.0130, loss test  0.0199
2020-05-19 20:47:08.605 | INFO     | baselines.NN:fit:175 - Epoch [39/100], loss train: 0.0092, loss test  0.0120
2020-05-19 20:47:08.835 | INFO     | baselines.NN:fit:175 - Epoch [49/100], loss train: 0.0040, loss test  0.0080
2020-05-19 20:47:09.065 | INFO     | baselines.NN:fit:175 - Epoch [59/100], loss train: 0.0041, loss test  0.0069
2020-05-19 20:47:09.293 | INFO     | baselines.NN:fit:175 - Epoch [69/100], loss train: 0.0114, loss test  0.0170
2020-05-19 20:47:09.523 | INFO     | baselines.NN:fit:175 - Epoch [79/100], loss train: 0.0050, loss test  0.0099
2020-05-19 20:47:09.752 | INFO     | baselines.NN:fit:175 - Epoch [89/100], loss train: 0

epoch:  14
task:  0
task:  0 step:  200 acc_reward:  180.67740401277217
******************
acc_reward 180.67740401277217
******************
task:  0 step:  200 acc_reward:  181.05739137611326
******************
acc_reward 181.05739137611326
******************
fitting the model...
data size:  5569


2020-05-19 20:47:39.562 | INFO     | baselines.NN:fit:175 - Epoch [9/100], loss train: 0.0060, loss test  0.0134
2020-05-19 20:47:39.798 | INFO     | baselines.NN:fit:175 - Epoch [19/100], loss train: 0.0093, loss test  0.0099
2020-05-19 20:47:40.033 | INFO     | baselines.NN:fit:175 - Epoch [29/100], loss train: 0.0021, loss test  0.0059
2020-05-19 20:47:40.267 | INFO     | baselines.NN:fit:175 - Epoch [39/100], loss train: 0.0038, loss test  0.0117
2020-05-19 20:47:40.503 | INFO     | baselines.NN:fit:175 - Epoch [49/100], loss train: 0.0021, loss test  0.0096
2020-05-19 20:47:40.737 | INFO     | baselines.NN:fit:175 - Epoch [59/100], loss train: 0.0048, loss test  0.0081
2020-05-19 20:47:40.973 | INFO     | baselines.NN:fit:175 - Epoch [69/100], loss train: 0.0110, loss test  0.0155
2020-05-19 20:47:41.209 | INFO     | baselines.NN:fit:175 - Epoch [79/100], loss train: 0.0018, loss test  0.0070
2020-05-19 20:47:41.445 | INFO     | baselines.NN:fit:175 - Epoch [89/100], loss train: 0

epoch:  15
task:  0
task:  0 step:  200 acc_reward:  181.23338348547045
******************
acc_reward 181.23338348547045
******************
task:  0 step:  200 acc_reward:  180.8376148353737
******************
acc_reward 180.8376148353737
******************
fitting the model...
data size:  5969


2020-05-19 20:48:11.344 | INFO     | baselines.NN:fit:175 - Epoch [9/100], loss train: 0.0157, loss test  0.0349
2020-05-19 20:48:11.603 | INFO     | baselines.NN:fit:175 - Epoch [19/100], loss train: 0.0182, loss test  0.0197
2020-05-19 20:48:11.861 | INFO     | baselines.NN:fit:175 - Epoch [29/100], loss train: 0.0026, loss test  0.0074
2020-05-19 20:48:12.121 | INFO     | baselines.NN:fit:175 - Epoch [39/100], loss train: 0.0029, loss test  0.0087
2020-05-19 20:48:12.380 | INFO     | baselines.NN:fit:175 - Epoch [49/100], loss train: 0.0020, loss test  0.0096
2020-05-19 20:48:12.639 | INFO     | baselines.NN:fit:175 - Epoch [59/100], loss train: 0.0085, loss test  0.0163
2020-05-19 20:48:12.898 | INFO     | baselines.NN:fit:175 - Epoch [69/100], loss train: 0.0018, loss test  0.0075
2020-05-19 20:48:13.158 | INFO     | baselines.NN:fit:175 - Epoch [79/100], loss train: 0.0045, loss test  0.0090
2020-05-19 20:48:13.416 | INFO     | baselines.NN:fit:175 - Epoch [89/100], loss train: 0

epoch:  16
task:  0
task:  0 step:  200 acc_reward:  182.92823047823887
******************
acc_reward 182.92823047823887
******************
task:  0 step:  200 acc_reward:  179.24102056430266
******************
acc_reward 179.24102056430266
******************
fitting the model...
data size:  6369


2020-05-19 20:48:43.396 | INFO     | baselines.NN:fit:175 - Epoch [9/100], loss train: 0.0076, loss test  0.0074
2020-05-19 20:48:43.663 | INFO     | baselines.NN:fit:175 - Epoch [19/100], loss train: 0.0056, loss test  0.0131
2020-05-19 20:48:43.931 | INFO     | baselines.NN:fit:175 - Epoch [29/100], loss train: 0.0089, loss test  0.0056
2020-05-19 20:48:44.199 | INFO     | baselines.NN:fit:175 - Epoch [39/100], loss train: 0.0067, loss test  0.0130
2020-05-19 20:48:44.464 | INFO     | baselines.NN:fit:175 - Epoch [49/100], loss train: 0.0038, loss test  0.0067
2020-05-19 20:48:44.729 | INFO     | baselines.NN:fit:175 - Epoch [59/100], loss train: 0.0046, loss test  0.0057
2020-05-19 20:48:44.992 | INFO     | baselines.NN:fit:175 - Epoch [69/100], loss train: 0.0043, loss test  0.0054
2020-05-19 20:48:45.258 | INFO     | baselines.NN:fit:175 - Epoch [79/100], loss train: 0.0036, loss test  0.0057
2020-05-19 20:48:45.523 | INFO     | baselines.NN:fit:175 - Epoch [89/100], loss train: 0

epoch:  17
task:  0
task:  0 step:  200 acc_reward:  150.5952336272659
******************
acc_reward 150.5952336272659
******************
task:  0 step:  200 acc_reward:  151.6061837026727
******************
acc_reward 151.6061837026727
******************
fitting the model...
data size:  6769


2020-05-19 20:49:15.331 | INFO     | baselines.NN:fit:175 - Epoch [9/100], loss train: 0.0170, loss test  0.0197
2020-05-19 20:49:15.619 | INFO     | baselines.NN:fit:175 - Epoch [19/100], loss train: 0.0162, loss test  0.0136
2020-05-19 20:49:15.906 | INFO     | baselines.NN:fit:175 - Epoch [29/100], loss train: 0.0035, loss test  0.0086
2020-05-19 20:49:16.195 | INFO     | baselines.NN:fit:175 - Epoch [39/100], loss train: 0.0052, loss test  0.0067
2020-05-19 20:49:16.482 | INFO     | baselines.NN:fit:175 - Epoch [49/100], loss train: 0.0089, loss test  0.0161
2020-05-19 20:49:16.770 | INFO     | baselines.NN:fit:175 - Epoch [59/100], loss train: 0.0066, loss test  0.0064
2020-05-19 20:49:17.059 | INFO     | baselines.NN:fit:175 - Epoch [69/100], loss train: 0.0018, loss test  0.0061
2020-05-19 20:49:17.350 | INFO     | baselines.NN:fit:175 - Epoch [79/100], loss train: 0.0050, loss test  0.0084
2020-05-19 20:49:17.638 | INFO     | baselines.NN:fit:175 - Epoch [89/100], loss train: 0

epoch:  18
task:  0
task:  0 step:  200 acc_reward:  177.20192455829658
******************
acc_reward 177.20192455829658
******************
task:  0 step:  200 acc_reward:  179.8938686508575
******************
acc_reward 179.8938686508575
******************
fitting the model...
data size:  7169


2020-05-19 20:49:47.527 | INFO     | baselines.NN:fit:175 - Epoch [9/100], loss train: 0.0201, loss test  0.0115
2020-05-19 20:49:47.837 | INFO     | baselines.NN:fit:175 - Epoch [19/100], loss train: 0.0218, loss test  0.0304
2020-05-19 20:49:48.155 | INFO     | baselines.NN:fit:175 - Epoch [29/100], loss train: 0.0196, loss test  0.0197
2020-05-19 20:49:48.467 | INFO     | baselines.NN:fit:175 - Epoch [39/100], loss train: 0.0114, loss test  0.0123
2020-05-19 20:49:48.779 | INFO     | baselines.NN:fit:175 - Epoch [49/100], loss train: 0.0086, loss test  0.0145
2020-05-19 20:49:49.092 | INFO     | baselines.NN:fit:175 - Epoch [59/100], loss train: 0.0057, loss test  0.0122
2020-05-19 20:49:49.404 | INFO     | baselines.NN:fit:175 - Epoch [69/100], loss train: 0.0066, loss test  0.0110
2020-05-19 20:49:49.717 | INFO     | baselines.NN:fit:175 - Epoch [79/100], loss train: 0.0058, loss test  0.0131
2020-05-19 20:49:50.030 | INFO     | baselines.NN:fit:175 - Epoch [89/100], loss train: 0

epoch:  19
task:  0
task:  0 step:  200 acc_reward:  156.33026905815817
******************
acc_reward 156.33026905815817
******************
task:  0 step:  200 acc_reward:  163.83733742435737
******************
acc_reward 163.83733742435737
******************
fitting the model...
data size:  7569


2020-05-19 20:50:25.724 | INFO     | baselines.NN:fit:175 - Epoch [9/100], loss train: 0.0048, loss test  0.0088
2020-05-19 20:50:26.039 | INFO     | baselines.NN:fit:175 - Epoch [19/100], loss train: 0.0061, loss test  0.0121
2020-05-19 20:50:26.355 | INFO     | baselines.NN:fit:175 - Epoch [29/100], loss train: 0.0030, loss test  0.0091
2020-05-19 20:50:26.670 | INFO     | baselines.NN:fit:175 - Epoch [39/100], loss train: 0.0075, loss test  0.0076
2020-05-19 20:50:26.985 | INFO     | baselines.NN:fit:175 - Epoch [49/100], loss train: 0.0039, loss test  0.0082
2020-05-19 20:50:27.302 | INFO     | baselines.NN:fit:175 - Epoch [59/100], loss train: 0.0036, loss test  0.0076
2020-05-19 20:50:27.617 | INFO     | baselines.NN:fit:175 - Epoch [69/100], loss train: 0.0028, loss test  0.0120
2020-05-19 20:50:27.933 | INFO     | baselines.NN:fit:175 - Epoch [79/100], loss train: 0.0041, loss test  0.0082
2020-05-19 20:50:28.251 | INFO     | baselines.NN:fit:175 - Epoch [89/100], loss train: 0

In [6]:
    indices = list(range(1000))
    np.random.shuffle(indices)
    num_context = np.random.randint(10,100)
    num_target = num_context + np.random.randint(0,100-num_context)
    rand_ind_ctt, rand_ind_tgt = indices[:num_context], indices[:num_target]

In [9]:
import torch
torch.randperm(10)

tensor([4, 3, 7, 8, 2, 9, 6, 5, 0, 1])

In [10]:
torch.tensor(indices[:num_context])

tensor([199, 138, 643, 754, 702, 874, 782, 494, 355,  24, 967, 670, 170, 471,
        725, 315, 942, 720, 223, 966, 304, 129, 137, 646, 206, 865, 210, 738,
        525,  66, 979, 787, 801, 831, 323, 929, 662, 827, 744, 361, 795, 622,
        568,  14, 976, 469, 299, 669,  78, 908, 441, 179, 251, 811, 500, 724,
        896, 444, 561, 642, 651,  17, 716, 969, 855, 205, 872, 794, 989, 166,
         20, 885, 728, 727, 346, 685, 680, 417, 912, 110, 488, 577, 834, 848,
        862, 352, 495, 609, 892, 455, 558,  83, 406])