In [1]:
# to access functions from root directory 
import sys
sys.path.append('/data/ad181/RemoteDir/rl_robust_owc')

In [2]:
%matplotlib notebook
from utils.plot_functions import plot_s_animation, plot_s_snapshots

import numpy as np
import time
import pickle
import os
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from scipy.optimize import differential_evolution

from utils.env_evaluate_functions import eval_actions

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
seed=1
case = 'case_2'
opt_dir = './'+case
os.makedirs(opt_dir, exist_ok=True)

In [4]:
with open('../envs_params/env_data/env_list_eval.pkl', 'rb') as input:
    envs = pickle.load(input)
rl_indices = list(range(16))

In [5]:
def objective_function(q_arg, *args ):
    env = args[0]
    assert q_arg.size == env.terminal_step*env.action_space.shape[0], 'invalid argument size'
    q_arg = q_arg.reshape(env.terminal_step, -1)
    _,_, rewards = eval_actions(env, q_arg)
    return -sum(rewards)

In [6]:
for i,env in enumerate(envs):
    if i in rl_indices:
        args = (env,)
        base_action = np.ones((env.terminal_step,env.action_space.shape[0]))
        _,_, rewards = eval_actions(env, base_action)
        print(f'case: {i} (base reward: {-round(sum(rewards), 2)})')
        bounds =  [(env.action_space.low[0],env.action_space.high[0])]*(env.terminal_step*env.action_space.shape[0])
        result = differential_evolution(objective_function, bounds, args=args,
                                    maxiter=750, popsize=1, tol=1e-5,
                                    disp=True, polish=False,
                                    recombination=0.9, mutation=(0.5,1),
                                    updating='deferred', workers=64, seed=seed)
        np.save(opt_dir+'/ck_argmax_'+str(i)+'.npy', result.x)
        np.save(opt_dir+'/ck_max_'+str(i)+'.npy', result.fun)
        print(result.fun, result.x)

case: 0 (base reward: -0.58)
differential_evolution step 1: f(x)= -0.601596
differential_evolution step 2: f(x)= -0.601596
differential_evolution step 3: f(x)= -0.610637
differential_evolution step 4: f(x)= -0.610737
differential_evolution step 5: f(x)= -0.617322
differential_evolution step 6: f(x)= -0.61861
differential_evolution step 7: f(x)= -0.621134
differential_evolution step 8: f(x)= -0.621134
differential_evolution step 9: f(x)= -0.624959
differential_evolution step 10: f(x)= -0.624959
differential_evolution step 11: f(x)= -0.624959
differential_evolution step 12: f(x)= -0.624959
differential_evolution step 13: f(x)= -0.624959
differential_evolution step 14: f(x)= -0.624959
differential_evolution step 15: f(x)= -0.624959
differential_evolution step 16: f(x)= -0.624959
differential_evolution step 17: f(x)= -0.6259
differential_evolution step 18: f(x)= -0.6259
differential_evolution step 19: f(x)= -0.625926
differential_evolution step 20: f(x)= -0.626612
differential_evolution st

differential_evolution step 61: f(x)= -0.63592
differential_evolution step 62: f(x)= -0.635941
differential_evolution step 63: f(x)= -0.635979
differential_evolution step 64: f(x)= -0.635981
differential_evolution step 65: f(x)= -0.636016
differential_evolution step 66: f(x)= -0.636021
differential_evolution step 67: f(x)= -0.636036
differential_evolution step 68: f(x)= -0.636047
differential_evolution step 69: f(x)= -0.636047
differential_evolution step 70: f(x)= -0.636053
differential_evolution step 71: f(x)= -0.636092
differential_evolution step 72: f(x)= -0.636108
differential_evolution step 73: f(x)= -0.636108
differential_evolution step 74: f(x)= -0.636142
differential_evolution step 75: f(x)= -0.636142
differential_evolution step 76: f(x)= -0.636161
differential_evolution step 77: f(x)= -0.636161
differential_evolution step 78: f(x)= -0.636163
differential_evolution step 79: f(x)= -0.636184
differential_evolution step 80: f(x)= -0.636186
differential_evolution step 81: f(x)= -0.

differential_evolution step 30: f(x)= -0.592352
differential_evolution step 31: f(x)= -0.592352
differential_evolution step 32: f(x)= -0.592603
differential_evolution step 33: f(x)= -0.59267
differential_evolution step 34: f(x)= -0.592832
differential_evolution step 35: f(x)= -0.592854
differential_evolution step 36: f(x)= -0.592854
differential_evolution step 37: f(x)= -0.592854
differential_evolution step 38: f(x)= -0.593245
differential_evolution step 39: f(x)= -0.593425
differential_evolution step 40: f(x)= -0.593425
differential_evolution step 41: f(x)= -0.593984
differential_evolution step 42: f(x)= -0.593984
differential_evolution step 43: f(x)= -0.593984
differential_evolution step 44: f(x)= -0.593984
differential_evolution step 45: f(x)= -0.593984
differential_evolution step 46: f(x)= -0.593984
differential_evolution step 47: f(x)= -0.593984
differential_evolution step 48: f(x)= -0.594369
differential_evolution step 49: f(x)= -0.594369
differential_evolution step 50: f(x)= -0.

differential_evolution step 103: f(x)= -0.608056
differential_evolution step 104: f(x)= -0.608062
differential_evolution step 105: f(x)= -0.608064
differential_evolution step 106: f(x)= -0.608066
differential_evolution step 107: f(x)= -0.608072
differential_evolution step 108: f(x)= -0.60808
differential_evolution step 109: f(x)= -0.60808
differential_evolution step 110: f(x)= -0.608082
differential_evolution step 111: f(x)= -0.608083
-0.6080828954784633 [0.86528829 0.04310412 0.99169504 0.06023145 0.35923302 0.62016391
 0.18151028 0.15122769 0.92278634 0.00236438 0.56269994 0.0946216
 0.02156381 0.87246496 0.12171052 0.37528277 0.22243171 0.12199738
 0.90426131 0.47079502 0.51473976 0.31395204 0.36849595 0.08943462
 0.40975097]
case: 5 (base reward: -0.63)
differential_evolution step 1: f(x)= -0.62968
differential_evolution step 2: f(x)= -0.629792
differential_evolution step 3: f(x)= -0.630457
differential_evolution step 4: f(x)= -0.632117
differential_evolution step 5: f(x)= -0.63211

case: 7 (base reward: -0.55)
differential_evolution step 1: f(x)= -0.569591
differential_evolution step 2: f(x)= -0.580397
differential_evolution step 3: f(x)= -0.580397
differential_evolution step 4: f(x)= -0.58467
differential_evolution step 5: f(x)= -0.58467
differential_evolution step 6: f(x)= -0.58467
differential_evolution step 7: f(x)= -0.586555
differential_evolution step 8: f(x)= -0.588389
differential_evolution step 9: f(x)= -0.588389
differential_evolution step 10: f(x)= -0.589212
differential_evolution step 11: f(x)= -0.589212
differential_evolution step 12: f(x)= -0.591242
differential_evolution step 13: f(x)= -0.591242
differential_evolution step 14: f(x)= -0.592253
differential_evolution step 15: f(x)= -0.592484
differential_evolution step 16: f(x)= -0.592484
differential_evolution step 17: f(x)= -0.592484
differential_evolution step 18: f(x)= -0.592484
differential_evolution step 19: f(x)= -0.59402
differential_evolution step 20: f(x)= -0.594387
differential_evolution s

differential_evolution step 59: f(x)= -0.630854
differential_evolution step 60: f(x)= -0.630854
differential_evolution step 61: f(x)= -0.630854
differential_evolution step 62: f(x)= -0.631129
differential_evolution step 63: f(x)= -0.631129
differential_evolution step 64: f(x)= -0.631273
differential_evolution step 65: f(x)= -0.631315
differential_evolution step 66: f(x)= -0.631508
differential_evolution step 67: f(x)= -0.63159
differential_evolution step 68: f(x)= -0.632096
differential_evolution step 69: f(x)= -0.632096
differential_evolution step 70: f(x)= -0.632096
differential_evolution step 71: f(x)= -0.632096
differential_evolution step 72: f(x)= -0.63238
differential_evolution step 73: f(x)= -0.63238
differential_evolution step 74: f(x)= -0.63238
differential_evolution step 75: f(x)= -0.63238
differential_evolution step 76: f(x)= -0.63238
differential_evolution step 77: f(x)= -0.63238
differential_evolution step 78: f(x)= -0.63238
differential_evolution step 79: f(x)= -0.63238
d

differential_evolution step 29: f(x)= -0.626613
differential_evolution step 30: f(x)= -0.627284
differential_evolution step 31: f(x)= -0.627506
differential_evolution step 32: f(x)= -0.627634
differential_evolution step 33: f(x)= -0.627653
differential_evolution step 34: f(x)= -0.627653
differential_evolution step 35: f(x)= -0.627653
differential_evolution step 36: f(x)= -0.627671
differential_evolution step 37: f(x)= -0.627671
differential_evolution step 38: f(x)= -0.627832
differential_evolution step 39: f(x)= -0.627832
differential_evolution step 40: f(x)= -0.628015
differential_evolution step 41: f(x)= -0.628509
differential_evolution step 42: f(x)= -0.628509
differential_evolution step 43: f(x)= -0.628593
differential_evolution step 44: f(x)= -0.628593
differential_evolution step 45: f(x)= -0.628593
differential_evolution step 46: f(x)= -0.628593
differential_evolution step 47: f(x)= -0.628593
differential_evolution step 48: f(x)= -0.628593
differential_evolution step 49: f(x)= -0

differential_evolution step 23: f(x)= -0.583876
differential_evolution step 24: f(x)= -0.584455
differential_evolution step 25: f(x)= -0.586217
differential_evolution step 26: f(x)= -0.586217
differential_evolution step 27: f(x)= -0.586217
differential_evolution step 28: f(x)= -0.586217
differential_evolution step 29: f(x)= -0.586217
differential_evolution step 30: f(x)= -0.588008
differential_evolution step 31: f(x)= -0.588008
differential_evolution step 32: f(x)= -0.588295
differential_evolution step 33: f(x)= -0.589851
differential_evolution step 34: f(x)= -0.590704
differential_evolution step 35: f(x)= -0.59389
differential_evolution step 36: f(x)= -0.594764
differential_evolution step 37: f(x)= -0.594764
differential_evolution step 38: f(x)= -0.596635
differential_evolution step 39: f(x)= -0.596635
differential_evolution step 40: f(x)= -0.596635
differential_evolution step 41: f(x)= -0.59737
differential_evolution step 42: f(x)= -0.599846
differential_evolution step 43: f(x)= -0.6

case: 11 (base reward: -0.58)
differential_evolution step 1: f(x)= -0.584548
differential_evolution step 2: f(x)= -0.58773
differential_evolution step 3: f(x)= -0.58773
differential_evolution step 4: f(x)= -0.58773
differential_evolution step 5: f(x)= -0.58773
differential_evolution step 6: f(x)= -0.58773
differential_evolution step 7: f(x)= -0.58773
differential_evolution step 8: f(x)= -0.58773
differential_evolution step 9: f(x)= -0.587856
differential_evolution step 10: f(x)= -0.587856
differential_evolution step 11: f(x)= -0.590529
differential_evolution step 12: f(x)= -0.590529
differential_evolution step 13: f(x)= -0.590529
differential_evolution step 14: f(x)= -0.590529
differential_evolution step 15: f(x)= -0.590529
differential_evolution step 16: f(x)= -0.590529
differential_evolution step 17: f(x)= -0.590537
differential_evolution step 18: f(x)= -0.592364
differential_evolution step 19: f(x)= -0.592364
differential_evolution step 20: f(x)= -0.592364
differential_evolution ste

differential_evolution step 46: f(x)= -0.63517
differential_evolution step 47: f(x)= -0.63518
differential_evolution step 48: f(x)= -0.635189
differential_evolution step 49: f(x)= -0.635203
differential_evolution step 50: f(x)= -0.63521
differential_evolution step 51: f(x)= -0.635227
differential_evolution step 52: f(x)= -0.635232
differential_evolution step 53: f(x)= -0.635247
differential_evolution step 54: f(x)= -0.635254
differential_evolution step 55: f(x)= -0.635274
differential_evolution step 56: f(x)= -0.635274
differential_evolution step 57: f(x)= -0.635275
differential_evolution step 58: f(x)= -0.635283
differential_evolution step 59: f(x)= -0.635283
differential_evolution step 60: f(x)= -0.635283
differential_evolution step 61: f(x)= -0.635284
differential_evolution step 62: f(x)= -0.635284
differential_evolution step 63: f(x)= -0.63529
-0.6352896916346928 [0.23421222 0.62236367 0.49978489 0.56532172 0.54243816 0.45646705
 0.87118626 0.31225223 0.31691339 0.26961761 0.645352

differential_evolution step 62: f(x)= -0.575533
differential_evolution step 63: f(x)= -0.575533
differential_evolution step 64: f(x)= -0.575533
differential_evolution step 65: f(x)= -0.575533
differential_evolution step 66: f(x)= -0.575538
differential_evolution step 67: f(x)= -0.575538
differential_evolution step 68: f(x)= -0.575538
differential_evolution step 69: f(x)= -0.575538
-0.5755375470951987 [0.27084599 0.43772188 0.2794237  0.21623764 0.58218342 0.36051388
 0.87480333 0.86880329 0.15292986 0.70448297 0.94174931 0.19144033
 0.57440673 0.83260846 0.34976614 0.40844418 0.15966857 0.38471182
 0.76843718 0.23550476 0.62979423 0.26718104 0.45219746 0.93469898
 0.40725389]
case: 15 (base reward: -0.54)
differential_evolution step 1: f(x)= -0.551376
differential_evolution step 2: f(x)= -0.552394
differential_evolution step 3: f(x)= -0.552394
differential_evolution step 4: f(x)= -0.553556
differential_evolution step 5: f(x)= -0.553992
differential_evolution step 6: f(x)= -0.553992
dif