In [1]:
import sys
sys.path.insert(0, "D:/dip/Adaptation-of-Action-Space-for-Reinforcement-Learning/")
import os
import re
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('bmh')
%matplotlib inline

import src.util.data as data_lib

CURRENT_DIR = os.getcwd()
DATA_TEMPLATE = "data_.*\.json.zip"

## user must set BATCH_SIZE and implement convert_episode to improve performance
BATCH_SIZE = 100
def convert_episode(episode):
    '''
    episode template
    remove any useless information to deplete the file
    {
        "id":0,
        "states":[],
        "actions":[],
        "actors_actions":[],
        "ndn_actions":[],
        "rewards":[],
        "action_space_sizes":[]
    }
    '''
    del episode["states"]
    del episode["actors_actions"]
    del episode["ndn_actions"]
    del episode["action_space_sizes"]
    episode["rewards"] = np.sum(episode["rewards"])

    
## -----------------------------------------------------------------------------
    
def get_data_filenames(deplete=True):
    all_filenames = os.listdir()
        
    result_filenames = []
    
    for filename in all_filenames:
        match = re.search(DATA_TEMPLATE, filename)
        if match is not None:
            depleted_file = "{}{}".format("light_",filename[:len(filename)-4])
            if not os.path.exists(depleted_file) and deplete:
                data_lib.deplete(filename, convert_episode)
                result_filenames.append("{}/{}".format(CURRENT_DIR, depleted_file))
            elif os.path.exists(depleted_file):
                result_filenames.append("{}/{}".format(CURRENT_DIR, depleted_file))
            else:   
                result_filenames.append("{}/{}".format(CURRENT_DIR, filename))
    return result_filenames

def load_data(filename):
    
    data_dict = data_lib.load(filename).data
    
    return data_dict


def get_episode_data(data_dict, field):
    episodes = data_dict['simulation']['episodes']
    data = list(data[field] for data in episodes)
    return np.array(data)

def apply_func_to_window(data, window_size, func):
    data_lenght = len(data)
    window_size = min(window_size, data_lenght)
    if window_size == 0:
        window_size = (data_lenght * .1)
    res = []
    for i in range(data_lenght):
        start = int(max(i - window_size / 2, 0))
        end = int(min(i + window_size / 2, data_lenght - 1))
        if start == end:
            continue
        res.append(func(data[start:end]))

    return np.array(res)



In [1]:
data = load_data(get_data_filenames()[0])
first_batch = 0

In [None]:
print(data['id'])
print(data['agent'])
print(data['experiment'])

arr = get_episode_data(data, 'rewards')

rewards = np.array(list(np.sum(subarr) for subarr in arr))

plt.figure(figsize=(15, 8))
plt.axes().xaxis.grid()
plt.xticks(np.arange(0,len(rewards)+1, BATCH_SIZE), rotation=-45)

avg_rewards = apply_func_to_window(rewards, 2*BATCH_SIZE, np.average)
plt.plot(avg_rewards)

avg_rewards = apply_func_to_window(rewards, BATCH_SIZE/4, np.average)
plt.plot(avg_rewards)


x = np.arange(0, len(rewards)+1, BATCH_SIZE)
plt.plot(x, np.zeros(x.shape), '|')

batch_avg = avg_rewards[np.arange(int(BATCH_SIZE/2), len(rewards), BATCH_SIZE)]
plt.plot(np.arange(0, len(rewards), BATCH_SIZE)+BATCH_SIZE/2, batch_avg, "g_")

plt.show()


In [None]:
TOP_PERC = 0.1
PERC_ANALYSIS = 0.1
batch_size = int(BATCH_SIZE)
top = []
x_axis = np.arange(0,len(rewards), batch_size)
avg_per_percentage = []
limit = []

for i in x_axis:
    batch = rewards[i:i+batch_size]
    argmax = np.argsort(batch)
    sorted_batch = batch[argmax]
    minibatch_size = int(len(sorted_batch)*PERC_ANALYSIS)
    batched_batch = np.array(list(sorted_batch[i:i+minibatch_size] for i in np.arange(0, len(sorted_batch), minibatch_size)))
    avg_per_percentage.append(list(np.average(minibatch) for minibatch in batched_batch))
    limit.append(list(np.min(minibatch) for minibatch in batched_batch))

avg_per_percentage = np.array(avg_per_percentage)

x_ax = first_batch+np.arange(0, len(avg_per_percentage))

plt.figure(figsize=(15, 8))
# plt.axes().xaxis.grid()
plt.grid(True)
plt.xticks(x_ax, rotation=-45)

avg_per_percentage = np.transpose(avg_per_percentage)
limit = np.transpose(limit)
count = 0
for count in range(len(avg_per_percentage)-1, 0, -1):
    b = avg_per_percentage[count]
    c = '#{:02X}00{:02X}'.format(int(np.interp(count, [0, (1/PERC_ANALYSIS)-1], [0, 255])),
                                 int(np.interp(count, [0, (1/PERC_ANALYSIS)-1], [255, 0])))
    plt.plot(x_ax, b, "--o", color=c, label="{}-{} %".format(int((count)*PERC_ANALYSIS*100),
                                                                        int((count+1)*PERC_ANALYSIS*100)))
    plt.plot(x_ax, limit[count], "--", color="#000000", linewidth=0.2 if count!=5 else 1)


plt.legend()

plt.show()

# Set best_batch according to the previous graphs

In [None]:
best_batch = 0

In [None]:
best_batch_index = best_batch-first_batch
ep_actions = get_episode_data(data, "actions")[best_batch_index*BATCH_SIZE: (best_batch_index+1)*BATCH_SIZE]
print(ep_actions.shape)
actions = []
for ep in ep_actions:
    actions.extend(ep)
actions = np.array(actions)
print(actions.shape)
ranges = list([data["experiment"]["actions_high"][i], data["experiment"]["actions_low"][i]] for i in range(len(data["experiment"]["actions_high"])))
print(ranges)

for dim in range(len(ranges)):
    dim_actions = np.array(list(a[dim] for a in actions))
    
    plt.figure(figsize=(15, 6))
    plt.title("action space dimension {}".format(dim))
    plt.xlim(sorted(np.multiply(ranges[dim],1.1)))
    plt.hist(dim_actions, bins=100)
    plt.plot()