# SETUP

In [1]:
import os
import json
from matplotlib.image import imread, imsave
from matplotlib.pyplot import imshow, show, close
from matplotlib import pyplot as plt
from collections import OrderedDict
import math
import matplotlib.image as mpimg
import cv2
import numpy as np
import matplotlib.patches as patches
from matplotlib.collections import PatchCollection, PolyCollection
import matplotlib.transforms as mtrans
import pandas as pd
import seaborn as sns

In [2]:
# uci primary colors
uci_blue = (0/255, 62/255, 120/255)
uci_gold = (253/255, 185/255, 19/255)

# uci secondary color palette
uci_light_blue = (106/255, 162/255, 184/255)
uci_light_gray = (197/255, 190/255, 181/255)
uci_dark_blue = (27/255, 61/255, 109/255)
uci_orange = (247/255, 141/255, 45/255)
uci_light_yellow = (247/255, 235/255, 95/255)
uci_dark_gray = (85/255, 87/255, 89/255)
uci_lime_green = (122/255, 184/255, 0/255)

# color blind friendly colors
# https://gist.github.com/thriveth/8560036
color_blinds = {
    'blue':   [55/255,  126/255, 184/255],  #377eb8 
    'orange': [255/255, 127/255, 0/255],    #ff7f00
    'green':  [77/255,  175/255, 74/255],   #4daf4a
    'pink':   [247/255, 129/255, 191/255],  #f781bf
    'brown':  [166/255, 86/255,  40/255],   #a65628
    'purple': [152/255, 78/255,  163/255],  #984ea3
    'gray':   [153/255, 153/255, 153/255],  #999999
    'red':    [228/255, 26/255,  28/255],   #e41a1c
    'yellow': [222/255, 222/255, 0/255]     #dede00
} 
color_blinds_list = [color_blinds[color] for color in color_blinds]

object_color = uci_blue

# BINVOX

In [3]:
import binvox as bv
binvox_path = 'C:/Users/timkj/OneDrive/Desktop/repo/drone_rl/local/voxel_grids/blocksmap.binvox'
voxels = bv.Binvox.read(binvox_path, 'dense')
voxels_data = voxels.data
voxels_scale = voxels.scale 
voxels_trans = voxels.translate
voxels_res = (np.absolute(voxels_trans)) * 2 * voxels_scale

In [4]:
_global_plt_patches = None

def plot_map(axis, object_color=uci_blue):
    global _global_plt_patches
    if _global_plt_patches is None:
        origin = voxels_data[int(voxels_data.shape[1]/2), int(voxels_data.shape[0]/2), :]
        floor_dim = max([i for i, x in enumerate(origin) if x])
        scale = voxels_scale * 100
        shift = 100
        _global_plt_patches = []
        for x in range(voxels_data.shape[0]):
            for y in range(voxels_data.shape[1]):
                for z in range(voxels_data.shape[2]):
                    if voxels_data[x, y, z] and z > floor_dim:
                        #x_loc = x - shift - scale/2 # align to left for plt.rect
                        #y_loc = y - shift - scale/2 # align to bottom for plt.rect
                        #patch = patches.Rectangle((x_loc, y_loc), scale, scale, color = object_color)
                        patch = patches.Rectangle((x-shift, y-shift), 1, 1, color = object_color)
                        _global_plt_patches.append(patch)
                        break
    # add list of patches (much quicker than iteratively drawing)
    map_stuff = PatchCollection(_global_plt_patches, match_original=True)
   # axis.gca().add_collection(map_stuff)
    axis.add_collection(map_stuff)

In [5]:
def plot_spawns_goals(
    drone_radius = 0.5,
    goal_tolerance = 4,
    goal_distance = 100,
    spawns = [],
    goals = [],
    spawn_color = color_blinds['red'],
    goal_color = color_blinds['orange'],
    goal_wedge = True,
):
    # patch stuff
    spawn_patches = []
    goal_patches = []
    for idx in range(len(spawns)):
        spawn = spawns[idx]
        goal = goals[idx]
        spawn_patch = patches.Rectangle(
            (spawn[0], spawn[1]), 
            drone_radius, 
            drone_radius, 
            color=spawn_color,
        )
        spawn_patches.append(spawn_patch)
        if goal_wedge:
            radius = math.sqrt(goal[0]**2 + goal[1]**2)
            goal_patch = patches.Wedge(
                (spawn[0], spawn[1]), 
                goal_distance+goal_tolerance, 
                goal[0], goal[1], 
                width=2*goal_tolerance,
                color=goal_color,
            )
        else:
            goal_patch = patches.Circle(
                (goal[0], goal[1]), 
                goal_tolerance, 
                color=goal_color,
            )
        goal_patches.append(goal_patch)
    # add list of patches (much quicker than iteratively drawing)
    spawn_collection = PatchCollection(spawn_patches, match_original=True)
    goal_collection = PatchCollection(goal_patches, match_original=True)
    plt.gca().add_collection(goal_collection)
    plt.gca().add_collection(spawn_collection)

In [6]:
def plot_environment(name='Learning Environment (to scale)'):
    fig = plt.figure()
    fig.set_size_inches((6, 6))
    plt.title(name)
    plt.xlabel('y [meters]')
    plt.ylabel('x [meters]')
    plt.xlim(-120, 120)
    plt.ylim(-120, 120)
    # custom legend hack
    plt.scatter(-999, -999, marker='s', color=object_color)
    plt.scatter(-999, -999, marker='s', color=train_spawn_color)
    plt.scatter(-999, -999, color=train_goal_color)
    #plt.scatter(-999, -999, color=eval_spawn_color)
    #plt.scatter(-999, -999, color=eval_goal_color)
    plt.legend([
        'Objects', 
        'Spawn', 
        'Goal', 
        #'Eval Spawn', 
        #'Eval Goal'
    ],
               loc='center left', 
               bbox_to_anchor=(1, 0.5),)

# EVALUATIONS

In [7]:
# json files output with all string key names
# process so that the evaluation dictionary structure is such:
    # episode # - int
        # step # - int
            # state - dictionary of misc key, value pairs for that state
def process_episodes(json_evaluation):
    nEpisodes = len(json_evaluation)
    episodes = [None] * nEpisodes
    episode_idx = 0
    for episode_str in json_evaluation:
        if 'episode_' not in episode_str:
            continue
        json_episode = json_evaluation[episode_str]
        nSteps = len(json_episode)
        states = [None] * nSteps
        for step_str in json_episode:
            step_num = int(step_str.split('_')[1])
            state = json_episode[step_str]
            states[step_num] = state
        episodes[episode_idx] = states
        episode_idx += 1
    return episodes
def read_evaluations(evaluation_folder):
    evaluation_files = [file for file in os.listdir(evaluation_folder) if 'states' in file]
    nEvaluations = len(evaluation_files)
    evaluations = [None] * nEvaluations
    for evaluation_file in evaluation_files:
        if '.json' not in evaluation_file:
            continue
        parts = evaluation_file.split('.')[0].split('_')
        left_episode = int(parts[-2])
        right_episode = int(parts[-1])
        nEpisodes = 1 + right_episode - left_episode
        evaluation_number = int(left_episode / nEpisodes)
        print(evaluation_file, evaluation_number)
        json_evaluation = json.load(open(evaluation_folder + evaluation_file, 'r'))
        episodes = process_episodes(json_evaluation)
        evaluations[evaluation_number] = episodes
    return evaluations
# architecture for evaluations:
# evaluations - list of episodes (indexed of evaluation number) - 0 idx is first evaluation before training
    # episodes - list of states (indexed by step number)
        # states - dict of (key, value) pairs for state at all_evaluations[instance][evaluation][episode][step]

In [8]:
# read evaluations from folders
# get these (sub_folder intances)
instances = [
    'hacknet4', 
    'mlserver2019', 
    'mlserver2021', 
    'testbed1',
    'timpc', 
    'timsurface',
]
parent_path = os.getcwd() + '/'
instances = []
instance_paths = {}
for folder in os.listdir(parent_path):
    if '.' not in folder and os.path.isdir(os.path.join(parent_path,folder)):
        instance = folder
        if os.path.exists(parent_path + instance + '/EvaluateEnvironment/'):
            instances.append(instance + '_evaluate')
            instance_paths[instance + '_evaluate'] = parent_path + instance + '/EvaluateEnvironment/'
        if os.path.exists(parent_path + instance + '/TrainEnvironment/'):
            instances.append(instance + '_train')
            instance_paths[instance + '_train'] = parent_path + instance + '/TrainEnvironment/'
all_evaluations = {}
for instance in instance_paths:
    all_evaluations[instance] = read_evaluations(instance_paths[instance])

In [9]:
# process evaluations
_moves = {}
final_variables = {instance:{} for instance in instances}
for instance in instances:
    for set_num, episodes in enumerate(all_evaluations[instance]):
        for episode, states in enumerate(episodes):
            for step, state in enumerate(states):
                if step == 0:
                    continue
                move = state['transcribed_action']
                if move not in _moves:
                    _moves[move] = 0
                _moves[move] += 1
            final_state = states[-1]
            for variable in final_state:
                if variable not in final_variables[instance]:
                    final_variables[instance][variable] = {}
                if episode not in final_variables[instance][variable]:
                    final_variables[instance][variable][episode] = []
                final_variables[instance][variable][episode].append(final_state[variable])
_moves
# architecture of final_variables:
# instance - str name
    # variable - str name
        # episode - int number
            # final values - list of final values with increased # of training evaluations 

{}

In [10]:

# select which state variables to fetch
numerical_variables = [
        #'nSteps', 
        'total_reward', 
]
string_variables = [
    #'transcribed_action', 
    'termination_reason',
]
all_variables = numerical_variables + string_variables
plots = {}
# take mean values
for variable in all_variables:
    plots[variable] = {}
    for instance in instances:
        plots[variable][instance] = {}
        
        values = {}
        for episode in final_variables[instance][variable]:
            for evaluation in range(len(final_variables[instance][variable][episode])):
                if evaluation not in values:
                    values[evaluation] = []
                values[evaluation].append(final_variables[instance][variable][episode][evaluation])

        if variable in numerical_variables:
            aggregates = []
            nEpisodes = len(final_variables[instance][variable])
            for evaluation in values:
                aggregates.append(sum(values[evaluation])/len(values[evaluation]))
            plt.title(instance + ' Evaluation Learning Curve')
            plt.xlabel('After # of Training Episodes [100s]')
            plt.ylabel(variable + ' at Final State')
            plt.plot(aggregates)
            plt.show()

        if variable in string_variables:
            aggregates = {}
            for evaluation in values:
                for name in values[evaluation]:
                    if name not in aggregates:
                        aggregates[name] = {}
                    if evaluation not in aggregates[name]:
                        aggregates[name][evaluation] = 0
                    aggregates[name][evaluation] += 1   
            df_data = {'count':[], 'name':[], 'evaluation':[]}
            for name in aggregates:
                for evaluation in values:
                    if evaluation not in aggregates[name]:
                        aggregates[name][evaluation] = 0
                    count = aggregates[name][evaluation]
                    df_data['count'].append(count)
                    df_data['name'].append(name)
                    df_data['evaluation'].append(evaluation)
            df = pd.DataFrame(df_data)
            heatmap_data = pd.pivot_table(df, values='count', index=['name'], columns='evaluation')
            sns.heatmap(heatmap_data)
            plt.title(instance + ' Evaluation Learning Curve: ' + variable)
            plt.show()
            
        plots[variable][instance] = aggregates

In [11]:
for instance in instances:
    evaluations = all_evaluations[instance]
    for evaluation, episodes in enumerate(evaluations):
        rewards_path = {}
        for episode, states in enumerate(episodes):
            for step, state in enumerate(states):
                if step == 0:
                    continue
                if step not in rewards_path:
                    rewards_path[step] = []
                reward = state['total_reward']
                rewards_path[step].append(reward)
        mean_reward_path = []
        for step in rewards_path:
            mean_reward = sum(rewards_path[step]) / len(rewards_path[step])
            mean_reward_path.append(mean_reward)
        plt.plot(mean_reward_path, label='Eval ' + str(evaluation))
        if evaluation == len(evaluations) - 1 or (evaluation > 0 and evaluation % 8 == 0):
            plt.title(instance + ' Immediate Reward Over all Evaluations')
            plt.xlabel('Step')
            plt.ylabel('Mean Reward')
            plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
            plt.show()

In [12]:
# SET DISCOUNT FACTOR FOR Q-PATHS
gamma = 0.99

# # plot final paths (run necessary binvox stuff at bottom of notebook to use plot_map)
for instance in instances:
    evaluations = all_evaluations[instance]
    if 'train' in instance:
        evaluations = [evaluations[-1]]
    for set_num, episodes in enumerate(evaluations):
        paths = []
        reward_paths = []
        Q_paths = []
        goals = []
        spawns = []
        termination_reasons = []
        episode_numbers = []
        for episode, states in enumerate(episodes):
            path = []
            reward_path = []
            Q_path = []
            episode_numbers.append(episode)
            # get initial value
            init_state = states[0]
            spawn = init_state['drone_position']
            spawns.append(spawn)
            goal = init_state['goal_position']
            goals.append(goal)
            # get intermediate values
            for step, state in enumerate(states):
                if step == 0: continue
                state = episodes[episode][step]
                drone_position = state['drone_position']
                path.append(drone_position)
                reward = state['total_reward']
                reward_path.append(reward)
                # get future values from this state
                for step2, state2 in enumerate(states[step+1:]):
                    if step == 0: continue
                    reward += (gamma**(step2-step)) * state2['total_reward']
                Q_path.append(reward)
            paths.append(path)
            reward_paths.append(reward_path)
            Q_paths.append(Q_path)
            # get final vaules
            final_state = episodes[episode][-1]
            termination_reason = final_state['termination_reason']
            termination_reasons.append(termination_reason)
            # plot every 8 episodes 
            if episode == len(episodes)-1 or (episode > 0 and episode%8 == 0):
                # subplots
                fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2)
                if 'train' in instance:
                    fig.suptitle(instance + ' Final Set')
                else:
                    fig.suptitle(instance + ' Set #' + str(set_num))
                fig.set_figheight(8)
                fig.set_figwidth(16)
                ax1 = plt.subplot2grid((2, 2), (0, 0), rowspan=2)
                ax2 = plt.subplot2grid((2, 2), (0, 1), colspan=1)
                ax3 = plt.subplot2grid((2, 2), (1, 1), colspan=1)
                # PLOT DRONE PATH
                ax1.set_title('Drone Path (to scale)')
                ax1.set_xlabel('y [meters]')
                ax1.set_ylabel('x [meters]')
                ax1.set_xlim(-100, 100)
                ax1.set_ylim(-100, 100)
                # show objects on map from binvox
                plot_map(ax1)
                # legend hack
                ax1.scatter(-999, -999, marker='s', color=object_color) # off map just for legend
                legend = [
                #    'Objects'
                ]
                for idx in range(len(termination_reasons)):
                    color = color_blinds_list[idx]
                    ax1.scatter(spawns[idx][1], spawns[idx][0], marker='x', color=color) # spawn location
                    termination_reason = termination_reasons[idx]
                    legend.append('E' + str(episode_numbers[idx]) + ':' + termination_reason)
                plt_patches = []
                for idx in range(len(paths)):
                    color = color_blinds_list[idx]
                    # plot goal
                    obj = plt.Circle((goals[idx][1], goals[idx][0]), 4, color=color, alpha=0.4)
                    plt_patches.append(obj)
                    # plot points
                    for point in paths[idx]:
                        # using generic quad copter size for point sizes (set this to your drone value)
                        radius = 0.5
                        point = plt.Circle((point[1], point[0]), radius, color=color)
                        plt_patches.append(point)
                    # plot end point
                    ax1.scatter(paths[idx][-1][1], paths[idx][-1][0], marker='*', s=64, color=color) # end location
                map_stuff = PatchCollection(plt_patches, match_original=True)
                ax1.add_collection(map_stuff)
                # PLOT REWARDS
                ax2.set_title('Reward Path')
                #ax2.set_xlabel('Step')
                ax2.set_ylabel('Immediate Reward')
                for idx in range(len(reward_paths)):
                    color = color_blinds_list[idx]
                    ax2.plot([i+1 for i in range(len(reward_paths[idx]))], reward_paths[idx], color=color, marker='.')
                # PLOT Q-VALUES
                ax3.set_title('Q-value Path')
                ax3.set_xlabel('Step')
                ax3.set_ylabel('Accumlated Reward')
                for idx in range(len(Q_paths)):
                    color = color_blinds_list[idx]
                    ax3.plot([i+1 for i in range(len(Q_paths[idx]))], Q_paths[idx], color=color, marker='.')
                # plot legend
                ax2.legend(legend, loc='center left', bbox_to_anchor=(1, 0.5))
                # Show the graph
                plt.show()
                # reset path arrays
                paths = []
                reward_paths = []
                Q_paths = []
                goals = []
                spawns = []
                termination_reasons = []
                episode_numbers = []

In [13]:
# view moves for each evaluation for each episode
bugs = []
for instance in instances:
    evaluations = all_evaluations[instance]
    for set_num, episodes in enumerate(evaluations):
        for episode, states in enumerate(episodes):
            for step, state in enumerate(states):
                if state['drone_position'][2] > 0:
                    bugs.append((instance, set_num, episode, state['drone_position']))
            if 'train' not in instance or set_num == len(evaluations)-1:
                if 'train' in instance:
                    print('Instance', instance, 'Final Set', 'Epsode', episode)
                else:
                    print('Instance', instance, 'Set#', set_num, 'Epsode', episode)
                for step, state in enumerate(states):
                    if step == 0: continue
                    action = state['transcribed_action']
                    marker = '-'
                    if action == 'MoveForward':
                        marker = 'F'
                    if action == 'RotateLeft':
                        marker = 'L'
                    if action == 'RotateRight':
                        marker = 'R'
                    if action == 'RotateLeft2':
                        marker = 'L'
                    if action == 'RotateRight2':
                        marker = 'R'
                    print(marker, end=' ')
                termination_reason = states[-1]['termination_reason']
                print(termination_reason)

In [14]:
bugs

[]

In [None]:
vec_names = ['DG', 'YG', 'D1', 'D2', 'D3', 'D4', 'D5']
nVec = len(vec_names)
nTimesteps = 4
vecPad = 10
for instance in instances:
    print(instance)
    directory = instance_paths[instance]
    slideshow_path = directory + 'slideshow/'
    if not os.path.exists(slideshow_path):
        os.mkdir(slideshow_path)
    # read observations
    files = [file for file in os.listdir(directory) if 'observations' in file]
    observations = {}
    for file in files:
        observation_set = np.load(directory + file, allow_pickle=True)
        observations.update(observation_set)
    # plot slides and save as png files
    slides = []
    evaluations = [all_evaluations[instance][0]]
    for evaluation, episodes in enumerate(evaluations):
        if 'train' in instance:
            episodes = episodes[0:10]
        for episode, states in enumerate(episodes):
            for step, state in enumerate(states):
                if step == 0:
                    continue
                # get state values
                transcribed_action = state['transcribed_action']
                done = state['done']
                if done:
                    termination_reason = state['termination_reason']
                observation_key = state['observation_component']
                # get observation and params
                if observation_key not in observations:
                    continue
                observation = observations[observation_key]
                # check if observation is multi, vec, or img
                if len(observation.shape) == 0:
                    obs_type = 'multi'
                    img_array = observation.item()['img']
                    vec_array = observation.item()['vec']
                elif len(observation.shape) == 1:
                    obs_type = 'vec'
                    nRows = 1
                    nCols = 84
                    empty_array = np.zeros((nRows, nCols*nTimesteps + nTimesteps - 1), dtype=np.int16)
                    vec_array = observation
                else:
                    obs_type = 'img'
                    img_array = observation
                # show image array
                if obs_type in ['multi', 'img']:
                    nBands = img_array.shape[0]
                    nRows = img_array.shape[1]
                    nCols = img_array.shape[2]
                    view_shape = (nRows, nBands * nCols + nBands - 1)
                    # view side by side
                    side_by_side = np.zeros(view_shape, dtype=np.int16)
                    side_by_side
                    for band in range(nBands):
                        side_by_side[0:nRows, (band*nCols+band):(band*nCols+band)+nCols] = img_array[band]
                    plt.plot([0, view_shape[1]], [42, 42], color='green')
                    plt.imshow(side_by_side, cmap='gray')
                # show vector array
                if obs_type in ['multi', 'vec']:
                    if obs_type == 'vec':
                        plt.imshow(empty_array, cmap='gray')
                    x = 0
                    for time in range(nTimesteps):
                        y = nRows + vecPad
                        for i, name in enumerate(vec_names):
                            value = vec_array[time*nVec + i]
                            plt.text(x, y, name + ':' + str(round(value, 2)))
                            y += vecPad
                        x += nCols + 1
                plt.axis('off')
                title = 'Eps ' + str(episode) + '  Step ' + str(step) + '  Act ' + transcribed_action
                title += '' if not done else '  Term ' + termination_reason
                plt.title(title)
                slide_name = title + '.png'
                plt.savefig(slideshow_path + slide_name)
                slides.append(slide_name)
                plt.show()      
                
    # make video from frames
    frames = []
    for slide_name in slides:
        frame_path = slideshow_path + slide_name
        frame = cv2.imread(frame_path)
        frame_shape = frame.shape
        frames.append(frame)
    video_path = slideshow_path + instance + '_slideshow.avi'
    video = cv2.VideoWriter(
                            filename = video_path, 
                            fourcc = 0,
                            fps = 1,
                            frameSize = (frame_shape[1], frame_shape[0]),
                            isColor = True,
                           )
    for frame in frames:
        video.write(frame)
    video.release()