# Navigation Task with Multiple Targets

In [1]:
import torch
import numpy as np
import math
import random
import matplotlib.pyplot as plt
from model.kohonen_som import KohonenSOM
from model.som_q_learner import SOMQLearnerAllNeighbor
from custom_env.navigation_task import NavigationTaskMultiTarget # distance of 0.2
import pickle
%matplotlib inline

path exception


## Training SOM Q-Learner (Selector)

### Both the state space and the Q-table are updated using self organization

In [2]:
maxitr = 10 ** 3
maxtime = 100
epsilon = 0.2
torch.manual_seed(0)
random.seed(0)

pose_som = KohonenSOM(total_nodes=100, node_size=2, update_iterations=maxitr)
selector = SOMQLearnerAllNeighbor(total_nodes=100, state_dim = 2, action_som = pose_som, update_iterations=maxitr)
task = NavigationTaskMultiTarget()

for epoch in range(maxitr):
    # for visualization
    total_return = 0
    state_trajectory = []
    obs = task.state()
    
    for t in range(maxtime):
        obs = task.state()
        
        # epsilon greedy
        if random.random() > epsilon:
            pose_index = selector.get_action(obs) # deterministic

        else:
            pose_index = random.randrange(pose_som.total_nodes)
        
        # step forward
        reward, next_obs = task.step(pose_som.w[pose_index])
        
        # online training
        """
        How do you train the pose SOM?
        """
        pose_som.update(X, t, 0)
        
        selector.action_q_learning(
            current_state = obs,
            next_state = next_obs,
            action_index = pose_index,
            reward = reward,
            t = epoch)
        
        obs = next_obs
        state_trajectory.append(np.array(obs))
        total_return += (0.9 ** t) * reward
        
    if epoch % 100 == 99:
        plt.plot(np.array(state_trajectory)[:, 0], np.array(state_trajectory)[:, 1], marker='.', linestyle='-', color='blue')
        plt.plot(0.0, 0.0, marker='v', linestyle='None', color='orange')
        
        for i in range(task.all_goals.shape[0]):
            plt.plot(np.array(task.all_goals[i])[0], np.array(task.all_goals[i])[1], marker='v', linestyle='None', color='red')
        
        plt.show()
        print(task.goal_completed)
        print(epoch, total_return)
    
    task.reset()
    
pose_som_filehandler = open("../data/selector_pose_ik/navigation_task_multi_target_online/pose_som.obj", 'wb')
pickle.dump(pose_som, pose_som_filehandler)
    
som_q_learner_all_neighbor_filehandler = open("../data/selector_pose_ik/navigation_task_multi_target_online/som_q_learner_all_neighbor.obj", 'wb')
pickle.dump(selector, som_q_learner_all_neighbor_filehandler)

  x = torch.tensor(x)


NameError: name 'X' is not defined

In [None]:
# for visualization
total_return = 0
state_trajectory = []

som_q_learner_all_neighbor_filehandler = open("../data/selector_pose_ik/navigation_task_multi_target_online/som_q_learner_all_neighbor.obj", 'rb')
som_q_learner_all_neighbor = pickle.load(som_q_learner_all_neighbor_filehandler)

for t in range(maxtime): 
    obs = task.state()
    pose_index = som_q_learner_all_neighbor.get_action(obs) # deterministic

    # step forward
    reward, next_obs = task.step(pose_som.w[pose_index])

    obs = next_obs
    state_trajectory.append(np.array(obs))
    total_return += (0.9 ** t) * reward

plt.plot(np.array(state_trajectory)[:, 0], np.array(state_trajectory)[:, 1], marker='.', linestyle='-', color='blue')
plt.plot(0.0, 0.0, marker='v', linestyle='None', color='orange')

for i in range(task.all_goals.shape[0]):
    plt.plot(np.array(task.all_goals[i])[0], np.array(task.all_goals[i])[1], marker='v', linestyle='None', color='red')
        
plt.show()
print(task.goal_completed)
print(total_return)

task.reset()

## Pose SOM

In [None]:
"""
Visualization of Trained Pose Model
"""
plt.plot(x[:, 0].numpy(), x[:, 1].numpy(), marker='v', linestyle='None', color='orange')
reshaped = pose_som.w.reshape(10, 10, 2)
for v in range(10):
    plt.plot(reshaped[:, v, 0].numpy(), reshaped[:, v, 1].numpy(), marker='None', linestyle='-', color='blue')
    plt.plot(reshaped[v, :, 0].numpy(), reshaped[v, :, 1].numpy(), marker='None', linestyle='-', color='blue')
plt.plot(pose_som.w[:, 0].numpy(), som.w[:, 1].numpy(), marker='o', color='blue')
plt.show()
pose_som.w

## Visualization of the Selector and its associated SOMs

In [None]:
from IPython.display import Image

def visualize_som(som, columns, labels = ["", ""], dim = 10, location = False):
    if location:
        reshaped_x = som.location[:, columns[0]].reshape(dim, dim)
        reshaped_y = som.location[:, columns[1]].reshape(dim, dim)
        for v in range(dim):
            plt.plot(reshaped_x[:, v].numpy(), reshaped_y[:, v].numpy(), marker='None', linestyle='-', color='blue')
            plt.plot(reshaped_x[v, :].numpy(), reshaped_y[v, :].numpy(), marker='None', linestyle='-', color='blue')
        plt.plot(som.location[:, columns[0]].numpy(), som.location[:, columns[1]].numpy(), marker='o', color='blue')
        plt.xlabel(labels[0])
        plt.ylabel(labels[1])
        plt.show()
        
    else:
        reshaped_x = som.w[:, columns[0]].reshape(dim, dim)
        reshaped_y = som.w[:, columns[1]].reshape(dim, dim)
        for v in range(dim):
            plt.plot(reshaped_x[:, v].numpy(), reshaped_y[:, v].numpy(), marker='None', linestyle='-', color='blue')
            plt.plot(reshaped_x[v, :].numpy(), reshaped_y[v, :].numpy(), marker='None', linestyle='-', color='blue')
        plt.plot(som.w[:, columns[0]].numpy(), som.w[:, columns[1]].numpy(), marker='o', color='blue')
        plt.xlabel(labels[0])
        plt.ylabel(labels[1])
        plt.show()
        
task = NavigationTaskMultiTarget()

In [None]:
from torch.nn.functional import softmax

def softmax_matrix(torch_matrix):
    matrix_size = torch_matrix.shape
    softmax_matrix = torch.zeros(matrix_size)
    for i in range(matrix_size[0]):
        softmax_matrix[i] = softmax(torch_matrix[i])
    return softmax_matrix

In [None]:
pose_som_filehandler = open("../data/selector_pose_ik/navigation_task_multi_target_online/pose_som.obj", 'rb')
pose_som = pickle.load(pose_som_filehandler)

som_q_learner_filehandler = open("../data/selector_pose_ik/navigation_task_multi_target_online/som_q_learner_all_neighbor.obj", 'rb')
som_q_learner = pickle.load(som_q_learner_filehandler)

### Pose Selection per State Node

In [None]:
for i in range(100):
    print(i)
    rep_index = pose_som.select_winner(som_q_learner.w[i, :som_q_learner.state_dim])
    plt.plot(pose_som.w[rep_index, 0], pose_som.w[rep_index, 1], marker='.', linestyle='None', color='blue')
    
    action_position = pose_som.w[torch.argmax(som_q_learner.w[i, 2:], dim=0)]
    plt.plot(action_position[0], action_position[1], marker='v', linestyle='None', color='blue')
    
    plt.plot(0.0, 0.0, marker='v', linestyle='None', color='orange')
    plt.plot(np.array(task.goal)[0], np.array(task.goal)[1], marker='v', linestyle='None', color='red')
    plt.plot(0.5, 0.5, marker='v', linestyle='None', color='red')
    plt.show()
    
    plt.imshow(som_q_learner.w[i, 2:].reshape(10, 10), cmap='hot', interpolation='nearest')
    plt.colorbar()
    plt.show()

### Distribution of State Space within the SOM

In [None]:
visualize_som(pose_som, [0, 1], location = True)
visualize_som(som_q_learner, [0, 1])

### Poses used by the Selector

In [None]:
print(som_q_learner.w.shape)
print("Softmax Matrix")
print(softmax_matrix(som_q_learner.w[:, 2:]))
print("Action Matrix")
print(torch.argmax(softmax_matrix(som_q_learner.w[:, 2:]), dim=1).reshape(10, 10))

### Which state nodes are more likely to use the specific actions?

In [None]:
for i in [0, 10, 20, 30, 61]: #range(selector.w.shape[1]):
    print("Action Index: ", i)
    plt.imshow(softmax_matrix(som_q_learner.w[:, 2:])[:, i].reshape(10, 10), cmap='hot', interpolation='nearest')
    plt.colorbar()
    plt.show()

In [None]:
torch.argmax(softmax_matrix(som_q_learner.w[:, 2:]), dim=1)

In [None]:
pose_usage = torch.unique(torch.argmax(softmax_matrix(som_q_learner.w[:, 2:]), dim=1), return_counts = True)
print("Pose in Use", pose_usage[0])
print("Frequency of Pose", pose_usage[1])

In [None]:
from torch.nn.functional import softmax

In [None]:
pose_heat_map = torch.zeros(100)

for p, i in zip(pose_usage[0], range(len(pose_usage[0]))):
    pose_heat_map[p] = pose_usage[1][i]
    
print(pose_heat_map)
pose_heat_map = softmax(pose_heat_map.float())
                
print("Pose Usage")
plt.imshow(pose_heat_map.reshape(10, 10), cmap='hot', interpolation='nearest')
plt.colorbar()
plt.show()

In [None]:
from custom_env.navigation_task import NavigationTaskMultiTarget

baseline_maxtime = 100
baseline_return = 0
baseline_gamma = 0.9
task = NavigationTaskMultiTarget()

for t in range(baseline_maxtime):
    reward, _ = task.step(task.goal)
    baseline_return += (baseline_gamma ** t) * reward
    
task.reset()
print(baseline_return)