# Continuous Multi H-MCTS in hand-designed grid

In [None]:
import wandb
wandb.login()

In [None]:
import math
import json
from itertools import product
import os
import sys

from hd_grid import hd_grids, number_grids, maze_grids

In [None]:
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from utils import *
sys.path.append(ROOT_DIR)

from src.Env.Grid.Cont_Grid import Continuous_Grid
from src.Env.Grid.Higher_Grids_HW import HighLevelGrids2
from src.Planners.H_MCTS_continuous.version.H_MCTS_Cont_multi import H_MCTS_Cont

In [None]:
# Basic Setup for environment
l1_rows = 8
l1_cols = 8
l1_width = 2
l1_height = 2
goal_distance = 3
grid_setting = (l1_rows, l1_cols, l1_width, l1_height, goal_distance, 1)
cont_action_radius=2
random_seed = 0

In [None]:
# # Multi H-MCTS
# # Used hyperparameter from best result until now
# explorationConstant = 0.25
# alpha = 0.09
# constant_c = 2
# gamma = 1
# iter_Limit = 10000
# reward_function_weight=0.2

# new_grid_setting = list(grid_setting)
# new_grid_setting.append(reward_function_weight)
# grid_setting = tuple(new_grid_setting)

# folder_name = os.path.join(HD_MULTI_H_MCTS_EXPERIMENT_DIR, \
#                             'maze_grids_test_{}_{}_{}_{}_{}_{}'.format(explorationConstant, iter_Limit, alpha, constant_c, gamma, reward_function_weight))
# tree_folder_name = os.path.join(folder_name, 'tree')
# traj_folder_name = os.path.join(folder_name, 'traj')

# exists = make_param_dir(folder_name)
# if not exists:
#     make_param_dir(tree_folder_name)
#     make_param_dir(traj_folder_name)

def test_h_mcts_multi(param, grid_setting, folder_name):
    mcts_result = {}
    mcts_result["iter_cnt"] = {}
    mcts_success_rate = 0

    explorationConstant = param[0]
    iter_Limit = param[1]
    alpha = param[2]
    constant_c = param[3]
    gamma = param[4]
    exploration_constant_scale = param[5]
    l1_goal_reward = param[6]
    l1_subgoal_reward = param[7]


    for gi in range(len(number_grids)):
        MCTS = H_MCTS_Cont(grid_setting, random_seed=random_seed,
                            H_level=2,
                            explorationConstant_h=1 / math.sqrt(explorationConstant),
                            explorationConstant_l=1 / math.sqrt(explorationConstant),
                            alpha=alpha,
                            constant_c=constant_c,
                            gamma=gamma,
                            iter_Limit = iter_Limit,
                            l1_goal_reward=l1_goal_reward,
                            l1_subgoal_reward=l1_subgoal_reward,
                            assigned_barrier=number_grids[gi].barriers,
                            assigned_start_goal=number_grids[gi].start_goal,
                            cont_action_radius=cont_action_radius,
                            exploration_constant_scale=exploration_constant_scale)
        traj, success, iter = MCTS.search(
                        tree_save_path="{}/tree/{}.png".format(folder_name, gi),
                        traj_save_path="{}/found_path/{}.txt".format(folder_name, gi))
        iter += 1
        mcts_result["iter_cnt"][gi] = iter

        if success:
            mcts_success_rate += 1
            MCTS.cont_env.plot_grid(0, traj, "{}/traj/{}.png".format(folder_name, gi))
            print(f'success with {iter}')
        else:
            MCTS.cont_env.plot_grid(0, traj, "{}/traj/{}.png".format(folder_name, gi))
            print('Failed')

        wandb.log({f"tree_{gi}": wandb.Image("{}/tree/{}.png".format(folder_name, gi))})
        wandb.log({f"traj_{gi}": wandb.Image("{}/traj/{}.png".format(folder_name, gi))})

    mcts_result["success_rate"] = mcts_success_rate
    with open("{}/result.json".format(folder_name), 'w') as result_file:
            json.dump(mcts_result, result_file)
    
    x_values, y_values = cumul_plot(iter_Limit, mcts_result, folder_name)
    wandb.log({"iteration_plot": wandb.Image("{}/success_rate.png".format(folder_name))})
    data = [[x, y] for (x, y) in zip(x_values, y_values)]
    table = wandb.Table(data=data, columns = ["x", "y"])
    wandb.log(
        {"Iteration_Plot" : wandb.plot.line(table, "x", "y",
            title="Iteration vs Success Rate Plot")})
    return mcts_result


In [None]:
# Launch Experiment
total_runs = 1
for run in range(total_runs):
    # 1. Start a new run to track script
    explorationConstant = [0.16, 0.25, 0.5, 1.0]
    iter_Limit = [10000]
    alpha = [0.01, 0.025, 0.05, 0.09]
    constant_c = [2]
    gamma = [1]
    exploration_constant_scale = [16]
    l1_goal_reward = [100]
    l1_subgoal_reward = [100]

    # Create a list of parameter arrays
    parameters = [explorationConstant, iter_Limit, alpha, constant_c, gamma, exploration_constant_scale, l1_goal_reward, l1_subgoal_reward]

    # Generate all possible combinations
    param_combinations = list(product(*parameters))

    # Print the combinations
    for param in param_combinations:
        print("Param to check", param)

        folder_name = os.path.join(HD_MULTI_H_MCTS_EXPERIMENT_DIR, \
                                'wandb_num_grids_test_{}_{}_{}_{}_{}_{}_g{}_sg{}'.format(param[0], param[1], param[2], param[3], param[4], param[5], param[6], param[7]))
        tree_folder_name = os.path.join(folder_name, 'tree')
        traj_folder_name = os.path.join(folder_name, 'traj')
        path_folder_name = os.path.join(folder_name, 'found_path')

        exists = make_param_dir(folder_name)
        if not exists:
            make_param_dir(tree_folder_name)
            make_param_dir(traj_folder_name)
            make_param_dir(path_folder_name)

            wandb.init(
                # Project_name
                project='num-grid',
                # Run_name
                name=f"multi_h_mcts_{run}_{param[0]}_{param[1]}_{param[2]}_{param[3]}_{param[4]}_{param[5]}_g{param[6]}_sg{param[7]}",
                # Track hyperparameters and run metadata
                config={
                    "H_level": 2,
                    "explorationConstant": param[0],
                    "iter_Limit": param[1],
                    "alpha": param[2],
                    "constant_c": param[3],
                    "gamma": param[4],
                    "exploration_constant_scale": param[5],
                    "l1_goal_reward": param[6],
                    "l1_subgoal_reward": param[7],
                }
            )
            wandb.run.log_code(".")
            
            mcts_result = test_h_mcts_multi(param, grid_setting, folder_name)
            wandb.log(mcts_result)
        
        # This simple block simulates a training loop loggin metrics
        wandb.finish()