In [None]:
# import shutil

# # Remove a directory and its contents
# shutil.rmtree('/kaggle/working/kaggle')
cluster = "kaggle"

maps = {
    "2x2_0h_0d": [
        "SF",
        "FG",
    ],
    "3x3_0h_0d": [
        "SFF",
        "FFF",
        "FGF",
    ],
    "4x4_0h_0d": [
        "SFFF",
        "FFFF",
        "FFFG",
        "FFFF",
    ],
    ### 1 is for easy map, 2 is for medium, 3 is for hard
    "2x2_1h_1d": [
        "SF",
        "HG",
    ],
    "3x3_1h_1d": [
        "SFH",
        "FFF",
        "FGF",
    ],
    "3x3_1h_2d": [
        "SFF",
        "HFF",
        "GFF",
    ],
    "3x3_2h_1d": [
        "SFH",
        "FFF",
        "HFG",
    ],
    "3x3_2h_2d": [
        "SFH",
        "FHF",
        "FFG",
    ],
    "3x3_3h_2d": [
        "SFH",
        "HFF",
        "FHG",
    ],
    "4x4_1h_1d": [
        "SFFF",
        "FFFF",
        "FFFG",
        "FFFH",
    ],
    # y
    "4x4_2h_1d": [
        "SFFF",
        "FFHF",
        "FFFF",
        "FGFH",
    ],
    "4x4_2h_2d": [
        "SFFF",
        "FFFF",
        "FFFH",
        "FHFG",
    ],
    "4x4_3h_1d": [
        "SFHF",
        "FFFF",
        "HFFF",
        "FHFG",
    ],
    "4x4_3h_2d": [
        "SFHF",
        "FFHF",
        "FFFF",
        "FHGF",
    ],
    # almost impossible with slipperiness
    "4x4_5h_3d": [
        "SFFF",
        "HHHF",
        "FFFF",
        "HGHF",
    ],
    "5x5_3h_2d": [
        "SFHFF",
        "FFFFF",
        "FFFFF",
        "FFHFF",
        "HFFGF",
    ]
}

In [None]:
!pip install nevergrad 

In [None]:
import os
if cluster == "kaggle":
    from kaggle_secrets import UserSecretsClient
    GITHUB_TOKEN = UserSecretsClient().get_secret("gh-token")
elif cluster == "db":
    GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
    
USER = "emalmsten"
NAME = "TransZero"
BRANCH = "trans_backward_improvement"
CLONE_URL = f"https://{USER}:{GITHUB_TOKEN}@github.com/{USER}/{NAME}.git"

working_path = '/kaggle/working'
repo_path = f"{working_path}/{NAME}"

# Remove the existing directory if it exists
if os.path.exists(repo_path):
    # If it exists, pull the latest changes
    os.chdir(repo_path)
    get_ipython().system(f"git fetch origin {BRANCH}")
    get_ipython().system(f"git reset --hard origin/{BRANCH}")
else:
    print(f"{repo_path} does not exist")
    os.chdir(working_path)
    # If it doesn't exist, clone the repository
    get_ipython().system(f"git clone -b {BRANCH} {CLONE_URL}")
    os.chdir(repo_path)
!git log -1 --pretty=format:"%h %an %s %ad"
os.makedirs("/results", exist_ok=True)

In [None]:
cluster = "kaggle"

In [None]:
import datetime
import torch
from pathlib import Path

class MuZeroConfig:
    def __init__(self, root=None):
        root = root or Path(__file__).resolve().parents[1]
        cuda = torch.cuda.is_available()

        self.trans_loss_weight = 1

        self.game_name = "frozen_lake"
        self.logger = "wandb"
        self.debug_mode = False

        self.custom_map = "2x2_no_hole"
        self.checkpoint_interval = 500

        # fmt: off
        self.seed = 42
        self.max_num_gpus = 1

        ### Game
        # Frozen Lake observation is a single integer representing the agent's position
        self.observation_shape = (1, 1, 1)  # Changed to (1, 1, 1) for a single integer observation
        self.action_space = list(range(4))  # Updated action space: 4 possible actions (0: left, 1: down, 2: right, 3: up)
        self.players = list(range(1))
        self.stacked_observations = 0

        self.muzero_player = 0
        self.opponent = None

        ### Self-Play
        self.num_workers = 4
        self.selfplay_on_gpu = cuda and not self.debug_mode
        self.max_moves = 50  # Reduced max moves for Frozen Lake
        self.num_simulations = 25
        self.discount = 0.997
        self.temperature_threshold = None

        self.root_dirichlet_alpha = 0.25
        self.root_exploration_fraction = 0.25

        self.pb_c_base = 19652
        self.pb_c_init = 1.25

        # Transformer
        self.transformer_layers=2
        self.transformer_heads=2
        self.transformer_hidden_size=16
        self.max_seq_length=50
        self.positional_embedding_type='sinus'  # sinus or learned
        self.value_network = "transformer"
        self.policy_network = "transformer"
        self.reward_network = "transformer"

        ### Network
        self.network = "transformer" # fullyconnected or resnet
        self.support_size = 10

        self.downsample = False
        self.blocks = 1
        self.channels = 2
        self.reduced_channels_reward = 2
        self.reduced_channels_value = 2
        self.reduced_channels_policy = 2
        self.resnet_fc_reward_layers = []
        self.resnet_fc_value_layers = []
        self.resnet_fc_policy_layers = []

        self.encoding_size = 8
        self.fc_representation_layers = []
        self.fc_dynamics_layers = [16]
        self.fc_reward_layers = [16]
        self.fc_value_layers = [16]
        self.fc_policy_layers = [16]

        self.norm_layer = True

        ### Training
        self.append = "_" + "newLoss"  
        path = root / "results" / self.game_name / self.custom_map / self.network 
        self.name = f'{datetime.datetime.now().strftime("%Y%m%d_%H%M%S")}{self.append}'
        self.results_path = path / self.name

        self.save_model = True
        self.training_steps = 10000
        self.batch_size = 128
        self.value_loss_weight = 1
        self.train_on_gpu = cuda and not self.debug_mode

        self.optimizer = "Adam"
        self.weight_decay = 1e-4
        self.momentum = 0.9

        self.lr_init = 0.02
        self.lr_decay_rate = 0.8
        self.lr_decay_steps = 1000

        ### Replay Buffer
        self.replay_buffer_size = 10001
        self.num_unroll_steps = 10
        self.td_steps = 50
        self.PER = True
        self.PER_alpha = 0.5

        self.use_last_model_value = True
        self.reanalyse_on_gpu = cuda and not self.debug_mode

        self.self_play_delay = 0
        self.training_delay = 0
        self.ratio = 1.5

cfg = MuZeroConfig(root = Path("")) # todo move up

In [None]:
import os
import importlib
import json
import wandb
from pathlib import Path


os.chdir("/")
root = "kaggle/working/TransZero"
script_path = f"{root}/muzero.py"



# Load the module dynamically
# spec = importlib.util.spec_from_file_location("main", script_path)
# main_module = importlib.util.module_from_spec(spec)
# spec.loader.exec_module(main_module)
# os.chdir("/kaggle/working/TransZero")
os.chdir(root)
if cluster == "kaggle":
    wandb_key = UserSecretsClient().get_secret("wandb_api")
elif cluster == "db":
    wandb_key = os.getenv("WANDB_API_KEY")

wandb.login(key=wandb_key, relogin=False)

cfg_dict = {key: str(value) if isinstance(value, Path) else value for key, value in cfg.__dict__.items()}

def run_script(cfg_dict):
    file_name = "kaggle_cfg.json"
    with open(file_name, "w") as f:
        json.dump(cfg_dict, f, indent=4)
    
    !python muzero.py -rfc "kaggle" -game "frozen_lake" -c "{file_name}"
    
#wandb.finish()
#main_module.main(choice=6, option=0, seq_mode=True, game_name = config.game_name, config=config)


In [None]:
run1 = [0,1,2]
run2 = [3,4,5]
run3 = [6,7,8]
run4 = [9,10,11]
run5 = [12,13,14]

first_runs = [0,3,6,9,12]
second_runs = [1,4,7,10,13]
last_runs = [2,5,8,11,14]

all_runs = run1 + run2 + run3 + run4 + run5

base_runs = [7,12,15]
hp_runs = [7]
num_runs = 4

ks = list(maps.keys())

for i in range(num_runs):
    for idx in hp_runs:
        print("################")
        print(f"{i+1} run on {ks[idx]}")
        print("################")

        #cfg_dict["training_steps"] = 100
        
        cfg_dict["seed"] = cfg_dict["seed"] + 1 + i
        cfg_dict["custom_map"] = ks[idx]
        run_script(cfg_dict)
    