# Preparation

In [None]:
## NOTE: please install any packages if necessary; environment.yml is provided.

In [6]:
## Import relevant packages.
import copy
from munch import Munch
from src.train import train


In [2]:

USE_WANDB = 0

if USE_WANDB:
    ## Optional: logging to wandb
    import wandb
    wandb.login()

In [14]:
# Set default training args
default_args = {
    "random_seed": 0,
    "max_steps": 30000,
    "graph_dir": "graphs",
    "train_type": "Two-levelGraph_SFT2",
    "graph_type": "Two-levelGraph",
    "eval_rate": 0.999,
    "graph_data_dir": None,
    "max_generation_length": 100,
    "num_hidden_layers": 3,
    "num_attention_heads": 3,
    "vocab_size": 5100,
    "hidden_size": 768,
    "position_embedding": "learned",
    "lr": 3e-4,
    "batch_size": 256,
    "log_steps": 128,
    "save_steps": 128,
    "eval_steps": 128,
    "eval_size": 1024,
    "weight_decay": 0,
    "warmup_ratio": 0,
    "model_dir": None,
    "output_dir": "model",
    "model_config_path": "config/gpt2_tiny_wpetrain.py",
    "world_size": 1,
    "report_to_wandb": USE_WANDB,
    "random_planning": False,
    "fix_interval": False,
    "planning_with_cluster_token": False,
    "onehot_embed": False,
    "provide_planning": False,
    "planning_with_ST": False,
}

# Anchoring helps with pathfinding

## Comparing w/ and w/o Anchoring in k-partite graph (Figure 1(a)) 

In [None]:
""" 
Generating the Graphs
- Two-level Graph: N1=50, N2=100, p1=1, p2=0.2
"""
from src.gen_graphs.kpartite_graph import gen_graph_kpartite
config = {
    "Type": "KPartiteGraph_Bernoulli",
    "K": 9,
    "N": 2500,
    "edge_probability": 0.001,
    "directed": True,
    "random_seed": 0,
}
gen_graph_kpartite(config)

In [None]:
""" 
Pathfinding without anchoring
"""

args = copy.deepcopy(default_args)
args["vocab_size"] = 22600
args["max_steps"] = 60000
args["eval_rate"] = 0.9
args["train_type"] = "KPartiteGraph1"
args["graph_type"] = "KPartiteGraph_Bernoulli"
args["graph_data_dir"] = "data/Exp2_GeneralPathFinding/Graphs/KPartiteGraph_Bernoulli_9_2500_0.001_rs0.json"
args = Munch(args)
train(args)

In [None]:
""" 
Pathfinding with anchoring
"""

args = copy.deepcopy(default_args)
args["vocab_size"] = 22600
args["max_steps"] = 60000
args["eval_rate"] = 0.9
args["train_type"] = "KPartiteGraph1_planning2"
args["graph_type"] = "KPartiteGraph_Bernoulli"
args["graph_data_dir"] = "data/Exp2_GeneralPathFinding/Graphs/KPartiteGraph_Bernoulli_9_2500_0.001_rs0.json"
args["split_layer"] = [4] # anchor at x+1 layer. Multiple anchoring tokens are allowed, e.g, [3, 5]
args = Munch(args)
train(args)

## Comparing w/ and w/o Anchoring in two-level graph (Figure 1(b)) 

In [None]:
"""
Generating the Graphs
- Two-level Graph: N1=50, N2=100, p1=1, p2=0.2
"""
from src.gen_graphs.twolevel_graph import gen_graph_twolevel
config = {
    "Type": "Two-levelGraph",
    "N1": 50,
    "N2": 100,
    "graph_type1": "Clique",
    "graph_type2": "TAE",
    "additional_edge_probability": 0.2, # p2
    "random_seed": 0,
    "directed": False,
}
gen_graph_twolevel(config)

In [9]:
default_args = {
    "random_seed": 0,
    "max_steps": 30000,
    "graph_dir": "graphs",
    "train_type": "Two-levelGraph_SFT2",
    "graph_type": "Two-levelGraph",
    "eval_rate": 0.999,
    "graph_data_dir": None,
    "max_generation_length": 40,
    "num_hidden_layers": 3,
    "num_attention_heads": 3,
    "vocab_size": 5100,
    "hidden_size": 768,
    "position_embedding": "learned",
    "lr": 3e-4,
    "batch_size": 256,
    "log_steps": 128,
    "save_steps": 128,
    "eval_steps": 128,
    "eval_size": 1024,
    "weight_decay": 0,
    "warmup_ratio": 0,
    "model_dir": None,
    "output_dir": "model",
    "model_config_path": "config/gpt2_tiny_wpetrain.py",
    "world_size": 1,
    "report_to_wandb": USE_WANDB,
    "random_planning": False,
    "fix_interval": None,
    "planning_with_cluster_token": False,
    "onehot_embed": False,
    "provide_planning": False,
}

In [None]:
""" 
Training (without anchoring) -- Each run takes about 0.5 hours on a single 3090 GPU.
"""

from src.train import train
args = copy.deepcopy(default_args)
args["train_type"] = "Two-levelGraph_SFT2"
args["eval_rate"] = 0.999
args["graph_data_dir"] = "data/Exp2_GeneralPathFinding/Graphs/Two-levelGraph_Clique_TAE_0.2_50_100.json"
args = Munch(args)
train(args)


In [None]:
""" 
Training (with anchoring) -- Each run takes about 0.5 hours on a single 3090 GPU.
"""

from src.train import train
args = copy.deepcopy(default_args)
args["train_type"] = "Two-levelGraph_SFT2_planning1"
args["eval_rate"] = 0.999
args["graph_data_dir"] = "data/Exp2_GeneralPathFinding/Graphs/Two-levelGraph_Clique_TAE_0.2_50_100.json"
args = Munch(args)
train(args)

# Comparing Different Anchoring Strategy for Two-level Path-Finding

In [None]:
""" 
Generating the Graphs
- Two-level Graph: N1=200, N2=50, p1=0.1, p2=0.4
"""

from src.gen_graphs.twolevel_graph import gen_graph_twolevel
config = {
    "Type": "Two-levelGraph",
    "N1": 200,
    "N2": 50,
    "graph_type1": "TAE", 
    "graph_type2": "TAE",
    "upper_edge_probability": 0.1,      # p1
    "additional_edge_probability": 0.4, # p2
    "random_seed": 0,
    "upper_directed": False,
    "directed": False,
}
gen_graph_twolevel(config)

In [None]:
""" 
Training (without anchoring)
"""

args = copy.deepcopy(default_args)
args["train_type"] = "Two-levelGraph_SFT3"
args["eval_rate"] = 0.9995
args["graph_data_dir"] = "data/Exp2_GeneralPathFinding/Graphs/Two-levelGraph_TAE_0.1_TAE_0.4_200_50.json"
args["vocab_size"] = 10100
args = Munch(args)
train(args)

## Comparing 3 different anchoring strategies
- Inter-Cluster Anchoring: the anchors are endpoints of inter-cluster edges.
- Fixed-interval Anchoring: the anchors are fixed at a certain interval; in this case, every other steps.
- Random anchoring: the anchors are randomly selected.

In [None]:
""" 
Inter-Cluster Anchoring
"""
from src.train import train
args = copy.deepcopy(default_args)
args["train_type"] = "Two-levelGraph_SFT3_planning2"
args["eval_rate"] = 0.9995
args["graph_data_dir"] = "data/Exp2_GeneralPathFinding/Graphs/Two-levelGraph_TAE_0.1_TAE_0.4_200_50.json"
args["vocab_size"] = 10100
args = Munch(args)
train(args)

In [None]:
""" 
Fixed-interval Anchoring
"""
from src.train import train
args = copy.deepcopy(default_args)
args["train_type"] = "Two-levelGraph_SFT3_planning2"
args["eval_rate"] = 0.9995
args["graph_data_dir"] = "data/Exp2_GeneralPathFinding/Graphs/Two-levelGraph_TAE_0.1_TAE_0.4_200_50.json"
args["vocab_size"] = 10100
args["fix_interval"] = True
args = Munch(args)
train(args)

In [None]:
"""
Random anchoring
"""
from src.train import train
args = copy.deepcopy(default_args)
args["train_type"] = "Two-levelGraph_SFT3_planning2"
args["eval_rate"] = 0.9995
args["graph_data_dir"] = "data/Exp2_GeneralPathFinding/Graphs/Two-levelGraph_TAE_0.1_TAE_0.4_200_50.json"
args["vocab_size"] = 10100
args["random_planning"] = True
args = Munch(args)
train(args)