In [1]:
from gfn.samplers import Sampler
from gfn.states import States
from gfn.utils.common import set_seed
from gfn.utils.modules import MLP  # is a simple multi-layer perceptron (MLP)
from torch.distributions import Distribution, Normal  # TODO: extend to Beta
from torch.distributions.independent import Independent
from tqdm import tqdm, trange
from gfn.modules import DiscretePolicyEstimator
from gfn.gflownet import TBGFlowNet
import torch
from common.env import DistrictEnv


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.0 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/opt/anaconda3/envs/gfn/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/opt/anaconda3/envs/gfn/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/Users/arnaudbergeron/.local/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/Users/arnaudbergeron/.local/lib/python3.10/site-packages/traitlets/config/application.py", line 1043, in launch_instance
    app.sta

In [2]:
env = DistrictEnv(json_file="data/IA_raw_data.json", device_str="cpu")

In [3]:
fw_dim = env.state_shape[0]
n_actions = env.n_actions[0]

module_PF = MLP(
    input_dim=fw_dim,
    output_dim=n_actions
)  # Neural network for the forward policy, with as many outputs as there are actions

module_PB = MLP(
    input_dim=fw_dim,
    output_dim=n_actions - 1,
    trunk=module_PF.trunk  # We share all the parameters of P_F and P_B, except for the last layer
)

In [None]:
pf_estimator = DiscretePolicyEstimator(module_PF, env.n_actions, is_backward=False, preprocessor=env.preprocessor)
pb_estimator = DiscretePolicyEstimator(module_PB, env.n_actions, is_backward=True, preprocessor=env.preprocessor)

In [None]:
gfn = TBGFlowNet(logZ=0., pf=pf_estimator, pb=pb_estimator)  # We initialize logZ to 0

# 5 - We define the sampler and the optimizer.
sampler = Sampler(estimator=pf_estimator)  # We use an on-policy sampler, based on the forward policy

# Different policy parameters can have their own LR.
# Log Z gets dedicated learning rate (typically higher).
optimizer = torch.optim.Adam(gfn.pf_pb_parameters(), lr=1e-3)
optimizer.add_param_group({"params": gfn.logz_parameters(), "lr": 1e-1})

# 6 - We train the GFlowNet for 1000 iterations, with 16 trajectories per iteration
for i in (pbar := tqdm(range(1000))):
    trajectories = sampler.sample_trajectories(env=env, n=16)
    optimizer.zero_grad()
    loss = gfn.loss(env, trajectories)
    loss.backward()
    optimizer.step()
    if i % 25 == 0:
        pbar.set_postfix({"loss": loss.item()})

  0%|          | 0/1000 [00:00<?, ?it/s]


ValueError: DiscretePolicyEstimator output dimension should be ((3,),) but is 3.

In [None]:
env.preprocessor.output_dim

torch.Size([1188])