alexhernandezgarcia · alexhernandezgarcia · Mar 31, 2023 · Feb 24, 2023 · Feb 28, 2023 · Feb 28, 2023
diff --git a/config/env/base.yaml b/config/env/base.yaml
@@ -3,14 +3,17 @@ _target_: gflownet.envs.base.GFlowNetEnv
 # Reward function: power or boltzmann
 # boltzmann: exp(-1.0 * reward_beta * proxy)
 # power: (-1.0 * proxy / reward_norm) ** self.reward_beta
-reward_func: boltzmann
+# identity: proxy
+reward_func: identity
+# Minimum reward
+reward_min: 1e-8
 # Beta parameter of the reward function
 reward_beta: 1.0
 # Reward normalization for "power" reward function
 reward_norm: 1.0
 # If > 0, reward_norm = reward_norm_std_mult * std(energies)
+reward_norm_std_mult: 0.0
 proxy_state_format: oracle
-reward_norm_std_mult: 8
 # Buffer
 buffer:
   replay_capacity: 10

diff --git a/config/env/grid.yaml b/config/env/grid.yaml
@@ -9,9 +9,10 @@ func: corners
 n_dim: 2
 # Number of cells per dimension
 length: 3
-# Minimum and maximum number of steps in the action space
-min_step_len: 1
-max_step_len: 1
+# Maximum increment per each dimension that can be done by one action
+max_increment: 1
+# Maximum number of dimensions that can be incremented by one action
+max_dim_per_action: 1
 # Mapping coordinates
 cell_min: -1
 cell_max: 1

diff --git a/config/env/torus.yaml b/config/env/torus.yaml
@@ -11,9 +11,10 @@ n_dim: 2
 n_angles: 8
 # Maximum number of rounds
 length_traj: 12
-# Minimum and maximum number of steps in the action space
-min_step_len: 1
-max_step_len: 1
+# Maximum increment per each dimension that can be done by one action
+max_increment: 1
+# Maximum number of dimensions that can be incremented by one action
+max_dim_per_action: 1
 # Buffer
 buffer:
   data_path: null

diff --git a/config/env/torus_rounds.yaml b/config/env/torus_rounds.yaml
diff --git a/config/logger/base.yaml b/config/logger/base.yaml
@@ -1,9 +1,11 @@
-_target_: logger.Logger
+_target_: gflownet.utils.logger.Logger
 
 do:
   online: False
   times: False
 
+project_name: "GFlowNet"
+
 # Train metrics
 train:
   period: 1

diff --git a/config/logger/wandb.yaml b/config/logger/wandb.yaml
@@ -6,7 +6,5 @@ _target_: gflownet.utils.logger.Logger
 do:
   online: True
 
-project_name: "GFlowNet"
-
 tags: 
     - gflownet
diff --git a/config/tests.yaml b/config/tests.yaml
@@ -0,0 +1,27 @@
+defaults:
+  - _self_
+  - env: grid
+  - gflownet: flowmatch
+  - proxy: uniform
+  - logger: base
+  - user: alex
+
+# Device
+device: cpu
+# Float precision
+float_precision: 32
+# Number of objects to sample at the end of training
+n_samples: 1
+# Random seeds
+seed: 0
+
+# Hydra config
+hydra:
+  # See: https://hydra.cc/docs/configure_hydra/workdir/
+  run:
+    dir: ${user.logdir.root}/${now:%Y-%m-%d_%H-%M-%S}_tests
+  job:
+    # See: https://hydra.cc/docs/upgrades/1.1_to_1.2/changes_to_job_working_dir/
+    # See: https://hydra.cc/docs/tutorials/basic/running_your_app/working_directory/#disable-changing-current-working-dir-to-jobs-output-dir
+    chdir: True
+
diff --git a/gflownet/envs/alaninedipeptide.py b/gflownet/envs/alaninedipeptide.py
@@ -1,9 +1,9 @@
+from copy import deepcopy
+from typing import List, Tuple
+
 import numpy as np
 import numpy.typing as npt
 import torch
-
-from copy import deepcopy
-from typing import List, Tuple
 from torchtyping import TensorType
 
 from gflownet.envs.ctorus import ContinuousTorus
@@ -20,48 +20,17 @@ def __init__(
         self,
         path_to_dataset,
         url_to_dataset,
-        length_traj=1,
-        fixed_distribution=dict,
-        random_distribution=dict,
-        vonmises_min_concentration=1e-3,
-        env_id=None,
-        reward_beta=1,
-        reward_norm=1.0,
-        reward_norm_std_mult=0,
-        reward_func="boltzmann",
-        denorm_proxy=False,
-        energies_stats=None,
-        proxy=None,
-        oracle=None,
-        policy_encoding_dim_per_angle=None,
-        n_comp=3,
         **kwargs,
     ):
-        self.atom_positions_dataset = AtomPositionsDataset(path_to_dataset, url_to_dataset)
+        self.atom_positions_dataset = AtomPositionsDataset(
+            path_to_dataset, url_to_dataset
+        )
         atom_positions = self.atom_positions_dataset.sample()
         self.conformer = ConformerBase(
             atom_positions, constants.ad_smiles, constants.ad_free_tas
         )
         n_dim = len(self.conformer.freely_rotatable_tas)
-        super(AlanineDipeptide, self).__init__(
-            n_dim=n_dim,
-            length_traj=length_traj,
-            fixed_distribution=fixed_distribution,
-            random_distribution=random_distribution,
-            vonmises_min_concentration=vonmises_min_concentration,
-            env_id=env_id,
-            reward_beta=reward_beta,
-            reward_norm=reward_norm,
-            reward_norm_std_mult=reward_norm_std_mult,
-            reward_func=reward_func,
-            denorm_proxy=denorm_proxy,
-            energies_stats=energies_stats,
-            proxy=proxy,
-            oracle=oracle,
-            policy_encoding_dim_per_angle=policy_encoding_dim_per_angle,
-            n_comp=n_comp,
-            **kwargs,
-        )
+        super().__init__(**kwargs)
         self.sync_conformer_with_state()
 
     def sync_conformer_with_state(self, state: List = None):
@@ -71,13 +40,7 @@ def sync_conformer_with_state(self, state: List = None):
             self.conformer.set_torsion_angle(ta, state[idx])
         return self.conformer
 
-    def copy(self):
-        # return an instance of the environment
-        return deepcopy(self)
-
-    def statetorch2proxy(
-        self, states: TensorType["batch", "state_dim"]
-    ) -> npt.NDArray:
+    def statetorch2proxy(self, states: TensorType["batch", "state_dim"]) -> npt.NDArray:
         """
         Prepares a batch of states in torch "GFlowNet format" for the oracle.
         """
@@ -88,16 +51,14 @@ def statetorch2proxy(
             np_states = states.cpu().numpy()
         return np_states[:, :-1]
 
-    def statebatch2proxy(
-        self, states: List[List]
-    ) -> npt.NDArray:
+    def statebatch2proxy(self, states: List[List]) -> npt.NDArray:
         """
         Prepares a batch of states in "GFlowNet format" for the proxy: a tensor where
         each state is a row of length n_dim with an angle in radians. The n_actions
         item is removed.
         """
         return np.array(states)[:, :-1]
-    
+
     def statetorch2oracle(
         self, states: TensorType["batch", "state_dim"]
     ) -> List[Tuple[npt.NDArray, npt.NDArray]]:

diff --git a/gflownet/envs/aptamers.py b/gflownet/envs/aptamers.py
@@ -1,13 +1,15 @@
 """
 Classes to represent aptamers environments
 """
-from typing import List
 import itertools
+import time
+from typing import List
+
 import numpy as np
 import numpy.typing as npt
 import pandas as pd
+
 from gflownet.envs.base import GFlowNetEnv
-import time
 
 
 class AptamerSeq(GFlowNetEnv):
@@ -51,47 +53,27 @@ def __init__(
         n_alphabet=4,
         min_word_len=1,
         max_word_len=1,
-        proxy=None,
-        oracle=None,
-        reward_beta=1,
-        env_id=None,
-        energies_stats=None,
-        reward_norm=1.0,
-        reward_norm_std_mult=0.0,
-        reward_func="power",
-        denorm_proxy=False,
         **kwargs,
     ):
-        super(AptamerSeq, self).__init__(
-            env_id,
-            reward_beta,
-            reward_norm,
-            reward_norm_std_mult,
-            reward_func,
-            energies_stats,
-            denorm_proxy,
-            proxy,
-            oracle,
-            **kwargs,
-        )
+        super().__init__()
         self.source = []
         self.min_seq_length = min_seq_length
         self.max_seq_length = max_seq_length
         self.n_alphabet = n_alphabet
         self.min_word_len = min_word_len
         self.max_word_len = max_word_len
-        self.action_space = self.get_actions_space()
-        self.eos = len(self.action_space)
+        self.action_space = self.get_action_space()
+        self.eos = self.action_space_dim
         self.reset()
         self.fixed_policy_output = self.get_fixed_policy_output()
         self.random_policy_output = self.get_fixed_policy_output()
         self.policy_output_dim = len(self.fixed_policy_output)
         self.policy_input_dim = len(self.state2policy())
-        self.max_traj_len = self.get_max_traj_len()
+        self.max_traj_len = self.get_max_traj_length()
         # Set up proxy
-        self.proxy.setup(self.max_seq_length)
+        self.setup_proxy()
 
-    def get_actions_space(self):
+    def get_action_space(self):
         """
         Constructs list with all possible actions
         """
@@ -104,7 +86,7 @@ def get_actions_space(self):
             actions += actions_r
         return actions
 
-    def get_max_traj_len(
+    def get_max_traj_length(
         self,
     ):
         return self.max_seq_length / self.min_word_len + 1
@@ -324,8 +306,8 @@ def get_mask_invalid_actions_forward(self, state=None, done=None):
         if done is None:
             done = self.done
         if done:
-            return [True for _ in range(len(self.action_space) + 1)]
-        mask = [False for _ in range(len(self.action_space) + 1)]
+            return [True for _ in range(self.action_space_dim + 1)]
+        mask = [False for _ in range(self.action_space_dim + 1)]
         seq_length = len(state)
         if seq_length < self.min_seq_length:
             mask[self.eos] = True
@@ -334,50 +316,6 @@ def get_mask_invalid_actions_forward(self, state=None, done=None):
                 mask[idx] = True
         return mask
 
-    def no_eos_mask(self, state=None):
-        """
-        Returns True if no eos action is allowed given state
-        """
-        if state is None:
-            state = self.state.copy()
-        return len(state) < self.min_seq_length
-
-    def true_density(self, max_states=1e6):
-        """
-        Computes the reward density (reward / sum(rewards)) of the whole space, if the
-        dimensionality is smaller than specified in the arguments.
-
-        Returns
-        -------
-        Tuple:
-          - normalized reward for each state
-          - states
-          - (un-normalized) reward)
-        """
-        if self._true_density is not None:
-            return self._true_density
-        if self.n_alphabet**self.max_seq_length > max_states:
-            return (None, None, None)
-        state_all = np.int32(
-            list(
-                itertools.product(*[list(range(self.n_alphabet))] * self.max_seq_length)
-            )
-        )
-        traj_rewards, state_end = zip(
-            *[
-                (self.proxy(state), state)
-                for state in state_all
-                if len(self.get_parents(state, False)[0]) > 0 or sum(state) == 0
-            ]
-        )
-        traj_rewards = np.array(traj_rewards)
-        self._true_density = (
-            traj_rewards / traj_rewards.sum(),
-            list(map(tuple, state_end)),
-            traj_rewards,
-        )
-        return self._true_density
-
     def make_train_set(
         self,
         ntrain,
@@ -491,36 +429,3 @@ def make_test_set(
         t1_all = time.time()
         times["all"] += t1_all - t0_all
         return df_test, times
-
-    @staticmethod
-    def np2df(test_path, al_init_length, al_queries_per_iter, pct_test, data_seed):
-        data_dict = np.load(test_path, allow_pickle=True).item()
-        letters = numbers2letters(data_dict["samples"])
-        df = pd.DataFrame(
-            {
-                "samples": letters,
-                "energies": data_dict["energies"],
-                "train": [False] * len(letters),
-                "test": [False] * len(letters),
-            }
-        )
-        # Split train and test section of init data set
-        rng = np.random.default_rng(data_seed)
-        indices = rng.permutation(al_init_length)
-        n_tt = int(pct_test * len(indices))
-        indices_tt = indices[:n_tt]
-        indices_tr = indices[n_tt:]
-        df.loc[indices_tt, "test"] = True
-        df.loc[indices_tr, "train"] = True
-        # Split train and test the section of each iteration to preserve splits
-        idx = al_init_length
-        iters_remaining = (len(df) - al_init_length) // al_queries_per_iter
-        indices = rng.permutation(al_queries_per_iter)
-        n_tt = int(pct_test * len(indices))
-        for it in range(iters_remaining):
-            indices_tt = indices[:n_tt] + idx
-            indices_tr = indices[n_tt:] + idx
-            df.loc[indices_tt, "test"] = True
-            df.loc[indices_tr, "train"] = True
-            idx += al_queries_per_iter
-        return df