In [None]:
import ecole

import time
import numpy as np
import random
import torch

# Utils

In [None]:
def seed_stochastic_modules_globally(default_seed=0, 
                                     numpy_seed=None, 
                                     random_seed=None, 
                                     torch_seed=None, 
                                     ecole_seed=None):
    '''Seeds any stochastic modules so get reproducible results.'''
    if numpy_seed is None:
        numpy_seed = default_seed
    if random_seed is None:
        random_seed = default_seed
    if torch_seed is None:
        torch_seed = default_seed
    if ecole_seed is None:
        ecole_seed = default_seed

    np.random.seed(numpy_seed)

    random.seed(random_seed)

    torch.manual_seed(torch_seed)
    torch.cuda.manual_seed(torch_seed)
    torch.cuda.manual_seed_all(torch_seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

    ecole.seed(ecole_seed)

# Agent

In [None]:
class RandomAgent:
    def __init__(self, name='random'):
        self.name = name

    def before_reset(self, model):
        pass

    def action_select(self, action_set, **kwargs):
        action_idx = np.random.choice([i for i in range(len(action_set))])
        return action_set[action_idx], action_idx

# Environment

In [None]:
class EcoleBranching(ecole.environment.Branching):
    def __init__(
        self,
        observation_function='default',
        information_function='default',
        reward_function='default',
        scip_params='default',
        pseudo_candidates=False,
    ):
        self.pseudo_candidates = pseudo_candidates

        # init functions from strings if needed
        if reward_function == 'default':
            reward_function = ({
                     'num_nodes': -ecole.reward.NNodes(),
                     'lp_iterations': -ecole.reward.LpIterations(),
                     'solving_time': -ecole.reward.SolvingTime(),
                 })
        if information_function == 'default':
            information_function=({
                     'num_nodes': ecole.reward.NNodes().cumsum(),
                     'lp_iterations': ecole.reward.LpIterations().cumsum(),
                     'solving_time': ecole.reward.SolvingTime().cumsum(),
                 })
        if observation_function == 'default':    
            observation_function = (ecole.observation.NodeBipartite())
        if scip_params == 'default':
            scip_params = {'separating/maxrounds': 0,
                           'separating/maxroundsroot': 0,
                           'separating/maxcuts': 0,
                           'separating/maxcutsroot': 0,
                           'presolving/maxrounds': 0,
                           'presolving/maxrestarts': 0,
                           'propagating/maxrounds':0,
                           'propagating/maxroundsroot':0,
                           'lp/initalgorithm':'d',
                           'lp/resolvealgorithm':'d',
                           'limits/time': 3600}

        super(EcoleBranching, self).__init__(
            observation_function=observation_function,
            information_function=information_function,
            reward_function=reward_function,
            scip_params=scip_params,
            pseudo_candidates=pseudo_candidates,
        )

# Example run

In [None]:
# seeding
seed = 0
seed_stochastic_modules_globally(seed)


# init set covering MILP instances, branch-and-bound env, and branching agent
instances = ecole.instance.SetCoverGenerator(n_rows=100, n_cols=100, density=0.05)
env = EcoleBranching(observation_function='default',
                     information_function='default',
                     reward_function='default',
                     scip_params='default')
agent = RandomAgent()

# solve MILP instances
instances_to_solve = 5
instance_stats = {'num_nodes': [], 'solving_time': []}
instances_solved, done = 0, True
while instances_solved < instances_to_solve:
    # reset env
    while done:
        env.seed(seed)
        instance = next(instances)
        agent.before_reset(instance)
        obs, action_set, reward, done, info = env.reset(instance)
        
    print(env.__dict__)
        
    # solve instance
    while not done:
        action, action_idx = agent.action_select(action_set)
        obs, action_set, reward, done, info = env.step(action)
    instances_solved += 1
    print(f'Solved instance {instances_solved} of {instances_to_solve} | num_nodes: {info["num_nodes"]} | solving_time: {info["solving_time"]:.3f} s')
        
    # record stats
    instance_stats['num_nodes'].append(info['num_nodes'])
    instance_stats['solving_time'].append(info['solving_time'])
    
print(f'mean num_nodes: {np.mean(instance_stats["num_nodes"])} | {np.mean(instance_stats["solving_time"]):.3f} s')