In [None]:
import ecole

In [None]:
default_scip_params = {'separating/maxrounds': 0,
                       'separating/maxroundsroot': 0,
                       'separating/maxcuts': 0,
                       'separating/maxcutsroot': 0,
                       'presolving/maxrounds': 0,
                       'presolving/maxrestarts': 0,
                       'propagating/maxrounds':0,
                       'propagating/maxroundsroot':0,
                       'lp/initalgorithm':'d',
                       'lp/resolvealgorithm':'d',
                       'limits/time': 3600}
class EcoleBranching(ecole.environment.Branching):
    def __init__(
        self,
        observation_function='default',
        information_function='default',
        reward_function='default',
        scip_params='default',
        pseudo_candidates=False,
    ):
        '''
        Args:
            scip_params (dict, str): Custom dictionary or one of: 'default', 
                'ml4co_item_placement', 'ml4co_load_balancing', 'ml4co_anonymous'.
        '''
        # save string names so easy to initialise new environments
        if type(observation_function) == str:
            self.str_observation_function = observation_function
        else:
            self.str_observation_function = None
        if type(information_function) == str:
            self.str_information_function = information_function
        else:
            self.str_information_function = None
        if type(reward_function) == str:
            self.str_reward_function = reward_function
        else:
            self.str_reward_function = None
        if type(scip_params) == str:
            self.str_scip_params = scip_params
        else:
            self.str_scip_params = None

        # init reward functions from strings if needed
        if reward_function == 'default':
            reward_function = ({
                     'num_nodes': -ecole.reward.NNodes(),
                     'lp_iterations': -ecole.reward.LpIterations(),
                     'primal_integral': -ecole.reward.PrimalIntegral(),
                     'dual_integral': ecole.reward.DualIntegral(),
                     'primal_dual_integral': -ecole.reward.PrimalDualIntegral(),
                     'solving_time': -ecole.reward.SolvingTime(),
                 })
        if information_function == 'default':
            information_function=({
                     'num_nodes': ecole.reward.NNodes().cumsum(),
                     'lp_iterations': ecole.reward.LpIterations().cumsum(),
                     'solving_time': ecole.reward.SolvingTime().cumsum(),
                 })
        if observation_function == 'default':    
            observation_function=(
                     ecole.observation.NodeBipartite()
                 )
        if scip_params == 'default':
            scip_params = default_scip_params
        
        super(EcoleBranching, self).__init__(
            observation_function=observation_function,
            information_function=information_function,
            reward_function=reward_function,
            scip_params=scip_params,
            pseudo_candidates=False,
        )

In [None]:
class StrongBranchingAgent:
    def __init__(self, pseudo_candidates=False, name='sb'):
        self.name = name
        self.strong_branching_function = ecole.observation.StrongBranchingScores(pseudo_candidates=False)

    def before_reset(self, model):
        self.strong_branching_function.before_reset(model)
    
    def extract(self, model, done, *args):
        return self.strong_branching_function.extract(model, done)

    def action_select(self, action_set, model, done):
        scores = self.extract(model, done)[action_set]
        action = scores.argmax()
        
#         print('method 2')
#         scores = self.extract(model, done)
#         action = scores.argmax()
#         best_action = action_set[scores.argmax()]
#         print(f'action set: {action_set}')
#         print(f'raw scores: {scores}')
#         print(scores.shape, scores, best_action)
        
        return action, scores
    
    
agent = StrongBranchingAgent()
env = EcoleBranching(observation_function='default',
                          information_function='default',
                          reward_function='default',
                          scip_params='default')
env.seed(0)
instances = ecole.instance.SetCoverGenerator(n_rows=100, n_cols=100, density=0.05)

In [None]:
num_episodes = 1
for ep in range(num_episodes):
    # find an instance not pre-solved by environment
    obs = None
    while obs is None:
        env.seed(0)
        instance = next(instances)
        instance_before_reset = instance.copy_orig()
        agent.before_reset(instance)
        obs, action_set, reward, done, info = env.reset(instance)
    
    # solve instances
    t = 1
    while not done:
        m = env.model.as_pyscipopt()
        prev_dual = m.getDualbound()
        action_idx, scores = agent.action_select(action_set, env.model, done)
        action = action_set[action_idx]
        obs, action_set, reward, done, info = env.step(action)
        m = env.model.as_pyscipopt()
        curr_dual = m.getDualbound()
        print(f'Step {t} action {action} | SB score: {scores[action_idx]} | Change in dual bound: {curr_dual-prev_dual}')
        t += 1