In [None]:
%load_ext autoreload
%autoreload
from retro_branching.environments import EcoleBranching, EcoleConfiguring
from retro_branching.agents import StrongBranchingAgent, PseudocostBranchingAgent, RandomAgent
from retro_branching.utils import seed_stochastic_modules_globally
from retro_branching.rewards import NormalisedLPGain

import ecole
import numpy as np
import random
import copy
import pyscipopt

import networkx as nx
from networkx.algorithms.shortest_paths.generic import shortest_path
from networkx.algorithms.traversal.depth_first_search import dfs_tree
from networkx.drawing.nx_pydot import graphviz_layout
import matplotlib.pyplot as plt
from ordered_set import OrderedSet
import math

seed = 0
seed_stochastic_modules_globally(default_seed=seed)

In [None]:
%autoreload
def get_path_node_scores(tree, path):
    return [tree.nodes[node]['score'] for node in path]

def render(tree):
    '''Renders B&B search tree.'''
    fig = plt.figure()

    pos = graphviz_layout(tree, prog='dot')
    node_labels = {node: node for node in tree.nodes}
    nx.draw_networkx_nodes(tree,
                           pos,
                           label=node_labels)
    nx.draw_networkx_edges(tree,
                           pos)

    nx.draw_networkx_labels(tree, pos, labels=node_labels)

    plt.show()

In [None]:
%autoreload



class SearchTree:
    '''
    Tracks SCIP search tree. Call SearchTree.update_tree(ecole.Model) each
    time the ecole environments (and therefore the ecole.Model) is updated.

    N.B. SCIP does not store nodes which were pruned, infeasible, outside
    the search tree's optimality bounds, or which node was optimal, therefore these nodes will not be
    stored in the SearchTree. This is why m.getNTotalNodes() (the total number
    of nodes processed by SCIP) will likely be more than the number of nodes in
    the search tree when an instance is solved.
    '''
    def __init__(self, model):        
        self.tree = nx.DiGraph()
        
        self.tree.graph['root_node'] = None
        self.tree.graph['visited_nodes'] = []
        self.tree.graph['visited_node_ids'] = OrderedSet()
        
        self.update_tree(model)
    
    def update_tree(self, model):
        '''
        Call this method after each update to the ecole environments. Pass
        the updated ecole.Model, and the B&B tree tracker will be updated accordingly.
        '''
        m = model.as_pyscipopt()
        
        _curr_node = m.getCurrentNode()
        if _curr_node is not None:
            curr_node_id = _curr_node.getNumber()
        else:
            # branching finished, no curr node
            curr_node_id = None
        self.curr_node = {curr_node_id: _curr_node}
        if curr_node_id is not None:
            if curr_node_id not in self.tree.graph['visited_node_ids']:
                self._add_nodes(self.curr_node)
                self.tree.graph['visited_nodes'].append(self.curr_node)
                self.tree.graph['visited_node_ids'].add(curr_node_id)
                print(f'Curr node {curr_node_id} estimate: {_curr_node.getEstimate():.3f}')
                print(f'# getSols: {len(m.getSols())}')
                sol_obj_vals = [m.getSolObjVal(sol) for sol in m.getSols()]
                print(f'sol obj vals: {sol_obj_vals}')
                print(f'lp obj val: {m.getLPObjVal()}')
#                 print(f'vars: {m.getVars()}')

                # get lp cols
                lp_cols = m.getLPColsData()
                lp_vars = [col.getVar() for col in lp_cols]
                print(f'lp_vars: {lp_vars}')
        
                # get lp primal stats
                lp_primal_sol = [col.getPrimsol() for col in lp_cols]
                print(f'lp_primal_sol: {lp_primal_sol}')
                lp_primal_sol_frac = [math.modf(col)[0] for col in lp_primal_sol]
                print(f'lp primal sol frac: {lp_primal_sol_frac}')
                
                # get lp dual stats
                lp_dual_sol = [var.getLPSol() for var in lp_vars]
                print(f'lp_dual_sol: {lp_dual_sol}')
                
                is_integral = [col.isIntegral() for col in lp_cols]
                for dual_val, primal_val, integral in zip(lp_dual_sol, lp_primal_sol, is_integral):
                    print(f'dual val: {dual_val} | primal val: {primal_val} | equal: {dual_val == primal_val} | integral: {integral}')
                
                sol = m.createSol()
                print(sol)
#                 primal_sol_val = m.checkSol(lp_cols)
#                 print(primal_sol_val)
    
        
        if curr_node_id is not None:
            _parent_node = list(self.curr_node.values())[0].getParent()
            if _parent_node is not None:
                parent_node_id = _parent_node.getNumber()
            else:
                # curr node is root node
                parent_node_id = None
            self.parent_node = {parent_node_id: _parent_node}
        else:
            self.parent_node = {None: None}
            
        open_leaves, open_children, open_siblings = m.getOpenNodes()
        self.open_leaves = {node.getNumber(): node  for node in open_leaves}
        self.open_children = {node.getNumber(): node for node in open_children}
        self.open_siblings = {node.getNumber(): node for node in open_siblings}
        
        self._add_nodes(self.open_leaves)
        self._add_nodes(self.open_children)
        self._add_nodes(self.open_siblings)
                
    def _add_nodes(self, nodes, parent_node_id=None):
        '''Adds nodes if not already in tree.'''
        for node_id, node in nodes.items():
            if node_id not in self.tree:
                # add node
                self.tree.add_node(node_id,
                                   _id=node_id,
                                   lower_bound=node.getLowerbound())

                # add edge
                _parent_node = node.getParent()
                if _parent_node is not None:
                    if parent_node_id is None:
                        parent_node_id = _parent_node.getNumber()
                    else:
                        # parent node id already given
                        pass
                    self.tree.add_edge(parent_node_id,
                                       node_id)
                else:
                    # is root node, has no parent
                    self.tree.graph['root_node'] = {node_id: node}
                
    def render(self):
        '''Renders B&B search tree.'''
        fig = plt.figure()
        
        pos = graphviz_layout(self.tree, prog='dot')
        node_labels = {node: node for node in self.tree.nodes}
        nx.draw_networkx_nodes(self.tree,
                               pos,
                               label=node_labels)
        nx.draw_networkx_edges(self.tree,
                               pos)
        
        nx.draw_networkx_labels(self.tree, pos, labels=node_labels)
        
        plt.show()


In [None]:
class RetroBranching:
    def __init__(self, 
                 normaliser='init_primal_bound', 
                 min_subtree_depth=1, 
                 retro_trajectory_construction='deepest',
                 debug_mode=False):
        '''
        Waits until end of episode to calculate rewards for each step, then retrospectively
        goes back through each step in the episode and calculates reward for that step.
        I.e. reward returned will be None until the end of the episode, at which
        point a dict mapping episode_step_idx for optimal path nodes to reward will be returned.

        The terminal sub-tree will first retrospectively construct an episode from the root node
        to the opimal node (i.e. the 'optimal path') and make this as one episode. Then, it will
        iteratively go through all other nodes in the B&B tree not already included in a retrospective
        sub-tree episode path and construct sub-trees randomly untill all nodes experiences by the agent
        are included in an episode. Will then return a list, where each element in the list is a dict
        mapping the step index in the original episode and the corresponding reward received by the agent.
        
        Args:
            normaliser ('init_primal_bound', 'curr_primal_bound'): What to normalise
                with respect to in the numerator and denominator to calculate
                the per-step normalsed LP gain reward.
            min_subtree_depth (int): Minimum depth of sub-tree (i.e. minimum length of sub-tree episode).
            retro_trajectory_construction ('random', 'deepest'): Which policy to use when choosing a leaf node as the
                final node to construct a sub-tree.
        '''
        self.min_subtree_depth = min_subtree_depth
        self.retro_trajectory_construction = retro_trajectory_construction
        self.normalised_lp_gain = NormalisedLPGain(normaliser=normaliser) # normalised lp gain reward tracker
        self.debug_mode = debug_mode

    def before_reset(self, model):
        self.started = False
        self.normalised_lp_gain.before_reset(model)
        
    def get_path_node_scores(self, tree, path):
        return [tree.nodes[node]['score'] for node in path]
        
    def conv_root_final_pair_to_step_idx_reward_map(self, root_node, final_node, check_depth=True):
        path = shortest_path(self.normalised_lp_gain.tree.tree, source=root_node, target=final_node)
        
        # register which nodes have been directly included in the sub-tree
        for node in path:
            self.nodes_added.add(node)
            
        if check_depth:
            if len(path) < self.min_subtree_depth:
                # subtree not deep enough, do not use episode (but count all nodes as having been added)
                return None
        
        # get rewards at each step in sub-tree episode
        path_node_rewards = self.get_path_node_scores(self.normalised_lp_gain.tree.tree, path)

        # get episode step indices at which each node in sub-tree was visited
        path_to_step_idx = {node: self.visited_nodes_to_step_idx[node] for node in path}

        # map each path node episode step idx to its corresponding reward
        step_idx_to_reward = {step_idx: r for step_idx, r in zip(list(path_to_step_idx.values()), path_node_rewards)}
        
        return step_idx_to_reward

    def extract(self, model, done):
        # update normalised LP gain tracker
        _ = self.normalised_lp_gain.extract(model, done)

        # m = model.as_pyscipopt()
        # curr_node = m.getCurrentNode()

        # if not self.started:
            # if curr_node is not None:
                # self.started = True
            # return None
        
        # if curr_node is not None:
            # # instance not yet finished
            # return None

        if not done:
            return None
        else:
            if self.normalised_lp_gain.tree.tree.graph['root_node'] is None:
                # instance was pre-solved
                return [{0: 0}]

            # instance finished, retrospectively create subtree episode paths
            subtrees_step_idx_to_reward = []

            # keep track of which nodes have been added to a sub-tree
            self.nodes_added = set()
            
            if self.debug_mode:
                print('\nB&B tree:')
                self.normalised_lp_gain.tree.render()

            # remove nodes which were never visited by the brancher and therefore do not have a score or next state
            nodes = [node for node in self.normalised_lp_gain.tree.tree.nodes]
            for node in nodes:
                if 'score' not in self.normalised_lp_gain.tree.tree.nodes[node]:
                    # node never visited by brancher -> do not consider
                    self.normalised_lp_gain.tree.tree.remove_node(node)

            # map which nodes were visited at which step in episode
            visited_nodes = [list(node.keys())[0] for node in self.normalised_lp_gain.tree.tree.graph['visited_nodes']]
            self.visited_nodes_to_step_idx = {node: idx for idx, node in enumerate(visited_nodes)}

            # get optimal path
            root_node = list(self.normalised_lp_gain.tree.tree.graph['root_node'].keys())[0]
            final_node = list(self.normalised_lp_gain.tree.tree.graph['visited_nodes'][-1].keys())[0]
            subtrees_step_idx_to_reward.append(self.conv_root_final_pair_to_step_idx_reward_map(root_node, final_node, check_depth=False))

            # create sub-tree episodes from remaining B&B nodes visited by agent
            while True:
                # create depth first search sub-trees from nodes still leftover
                nx_subtrees = []
                
                # construct sub-trees containing prospective sub-tree episode(s) from remaining nodes
                for node in self.nodes_added:
                    children = [child for child in self.normalised_lp_gain.tree.tree.successors(node)]
                    for child in children:
                        if child not in self.nodes_added:
                            nx_subtrees.append(dfs_tree(self.normalised_lp_gain.tree.tree, child))
                            
                for i, subtree in enumerate(nx_subtrees):
                    # init node scores for nodes in subtree (since these are not transferred into new subtree)
                    for node in subtree.nodes:
                        subtree.nodes[node]['score'] = self.normalised_lp_gain.tree.tree.nodes[node]['score']

                    # get root of sub-tree
                    for root_node in subtree.nodes:
                        if subtree.in_degree(root_node) == 0:
                            # node is root
                            break

                    # get a path by choosing a leaf node as the final node in the path
                    leaf_nodes = [node for node in subtree.nodes() if subtree.out_degree(node) == 0]
                    if self.retro_trajectory_construction == 'random':
                        # randomly choose leaf node as final node
                        final_node = leaf_nodes[random.choice(range(len(leaf_nodes)))]
                    elif self.retro_trajectory_construction == 'deepest':
                        # choose leaf node which would lead to deepest subtree as final node
                        depths = [len(shortest_path(subtree, source=root_node, target=leaf_node)) for leaf_node in leaf_nodes]
                        final_node = leaf_nodes[depths.index(max(depths))]
                    else:
                        raise Exception(f'Unrecognised retro_trajectory_construction {self.retro_trajectory_construction}')
                        
                    subtree_step_idx_to_reward = self.conv_root_final_pair_to_step_idx_reward_map(root_node, final_node, check_depth=True)
                    if subtree_step_idx_to_reward is not None:
                        subtrees_step_idx_to_reward.append(subtree_step_idx_to_reward)
                    else:
                        # subtree was not deep enough to be added
                        pass

                if len(nx_subtrees) == 0:
                    # all sub-trees added
                    break
                    
            if self.debug_mode:
                print(f'visited_nodes_to_step_idx: {self.visited_nodes_to_step_idx}')
                step_idx_to_visited_nodes = {val: key for key, val in self.visited_nodes_to_step_idx.items()}
                for i, exp in enumerate(subtrees_step_idx_to_reward):
                    print(f'>>> exp {i+1}: {exp}')
                    exp_path = [step_idx_to_visited_nodes[idx] for idx in exp.keys()]
                    print(f'path: {exp_path}')
            
            return subtrees_step_idx_to_reward

In [None]:
%autoreload
agent = PseudocostBranchingAgent()

env = EcoleBranching(observation_function='default',
                      information_function='default',
                      reward_function='default',
                      scip_params='default')
env.seed(seed)

instances = ecole.instance.SetCoverGenerator(n_rows=200, n_cols=200, density=0.05)

In [None]:
%autoreload
obs = None
custom_reward = RetroBranching(normaliser='init_primal_bound', 
                                         min_subtree_depth=30, 
                                         retro_trajectory_construction='deepest',
                                         debug_mode=True)
while obs is None:
    env.seed(seed)
    instance = next(instances)
    custom_reward.before_reset(instance)
    agent.before_reset(instance)
    obs, action_set, reward, done, info = env.reset(instance)
    _custom_reward = custom_reward.extract(env.model, done)
    
t = 1
tree = SearchTree(env.model)
tree.render()
instance_transitions = []
prev_obs = copy.deepcopy(obs)
# tree = custom_reward.normalised_lp_gain.tree
while not done:
    # select branching action
    action, action_idx = agent.action_select(action_set, model=env.model, done=done)
    obs, action_set, reward, done, info = env.step(action)
    _custom_reward = custom_reward.extract(env.model, done)
    
    if done:
        obs = copy.deepcopy(prev_obs)
    
    # store transition
    instance_transitions.append({'obs': prev_obs,
                               'action': action,
#                                'reward': reward['normalised_lp_gain'],
                               'reward': reward['num_nodes'],
                               'done': done,
                               'next_obs': obs})
    
    # update prev obs
    prev_obs = copy.deepcopy(obs)
    
    m = env.model.as_pyscipopt()
    print(f'Step {t} | Reward: {reward["num_nodes"]:.3f} | primal bound: {m.getPrimalbound()} | dual bound: {m.getDualbound()}')
    print(f'Custom reward: {_custom_reward}')
    
    # update search tree and analyse branching action
    tree.update_tree(env.model)
    tree.render()
    
    print('')
    
    t += 1
    
m = env.model.as_pyscipopt()
print(f'\nFinished | primal bound: {m.getPrimalbound()} | dual bound: {m.getDualbound()} | # nodes: {m.getNTotalNodes()} | Final node: {tree.tree.graph["visited_nodes"][-1]}')
print(f'Custom reward: {_custom_reward}')
tree.render()

In [None]:
# retrospectively create subtree episode paths
subtrees_step_idx_to_reward = []

# keep track of which nodes have been added to a sub-tree
nodes_added = set()

# remove nodes which were never visited by the brancher and therefore do not have a score or next state
nodes = [node for node in tree.tree.nodes]
for node in nodes:
    if 'score' not in tree.tree.nodes[node]:
        # node never visited by brancher -> do not consider
        tree.tree.remove_node(node)

# map which nodes were visited at which step in episode
visited_nodes = [list(node.keys())[0] for node in tree.tree.graph['visited_nodes']]
visited_nodes_to_step_idx = {node: idx for idx, node in enumerate(visited_nodes)}
print(f'visited_nodes_to_step_idx: {visited_nodes_to_step_idx}')



print('\n>> optimal path <<')
render(tree.tree)
# get optimal path
root_node = list(tree.tree.graph['root_node'].keys())[0]
print(f'root_node: {root_node}')

final_node = list(tree.tree.graph['visited_nodes'][-1].keys())[0]
print(f'final_node: {final_node}')

path = shortest_path(tree.tree, source=root_node, target=final_node)
print(f'optimal_path: {path}')

path_node_rewards = get_path_node_scores(tree.tree, path)
print(f'node rewards in path: {path_node_rewards}')

# get episode step indices at which each node in sub-tree was visited
path_to_step_idx = {node: visited_nodes_to_step_idx[node] for node in path}

# map each path node episode step idx to its corresponding reward
step_idx_to_reward = {step_idx: r for step_idx, r in zip(list(path_to_step_idx.values()), path_node_rewards)}
subtrees_step_idx_to_reward.append(step_idx_to_reward)
print(f'episode step idx to reward for nodes in path: {step_idx_to_reward}')

# register which nodes have been directly included in the sub-tree
for node in path:
    nodes_added.add(node)
print(f'nodes added so far: {nodes_added}')


# create episodes from remaining B&B nodes visited by agent
num_iters = 1
while True:
    print(f'\n >> iteration {num_iters} of creating other sub-tree episodes <<')
    # create depth first search sub-trees from nodes still leftover
    nx_subtrees = []
    for node in nodes_added:
        children = [child for child in tree.tree.successors(node)]
        for child in children:
            if child not in nodes_added:
                nx_subtrees.append(dfs_tree(tree.tree, child))
    print(f'num sub-trees left: {len(nx_subtrees)}')
    for i, subtree in enumerate(nx_subtrees):
        print(f'> sub-tree {i+1} of {len(nx_subtrees)}')
        render(subtree)
        
        # init node scores for nodes in subtree (since these are not transferred into new subtree)
        for node in subtree.nodes:
            subtree.nodes[node]['score'] = tree.tree.nodes[node]['score']
        
        # get root of sub-tree
        for root_node in subtree.nodes:
            if subtree.in_degree(root_node) == 0:
                # node is root
                break
                
        # get a path by randomly choosing a leaf node as the final node in the path
        leaf_nodes = [node for node in subtree.nodes() if subtree.out_degree(node) == 0]
        final_node = leaf_nodes[random.choice(range(len(leaf_nodes)))]
        path = shortest_path(subtree, source=root_node, target=final_node)
        print(f'subtree path: {path}')
        
        # get rewards of each node in sub-tree
        path_node_rewards = get_path_node_scores(subtree, path)
        print(f'node rewards in subtree path: {path_node_rewards}')
        
        # get episode step indices at which each node in sub-tree was visited
        path_to_step_idx = {node: visited_nodes_to_step_idx[node] for node in path}
        
        # map each path node episode step idx to its corresponding reward
        step_idx_to_reward = {step_idx: r for step_idx, r in zip(list(path_to_step_idx.values()), path_node_rewards)}
        subtrees_step_idx_to_reward.append(step_idx_to_reward)
        print(f'episode step idx to reward for nodes in path: {step_idx_to_reward}')
        
        # register which nodes have been included in the sub-tree
        for node in path:
            nodes_added.add(node)
        print(f'nodes added so far: {nodes_added}')
        
    num_iters += 1
    if len(nx_subtrees) == 0:
        # all sub-trees added
        break

print(f'\nNum episodes retrospectively re-constructed: {len(subtrees_step_idx_to_reward)} | Episode step indices to rewards: {subtrees_step_idx_to_reward}')

In [None]:
# print(env.reward_function.extract(model=env.model, done=done))
# print(help(type(env.reward_function)))
# print(env.reward_function)
# print(dir(env.reward_function.extract.__getattribute__))