Seeing if can get LP solution value at current node in SCIP.

**Update**: Can track and plot the B&B search tree, *but* only able to track the nodes which were not pruned from the tree. I.e. if branching decision creates an infeasible node, this node is not added to the SCIP search tree memory, so we never see it, so we don't get to reward the agent for finding infeasible nodes and removing them from the tree. Perhaps this is okay though, since if we don't include the LP gain for the pruned node in the reward function, then it is effectively 0, so as long as our rewards are negative (i.e. punishing the agent), the agent will not be punished for pruning nodes from the tree? 

In [None]:
%load_ext autoreload
%autoreload
from retro_branching.environments import EcoleBranching, EcoleConfiguring
from retro_branching.agents import StrongBranchingAgent, PseudocostBranchingAgent, RandomAgent
from retro_branching.utils import seed_stochastic_modules_globally

import ecole
import numpy as np
import copy

import networkx as nx
import matplotlib.pyplot as plt
from networkx.drawing.nx_pydot import graphviz_layout
# from networkx.drawing.nx_agraph import graphviz_layout

seed = 0 # 1
seed_stochastic_modules_globally(default_seed=seed)

In [None]:
%autoreload
agent = StrongBranchingAgent()

env = EcoleBranching(observation_function='default',
                      information_function='default',
                      reward_function='default',
                      scip_params='default')
env.seed(seed)

instances = ecole.instance.SetCoverGenerator(n_rows=400, n_cols=400, density=0.05)

In [None]:
%autoreload
class SearchTree:
    def __init__(self, model):
        self.tree = nx.DiGraph()
        self.update_tree(model)
    
    def update_tree(self, model):
        m = model.as_pyscipopt()
        
        _curr_node = m.getCurrentNode()
        if _curr_node is not None:
            curr_node_id = _curr_node.getNumber()
        else:
            # branching finished, no curr node
            curr_node_id = None
        self.curr_node = {curr_node_id: _curr_node}
        if curr_node_id is not None:
            self.add_nodes(self.curr_node)
        
        if curr_node_id is not None:
            _parent_node = list(self.curr_node.values())[0].getParent()
            if _parent_node is not None:
                parent_node_id = _parent_node.getNumber()
            else:
                # curr node is root node
                parent_node_id = None
            self.parent_node = {parent_node_id: _parent_node}
        else:
            self.parent_node = {None: None}
            
        open_leaves, open_children, open_siblings = m.getOpenNodes()
        self.open_leaves = {node.getNumber(): node  for node in open_leaves}
        self.open_children = {node.getNumber(): node for node in open_children}
        self.open_siblings = {node.getNumber(): node for node in open_siblings}
        
        self.add_nodes(self.open_leaves)
        self.add_nodes(self.open_children)
        self.add_nodes(self.open_siblings)
        
    def add_nodes(self, nodes):
        '''Adds nodes if not already in tree.'''
        for node_id, node in nodes.items():
            if node_id not in self.tree:
                # add node
                self.tree.add_node(node_id,
                                   _id=node_id,
                                   lower_bound=node.getLowerbound())
                
                # add edge
                _parent_node = node.getParent()
                if _parent_node is not None:
                    parent_node_id = _parent_node.getNumber()
                    self.tree.add_edge(parent_node_id,
                                       node_id)
                else:
                    # is root node, has no parent
                    pass
                
    def render(self):
        fig = plt.figure()
        
        pos = graphviz_layout(self.tree, prog='dot')
        node_labels = {node: node for node in self.tree.nodes}
        nx.draw_networkx_nodes(self.tree,
                               pos,
                               label=node_labels)
        nx.draw_networkx_edges(self.tree,
                               pos)
        
        nx.draw_networkx_labels(self.tree, pos, labels=node_labels)
        
        plt.show()

In [None]:
%autoreload
obs = None
while obs is None:
    env.seed(seed)
    instance = next(instances)
    agent.before_reset(instance)
    obs, action_set, reward, done, info = env.reset(instance)
    
t = 1
prev_node = env.model.as_pyscipopt().getCurrentNode()
prev_node_id = prev_node.getNumber()
tree = SearchTree(env.model)
# print(f'agent score: {agent.extract(env.model, done)[action_set].max()}')
scores = []
while not done:
    # select branching action
    action, action_idx = agent.action_select(action_set, env.model, done)
    obs, action_set, reward, done, info = env.step(action)
    
    # update search tree and analyse branching action
    tree.update_tree(env.model)
    tree.render()
    
    m = env.model.as_pyscipopt()
    print(f'\nStep {t} | primal bound: {m.getPrimalbound()} | dual bound: {m.getDualbound()}')
#     if not done:
#         print(f'agent score: {agent.extract(env.model, done)[action_set].max()}')
    
    curr_primal_bound = m.getPrimalbound()
    curr_node = m.getCurrentNode()
    if curr_node is not None:
        if curr_node.getParent() is not None:
            if curr_node.getParent().getNumber() == prev_node_id:
                print('Moved to new sub-tree.')
            else:
                print('Still on same sub-tree as previous.')
    else:
        # finished instance
        curr_node = None
    if curr_node is not None:
        parent_node = curr_node.getParent()
        
    print(f'prev_node: {prev_node} | id: {prev_node_id}')
    prev_node_lb = tree.tree.nodes[prev_node_id]['lower_bound']
    print(f'prev_node_lb: {prev_node_lb}')
    prev_node_child_ids = [child for child in tree.tree.successors(prev_node_id)]
    print(f'prev_node_child_ids: {prev_node_child_ids}')
    prev_node_child_lbs = [tree.tree.nodes[child]['lower_bound'] for child in prev_node_child_ids]
    print(f'prev_node_child_lbs: {prev_node_child_lbs}')
    prev_node_child_lp_gains = [(child_lb - prev_node_lb) for child_lb in prev_node_child_lbs]
    print(f'prev_node_child_lp_gains: {prev_node_child_lp_gains}')
    
    if len(prev_node_child_lbs) > 0:
        # use child lp gains to retrospectively calculate a score for the previous branching decision
        score = -1
        for child_node_lb in prev_node_child_lbs:
            score *= (curr_primal_bound - child_node_lb) / (curr_primal_bound - prev_node_lb)
    else:
        # previous branching decision led to all child nodes being pruned, infeasible, or outside bounds -> don't punish brancher
        score = 0
    print(f'custom score: {score}')
    scores.append(score)
    
    if curr_node is not None:
        print(f'curr_node: {curr_node} | id: {curr_node.getNumber()}')
        print(f'curr_node_lb: {curr_node.getLowerbound()}')
        if parent_node is not None:
            print(f'parent_node: {parent_node} | id: {parent_node.getNumber()}')
    
        prev_node = curr_node
        prev_node_id = copy.deepcopy(curr_node.getNumber())
    
    t += 1
    
    
    
    
m = env.model.as_pyscipopt()
print(f'\nFinished | primal bound: {m.getPrimalbound()} | dual bound: {m.getDualbound()} | nodes: {m.getNTotalNodes()} | total scores: {sum(scores)}')
# tree.render()

In [None]:
%autoreload

seed = 0
seed_stochastic_modules_globally(default_seed=seed)

# Trying above reward now that have implemented in retro_branching
agent = PseudocostBranchingAgent()
# agent = StrongBranchingAgent()

env = EcoleBranching(observation_function='default',
                      information_function='default',
                      reward_function='default',
                      scip_params='default')
env.seed(seed)

instances = ecole.instance.SetCoverGenerator(n_rows=500, n_cols=1000, density=0.05)

agent_reward = 'normalised_lp_gain'

obs = None
while obs is None:
    env.seed(seed)
    instance = next(instances)
    agent.before_reset(instance)
    obs, action_set, reward, done, info = env.reset(instance)
    
t = 1
prev_node = env.model.as_pyscipopt().getCurrentNode()
rewards = []
while not done:
    action, action_idx = agent.action_select(action_set, env.model, done)
    obs, action_set, reward, done, info = env.step(action)
    print(f'Step {t} reward: {reward[agent_reward]}')
    rewards.append(reward[agent_reward])
    
    t += 1
m = env.model.as_pyscipopt()
print(f'\nFinished | primal bound: {m.getPrimalbound()} | dual bound: {m.getDualbound()} | nodes: {m.getNTotalNodes()} | return: {sum(rewards)}')

# Episodic sub-trees

Can we use above search tree build to retrospectively construct episodes within sub-trees and reward accordingly?

In [None]:
%autoreload
class SearchTree:
    '''
    Tracks SCIP search tree. Call SearchTree.update_tree(ecole.Model) each
    time the ecole environment (and therefore the ecole.Model) is updated.

    N.B. SCIP does not store nodes which were pruned, infeasible, outside
    the search tree's optimality bounds, or which node was optimal, therefore these nodes will not be
    stored in the SearchTree. This is why m.getNTotalNodes() (the total number
    of nodes processed by SCIP) will likely be more than the number of nodes in
    the search tree when an instance is solved.
    
    If add optimal node, will manually create a node with the global dual bound
    and add it as a child to the last node visited before the optimal node was found.
    '''
    def __init__(self, model, add_optimal_node=True):
        self.add_optimal_node = add_optimal_node
        
        self.tree = nx.DiGraph()
        
        self.tree.graph['root_node'] = None
        self.tree.graph['optimal_node_parent'] = None
        self.tree.graph['optimal_node'] = None
        self.tree.graph['visited_nodes'] = []
        
        self.update_tree(model)
    
    def update_tree(self, model):
        '''
        Call this method after each update to the ecole environment. Pass
        the updated ecole.Model, and the B&B tree tracker will be updated accordingly.
        '''
        m = model.as_pyscipopt()
        
        _curr_node = m.getCurrentNode()
        if _curr_node is not None:
            curr_node_id = _curr_node.getNumber()
        else:
            # branching finished, no curr node
            curr_node_id = None
            self.tree.graph['optimal_node_parent'] = self.tree.graph['visited_nodes'][-1]
            if self.add_optimal_node:
                _optimal_node = OptimalNode(number=max(list(self.tree.nodes()))+1, 
                                           parent=list(self.tree.graph['optimal_node_parent'].keys())[0], 
                                           lower_bound=m.getDualbound())
                optimal_node = {_optimal_node.getNumber(): _optimal_node}
#                 self.tree.graph['visited_nodes'].append(optimal_node)
                self.tree.graph['optimal_node'] = optimal_node
                self._add_nodes(optimal_node, parent_node_id=_optimal_node.getParent())
        self.curr_node = {curr_node_id: _curr_node}
        if curr_node_id is not None:
            self._add_nodes(self.curr_node)
            self.tree.graph['visited_nodes'].append(self.curr_node)
        
        if curr_node_id is not None:
            _parent_node = list(self.curr_node.values())[0].getParent()
            if _parent_node is not None:
                parent_node_id = _parent_node.getNumber()
            else:
                # curr node is root node
                parent_node_id = None
            self.parent_node = {parent_node_id: _parent_node}
        else:
            self.parent_node = {None: None}
            
        open_leaves, open_children, open_siblings = m.getOpenNodes()
        self.open_leaves = {node.getNumber(): node  for node in open_leaves}
        self.open_children = {node.getNumber(): node for node in open_children}
        self.open_siblings = {node.getNumber(): node for node in open_siblings}
        
        self._add_nodes(self.open_leaves)
        self._add_nodes(self.open_children)
        self._add_nodes(self.open_siblings)
                
    def _add_nodes(self, nodes, parent_node_id=None):
        '''Adds nodes if not already in tree.'''
        for node_id, node in nodes.items():
            if node_id not in self.tree:
                # add node
                self.tree.add_node(node_id,
                                   _id=node_id,
                                   lower_bound=node.getLowerbound())

                # add edge
                _parent_node = node.getParent()
                if _parent_node is not None:
                    if parent_node_id is None:
                        parent_node_id = _parent_node.getNumber()
                    else:
                        # parent node id already given
                        pass
                    self.tree.add_edge(parent_node_id,
                                       node_id)
                else:
                    # is root node, has no parent
                    self.tree.graph['root_node'] = {node_id: node}
                
    def render(self):
        '''Renders B&B search tree.'''
        fig = plt.figure()
        
        pos = graphviz_layout(self.tree, prog='dot')
        node_labels = {node: node for node in self.tree.nodes}
        nx.draw_networkx_nodes(self.tree,
                               pos,
                               label=node_labels)
        nx.draw_networkx_edges(self.tree,
                               pos)
        
        nx.draw_networkx_labels(self.tree, pos, labels=node_labels)
        
        plt.show()

class OptimalNode:
    '''Hack to add optimal node to search tree, since is not tracked or recorded by SCIP.'''
    def __init__(self, number, parent, lower_bound):
        self.number = number
        self.parent = parent
        self.lower_bound = lower_bound
        
    def getNumber(self):
        return self.number
        
    def getParent(self):
        return self.parent
    
    def getLowerbound(self):
        return self.lower_bound

In [None]:
%autoreload
agent = RandomAgent()

env = EcoleBranching(observation_function='default',
                      information_function='default',
                      reward_function='default',
                      scip_params='default')
env.seed(seed)

instances = ecole.instance.SetCoverGenerator(n_rows=500, n_cols=1000, density=0.05) # 400x400

In [None]:
%autoreload
obs = None
while obs is None:
    env.seed(seed)
    instance = next(instances)
    agent.before_reset(instance)
    obs, action_set, reward, done, info = env.reset(instance)
    
t = 1
tree = SearchTree(env.model)
instance_transitions = []
prev_obs = copy.deepcopy(obs)
while not done:
    # select branching action
    action, action_idx = agent.action_select(action_set, model=env.model, done=done)
    obs, action_set, reward, done, info = env.step(action)
    
    if done:
        obs = copy.deepcopy(prev_obs)
    
    # store transition
    instance_transitions.append({'obs': prev_obs,
                               'action': action,
                               'reward': reward['normalised_lp_gain'],
                               'done': done,
                               'next_obs': obs})
    
    # update prev obs
    prev_obs = copy.deepcopy(obs)
    
    # update search tree and analyse branching action
    tree.update_tree(env.model)
    tree.render()
    
    print(f'Step {t} | Reward: {reward["normalised_lp_gain"]:.3f}')
    t += 1
    
m = env.model.as_pyscipopt()
print(f'\nFinished | primal bound: {m.getPrimalbound()} | dual bound: {m.getDualbound()} | # nodes: {m.getNTotalNodes()} | Final node: {tree.tree.graph["visited_nodes"][-1]} | Optimal node: {tree.tree.graph["optimal_node"]}')
tree.render()

In [None]:
%autoreload
from networkx.algorithms.shortest_paths.generic import shortest_path

# get root node
root_node = list(tree.tree.graph['root_node'].keys())[0]
print(f'Root node: {root_node}')

# get optimal node
optimal_node = list(tree.tree.graph['optimal_node'].keys())[0]
print(f'Optimal node: {optimal_node}')

# get path from root to optimal node
optimal_path = shortest_path(tree.tree, source=root_node, target=optimal_node)
print(f'Optimal path: {optimal_path}')

# get nodes in optimal path which were visited
visited_optimal_nodes = optimal_path[:-1]
print(f'Visited optimal nodes seen by agent: {visited_optimal_nodes}')

# get lower bounds of nodes in optimal path
optimal_path_lps = [tree.tree.nodes[node]['lower_bound'] for node in optimal_path]
print(f'Optimal path LPs: {optimal_path_lps}')

# find which nodes were visited on each step
visited_nodes = [list(node.keys())[0] for node in tree.tree.graph['visited_nodes']]
print(f'Visited nodes: {visited_nodes}')

# map nodes visited to instance episode step idx
visited_nodes_to_step_idx = {node: idx for idx, node in enumerate(visited_nodes)}
print(f'Visited nodes to instance episode step idx: {visited_nodes_to_step_idx}')

# get instance episode step idx of optimal path
optimal_path_to_step_idx = {node: visited_nodes_to_step_idx[node] for node in visited_optimal_nodes}
print(f'Optimal path nodes to instance episode step idx: {optimal_path_to_step_idx}')

# get optimal path sub-tree transitions
optimal_path_transitions = [instance_transitions[idx] for idx in optimal_path_to_step_idx.values()]
print(f'Optimal path transitions: {optimal_path_transitions}')

# get optimal path rewards
# optimal_path_rewards = [trans['reward'] for trans in optimal_path_transitions]
# print(f'Optimal path rewards: {optimal_path_rewards}')
rewards = []
init_dual = tree.tree.nodes[root_node]['lower_bound']
final_dual = tree.tree.nodes[optimal_node]['lower_bound']
for idx, node in enumerate(visited_optimal_nodes):
    # calc reward agent got at this node
#     children = [child for child in tree.tree.successors(node)]
    children = [optimal_path[idx+1]]
    print(f'Parent: {node}')
    print(f'Children: {children}')
    if len(children) > 0 and optimal_node not in children:
        parent_lb = tree.tree.nodes[node]['lower_bound']
        print(f'Parent {node} LB: {parent_lb}')
        score = 1
        for child in children:
            child_lb = tree.tree.nodes[child]['lower_bound']
            print(f'Child {child} LB: {child_lb}')
#             score *= (final_dual - child_lb) / (final_dual - parent_lb)
#             score *= (final_dual - child_lb) / (final_dual - init_dual)
#             score *= (final_dual - (child_lb - parent_lb)) / (final_dual)
            score *= (child_lb - parent_lb) / (final_dual - init_dual)
    else:
        # prev branching decision led to all child nodes being pruned, infeasible, or outside bounds -> dont punish
        score = 0
    print(f'Score: {score}')
    rewards.append(score)
print(f'Visited optimal node rewards: {rewards} | Total return: {sum(rewards):.3f}')
        




# # get leaf nodes
# leaf_nodes = [node for node in tree.tree.nodes() if tree.tree.in_degree(node) != 0 and tree.tree.out_degree(node) == 0]
# print(f'Leaf nodes: {leaf_nodes}')

# # get paths from root to leaf
# paths = [shortest_path(tree.tree, source=1, target=node)]

In [None]:
node_to_lp = {node: tree.tree.nodes[node]['lower_bound'] for node in tree.tree.nodes()}
for node, lp in node_to_lp.items():
    print(f'Node {node} LP: {lp}')