Create a policy evaluation table similar to Table 2 in Gasse et al. 2019 https://arxiv.org/pdf/1906.01629.pdf

- FSB
- RPB
- PCB
- SL
- RL

In [119]:
%load_ext autoreload
%autoreload

from retro_branching.utils import get_most_recent_checkpoint_foldername

import glob
import gzip
import pickle
from collections import defaultdict
import copy
import numpy as np
import scipy.stats as st

import pandas as pd
from tabulate import tabulate

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Config

In [112]:
%autoreload

difficulty_levels = ['easy'] # ['easy', 'medium', 'hard']
problem_classes = ['sc'] # ['sc', 'ca', 'cfl', 'mis']
rl_type = 'dqn_gnn'
rl_id = 1236
rl_cp = 224
win_determinator = 'Nodes' # 'Time' 'Nodes'

# Setup

In [113]:
%autoreload

# get rl agent name
rl_agent_name = f'{rl_type}_{rl_id}_checkpoint_{rl_cp}'

# map difficulty to problem type and size
difficulty_to_problem_to_size = {'easy': 
                                     {'sc': [500, 1000],
                                      'ca': [100, 500],
                                      'cfl': [100],
                                      'mis': [500]},
                                 'medium': 
                                     {'sc': [1000, 1000],
                                      'ca': [200, 1000],
                                      'cfl': [200],
                                      'mis': [1000]},
                                 'hard': 
                                     {'sc': [2000, 1000],
                                      'ca': [300, 1500],
                                      'cfl': [400],
                                      'mis': [1500]}}

# map problem class to saved imitation agent
problem_to_imitation_agent = {'sc': 'gnn_343_checkpoint_233',
                              'ca': None,
                              'cfl': None,
                              'mis': None}

# map saved agent names to their acronyms to display in table
agent_name_to_acronym = {'strong_branching': 'FSB',
                         'scip_branching': 'RPB',
                         'pseudocost': 'PCB',
                         f'{rl_agent_name}': 'RL'}
for imitation_agent in problem_to_imitation_agent.values():
    if imitation_agent is not None:
        agent_name_to_acronym[imitation_agent] = 'SL'

# get base path(s) for where validation data is stored for each benchmark
def get_problem_validation_path(difficulty_level, problem_class):
    params = difficulty_to_problem_to_size[difficulty_level][problem_class]
    if problem_class == 'sc':
        return f'/scratch/datasets/retro_branching/instances/set_cover_nrows_{params[0]}_ncols_{params[1]}_density_005_threshold_None/baselines/'
    else:
        raise NotImplemented(f'Not yet implemented validation path retrieval for problem_class {problem_class}')

validation_paths = {difficulty_level: {problem_class: [] for problem_class in problem_classes} for difficulty_level in difficulty_levels}
for difficulty_level in difficulty_levels:
    for problem_class in problem_classes:
        problem_baseline_path = get_problem_validation_path(difficulty_level, problem_class)
        validation_paths[difficulty_level][problem_class].append(problem_baseline_path)

# initialise table
headers = ['Method', 'Time', 'Wins', 'Nodes']
policy_evaluation_dict = {problem_class: 
                              {difficulty_level: 
                                   {header: [] for header in headers} 
                               for difficulty_level in difficulty_levels} 
                          for problem_class in problem_classes}
for difficulty_level in difficulty_levels:
    if difficulty_level != 'easy':
        # pop model since can share model column across different problem classes in table
        _ = policy_evaluation_dict[difficulty_level].pop('Method', None)

# Load Data

In [114]:
%autoreload

# get agent paths
difficulty_to_problem_to_agent_to_path = {difficulty_level:
                                             {problem_class: {}
                                             for problem_class in problem_classes}
                                          for difficulty_level in difficulty_levels}
for difficulty_level in validation_paths.keys():
    for problem_class in validation_paths[difficulty_level].keys():
        for validation_path in validation_paths[difficulty_level][problem_class]:
            agent_names = [path.split('/')[-1] for path in glob.glob(validation_path+'/*')]
            for agent_name in agent_names:
                difficulty_to_problem_to_agent_to_path[difficulty_level][problem_class][agent_name] = validation_path+f'/{agent_name}/'

# get agent names to show
agent_names = [name for name in agent_names if name in agent_name_to_acronym.keys()]
all_names = list(difficulty_to_problem_to_agent_to_path[difficulty_level][problem_class].keys())
for difficulty_level in difficulty_levels:
    for problem_class in problem_classes:
        for agent_name in all_names:
            if agent_name not in agent_names:
                _ = difficulty_to_problem_to_agent_to_path[difficulty_level][problem_class].pop(agent_name, None)
        
# load data
all_data = {problem_class: 
              {difficulty_level: 
                   {'raw_data': {header: [] for header in headers},
                     'mean_data': {header: [] for header in headers},
                     'ci_data': {header: [] for header in headers}} 
               for difficulty_level in difficulty_levels} 
          for problem_class in problem_classes}

for problem_class in problem_classes:
    for difficulty_level in difficulty_levels:
        for agent_name in agent_names:
            # load data
            path = difficulty_to_problem_to_agent_to_path[difficulty_level][problem_class][agent_name] + 'rl_validator/rl_validator_1/'
            path += get_most_recent_checkpoint_foldername(path)
            with gzip.open(*glob.glob(path+'/*log.pkl'), 'rb') as f:
                log = pickle.load(f)
                
            # get method acronym
            try:
                all_data[problem_class][difficulty_level]['raw_data']['Method'].append(agent_name_to_acronym[agent_name])
                all_data[problem_class][difficulty_level]['mean_data']['Method'].append(agent_name_to_acronym[agent_name])
                all_data[problem_class][difficulty_level]['ci_data']['Method'].append(agent_name_to_acronym[agent_name])
            except KeyError:
                # sharing Method column across difficulty levels in table
                pass
                
            # collect solving time data
            solving_times = [abs(np.sum(times)) for times in log[agent_name]['solving_time']]
            all_data[problem_class][difficulty_level]['raw_data']['Time'].append(solving_times)
            all_data[problem_class][difficulty_level]['mean_data']['Time'].append(np.mean(solving_times))
            ci = st.norm.interval(alpha=0.68, loc=np.mean(solving_times), scale=st.sem(solving_times))
            all_data[problem_class][difficulty_level]['ci_data']['Time'].append(ci)
        
            # collect num nodes data
            num_nodes = [abs(np.sum(nodes)) for nodes in log[agent_name]['num_nodes']]
            all_data[problem_class][difficulty_level]['raw_data']['Nodes'].append(num_nodes)
            all_data[problem_class][difficulty_level]['mean_data']['Nodes'].append(np.mean(num_nodes))
            ci = st.norm.interval(alpha=0.68, loc=np.mean(num_nodes), scale=st.sem(num_nodes))
            all_data[problem_class][difficulty_level]['ci_data']['Nodes'].append(ci)
            
        # collect % wins data (in terms of win_determinator, assume lower is better)
        win_counter = {name: 0 for name in agent_names}
        agent_idx_to_name = {idx: agent_name for idx, agent_name in enumerate(agent_names)}
        for instance_idx in range(len(solving_times)):
            agent_solving_times = [all_data[problem_class][difficulty_level]['raw_data'][win_determinator][agent_idx][instance_idx] for agent_idx in agent_idx_to_name.keys()]
            winner_agent = agent_idx_to_name[np.argmin(agent_solving_times)]
            win_counter[winner_agent] += 1
        for agent_name in agent_names:
            all_data[problem_class][difficulty_level]['raw_data']['Wins'].append(win_counter[agent_name])
            all_data[problem_class][difficulty_level]['mean_data']['Wins'].append(win_counter[agent_name])
            all_data[problem_class][difficulty_level]['ci_data']['Wins'].append(win_counter[agent_name])

# Construct Table

In [118]:
for problem_class in problem_classes:
    print(f'\n>>> {problem_class} <<<')
    for difficulty_level in difficulty_levels:
        print(all_data[problem_class][difficulty_level]['mean_data'])
        df = pd.DataFrame(all_data[problem_class][difficulty_level]['mean_data'])
        print(tabulate(df, headers='keys', tablefmt='psql', showindex=False))
        
        latex_df = copy.deepcopy(df)
#         latex_df.to_latex(index=False, multicolumn=True, buf=self.path_to_save+'/latex_summary_table', escape=False)
        latex_df.to_latex(index=False, multicolumn=True, escape=False)
        print(latex_df)


>>> sc <<<
{'Method': ['SL', 'RL', 'FSB', 'PCB'], 'Time': [4.31815555247, 6.23711978466, 11.50239698041, 0.8240305886200001], 'Wins': [19, 3, 77, 1], 'Nodes': [66.2, 88.85, 48.84, 121.14]}
+----------+-----------+--------+---------+
| Method   |      Time |   Wins |   Nodes |
|----------+-----------+--------+---------|
| SL       |  4.31816  |     19 |   66.2  |
| RL       |  6.23712  |      3 |   88.85 |
| FSB      | 11.5024   |     77 |   48.84 |
| PCB      |  0.824031 |      1 |  121.14 |
+----------+-----------+--------+---------+
  Method       Time  Wins   Nodes
0     SL   4.318156    19   66.20
1     RL   6.237120     3   88.85
2    FSB  11.502397    77   48.84
3    PCB   0.824031     1  121.14
