# V-polyhedral disjunctive cuts plotting worksheet
1. Table 1: Summary statistics for percent gap closed by VPCs --- avg (%) and number of strict wins (best by at least `EPS`), including set of all instances and set of ≥ 10% gap closed instances
2. Table 2: Average percent gap closed by num disj terms
3. Table 3: Summary statistics for time to solve instances with branch-and-bound

We select instances that meet the following criteria:
1. Belong to MIPLIB, NEOS, or COR@L
2. IP optimal value is known
3. ≤ 5000 variables and 5000 constraints (in presolved instance)
4. The partial branch-and-bound tree with 64 leaves does not find an IP optimal solution
5. The disjunctive lower bound is strictly less than the maximum objective value on any leaf node

There are some instances for which we do not have data for all 6 partial tree sizes. We include these instances in most tables, except if we are showing how some statistic changes as the disjunction increases in size.

# Section 0: Set variables, import whatever is needed, and read in data

### Global variables

In [1]:
## Global variables
EPS = 1e-7

## Set up variables containing relevant directories
import os
repos_key = 'REPOS_DIR'
try:
    REPOS_DIR = os.environ[repos_key]
    print("REPOS_DIR set to \"%s\"." % REPOS_DIR)
except KeyError:
    print("*** ERROR: %s not found!" % repos_key)

VPC_DIR = REPOS_DIR + "/vpc/"
RESULTS_DIR = VPC_DIR + "results/saved/"
DATA_DIR = VPC_DIR + "data/"

REPOS_DIR set to "/Users/akazachk/repos".


### Import data processing, plotting, and export packages and functions

In [2]:
## Import data processing, plotting, and export packages and functions
from IPython.display import display

import pandas as pd
pd.set_option("multi_sparse", True)

import numpy as np
import matplotlib.lines as mlines
from matrix2latex import matrix2latex

import matplotlib.pyplot as plt
scale=2
DPI = 200
plt.rc('text', usetex=True)
plt.rc('font', family='serif')
plt.rc('axes.spines', **{'bottom':True, 'left':True, 'right':False, 'top':False})
plt.rc('axes', titlesize=12*scale)
plt.rc('axes', labelsize=8*scale)
plt.rc('xtick', labelsize=8*scale)
plt.rc('ytick', labelsize=8*scale)
plt.rc("legend", fontsize=8*scale)
plt.rc("figure", figsize=[6*scale,4*scale])
# plt.rc("figure", figsize=[6,4])
#plt.rc("figure", figsize=[3,2])
plt.rc("savefig", dpi=DPI)

### LaTeX helper functions

In [3]:
# LaTeX helper functions
import re

def tex_escape(text):
    """
        :param text: a plain text message
        :return: the message escaped to appear correctly in LaTeX
    """
    conv = {
        '&': r'\&',
        '%': r'\%',
        '$': r'\$',
        '#': r'\#',
        '_': r'\_',
        '{': r'\{',
        '}': r'\}',
        '~': r'\textasciitilde{}',
        '^': r'\^{}',
        '\\': r'\textbackslash{}',
        '<': r'\textless{}',
        '>': r'\textgreater{}',
        '≥': r'$\ge$'
    }
    regex = re.compile('|'.join(re.escape(str(key)) for key in sorted(conv.keys(), key = lambda item: - len(item))))
    return regex.sub(lambda match: conv[match.group()], text)


def remove_presolved_from_name(name:str) -> str:
    """Remove _presolved from instance names"""
    return name.removesuffix("_presolved")




def create_multirow_string(strval: str, num_rows: int = 2, alignment: str = 'c', extra_format: str = ""):
    """
    Wrap \p strval in a multirow environment for a table.
    """
    return \
        "{" + \
        "\\multirow[" + alignment + "]{"+ str(num_rows) + "}{*}{" + \
        (extra_format + "{" if extra_format != "" else "") + \
        str(strval) + \
        ("}" if extra_format != "" else "") + \
        "}" + "}"


def format_col_as_multirow(curr_series: pd.core.series.Series):
    start_val = ''
    start_row = -1
    end_row = -1
    for val in curr_series:
        end_row += 1
        is_last_row = end_row == len(curr_series)-1
        if val != start_val or is_last_row:
            num_rows = (end_row - start_row) + is_last_row
            if start_row >= 0 and num_rows > 1:
                multirow_string = create_multirow_string(str(start_val), num_rows = num_rows)
                curr_series[start_row] = multirow_string
                if is_last_row:
                    curr_series[end_row] = ""
            start_row = end_row
            start_val = val
        else:
            curr_series[end_row] = ""


# Some columns report both floats and ints
# This is a problem for siunitx that we need to fix explicitly
# We check for any int values in the table and apply a format to all of them
from math import floor, ceil


def is_val(val1: float, val2: float) -> bool:
    return abs(val1 - val2) < 1e-7


def is_int(val):
    """
    Checks whether given value should be treated as an int.

    Currently treats zero as a float always which is not ideal.
    """
    if isinstance(val, str) and val == '':
        return False
    try:
        floatval = float(val)
    except ValueError:
        # print("ValueError: ", val)
        return False
    # print("DEBUG:", val, ":", type(val))
    rounds_to_int = is_val(floatval, floor(floatval)) and is_val(floatval, ceil(floatval))
    is_zero = is_val(floatval, 0.0)
    # is_float_zero = (isinstance(val,str) and val.find('.') >= 0 and is_zero)
    return rounds_to_int and (not is_zero)


# def is_int_style(col : pd.core.series.Series):
#     # return ['background-color: green' if is_int(v) else '' for v in col]
#     return ['background-color: green' if is_int(v) else '' for v in col]


def int_format(val, num_digits = 3, add_phantom = False):
    if is_int(val):
        new_str = "{\\tablenum[table-format=" + str(num_digits) + "]{" + str(val) + "}"
        new_str += "\\phantom{.00}" if add_phantom else ''
        new_str += "}"
        return new_str
    else:
        return val


def enclose_in_braces(val):
    return "{" + str(val) + "}"


# The styler from pandas has some limitations, particularly no way to add \midrule at arbitrary places
def add_midrule(latex: str, index: int) -> str:
    """
    Adds a midrule either `index` lines after the start or -index lines before the last line of the table

    Args:
        latex: latex table
        index: index of horizontal line insertion (in lines)
    """
    lines = latex.splitlines()
    if index >= 0:
        lines.insert(index, r'\midrule')
    else:
        index_from_bottom = -index
        lines.insert(len(lines) - index_from_bottom - 2, r'\midrule')
    return '\n'.join(lines).replace('NaN', '')


# To add adjustbox, needs to be done after LaTeX string has been generated
def add_adjustbox_environment(latex: str) -> str:
    lines = latex.splitlines()
    start_env_ind = -1
    end_env_ind = -1
    curr_ind = -1
    for line in lines:
        curr_ind += 1
        if line.startswith(r"\begin{tabular}"):
            start_env_ind = curr_ind
        if line.startswith(r"\end{tabular}"):
            end_env_ind = curr_ind+1
    if (start_env_ind >= 0 and end_env_ind >= 0):
        lines.insert(start_env_ind, r'\begin{adjustbox}{width=1\textwidth}')
        lines.insert(end_env_ind+1, r'\end{adjustbox}')
    return '\n'.join(lines).replace('NaN', '')


def add_sisetup(latex: str, table_format = "2.2") -> str:
    latex = \
"""
{
\sisetup{
    table-alignment-mode = format,
    table-number-alignment = center,
    table-format = """ + \
    table_format + ",\n" + \
        latex + \
        "\n}"
    return latex

### `initialize_df`: common way to process each data frame that we need

In [4]:
## Common way to process each data frame that we need
def initialize_df(filename):
    """
    Create a multilevel index df out of data from file `filename`.
    """
    df = pd.read_csv(filename, sep=',', index_col=False, skiprows=1)
    df.sort_values(by = ['INSTANCE','disj_terms'], inplace=True)
    df.set_index(['INSTANCE','disj_terms'], inplace=True)
    df.replace({"\'-inf\'": -np.inf, "\'inf\'": np.inf}, inplace=True)
    return df

### `df_ipopt`: Retrieve best known IP objective values

In [5]:
## Best known IP objective values
df_ipopt = pd.read_csv(DATA_DIR + "ip_obj.csv")
df_ipopt = df_ipopt.set_index(df_ipopt[df_ipopt.columns[0]])
df_ipopt.rename(columns = {'IP_OBJ' : 'IP OBJ'}, inplace=True) # for consistency with other dfs
df_ipopt = df_ipopt[~df_ipopt.index.duplicated()]
display(df_ipopt.head())
display(df_ipopt['IP OBJ']['bm23_presolved'])

Unnamed: 0_level_0,INSTANCE,IP OBJ,SET
INSTANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
22433,22433,21477.0,miplib2017
23588,23588,8090.0,miplib2017
10teams,10teams,924.0,miplib2017
50v-10,50v-10,3311.179984,miplib2017
a1c1s1,a1c1s1,11503.44413,miplib2017


'34'

### `df_preprocess`: Results from preprocessing instances

In [6]:
## Results from preprocessing instances
df_preprocess = pd.read_csv(RESULTS_DIR + "vpc-preprocess.csv", sep=',', index_col=False, skiprows=1)
df_preprocess = df_preprocess.set_index(df_preprocess[df_preprocess.columns[0]])
display(df_preprocess.head())
display(df_preprocess.loc['bm23','CLEANED LP OBJ'])

Unnamed: 0_level_0,INSTANCE,STRATEGY,ORIG LP OBJ,CLEANED LP OBJ,ORIG FIRST GUR NODES,CLEANED FIRST GUR NODES,ORIG BEST GUR NODES,CLEANED BEST GUR NODES,ORIG FIRST GUR TIME,CLEANED FIRST GUR TIME,...,vpc_version,cbc_version,clp_version,gurobi_version,cplex_version,ExitReason,end_time_string,time elapsed,instname,Unnamed: 137
INSTANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
22433,22433,536,21240.52617,21240.52617,18,34,18,34,0.344,0.281,...,#78d6a45,#0152c5f,#8bd9396,9.11,20.1.0,SUCCESS,Mon Jun 14 00:07:29 2021,0,22433,DONE
23588,23588,536,7649.866134,7649.866134,2951,940,2951,940,3.545,1.022,...,#78d6a45,#0152c5f,#8bd9396,9.11,20.1.0,SUCCESS,Mon Jun 14 00:07:33 2021,4,23588,DONE
10teams,10teams,536,917.0,917.0,130,794,130,794,2.621,12.546,...,#78d6a45,#0152c5f,#8bd9396,9.11,20.1.0,SUCCESS,Mon Jun 14 00:07:44 2021,15,10teams,DONE
2club200v15p5scn,2club200v15p5scn,536,-121.222222,-120.076923,94301,104414,94301,104414,7200.001,7200.002,...,#78d6a45,#0152c5f,#8bd9396,9.11,20.1.0,SUCCESS,Mon Jun 14 04:07:30 2021,14401,2club200v15p5scn,DONE
30_70_45_05_100,30_70_45_05_100,536,8.1,8.1,1,1,1,1,7.255,5.671,...,#78d6a45,#0152c5f,#8bd9396,9.11,20.1.0,SUCCESS,Mon Jun 14 00:07:54 2021,25,30_70_45_05_100,DONE


20.57092176

### `df_bb`: Results from generating VPCs for various number of disjunctive terms

In [7]:
## Results from generating VPCs for various number of disjunctive terms
df_bb = initialize_df(RESULTS_DIR + "vpc-bb.csv")
display(df_bb.head())

Unnamed: 0_level_0,Unnamed: 1_level_0,cutlimit,gomory,mode,partial_bb_strategy,partial_bb_num_strong,preprocess,prlp_flip_beta,rounds,strengthen,temp,...,vpc_version,cbc_version,clp_version,gurobi_version,cplex_version,ExitReason,end_time_string,time elapsed,instname,Unnamed: 273
INSTANCE,disj_terms,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
10teams_presolved,2,-1,-1,0,4,5,0,0,1,1,0,...,#fc3db01,#0152c5f,#8bd9396,9.11,20.1.0,SUCCESS,Sat Jun 26 14:14:37 2021,27,10teams_presolved,DONE
10teams_presolved,4,-1,-1,0,4,5,0,0,1,1,0,...,#fc3db01,#0152c5f,#8bd9396,9.11,20.1.0,FAIL_LIMIT,Sat Jun 26 21:40:25 2021,52,10teams_presolved,DONE
10teams_presolved,8,-1,-1,0,4,5,0,0,1,1,0,...,#fc3db01,#0152c5f,#8bd9396,9.11,20.1.0,SUCCESS,Sun Jun 27 06:03:06 2021,736,10teams_presolved,DONE
10teams_presolved,16,-1,-1,0,4,5,0,0,1,1,0,...,#fc3db01,#0152c5f,#8bd9396,9.11,20.1.0,FAIL_LIMIT,Sun Jun 27 13:14:10 2021,264,10teams_presolved,DONE
10teams_presolved,32,-1,-1,0,4,5,0,0,1,1,0,...,#fc3db01,#0152c5f,#8bd9396,9.11,20.1.0,FAIL_LIMIT,Sun Jun 27 21:37:55 2021,707,10teams_presolved,DONE


### `df`: Append to `df_bb` results from running baseline solver 7 times

In [8]:
## Append results from running baseline solver 7 times
#df = df_bb.append(initialize_df(RESULTS_DIR + "vpc-bb0.csv")) # deprecated
df = pd.concat([df_bb, initialize_df(RESULTS_DIR + "vpc-bb0.csv")])
df.sort_values(by = ['INSTANCE','disj_terms'], inplace=True)

col_list = ["BEST DISJ OBJ", "WORST DISJ OBJ"]
for col in col_list:
    df[col] = pd.to_numeric(df[col])

df['NUM DISJ TERMS'] = df.index.get_level_values(1)
# col_list = ['NUM DISJ TERMS']
# for col in col_list:
#     df[col] = pd.to_numeric(df[col])

# start = 220
# end = start + 15
# print(df.columns[start:end])
# print(df.dtypes[start:end])

display(df.head())

Unnamed: 0_level_0,Unnamed: 1_level_0,cutlimit,gomory,mode,partial_bb_strategy,partial_bb_num_strong,preprocess,prlp_flip_beta,rounds,strengthen,temp,...,vpc_version,cbc_version,clp_version,gurobi_version,cplex_version,ExitReason,end_time_string,time elapsed,instname,Unnamed: 273
INSTANCE,disj_terms,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
10teams_presolved,0,-1,0,0,4,5,0,0,1,1,0,...,#fc3db01,#0152c5f,#8bd9396,9.11,20.1.0,NO_DISJUNCTION,Mon Jun 28 17:00:24 2021,56,10teams_presolved,DONE
10teams_presolved,2,-1,-1,0,4,5,0,0,1,1,0,...,#fc3db01,#0152c5f,#8bd9396,9.11,20.1.0,SUCCESS,Sat Jun 26 14:14:37 2021,27,10teams_presolved,DONE
10teams_presolved,4,-1,-1,0,4,5,0,0,1,1,0,...,#fc3db01,#0152c5f,#8bd9396,9.11,20.1.0,FAIL_LIMIT,Sat Jun 26 21:40:25 2021,52,10teams_presolved,DONE
10teams_presolved,8,-1,-1,0,4,5,0,0,1,1,0,...,#fc3db01,#0152c5f,#8bd9396,9.11,20.1.0,SUCCESS,Sun Jun 27 06:03:06 2021,736,10teams_presolved,DONE
10teams_presolved,16,-1,-1,0,4,5,0,0,1,1,0,...,#fc3db01,#0152c5f,#8bd9396,9.11,20.1.0,FAIL_LIMIT,Sun Jun 27 13:14:10 2021,264,10teams_presolved,DONE


### Remove `stein*` instances (keep modified `stein*_nocard` instances)

In [9]:
# Remove unmodified stein instances from consideration
df.drop(index = ['stein09_presolved', 'stein15_presolved', 'stein27_presolved', 'stein45_presolved'], inplace=True)
df.index = df.index.remove_unused_levels()

### Fix mistake in code for one root pass containing wrong bound

In [492]:
inst_set = df["FIRST REF+V ROOT_PASSES"] == 1
tmp_df = df[inst_set]
tmp_df = tmp_df[["LP OBJ", "FIRST REF+V BOUND", "FIRST REF+V FIRST_CUT_PASS", "FIRST REF+V LAST_CUT_PASS"]]
tmp_df.tail(30)

tmp_tmp_df = tmp_df["LP OBJ"] - tmp_df["FIRST REF+V FIRST_CUT_PASS"]
assert(tmp_tmp_df.max() < EPS)

refcol = "FIRST REF+V BOUND"
col = "FIRST REF+V FIRST_CUT_PASS"
df.loc[inst_set,col] = tmp_df[refcol].values
col = "FIRST REF+V LAST_CUT_PASS"
df.loc[inst_set,col] = tmp_df[refcol].values

df.loc["misc02_presolved",[refcol, "FIRST REF+V FIRST_CUT_PASS","FIRST REF+V LAST_CUT_PASS"]]


Unnamed: 0_level_0,FIRST REF+V BOUND,FIRST REF+V FIRST_CUT_PASS,FIRST REF+V LAST_CUT_PASS
disj_terms,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.0,0.0,0.0
2,1690.0,1045.0,1231.017442
4,1690.0,1056.111111,1271.879433
8,1690.0,1690.0,1690.0
16,1690.0,1690.0,1690.0
32,1690.0,1690.0,1690.0
64,1690.0,1690.0,1690.0


### `instances`: get unique instance list

In [493]:
# Get unique instance list
instances = df.index.levels[0]
instances.set_names(names = 'Instance', inplace=True)

### `df_rejection_reason`: Track why instances were not selected for our statistics

In [494]:
rejection_reasons = [
    'SELECTED_GAP', # *not* rejected for gap experiments; _must_ be first column
    'SELECTED_TIME', # *not* rejected for time experiments; _must_ be second column
    'SELECTED_6TREES', # *not* rejected for 6trees set for time experiments; _must_ be third column
    'NUM_WITH_OBJS', # number of attempts that successfully tried solving the PRLP
    'NUM_WITH_CUTS', # number of attempts that successfully yielded cuts
    'IP_OPT_UNKNOWN', # ip opt val must be known
    'TOO_MANY_ROWS_OR_COLS', # require max(nrows, ncols) ≤ 5K
    'OPTIMAL_SOLUTION_FOUND', # optimal solution should not be found by any of the partial trees
    'LP_OPT_IS_NOT_CUT', # check if lp opt < ip opt
    'DLB=DUB', # check if disj lb < disj ub
    'LP=DLB=DUB', # require either lp opt < disj lb or disj lb < disj ub
    'PRLP_INFEASIBLE', # require PRLP is feasible and solves within timelimit for at least one of the attempts
    'PRLP_TIME_LIMIT', # require PRLP solves within timelimit for at least one of the attempts
    'NO_CUTS', # there must be cuts from at least one of the partial b&b trees
    'NO_GAP', # require that ip opt != lp opt
    'GUR_TIMEOUT', # require Gur7 < 3600 (Gurobi is able to solve the instance to optimality within an hour either with or without using VPCs)'
    '<7_ATTEMPTS', # indicates not all partial trees were successfully run
]
df_rejection_reason = pd.DataFrame(index = instances, columns = rejection_reasons, dtype=bool)
df_rejection_reason.iloc[:,3:] = False # no rejection criteria at true

for col in ['OPTIMAL_SOLUTION_FOUND']:
    df_rejection_reason[col] = df_rejection_reason[col].astype(np.int64)
for col in ['NUM_WITH_OBJS', 'NUM_WITH_CUTS', 'LP_OPT_IS_NOT_CUT', 'DLB=DUB', 'LP=DLB=DUB', 'PRLP_INFEASIBLE', 'PRLP_TIME_LIMIT']:
    df_rejection_reason[col] = df_rejection_reason[col].astype(np.int8)
display(df_rejection_reason.head())

Unnamed: 0_level_0,SELECTED_GAP,SELECTED_TIME,SELECTED_6TREES,NUM_WITH_OBJS,NUM_WITH_CUTS,IP_OPT_UNKNOWN,TOO_MANY_ROWS_OR_COLS,OPTIMAL_SOLUTION_FOUND,LP_OPT_IS_NOT_CUT,DLB=DUB,LP=DLB=DUB,PRLP_INFEASIBLE,PRLP_TIME_LIMIT,NO_CUTS,NO_GAP,GUR_TIMEOUT,<7_ATTEMPTS
Instance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
10teams_presolved,True,True,True,0,0,False,False,0,0,0,0,0,0,False,False,False,False
22433_presolved,True,True,True,0,0,False,False,0,0,0,0,0,0,False,False,False,False
23588_presolved,True,True,True,0,0,False,False,0,0,0,0,0,0,False,False,False,False
30n20b8_presolved,True,True,True,0,0,False,False,0,0,0,0,0,0,False,False,False,False
50v-10_presolved,True,True,True,0,0,False,False,0,0,0,0,0,0,False,False,False,False


### `map_rejection_reason_to_number`: Reference paper's rejection criteria

In [495]:
# map_rejection_reason_to_number = {
#     'OPTIMAL_SOLUTION_FOUND':   '(3)',
#     'LP=DLB=DUB':               '(4a)',
#     'PRLP_INFEASIBLE':          '(4b)',
#     'PRLP_TIME_LIMIT':          '(4c)',
#     '<7_ATTEMPTS':              '(?)',
# }
map_rejection_reason_to_number = {
    'IP_OPT_UNKNOWN':           '\\ref{selection-criterion:ip-opt-known}',
    'NO_GAP':                   '\\ref{selection-criterion:ip-opt-known}',
    'TOO_MANY_ROWS_OR_COLS':    '\\ref{selection-criterion:max-instance-size}',
    'OPTIMAL_SOLUTION_FOUND':   '\\ref{selection-criterion:partial-tree-does-not-find-opt}',
    'LP=DLB=DUB':               '\\ref{selection-criterion:cuts-are-generated:not_lp=dlb=dub}',
    'PRLP_INFEASIBLE':          '\\ref{selection-criterion:cuts-are-generated:PRLP-primal-feasible}',
    'PRLP_TIME_LIMIT':          '\\ref{selection-criterion:cuts-are-generated:PRLP-time-limit}',
    'NO_CUTS':                  '\\ref{selection-criterion:cuts-are-generated:cuts-are-generated}',
    'GUR_TIMEOUT':              'G',
    '<7_ATTEMPTS':              '?',
}

### `df_status_by_depth`: Track success or failure reason by depth

In [496]:
sizes = [2, 4, 8, 16, 32, 64]
df_status_by_depth = pd.DataFrame(index = instances, columns = sizes, dtype=str)

DEFAULT_STATUS = map_rejection_reason_to_number['<7_ATTEMPTS']

df_status_by_depth[:] = DEFAULT_STATUS

display(df_status_by_depth.head())

Unnamed: 0_level_0,2,4,8,16,32,64
Instance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
10teams_presolved,?,?,?,?,?,?
22433_presolved,?,?,?,?,?,?
23588_presolved,?,?,?,?,?,?
30n20b8_presolved,?,?,?,?,?,?
50v-10_presolved,?,?,?,?,?,?


### DEBUG

In [497]:
# col = "REF+V FIRST_CUT_PASS"
# tmp = df[col]
# display(tmp)

# for col in df.columns:
#     if str(col).endswith("FIRST_CUT_PASS"):
#         print("{}".format(col))

# inst = 'neos22_presolved'
# col = 'NUM DISJ TERMS'
# df.loc[inst][col]

# display(df.loc[('bppc4-08_presolved',2)]['LP OBJ'])
# display(df.loc[('bppc4-08_presolved',2)]['BEST DISJ OBJ'])
# display(df.loc[('bppc4-08_presolved',2)]['WORST DISJ OBJ'])
# display(df['BEST DISJ OBJ'])

# Section 1: Select instances

### `selected_gap_instances_dict` (original index, instance): Select instances for gap closed calculations

Criteria to filter gap closed instances:
* ip opt val is known
* lp opt < ip opt
* max(nrows, ncols) ≤ 5K
* optimal solution should not be found by any of the partial trees
* either lp opt < disj lb or disj lb < disj ub
* PRLP is feasible and solves within timelimit for at least one of the attempts

In [498]:
## Select instances for gap closed calculations
#
# Criteria to filter gap closed instances:
# * ip opt val is known
# * lp opt < ip opt
# * max(nrows, ncols) ≤ 5K
# * optimal solution should not be found by any of the partial trees
# * either lp opt < disj lb or disj lb < disj ub
# * PRLP is feasible and solves within timelimit for at least one of the attempts

# Constants
MAX_ROWS = 5000
MAX_COLS = MAX_ROWS
PRINT_SKIP_REASON = False

# Information to save
selected_gap_instances_dict = {} # dictionary of (original index, instance)
#selected_indices = []
num_gap_errors = 0

inst_set = instances
num_attempts = np.zeros(len(inst_set), dtype=int)

for i, inst in enumerate(inst_set):
    print("{}/{}".format(i+1,len(inst_set)), end='\r', flush=True)
    skip_instance = False
    curr_df = df.loc[inst]
    
    # Count number of times instance appears
    num_attempts[i] = len(curr_df)

    if num_attempts[i] < 7:
        print("*** ERROR: Instance {:d} -- {}: {:d} < 7 attempts.".format(i, inst, num_attempts[i]))
        skip_instance = True
        num_gap_errors += 1
        df_rejection_reason.loc[inst, '<7_ATTEMPTS'] = True

    # Check that LP opt < IP opt
    lp_obj = np.float64(df_preprocess.loc[remove_presolved_from_name(inst),'CLEANED LP OBJ'])
    ip_obj = np.float64(df_ipopt.loc[inst,'IP OBJ'])
    YES_GAP = abs(ip_obj - lp_obj) >= 1e-7
    if not YES_GAP:
        print("*** ERROR: Instance {:d} -- {}: not YES GAP (lp = {:.10f}; ip = {:.10f})".format(i, inst, lp_obj, ip_obj))
        skip_instance = True
        num_gap_errors += 1
        df_rejection_reason.loc[inst, 'NO_GAP'] = True
        
    # Check that ExitReason != OPTIMAL_SOLUTION_FOUND
    OPT_SOL_FOUND = False
    for curr_index, row in curr_df.iterrows():
        #print(i,j, curr_df['ExitReason'])
        curr_depth = int(curr_index)
        if curr_depth == 0:
            continue
        exitreason = row['ExitReason']
        if exitreason == 'OPTIMAL_SOLUTION_FOUND' and not OPT_SOL_FOUND:
            if PRINT_SKIP_REASON:
                print("Skipping instance {:d} -- {}: optimal IP solution found at depth {:d}.".format(
                    i, inst, curr_depth
                ))
            skip_instance = True
            OPT_SOL_FOUND = True
            df_rejection_reason.loc[inst, 'OPTIMAL_SOLUTION_FOUND'] = curr_depth
        if OPT_SOL_FOUND:
            df_status_by_depth.loc[inst, curr_depth] = map_rejection_reason_to_number['OPTIMAL_SOLUTION_FOUND']
        else:
            df_status_by_depth.loc[inst, curr_depth] = ''

    # Check that best and worst bound on leaf nodes is not same (likely cause of primal infeasible PRLP)
    num_successful_attempts = 0
    has_zero = False
    terms = curr_df.index
    for curr_index in terms:
        if curr_df['NUM DISJ TERMS'][curr_index] == 0:
            has_zero = True
            continue
            
        lp_obj = curr_df['LP OBJ'][curr_index]
        ip_obj = curr_df['IP OBJ'][curr_index]
        best_disj_obj = curr_df['BEST DISJ OBJ'][curr_index]
        worst_disj_obj = curr_df['WORST DISJ OBJ'][curr_index]
        num_frac = curr_df['NUM FRAC'][curr_index]
        num_obj_tried = curr_df['NUM OBJ'][curr_index]
        num_cuts = curr_df['NUM VPC'][curr_index] # can be > 0 even if num_obj_tried = 0, b/c of OPTIMAL_SOLUTION_FOUND exit reason
        exitreason = curr_df['ExitReason'][curr_index]

        YES_GAP = abs(ip_obj - lp_obj) >= 1e-7
        LP_OPT_IS_CUT = (num_frac > 0) and YES_GAP and abs(lp_obj - worst_disj_obj) >= 1e-7
        DLB_NE_DUB = (num_frac > 0) and abs(best_disj_obj - worst_disj_obj) >= 1e-7
        df_rejection_reason.loc[inst, 'NO_GAP'] += (not YES_GAP)
        df_rejection_reason.loc[inst, 'LP_OPT_IS_NOT_CUT'] += (not LP_OPT_IS_CUT)
        df_rejection_reason.loc[inst, 'DLB=DUB'] += (not DLB_NE_DUB)
        df_rejection_reason.loc[inst, 'PRLP_INFEASIBLE'] += (exitreason == 'PRLP_INFEASIBLE')
        df_rejection_reason.loc[inst, 'PRLP_TIME_LIMIT'] += (exitreason == 'PRLP_TIME_LIMIT')
        # if not DLB_NE_DUB and num_obj_tried > 0:
        #     raise ValueError(
        #         "*** ERROR: Instance {:d} -- {}: at depth {:d}, num obj tried = {:d} (num cuts = {:d}) but lp opj {:.10f}, best_disj_obj {:.10f} = worst_disj_obj {:.10f} with exit reason {}".format(
        #             i, inst, curr_index, num_obj_tried, num_cuts, lp_obj, best_disj_obj, worst_disj_obj, curr_df['ExitReason'][curr_index]
        #         )
        #     )
        if LP_OPT_IS_CUT or DLB_NE_DUB:
            if (num_obj_tried == 0) and (exitreason not in ['PRLP_TIME_LIMIT','PRLP_INFEASIBLE','OPTIMAL_SOLUTION_FOUND']):
                # We should be trying objectives at this point, unless the initial PRLP timed out or was infeasible or an optimal solution was found
                raise ValueError(
                    "*** ERROR: Instance {:d} -- {}: at depth {:d}, num obj tried = 0 but lp opj {:.10f} < best_disj_obj {:.10f} < worst_disj_obj {:.10f} with exit reason {}".format(
                        i, inst, curr_index, lp_obj, best_disj_obj, worst_disj_obj, curr_df['ExitReason'][curr_index]
                    )
                )
            if num_obj_tried > 0:
                df_rejection_reason.loc[inst, 'NUM_WITH_OBJS'] += 1
                if num_cuts > 0:
                    num_successful_attempts += 1
                    df_rejection_reason.loc[inst, 'NUM_WITH_CUTS'] += 1
                else:
                    df_status_by_depth.loc[inst, curr_index] = map_rejection_reason_to_number['NO_CUTS']
            elif exitreason == 'PRLP_INFEASIBLE':
                df_status_by_depth.loc[inst, curr_index] = map_rejection_reason_to_number[exitreason]
            elif exitreason == 'PRLP_TIME_LIMIT':
                df_status_by_depth.loc[inst, curr_index] = map_rejection_reason_to_number[exitreason]
        else:
            # check that num obj tried is 0
            if (num_obj_tried > 0):
                raise ValueError(
                    "*** ERROR: Instance {:d} -- {}: at depth {:d}, num obj tried = {:d} > 0 but best_disj_obj {:f} = worst_disj_obj {:f}".format(
                        i, inst, curr_index, num_obj_tried, best_disj_obj, worst_disj_obj
                    )
                )
            df_rejection_reason.loc[inst, 'LP=DLB=DUB'] += 1
            df_status_by_depth.loc[inst, int(curr_index)] = map_rejection_reason_to_number['LP=DLB=DUB']

    if not has_zero:
        raise ValueError(
            "*** ERROR: Instance {:d} -- {}: has no bb0 entry.".format(
                i, inst, curr_index
            )
        )        
    
    if num_successful_attempts == 0 and not skip_instance:
        if PRINT_SKIP_REASON:
            print("Skipping instance {:d} -- {}: best and worst bound on leaf nodes coincide for all trees, no objectives ever tried, or no objectives successfully produced cuts.".format(
                i, inst, num_attempts[i]))
        skip_instance = True
        exitreason = 'NO_CUTS'
        df_rejection_reason.loc[inst, exitreason] = True
    else:        
        # Ensure IP objective value is known
        ip_obj = curr_df['IP OBJ'][curr_df.index[0]]
        if not isinstance(ip_obj,float):
            if PRINT_SKIP_REASON:
                print(
                    "Skipping instance {:d} -- {}: IP objective value ({}) is not detected to be a float value.".format(
                    i, inst, ip_obj))
            skip_instance = True
            df_rejection_reason.loc[inst, 'IP_OPT_UNKNOWN'] = True
            
        # Ensure nrows and ncols is not too many
        nrows = curr_df['ROWS'][curr_df.index[0]]
        ncols = curr_df['COLS'][curr_df.index[0]]
        if (nrows > MAX_ROWS) or (ncols > MAX_COLS):
            if PRINT_SKIP_REASON:
                print("Skipping instance {:d} -- {}: nrows = {:d} > {:d} or ncols = {:d} > {:d}.".format(
                        i, inst, nrows, ncols, MAX_ROWS, MAX_COLS))
            skip_instance = True
            df_rejection_reason.loc[inst, 'TOO_MANY_ROWS_OR_COLS'] = True
    
    if not skip_instance:
        #selected_gap_instances_dict[len(selected_gap_instances_dict)] = inst
        selected_gap_instances_dict[inst] = i
    else:
        df_rejection_reason.loc[inst, 'SELECTED_GAP'] = False

num_selected_gap_instances = len(selected_gap_instances_dict)
print("Total number of errors: {}".format(num_gap_errors))
print("Total number of selected instances for gap closed reporting: {}/{:d}".format(num_selected_gap_instances,len(instances)))

*** ERROR: Instance 116 -- lrn_presolved: 6 < 7 attempts.
*** ERROR: Instance 207 -- neos-3048764-nadi_presolved: not YES GAP (lp = -3883988306.0000000000; ip = -3883988306.0000000000)
*** ERROR: Instance 211 -- neos-3214367-sovi_presolved: 6 < 7 attempts.
*** ERROR: Instance 228 -- neos-3734794-moppy_presolved: 1 < 7 attempts.
Total number of errors: 4
Total number of selected instances for gap closed reporting: 332/431


### `selected_time_instances_dict` and `all6_instances_dict` (original index, instance): Select instances for time tables

Criteria to filter instances for reporting time:
* ip opt val is known
* lp opt < ip opt
* max(nrows, ncols) ≤ 5K
* optimal solution should not be found by any of the partial trees
* either lp opt < disj lb or disj lb < disj ub
* PRLP is feasible and solves within timelimit for at least one of the attempts
* Gur7 < 3600 (Gurobi is able to solve the instance to optimality within an hour either with or without using VPCs)

6 trees set
* all six partial tree sizes produced VPCs

In [499]:
## Select instances for time tables
#
# Criteria to filter instances for reporting time:
# * ip opt val is known
# * lp opt < ip opt 
# * max(nrows, ncols) ≤ 5K
# * optimal solution should not be found by any of the partial trees
# * either lp opt < disj lb or disj lb < disj ub
# * PRLP is feasible and solves within timelimit for at least one of the attempts
# * Gur7 < 3600 (Gurobi is able to solve the instance to optimality within an hour either with or without using VPCs)
#
# 6 trees set
# * all six partial tree sizes produced VPCs

# Constants
MAX_TIME = 3600
PRINT_SKIP_REASON = False

# Information to save
selected_time_instances_dict = {}   # dictionary of (original index, instance)
all6_instances_dict = {}            # dictionary of (original index, instance)
skipped_instances_dict = {}         # dictionary of (original index, instance)
error_instances_dict = {}           # dictionary of (original index, instance)

num_timeouts = 0
num_time_errors = 0

inst_set = list(selected_gap_instances_dict.keys())
# inst_set = ['lotsize_presolved']
for i, inst in enumerate(inst_set):
    print("{}/{}".format(i+1,len(inst_set)), end='\r', flush=True)
    skip_instance = False
    curr_df = df.loc[inst]

    # Check Gur < 3600 (Gurobi is able to solve the instance to optimality within an hour without using VPCs)
    col = 'BEST REF TIME'
    mintime_gur = float(curr_df.loc[0,col].min())
    
    # Check Gur7 < 3600 (Gurobi is able to solve the instance to optimality within an hour either with or without using VPCs)
    col = 'BEST REF+V TIME'
    mintime_gur7 = float(curr_df.loc[2:64,col].min())

    mintime = min(mintime_gur, mintime_gur7)
    if mintime > MAX_TIME - EPS:
        if PRINT_SKIP_REASON:
            print("{:d}: Skipping instance {:d} -- {}: Gurobi's best time (with or without VPCs) is {:.7f} >= {:.7f}.".format(
                    len(skipped_instances_dict), i, inst, mintime, MAX_TIME-EPS
                ))
        skip_instance = True
        skipped_instances_dict[inst] = i
        num_timeouts += 1
        df_rejection_reason.loc[inst, 'GUR_TIMEOUT'] += 1
        # df_status_by_depth.loc[inst, int(curr_index)] = map_rejection_reason_to_number['GUR_TIMEOUT']

    # Check how many times VPCs were successfully generated
    num_successful_attempts = 0
    has_zero = False
    for curr_index, row in curr_df.iterrows():
        if row['NUM DISJ TERMS'] == 0:
            has_zero = True
            continue

        num_vpc = float(row['NUM VPC'])
        num_successful_attempts += (num_vpc > 0)

        if df_status_by_depth.loc[inst, int(curr_index)] == DEFAULT_STATUS:
            curr_time = float(curr_df.loc[curr_index,col])
            if curr_time > MAX_TIME - EPS:
                df_status_by_depth.loc[inst, int(curr_index)] = map_rejection_reason_to_number['GUR_TIMEOUT']

    if not has_zero:
        raise ValueError(
            "*** ERROR: Instance {:d} -- {}: has no bb0 entry.".format(
                i, inst, curr_index
            )
        )        
    
    # if num_successful_attempts == 0 and not skip_instance:
    #     if PRINT_SKIP_REASON:
    #         print("Skipping instance {:d} -- {}: no VPCs generated successfully for any number of terms.".format(i, inst, num_attempts[i]))
    #     skip_instance = True
    #     skipped_instances_dict[inst] = i

    if not skip_instance:
        if num_successful_attempts == 6:
            all6_instances_dict[inst] = i
        #selected_time_instances_dict[len(selected_time_instances_dict)] = inst
        selected_time_instances_dict[inst] = i

num_selected_time_instances = len(selected_time_instances_dict)
num_all6_instances = len(all6_instances_dict)
print("Total number of errors: {}".format(num_time_errors))
print("Total number of timeouts: {}".format(num_timeouts))
print("Total number of instances for time reporting: {}".format(num_selected_time_instances))
print("Total number of \"6 trees\" instances: {}".format(num_all6_instances))

Total number of errors: 0
Total number of timeouts: 34
Total number of instances for time reporting: 298
Total number of "6 trees" instances: 205


#### DEBUG (check which instances were selected but do not have all six runs)

In [500]:
## DEBUG (check which instances were selected but do not have all six runs)
not_all_6 = [key for key in selected_time_instances_dict.keys() if key not in all6_instances_dict.keys()]
not_all_6

['10teams_presolved',
 '30n20b8_presolved',
 'a1c1s1_presolved',
 'berlin_5_8_0_presolved',
 'bg512142_presolved',
 'bppc4-08_presolved',
 'cod105_presolved',
 'cvs08r139-94_presolved',
 'cvs16r106-72_presolved',
 'cvs16r128-89_presolved',
 'cvs16r70-62_presolved',
 'cvs16r89-60_presolved',
 'danoint_presolved',
 'eilB101_presolved',
 'eild76_presolved',
 'f2gap801600_presolved',
 'graph20-20-1rand_presolved',
 'graphdraw-domain_presolved',
 'hgms-det_presolved',
 'ic97_potential_presolved',
 'ic97_tension_presolved',
 'icir97_tension_presolved',
 'mine-90-10_presolved',
 'mkc1_presolved',
 'n2seq36f_presolved',
 'n4-3_presolved',
 'n6-3_presolved',
 'neos-1058477_presolved',
 'neos-1112782_presolved',
 'neos-1112787_presolved',
 'neos-1200887_presolved',
 'neos-1215259_presolved',
 'neos-1225589_presolved',
 'neos-1330346_presolved',
 'neos-1396125_presolved',
 'neos-1413153_presolved',
 'neos-1415183_presolved',
 'neos-1480121_presolved',
 'neos-1582420_presolved',
 'neos-1595230_pre

# Section 2: Gap closed tables

### `gap_df`: Calculate gap closed for GMICs, Gurobi, and VPCs

In [501]:
## Calculate gap closed for GMICs, Gurobi, and VPCs
def calc_gap_closed(gap_df, col):
    return np.where(
        gap_df[col] > EPS, # condition
        100. * (gap_df[col] - gap_df["LP OBJ"]) / (gap_df["IP OBJ"] - gap_df["LP OBJ"]), # if condition is true
        0.0 # if condition is false
    )


def calc_gap_closed2(gap_df, col):
    conditions = gap_df[col] > EPS & np.isfinite(gap_df[col])
    choices = 100. * (gap_df[col] - gap_df["LP OBJ"]) / (gap_df["IP OBJ"] - gap_df["LP OBJ"])
    return np.select(conditions, choices, default=0.0)


# Create subset of dataframe relevant to gap closed
gap_df = df.loc[:, 
                [
                    'NUM DISJ TERMS',
                    'ROWS',
                    'COLS',
                    'LP OBJ',
                    'BEST DISJ OBJ',
                    'WORST DISJ OBJ',
                    'IP OBJ',
                    'GMIC OBJ',
                    'VPC OBJ',
                    'VPC+GMIC OBJ',
                    'FIRST REF FIRST_CUT_PASS',
                    'FIRST REF+V FIRST_CUT_PASS',
                    'FIRST REF LAST_CUT_PASS',
                    'FIRST REF+V LAST_CUT_PASS',
                    'NUM GMIC',
                    'NUM VPC',
                    'NUM OBJ',
                    'ExitReason']
               ]

# Calculate some missing % gap closed columns
# gap closed = 100 * (post_cut_opt_val - lp_opt_val) / (ip_opt_val - lp_opt_val)
cut_type = "GMIC"
col = cut_type + " OBJ"
gap_df[cut_type + " % GAP CLOSED"] = calc_gap_closed(gap_df, col)

cut_type = "BEST DISJ"
col = cut_type + " OBJ"
gap_df[cut_type + " % GAP CLOSED"] = calc_gap_closed(gap_df, col)

cut_type = "VPC"
col = cut_type + " OBJ"
gap_df[cut_type + " % GAP CLOSED"] = calc_gap_closed(gap_df, col)

cut_type = "VPC+GMIC"
col = cut_type + " OBJ"
gap_df[cut_type + " % GAP CLOSED"] = calc_gap_closed(gap_df, col)

col = "REF FIRST_CUT_PASS"
gap_df[col + " % GAP CLOSED"] = calc_gap_closed(gap_df, "FIRST " + col)
col = "REF+V FIRST_CUT_PASS"
gap_df[col + " % GAP CLOSED"] = calc_gap_closed(gap_df, "FIRST " + col)
col = "REF LAST_CUT_PASS"
gap_df[col + " % GAP CLOSED"] = calc_gap_closed(gap_df, "FIRST " + col)
col = "REF+V LAST_CUT_PASS"
gap_df[col + " % GAP CLOSED"] = calc_gap_closed(gap_df, "FIRST " + col)

display(gap_df.loc['bm23_presolved'])
display(gap_df.loc[("bm23_presolved",2)])

Unnamed: 0_level_0,NUM DISJ TERMS,ROWS,COLS,LP OBJ,BEST DISJ OBJ,WORST DISJ OBJ,IP OBJ,GMIC OBJ,VPC OBJ,VPC+GMIC OBJ,...,NUM OBJ,ExitReason,GMIC % GAP CLOSED,BEST DISJ % GAP CLOSED,VPC % GAP CLOSED,VPC+GMIC % GAP CLOSED,REF FIRST_CUT_PASS % GAP CLOSED,REF+V FIRST_CUT_PASS % GAP CLOSED,REF LAST_CUT_PASS % GAP CLOSED,REF+V LAST_CUT_PASS % GAP CLOSED
disj_terms,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0,20,27,20.570922,-inf,-inf,34.0,,,,...,0,NO_DISJUNCTION,0.0,0.0,0.0,0.0,20.867452,0.0,37.49489,0.0
2,2,20,27,20.570922,21.483725,27.267238,34.0,22.828302,21.483725,22.960493,...,7,CUT_LIMIT,16.809643,6.797215,6.797215,17.794009,0.0,20.97728,0.0,34.778873
4,4,20,27,20.570922,22.53788,29.935573,34.0,22.828302,22.53788,23.250549,...,6,CUT_LIMIT,16.809643,14.647006,14.647006,19.953919,0.0,21.850177,0.0,39.502918
8,8,20,27,20.570922,22.936206,35.613171,34.0,22.828302,22.936206,23.250033,...,6,CUT_LIMIT,16.809643,17.61315,17.61315,19.950078,0.0,21.960083,0.0,43.457377
16,16,20,27,20.570922,25.881188,35.613171,34.0,22.828302,25.708479,25.708479,...,6,CUT_LIMIT,16.809643,39.543044,38.256964,38.256964,0.0,38.980155,0.0,47.717947
32,32,20,27,20.570922,28.16187,44.56683,34.0,22.828302,28.16187,28.16187,...,6,CUT_LIMIT,16.809643,56.526208,56.526208,56.526208,0.0,57.046658,0.0,61.994121
64,64,20,27,20.570922,30.168224,56.839578,34.0,22.828302,29.920824,29.920824,...,6,CUT_LIMIT,16.809643,71.466577,69.624303,69.624303,0.0,66.200094,0.0,71.325798


NUM DISJ TERMS                               2
ROWS                                        20
COLS                                        27
LP OBJ                               20.570922
BEST DISJ OBJ                        21.483725
WORST DISJ OBJ                       27.267238
IP OBJ                                    34.0
GMIC OBJ                             22.828302
VPC OBJ                              21.483725
VPC+GMIC OBJ                         22.960493
FIRST REF FIRST_CUT_PASS                   0.0
FIRST REF+V FIRST_CUT_PASS           23.387977
FIRST REF LAST_CUT_PASS                    0.0
FIRST REF+V LAST_CUT_PASS            25.241404
NUM GMIC                                     6
NUM VPC                                      6
NUM OBJ                                      7
ExitReason                           CUT_LIMIT
GMIC % GAP CLOSED                    16.809643
BEST DISJ % GAP CLOSED                6.797215
VPC % GAP CLOSED                      6.797215
VPC+GMIC % GA

### `selected_gap_df`: Gap closed for selected instances, adding 0-row that has best for `V+` cols

In [502]:
## `selected_gap_df`: Gap closed for selected instances, adding 0-row that has best for `V+` cols
## Show the instances that have been selected (and their original index)
## and then set the selected_gap_df as the selected instances from gap_df
## We also set the '0' row to contain the best result for each method
## (including the option of not using VPCs at all)
## and we replace any runs with no VPCs with the values obtained without them
selected_gap_df = gap_df.loc[selected_gap_instances_dict.keys()]

# From https://pandas.pydata.org/docs/user_guide/advanced.html#defined-levels
# "The MultiIndex keeps all the defined levels of an index, even if they are not actually used.
# When slicing an index, you may notice this."
# Even without using remove_unused_levels, index was correct with selected_gap_df.index.get_level_values(0).unique()
selected_gap_df.index = selected_gap_df.index.remove_unused_levels()

#display(selected_gap_df.index.difference(gap_df.index))
#selected_gap_df.drop(['22433_presolved'])

# # Check what the selected_gap_df contains for bm23
# inst = "bm23_presolved"
# display(selected_gap_df.loc[inst])

#inst = "10teams_presolved"
# inst = '22433_presolved'
# curr_df = selected_gap_df.loc[inst]
# display(curr_df)
# # for i in curr_df.index:
# #     display(curr_df.loc[i])

#display(selected_gap_df.index.get_level_values(0).unique())

col_gmic        = 'GMIC % GAP CLOSED'
col_best_disj   = 'BEST DISJ % GAP CLOSED'
col_vpc         = 'VPC % GAP CLOSED'
col_vpc_gmic    = 'VPC+GMIC % GAP CLOSED'
col_first_ref   = 'REF FIRST_CUT_PASS % GAP CLOSED'
col_first_ref_v = 'REF+V FIRST_CUT_PASS % GAP CLOSED'
col_last_ref    = 'REF LAST_CUT_PASS % GAP CLOSED'
col_last_ref_v  = 'REF+V LAST_CUT_PASS % GAP CLOSED'
col_num_vpcs    = 'NUM VPC'
gap_cols = [
    col_gmic,
    col_best_disj,
    col_vpc,
    col_vpc_gmic,
    col_first_ref,
    col_first_ref_v,
    col_last_ref,
    col_last_ref_v,
]

# Do we update the value of the "best" in each column when no VPCs are generated for a run and we use the "no-VPCs" data?
# This may cause the stats in the "best" row to improve
# For example, we replace V+GurF with GurF when no VPCs are generated, since that is what would occur without VPCs
# But if GurF is better than any V+GurF when VPCs are produced, then the average in the max-row is inflated
SHOULD_UPDATE_MAX_WHEN_NO_VPCS = True

# inst_set = selected_gap_df.index.get_level_values(0).unique()
inst_set = selected_gap_df.index.levels[0]
num_inst = len(inst_set)
for curr_inst_ind, inst in enumerate(inst_set):
    print("{}/{}".format(curr_inst_ind+1,num_inst), end='\r', flush=True)
    curr_df = selected_gap_df.loc[inst].copy() # copy needed to not throw SettingWithCopyWarning

    # Set 0-row to have max values across all rows for this instance
    max_vals = curr_df[gap_cols].max()
    selected_gap_df.loc[(inst,0),gap_cols] = max_vals

    for ind in curr_df.index:
        if ind == 0:
            continue

        # Propogate GurF and GurL down
        sel_gap = [col_first_ref, col_last_ref]
        selected_gap_df.loc[(inst,ind),sel_gap] = curr_df.loc[0,sel_gap]

        # If no VPCs produced, the values for V+GurF and V+GurL have not been provided
        # We replace these by GurF and GurL
        # Currently disabled: update max for that column too (if disabled, we instead keep max as the value among those that generated VPCs)
        num_vpc = curr_df.loc[ind,col_num_vpcs]
        if num_vpc == 0:
            # print("Zero cuts for inst {} at depth {:d}".format(inst, ind))
            ref_gap = [col_first_ref, col_last_ref] # this is where we pull info from
            refinds = [gap_cols.index(colname) for colname in ref_gap] 
            sel_gap = [col_first_ref_v, col_last_ref_v] # this is where we put the info
            selected_gap_df.loc[(inst,ind),sel_gap] = curr_df.loc[0,ref_gap].to_numpy()

            if SHOULD_UPDATE_MAX_WHEN_NO_VPCS:
                for i in refinds:
                    if curr_df.loc[0,gap_cols[i]] > selected_gap_df.loc[(inst,0),gap_cols[i+1]]:
                        # if curr_df.loc[0,gap_cols[i]] > 0:
                            # print("DEBUG: Updating {} for inst {} from {:f} to {:f}".format(
                            #     gap_cols[i+1], 
                            #     inst, 
                            #     selected_gap_df.loc[(inst,0),gap_cols[i+1]], 
                            #     curr_df.loc[0,gap_cols[i]]))
                        selected_gap_df.loc[(inst,0),gap_cols[i+1]] = curr_df.loc[0,gap_cols[i]]

display(selected_gap_df.head(21).loc[:,[col_num_vpcs]+gap_cols])

332/332

Unnamed: 0_level_0,Unnamed: 1_level_0,NUM VPC,GMIC % GAP CLOSED,BEST DISJ % GAP CLOSED,VPC % GAP CLOSED,VPC+GMIC % GAP CLOSED,REF FIRST_CUT_PASS % GAP CLOSED,REF+V FIRST_CUT_PASS % GAP CLOSED,REF LAST_CUT_PASS % GAP CLOSED,REF+V LAST_CUT_PASS % GAP CLOSED
INSTANCE,disj_terms,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
10teams_presolved,0,0,100.0,0.0,0.0,100.0,100.0,100.0,100.0,100.0
10teams_presolved,2,74,100.0,0.0,0.0,100.0,100.0,100.0,100.0,100.0
10teams_presolved,4,4,100.0,0.0,0.0,100.0,100.0,100.0,100.0,100.0
10teams_presolved,8,74,100.0,0.0,0.0,100.0,100.0,100.0,100.0,100.0
10teams_presolved,16,1,100.0,0.0,0.0,100.0,100.0,100.0,100.0,100.0
10teams_presolved,32,1,100.0,0.0,0.0,100.0,100.0,100.0,100.0,100.0
10teams_presolved,64,0,100.0,0.0,0.0,100.0,100.0,100.0,100.0,100.0
23588_presolved,0,0,5.77283,72.182376,71.597382,71.59924,14.222797,70.957296,26.386033,71.826753
23588_presolved,2,34,5.77283,21.88689,18.918235,20.044423,14.222797,23.442264,26.386033,30.475698
23588_presolved,4,75,5.77283,34.091089,27.647967,27.647967,14.222797,27.457404,26.386033,32.066123


#### DEBUG: Why REF+V is less than REF

In [503]:
### DEBUG
# Why REF+V < REF

inst = 'f2gap801600_presolved'

tmp_df = gap_df.loc[inst,['NUM VPC']+['FIRST REF FIRST_CUT_PASS']+['FIRST REF+V FIRST_CUT_PASS']+gap_cols+['LP OBJ','IP OBJ']]

# display(tmp_df)
# display(gap_df.loc[inst,['NUM VPC']+['FIRST REF FIRST_CUT_PASS']+['FIRST REF+V FIRST_CUT_PASS']+gap_cols])

# display(selected_gap_df.loc[inst,['NUM VPC']+['FIRST REF FIRST_CUT_PASS']+['FIRST REF+V FIRST_CUT_PASS']+gap_cols])

col = "REF+V FIRST_CUT_PASS"
tmp_df[col + " % GAP CLOSED"] = calc_gap_closed(tmp_df, "FIRST " + col)
display(tmp_df)

Unnamed: 0_level_0,NUM VPC,FIRST REF FIRST_CUT_PASS,FIRST REF+V FIRST_CUT_PASS,GMIC % GAP CLOSED,BEST DISJ % GAP CLOSED,VPC % GAP CLOSED,VPC+GMIC % GAP CLOSED,REF FIRST_CUT_PASS % GAP CLOSED,REF+V FIRST_CUT_PASS % GAP CLOSED,REF LAST_CUT_PASS % GAP CLOSED,REF+V LAST_CUT_PASS % GAP CLOSED,LP OBJ,IP OBJ
disj_terms,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,0,86624.336832,0.0,0.0,0.0,0.0,0.0,49.794634,0.0,49.794634,0.0,86570.120867,86679.0
2,66,0.0,86679.0,78.591875,1.056924,0.843201,78.591875,0.0,100.0,0.0,100.0,86570.12087,86679.0
4,33,0.0,86679.0,78.591875,2.660684,0.98016,78.591875,0.0,100.0,0.0,100.0,86570.12087,86679.0
8,5,0.0,86679.0,78.591875,3.653143,1.107604,78.607287,0.0,100.0,0.0,100.0,86570.12087,86679.0
16,0,0.0,0.0,78.591875,4.498594,0.0,78.591875,0.0,0.0,0.0,0.0,86570.12087,86679.0
32,66,0.0,86679.0,78.591875,5.173443,1.056924,78.591875,0.0,100.0,0.0,100.0,86570.12087,86679.0
64,0,0.0,0.0,78.591875,6.077675,0.0,78.591875,0.0,0.0,0.0,0.0,86570.12087,86679.0


In [504]:
inst = 'f2gap801600_presolved'

display(gap_df.loc[inst,['NUM VPC']+['FIRST REF FIRST_CUT_PASS']+['FIRST REF+V FIRST_CUT_PASS']+gap_cols])

display(selected_gap_df.loc[inst,['NUM VPC']+['FIRST REF FIRST_CUT_PASS']+['FIRST REF+V FIRST_CUT_PASS']+gap_cols])

Unnamed: 0_level_0,NUM VPC,FIRST REF FIRST_CUT_PASS,FIRST REF+V FIRST_CUT_PASS,GMIC % GAP CLOSED,BEST DISJ % GAP CLOSED,VPC % GAP CLOSED,VPC+GMIC % GAP CLOSED,REF FIRST_CUT_PASS % GAP CLOSED,REF+V FIRST_CUT_PASS % GAP CLOSED,REF LAST_CUT_PASS % GAP CLOSED,REF+V LAST_CUT_PASS % GAP CLOSED
disj_terms,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,0,86624.336832,0.0,0.0,0.0,0.0,0.0,49.794634,0.0,49.794634,0.0
2,66,0.0,86679.0,78.591875,1.056924,0.843201,78.591875,0.0,100.0,0.0,100.0
4,33,0.0,86679.0,78.591875,2.660684,0.98016,78.591875,0.0,100.0,0.0,100.0
8,5,0.0,86679.0,78.591875,3.653143,1.107604,78.607287,0.0,100.0,0.0,100.0
16,0,0.0,0.0,78.591875,4.498594,0.0,78.591875,0.0,0.0,0.0,0.0
32,66,0.0,86679.0,78.591875,5.173443,1.056924,78.591875,0.0,100.0,0.0,100.0
64,0,0.0,0.0,78.591875,6.077675,0.0,78.591875,0.0,0.0,0.0,0.0


Unnamed: 0_level_0,NUM VPC,FIRST REF FIRST_CUT_PASS,FIRST REF+V FIRST_CUT_PASS,GMIC % GAP CLOSED,BEST DISJ % GAP CLOSED,VPC % GAP CLOSED,VPC+GMIC % GAP CLOSED,REF FIRST_CUT_PASS % GAP CLOSED,REF+V FIRST_CUT_PASS % GAP CLOSED,REF LAST_CUT_PASS % GAP CLOSED,REF+V LAST_CUT_PASS % GAP CLOSED
disj_terms,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,0,86624.336832,0.0,78.591875,6.077675,1.107604,78.607287,49.794634,100.0,49.794634,100.0
2,66,0.0,86679.0,78.591875,1.056924,0.843201,78.591875,49.794634,100.0,49.794634,100.0
4,33,0.0,86679.0,78.591875,2.660684,0.98016,78.591875,49.794634,100.0,49.794634,100.0
8,5,0.0,86679.0,78.591875,3.653143,1.107604,78.607287,49.794634,100.0,49.794634,100.0
16,0,0.0,0.0,78.591875,4.498594,0.0,78.591875,49.794634,49.794634,49.794634,49.794634
32,66,0.0,86679.0,78.591875,5.173443,1.056924,78.591875,49.794634,100.0,49.794634,100.0
64,0,0.0,0.0,78.591875,6.077675,0.0,78.591875,49.794634,49.794634,49.794634,49.794634


### `best_gap_df`: For each instance, what the best gap closed is (and how that was obtained)

In [505]:
## Create best df = for each instance, what the best gap closed is (and how that was obtained)
map_short_to_cols = {
    'G'      : col_gmic,
    'DB'     : col_best_disj,
    'V'      : col_vpc,
    'V+G'    : col_vpc_gmic,
    'GurF'   : col_first_ref,
    'V+GurF' : col_first_ref_v,
    'GurL'   : col_last_ref,
    'V+GurL' : col_last_ref_v,
}
map_cols_to_short = {v: k for k, v in map_short_to_cols.items()}

gap_cols_short = list(map_short_to_cols.keys())

# inst_set = selected_gap_instances_dict.keys()
inst_set = selected_gap_df.index.levels[0]
# inst_set = ['neos22_presolved']
# inst_set = ['usAbbrv-8-25_70_presolved']

best_gap_df = pd.DataFrame(
    columns = gap_cols_short+[
        'BEST VPC DISJ',
        'BEST GMIC+VPC DISJ',
        'BEST V+GurF DISJ',
        'BEST V+GurL DISJ',
        'NUM VPC',
        'NUM GMIC',
    ],
    index = inst_set,
    dtype = float,
)

num_inst = len(inst_set)
for i, inst in enumerate(inst_set):
    print("{}/{}".format(i+1,num_inst), end='\r', flush=True)
    # print("Processing instance {:d} with name {}.".format(i, inst))
    best_vpc = -1.
    best_vpc_disj = -1
    best_vpcgmic = -1.
    best_vpcgmic_disj = -1
    best_VGurF = -1.
    best_VGurF_disj = -1
    best_VGurL = -1.
    best_VGurL_disj = -1
    best_num_vpc = -1
    best_num_gmic = -1
    
    curr_df = selected_gap_df.loc[inst]
    
    # Get info for GurF and GurL from the no-VPC row
    row = curr_df.loc[0]
    GurF_gap = float(row['REF FIRST_CUT_PASS % GAP CLOSED'])
    GurL_gap = float(row['REF LAST_CUT_PASS % GAP CLOSED'])
    gmic_gap = float(row['GMIC % GAP CLOSED'])
    disj_gap = float(row['BEST DISJ % GAP CLOSED'])

    for index, row in curr_df.iterrows():
        num_disj_terms = int(row['NUM DISJ TERMS'])
        # num_obj_tried  = float(row['NUM OBJ'])
        num_vpc        = float(row['NUM VPC'])
        if num_disj_terms <= 0 or num_vpc == 0:
            continue
            
        # print("Index {:d}: Processing instance {} with {:d} disj terms.".format(index, inst, num_disj_terms))
        vpc_gap     = float(row['VPC % GAP CLOSED'])
        vpcgmic_gap = float(row['VPC+GMIC % GAP CLOSED'])
        VGurF_gap   = float(row['REF+V FIRST_CUT_PASS % GAP CLOSED'])
        VGurL_gap   = float(row['REF+V LAST_CUT_PASS % GAP CLOSED'])
        # num_vpc     = float(row['NUM VPC'])
        num_gmic    = float(row['NUM GMIC'])
        
        if (best_vpc < vpc_gap): #or (is_val(best_vpc, vpc_gap) and best_num_vpc == 0):
            best_vpc = vpc_gap
            best_vpc_disj = index
            best_num_vpc = num_vpc
            best_num_gmic = num_gmic
        if best_vpcgmic < vpcgmic_gap:
            best_vpcgmic = vpcgmic_gap
            best_vpcgmic_disj = index
        if best_VGurF < VGurF_gap:
            best_VGurF = VGurF_gap
            best_VGurF_disj = index
        if best_VGurL < VGurL_gap:
            best_VGurL = VGurL_gap
            best_VGurL_disj = index

    best_gap_df.iloc[i] = [
        gmic_gap if gmic_gap >= EPS else 0.,
        disj_gap if disj_gap >= EPS else 0.,
        best_vpc if best_vpc >= EPS else 0.,
        best_vpcgmic if best_vpcgmic >= EPS else 0.,
        GurF_gap if GurF_gap >= EPS else 0.,
        best_VGurF if best_VGurF >= EPS else 0.,
        GurL_gap if GurL_gap >= EPS else 0.,
        best_VGurL if best_VGurL >= EPS else 0.,
        best_vpc_disj,
        best_vpcgmic_disj,
        best_VGurF_disj,
        best_VGurL_disj,
        best_num_vpc,
        best_num_gmic,
    ]

col_list = ['BEST VPC DISJ', 'BEST GMIC+VPC DISJ', 'BEST V+GurF DISJ', 'BEST V+GurL DISJ', 'NUM VPC', 'NUM GMIC']
for col in col_list:
    best_gap_df[col] = best_gap_df[col].astype(np.int64)

display(best_gap_df)

332/332

Unnamed: 0_level_0,G,DB,V,V+G,GurF,V+GurF,GurL,V+GurL,BEST VPC DISJ,BEST GMIC+VPC DISJ,BEST V+GurF DISJ,BEST V+GurL DISJ,NUM VPC,NUM GMIC
INSTANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
10teams_presolved,100.000000,0.000000,0.000000,100.000000,100.000000,100.000000,100.000000,100.000000,2,2,2,2,74,153
23588_presolved,5.772830,72.182376,71.597382,71.599240,14.222797,70.957296,26.386033,71.826753,64,64,64,64,75,74
30n20b8_presolved,11.513514,1.223891,0.017716,11.513514,1.234311,1.312801,17.285869,28.956262,4,2,2,4,190,187
50v-10_presolved,45.753596,18.008191,6.836095,45.823184,50.218750,50.861824,70.906623,74.653591,64,16,2,4,29,29
a1c1s1_presolved,25.100614,4.895611,1.820497,25.386388,45.998106,47.072835,88.344774,88.650009,64,8,2,2,4,154
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
umts_presolved,0.973181,0.209040,0.109528,0.973181,1.302534,1.368979,4.731895,5.672506,32,2,2,16,276,275
usAbbrv-8-25_70_presolved,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,8,8,8,8,4,722
vpm1_presolved,29.906542,7.242991,7.009346,29.906542,41.121495,50.934579,50.934579,50.934579,64,2,2,2,9,14
vpm2_presolved,17.849671,14.293216,8.125084,20.006451,42.919339,50.063401,75.669712,71.193532,32,32,2,2,25,25


#### DEBUG: In `best_gap_df`, can get V > V+G due to numerical issues

In [506]:
## DEBUG: You can get V > V+G due to numerical issues

col1 = best_gap_df['V']
col2 = best_gap_df['V+G']

display(best_gap_df[(col1 > col2 + EPS) == True])

df.loc['neos-1058477_presolved'] #.to_csv("neos-1058477_presolved_data.csv")

Unnamed: 0_level_0,G,DB,V,V+G,GurF,V+GurF,GurL,V+GurL,BEST VPC DISJ,BEST GMIC+VPC DISJ,BEST V+GurF DISJ,BEST V+GurL DISJ,NUM VPC,NUM GMIC
INSTANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
neos-1058477_presolved,0.759153,27.825376,20.843319,20.832364,25.419863,25.42122,99.212298,99.21295,4,4,2,4,28,28


Unnamed: 0_level_0,cutlimit,gomory,mode,partial_bb_strategy,partial_bb_num_strong,preprocess,prlp_flip_beta,rounds,strengthen,temp,...,vpc_version,cbc_version,clp_version,gurobi_version,cplex_version,ExitReason,end_time_string,time elapsed,instname,Unnamed: 273
disj_terms,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,-1,0,0,4,5,0,0,1,1,0,...,#fc3db01,#0152c5f,#8bd9396,9.11,20.1.0,NO_DISJUNCTION,Tue Jun 29 19:31:08 2021,1,neos-1058477_presolved,DONE
2,-1,-1,0,4,5,0,0,1,1,0,...,#fc3db01,#0152c5f,#8bd9396,9.11,20.1.0,CUT_LIMIT,Sat Jun 26 17:11:28 2021,1,neos-1058477_presolved,DONE
4,-1,-1,0,4,5,0,0,1,1,0,...,#fc3db01,#0152c5f,#8bd9396,9.11,20.1.0,CUT_LIMIT,Sun Jun 27 01:11:39 2021,3,neos-1058477_presolved,DONE
8,-1,-1,0,4,5,0,0,1,1,0,...,#fc3db01,#0152c5f,#8bd9396,9.11,20.1.0,FAIL_LIMIT,Sun Jun 27 08:41:23 2021,222,neos-1058477_presolved,DONE
16,-1,-1,0,4,5,0,0,1,1,0,...,#fc3db01,#0152c5f,#8bd9396,9.11,20.1.0,PRLP_TIME_LIMIT,Sun Jun 27 16:11:29 2021,64,neos-1058477_presolved,DONE
32,-1,-1,0,4,5,0,0,1,1,0,...,#fc3db01,#0152c5f,#8bd9396,9.11,20.1.0,PRLP_TIME_LIMIT,Mon Jun 28 00:37:33 2021,68,neos-1058477_presolved,DONE
64,-1,-1,0,4,5,0,0,1,1,0,...,#fc3db01,#0152c5f,#8bd9396,9.11,20.1.0,PRLP_TIME_LIMIT,Mon Jun 28 08:58:25 2021,85,neos-1058477_presolved,DONE


#### DEBUG: Find instances in which V+GurF max does not match up

In [507]:
## DEBUG: Find instances in which V+GurF max does not match up
# This causes the value in Table 2 'Best' row to not match Table 1 'All'

# For instance f2gap801600_presolved, the gap closed at the end of the root node is 0% whenever VPCs are used,
# but without VPCs, the gap closed is 50%
# In `best_gap_df`, for an instance in which no VPCs were generated,
# we use the value of GurF/GurL for V+GurF/V+GurL
# In `selected_gap_df`, the "zero" row contains

num_inst = len(best_gap_df.index)
col = 'V+GurF'
origcol = map_short_to_cols[col]
num_errors = 0
avg1 = 0
avg2 = 0
for inst in best_gap_df.index:
    val1 = best_gap_df.loc[inst,col]
    val2 = selected_gap_df.loc[(inst,0),origcol]
    if abs(val1-val2) > EPS:
        print("{} has best_gap_df = {:f} and selected_gap_df = {:f} for col {}".format(inst,val1,val2,col))
        num_errors += 1
    avg1 += val1 / num_inst
    avg2 += val2 / num_inst

print("Average from best_gap_df = {}".format(avg1))
print("Average from selected_gap_df = {}".format(avg2))
print("Total # of errors =", num_errors, flush=True)

neos-1112787_presolved has best_gap_df = 13.846154 and selected_gap_df = 14.212442 for col V+GurF
neos-1582420_presolved has best_gap_df = 13.159762 and selected_gap_df = 13.159762 for col V+GurF
Average from best_gap_df = 30.360311183322914
Average from selected_gap_df = 30.361414460749128
Total # of errors = 2


#### DEBUG: Print relevant info from `selected_gap_df` and `best_gap_df` to further debug

In [508]:
## DEBUG
inst = 'f2gap801600_presolved'
# inst = 'neos22_presolved'
display(best_gap_df.loc[inst])
display(selected_gap_df.loc[inst,[col_num_vpcs]+gap_cols])

G                      78.591875
DB                      6.077675
V                       1.107604
V+G                    78.607287
GurF                   49.794634
V+GurF                100.000000
GurL                   49.794634
V+GurL                100.000000
BEST VPC DISJ           8.000000
BEST GMIC+VPC DISJ      8.000000
BEST V+GurF DISJ        2.000000
BEST V+GurL DISJ        2.000000
NUM VPC                 5.000000
NUM GMIC               66.000000
Name: f2gap801600_presolved, dtype: float64

Unnamed: 0_level_0,NUM VPC,GMIC % GAP CLOSED,BEST DISJ % GAP CLOSED,VPC % GAP CLOSED,VPC+GMIC % GAP CLOSED,REF FIRST_CUT_PASS % GAP CLOSED,REF+V FIRST_CUT_PASS % GAP CLOSED,REF LAST_CUT_PASS % GAP CLOSED,REF+V LAST_CUT_PASS % GAP CLOSED
disj_terms,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,0,78.591875,6.077675,1.107604,78.607287,49.794634,100.0,49.794634,100.0
2,66,78.591875,1.056924,0.843201,78.591875,49.794634,100.0,49.794634,100.0
4,33,78.591875,2.660684,0.98016,78.591875,49.794634,100.0,49.794634,100.0
8,5,78.591875,3.653143,1.107604,78.607287,49.794634,100.0,49.794634,100.0
16,0,78.591875,4.498594,0.0,78.591875,49.794634,49.794634,49.794634,49.794634
32,66,78.591875,5.173443,1.056924,78.591875,49.794634,100.0,49.794634,100.0
64,0,78.591875,6.077675,0.0,78.591875,49.794634,49.794634,49.794634,49.794634


In [509]:
# ## DEBUG
# gap_cols = [
#     'GMIC % GAP CLOSED',
#     'BEST DISJ % GAP CLOSED',
#     'VPC % GAP CLOSED',
#     'VPC+GMIC % GAP CLOSED',
#     'REF FIRST_CUT_PASS % GAP CLOSED',
#     'REF+V FIRST_CUT_PASS % GAP CLOSED',
#     'REF LAST_CUT_PASS % GAP CLOSED',
#     'REF+V LAST_CUT_PASS % GAP CLOSED',
# ]
# col_num_vpcs = 'NUM VPC'

inst = 'f2gap801600_presolved'
tmp_selected_gap_df = gap_df.loc[selected_gap_instances_dict.keys()]
curr_df = tmp_selected_gap_df.loc[inst].copy() # copy needed to not throw SettingWithCopyWarning

# Set 0-row to have max values across all rows for this instance
max_vals = curr_df[gap_cols].max()
# selected_gap_df.loc[(inst,0),gap_cols] = max_vals

display(tmp_selected_gap_df.loc[inst])
display(max_vals)

Unnamed: 0_level_0,NUM DISJ TERMS,ROWS,COLS,LP OBJ,BEST DISJ OBJ,WORST DISJ OBJ,IP OBJ,GMIC OBJ,VPC OBJ,VPC+GMIC OBJ,...,NUM OBJ,ExitReason,GMIC % GAP CLOSED,BEST DISJ % GAP CLOSED,VPC % GAP CLOSED,VPC+GMIC % GAP CLOSED,REF FIRST_CUT_PASS % GAP CLOSED,REF+V FIRST_CUT_PASS % GAP CLOSED,REF LAST_CUT_PASS % GAP CLOSED,REF+V LAST_CUT_PASS % GAP CLOSED
disj_terms,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0,80,1600,86570.120867,-inf,-inf,86679.0,,,,...,0,NO_DISJUNCTION,0.0,0.0,0.0,0.0,49.794634,0.0,49.794634,0.0
2,2,80,1600,86570.12087,86571.27164,86578.45463,86679.0,86655.69102,86571.03894,86655.69102,...,75,CUT_LIMIT,78.591875,1.056924,0.843201,78.591875,0.0,100.0,0.0,100.0
4,4,80,1600,86570.12087,86573.0178,86582.74713,86679.0,86655.69102,86571.18806,86655.69102,...,34,SUCCESS,78.591875,2.660684,0.98016,78.591875,0.0,100.0,0.0,100.0
8,8,80,1600,86570.12087,86574.09838,86619.55332,86679.0,86655.69102,86571.32682,86655.7078,...,6,SUCCESS,78.591875,3.653143,1.107604,78.607287,0.0,100.0,0.0,100.0
16,16,80,1600,86570.12087,86575.0189,86619.55332,86679.0,86655.69102,,86655.69102,...,20,FAIL_LIMIT,78.591875,4.498594,0.0,78.591875,0.0,0.0,0.0,0.0
32,32,80,1600,86570.12087,86575.75367,86619.55332,86679.0,86655.69102,86571.27164,86655.69102,...,87,CUT_LIMIT,78.591875,5.173443,1.056924,78.591875,0.0,100.0,0.0,100.0
64,64,80,1600,86570.12087,86576.73819,86622.23104,86679.0,86655.69102,,86655.69102,...,20,FAIL_LIMIT,78.591875,6.077675,0.0,78.591875,0.0,0.0,0.0,0.0


GMIC % GAP CLOSED                     78.591875
BEST DISJ % GAP CLOSED                 6.077675
VPC % GAP CLOSED                       1.107604
VPC+GMIC % GAP CLOSED                 78.607287
REF FIRST_CUT_PASS % GAP CLOSED       49.794634
REF+V FIRST_CUT_PASS % GAP CLOSED    100.000000
REF LAST_CUT_PASS % GAP CLOSED        49.794634
REF+V LAST_CUT_PASS % GAP CLOSED     100.000000
dtype: float64

### Table 1: `avg_gap_df`: average percent gap closed across different combinations of cuts

In [510]:
## TABLE 1: average percent gap closed across different combinations of cuts
## Create avg_gap_df = average gap closed across instances
all_set_name = 'All'
good_vpc_set_name = tex_escape('≥10%')
avg_row_name = tex_escape('Avg (%)')
wins_row_name = 'Wins'

idx = pd.MultiIndex.from_product(
    [ [all_set_name, good_vpc_set_name], [avg_row_name, wins_row_name] ],
    names = ['Set', '']
)
    
ncols = len(best_gap_df.columns)
nrows = len(idx)

col = best_gap_df['V'].astype(float)
good_vpc_df = best_gap_df[col >= 10.]

data = np.zeros((nrows, ncols), dtype=float)
data[0,:] = [best_gap_df[col].mean() for col in best_gap_df.columns]
data[2,:] = [good_vpc_df[col].mean() for col in best_gap_df.columns]

# display(best_gap_df.head())
avg_gap_df = pd.DataFrame(
    data,
    columns = best_gap_df.columns,
    index = idx,
    dtype = object
)

inst_col_name = '# inst'
avg_gap_df[inst_col_name] = [len(best_gap_df), 0, len(good_vpc_df), 0]

avg_gap_df.iloc[1] = ["" for i in range(ncols+1)]
avg_gap_df.iloc[3] = ["" for i in range(ncols+1)]

display(avg_gap_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,G,DB,V,V+G,GurF,V+GurF,GurL,V+GurL,BEST VPC DISJ,BEST GMIC+VPC DISJ,BEST V+GurF DISJ,BEST V+GurL DISJ,NUM VPC,NUM GMIC,# inst
Set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
All,Avg (\%),14.12975,15.28108,9.673803,19.989592,23.826539,30.360311,41.489612,46.616962,31.433735,25.759036,16.006024,17.710843,57.177711,169.560241,332.0
All,Wins,,,,,,,,,,,,,,,
$\ge$10\%,Avg (\%),16.063619,36.257255,28.649874,34.278529,27.810775,40.418214,48.508253,58.901068,52.43299,48.556701,31.42268,29.587629,58.927835,82.597938,97.0
$\ge$10\%,Wins,,,,,,,,,,,,,,,


### `wins_df`: num wins

In [511]:
## Create num wins df
# x wins over y for an instance if x > y + EPS
#shortcols = avg_gap_df.columns[0:-1]
wins_df = pd.DataFrame(
    np.zeros((len(gap_cols_short), len(gap_cols_short)), dtype=int),
    columns = gap_cols_short,
    index = gap_cols_short,
    dtype = int,
)

from itertools import permutations
for (ind1, ind2) in permutations(range(len(gap_cols_short)), 2):
    wins_df.at[gap_cols_short[ind1],gap_cols_short[ind2]] =\
        int(sum(best_gap_df[gap_cols_short[ind1]] > best_gap_df[gap_cols_short[ind2]] + EPS))
    wins_df.at[gap_cols_short[ind2],gap_cols_short[ind1]] =\
        int(sum(best_gap_df[gap_cols_short[ind2]] > best_gap_df[gap_cols_short[ind1]] + EPS))

# Sets we are considering
# all_set = 'Wins (All)'
# good_vpc_set = 'Wins (V ≥ 10%)'
all_set = (all_set_name,wins_row_name)
good_vpc_set = (good_vpc_set_name,wins_row_name)

# "G" are wins relative to "V"
shortrefcol = 'V'
#refcol = 'VPC % GAP CLOSED'
#refcol = map_short_to_cols[shortrefcol]
refcol = shortrefcol
shortdestcol = 'G'
#col = 'GMIC % GAP CLOSED'
#col = map_short_to_cols[shortcol]
destcol = shortdestcol
avg_gap_df.at[all_set,shortdestcol] = wins_df.at[shortdestcol,shortrefcol]
avg_gap_df.at[good_vpc_set,shortdestcol] = sum(good_vpc_df[destcol] > good_vpc_df[refcol] + EPS)

# "DB", "V", "V+G": wins are relative to "G"
shortrefcol = 'G'
#refcol = 'GMIC % GAP CLOSED'
#refcol = map_short_to_cols[shortrefcol]
refcol = shortrefcol
shortdestcol = 'DB'
#col = 'BEST DISJ % GAP CLOSED'
#col = map_short_to_cols[shortcol]
destcol = shortdestcol
avg_gap_df.at[all_set,shortdestcol] = wins_df.at[shortdestcol,shortrefcol]
avg_gap_df.at[good_vpc_set,shortdestcol] = sum(good_vpc_df[destcol] > good_vpc_df[refcol] + EPS)

shortdestcol = 'V'
#col = 'VPC % GAP CLOSED'
#col = map_short_to_cols[shortcol]
destcol = shortdestcol
avg_gap_df.at[all_set,shortdestcol] = wins_df.at[shortdestcol,shortrefcol]
avg_gap_df.at[good_vpc_set,shortdestcol] = sum(good_vpc_df[destcol] > good_vpc_df[refcol] + EPS)

shortdestcol = 'V+G'
#col = 'VPC+GMIC % GAP CLOSED'
#col = map_short_to_cols[shortcol]
destcol = shortdestcol
avg_gap_df.at[all_set,shortdestcol] = wins_df.at[shortdestcol,shortrefcol]
avg_gap_df.at[good_vpc_set,shortdestcol] = sum(good_vpc_df[destcol] > good_vpc_df[refcol] + EPS)

# "V+GurF" are wins relative to "GurF"
shortrefcol = 'GurF'
refcol = shortrefcol
shortdestcol = 'V+GurF'
destcol = shortdestcol
#col = map_short_to_cols[shortcol]
avg_gap_df.at[all_set,shortdestcol] = wins_df.at[shortdestcol,shortrefcol]
avg_gap_df.at[good_vpc_set,shortdestcol] = sum(good_vpc_df[destcol] > good_vpc_df[refcol] + EPS)

# "V+GurL" are wins relative to "GurL"
shortrefcol = 'GurL'
refcol = shortrefcol
shortdestcol = 'V+GurL'
destcol = shortdestcol
wins_df.at[shortdestcol,shortrefcol] = int(sum(best_gap_df[destcol] > best_gap_df[refcol] + EPS))
wins_df.at[shortrefcol,shortdestcol] = int(sum(best_gap_df[refcol] > best_gap_df[destcol] + EPS))
avg_gap_df.at[all_set,shortdestcol] = wins_df.at[shortdestcol,shortrefcol]
avg_gap_df.at[good_vpc_set,shortdestcol] = sum(good_vpc_df[destcol] > good_vpc_df[refcol] + EPS)

# Count number of instances that have V+G > 0
shortdestcol = inst_col_name
#col = 'V+GurL'
destcol = 'V+G'
avg_gap_df.at[all_set,shortdestcol] = sum(best_gap_df[destcol] > EPS)
avg_gap_df.at[good_vpc_set,shortdestcol] = sum(good_vpc_df[destcol] > EPS)

display(avg_gap_df)
display(wins_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,G,DB,V,V+G,GurF,V+GurF,GurL,V+GurL,BEST VPC DISJ,BEST GMIC+VPC DISJ,BEST V+GurF DISJ,BEST V+GurL DISJ,NUM VPC,NUM GMIC,# inst
Set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
All,Avg (\%),14.12975,15.28108,9.673803,19.989592,23.826539,30.360311,41.489612,46.616962,31.433735,25.759036,16.006024,17.710843,57.177711,169.560241,332
All,Wins,147.0,147.0,108.0,191.0,,221.0,,191.0,,,,,,,256
$\ge$10\%,Avg (\%),16.063619,36.257255,28.649874,34.278529,27.810775,40.418214,48.508253,58.901068,52.43299,48.556701,31.42268,29.587629,58.927835,82.597938,97
$\ge$10\%,Wins,20.0,83.0,77.0,93.0,,87.0,,77.0,,,,,,,97


Unnamed: 0,G,DB,V,V+G,GurF,V+GurF,GurL,V+GurL
G,0,111,147,0,67,32,18,7
DB,147,0,224,122,112,93,74,49
V,108,0,0,1,78,43,52,6
V+G,191,125,221,0,116,73,61,19
GurF,183,150,180,141,0,4,0,1
V+GurF,222,164,210,174,221,0,53,0
GurL,240,190,211,200,239,200,0,42
V+GurL,251,212,253,235,254,234,191,0


### Analyze instances in which DB > G but V <= G

In [512]:
col1 = 'DB'
col2 = 'G'
tmp_df = best_gap_df.loc[best_gap_df[col1] > best_gap_df[col2] + EPS]

col1 = 'V'
tmp_df = tmp_df[tmp_df[col1] <= tmp_df[col2] + EPS]
display(tmp_df.head())

# inst_set = tmp_df.index
inst_depth_set = [(inst,tmp_df.at[inst,'BEST VPC DISJ']) for inst in tmp_df.index]

print("Total num inst with DB > G >= V is {:d}".format(len(tmp_df)))
print("Num times hit cut limit = {:d}".format(sum(df.loc[inst_depth_set,'ExitReason'] == 'CUT_LIMIT')))

# display(df.loc[inst_depth_set])


Unnamed: 0_level_0,G,DB,V,V+G,GurF,V+GurF,GurL,V+GurL,BEST VPC DISJ,BEST GMIC+VPC DISJ,BEST V+GurF DISJ,BEST V+GurL DISJ,NUM VPC,NUM GMIC
INSTANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
app3_presolved,20.407763,53.919689,14.437037,25.785417,28.605491,32.028038,84.562025,85.320587,32,32,16,16,15,18
beasleyC2_presolved,11.37931,13.103448,7.459991,14.311773,49.14751,49.927984,95.931121,98.883005,64,32,8,32,32,32
blend2_presolved,5.456519,29.371553,1.737554,8.690768,5.456514,5.456514,20.467636,20.594188,2,32,2,2,13,11
bppc8-09_presolved,3.078078,3.301802,0.561924,3.078078,1.699186,2.078451,2.774698,2.990956,64,2,64,64,1,30
eilB101_presolved,2.465774,13.940965,0.765686,2.701124,11.899547,12.028531,42.77363,44.932902,16,16,2,2,89,71


Total num inst with DB > G >= V is 39
Num times hit cut limit = 18


### Analyze instances in which V+G <= G

In [513]:
col1 = 'V+G'
col2 = 'G'
tmp_df = best_gap_df.loc[best_gap_df[col1] <= best_gap_df[col2] + EPS]

display(tmp_df)

inst_depth_set = [(inst,tmp_df.at[inst,'BEST VPC DISJ']) for inst in tmp_df.index]

print("Total num inst with V+G <= G is {:d}".format(len(tmp_df)))
print("Num times with G = 100% gap closed = {:d}".format(sum(tmp_df['G'] == 100.)))
print("Num times with V+G = 0% gap closed = {:d}".format(sum(tmp_df['V+G'] == 0.)))
print("Num times hit cut limit = {:d}".format(sum(df.loc[inst_depth_set,'ExitReason'] == 'CUT_LIMIT')))

# display(df.loc[inst_depth_set])

Unnamed: 0_level_0,G,DB,V,V+G,GurF,V+GurF,GurL,V+GurL,BEST VPC DISJ,BEST GMIC+VPC DISJ,BEST V+GurF DISJ,BEST V+GurL DISJ,NUM VPC,NUM GMIC
INSTANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
10teams_presolved,100.000000,0.000000,0.000000,100.000000,100.000000,100.000000,100.000000,100.000000,2,2,2,2,74,153
30n20b8_presolved,11.513514,1.223891,0.017716,11.513514,1.234311,1.312801,17.285869,28.956262,4,2,2,4,190,187
a2c1s1_presolved,24.762989,3.307002,0.551438,24.762989,43.944996,45.219809,90.302139,91.366463,2,2,2,16,17,157
b2c1s1_presolved,19.697967,1.493952,0.085124,19.697967,17.865599,23.546836,72.288503,72.968249,32,2,2,64,4,238
berlin_5_8_0_presolved,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,16,16,16,16,8,236
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
timtab2_presolved,13.664747,9.246145,2.358475,13.664747,24.495378,24.823847,47.015363,49.114298,4,2,2,32,3,214
tr12-30_presolved,58.360211,1.743446,0.477979,58.360211,60.331161,60.423920,99.318705,99.678262,32,2,2,2,2,321
umts_presolved,0.973181,0.209040,0.109528,0.973181,1.302534,1.368979,4.731895,5.672506,32,2,2,16,276,275
usAbbrv-8-25_70_presolved,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,8,8,8,8,4,722


Total num inst with V+G <= G is 141
Num times with G = 100% gap closed = 4
Num times with V+G = 0% gap closed = 76
Num times hit cut limit = 25


### Analyze when G > V

In [514]:
col1 = 'G'
col2 = 'V'
tmp_df = best_gap_df.loc[best_gap_df[col1] > best_gap_df[col2] + EPS]

display(tmp_df)

inst_depth_set = [(inst,tmp_df.at[inst,'BEST VPC DISJ']) for inst in tmp_df.index]

print("Total num inst with G > V is {:d}".format(len(tmp_df)))
print("Num times with #V < 10 is {:d}".format(len(tmp_df[(tmp_df['NUM VPC'] < 10)])))
print("Num times with #V < 10 while #G > 10 is {:d}".format(len(tmp_df[(tmp_df['NUM VPC'] < 10) & (tmp_df['NUM GMIC'] > 10)])))
# print("Num times with #V < 10 is {:d}".format(sum(tmp_df['NUM VPC'] < 10)))

# print("Num times with V+G = 0% gap closed = {:d}".format(sum(tmp_df['V+G'] == 0.)))
print("Num times hit cut limit = {:d}".format(sum(df.loc[inst_depth_set,'ExitReason'] == 'CUT_LIMIT')))

tmp_inst_set = tmp_df[(tmp_df['NUM VPC'] < 10) & (tmp_df['NUM GMIC'] > 10)].index
tmp_inst_depth_set = [(inst,tmp_df.at[inst,'BEST VPC DISJ']) for inst in tmp_inst_set]
print("Num times hit cut limit when #G > #V = {:d} (should be 0)".format(sum(df.loc[tmp_inst_depth_set,'ExitReason'] == 'CUT_LIMIT')))

# display(df.loc[inst_depth_set])

Unnamed: 0_level_0,G,DB,V,V+G,GurF,V+GurF,GurL,V+GurL,BEST VPC DISJ,BEST GMIC+VPC DISJ,BEST V+GurF DISJ,BEST V+GurL DISJ,NUM VPC,NUM GMIC
INSTANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
10teams_presolved,100.000000,0.000000,0.000000,100.000000,100.000000,100.000000,100.000000,100.000000,2,2,2,2,74,153
30n20b8_presolved,11.513514,1.223891,0.017716,11.513514,1.234311,1.312801,17.285869,28.956262,4,2,2,4,190,187
50v-10_presolved,45.753596,18.008191,6.836095,45.823184,50.218750,50.861824,70.906623,74.653591,64,16,2,4,29,29
a1c1s1_presolved,25.100614,4.895611,1.820497,25.386388,45.998106,47.072835,88.344774,88.650009,64,8,2,2,4,154
a2c1s1_presolved,24.762989,3.307002,0.551438,24.762989,43.944996,45.219809,90.302139,91.366463,2,2,2,16,17,157
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
tr12-30_presolved,58.360211,1.743446,0.477979,58.360211,60.331161,60.423920,99.318705,99.678262,32,2,2,2,2,321
umts_presolved,0.973181,0.209040,0.109528,0.973181,1.302534,1.368979,4.731895,5.672506,32,2,2,16,276,275
vpm1_presolved,29.906542,7.242991,7.009346,29.906542,41.121495,50.934579,50.934579,50.934579,64,2,2,2,9,14
vpm2_presolved,17.849671,14.293216,8.125084,20.006451,42.919339,50.063401,75.669712,71.193532,32,32,2,2,25,25


Total num inst with G > V is 147
Num times with #V < 10 is 65
Num times with #V < 10 while #G > 10 is 60
Num times hit cut limit = 47
Num times hit cut limit when #G > #V = 0 (should be 0)


### Table 2: `gap_by_size_df`: gap closed by num leaves

In [515]:
## TABLE 2: gap closed by num leaves
## Note that ``best'' can be worse than for a single row
## because when no VPCs are generated, we assume the "no VPCs" results hold for Gurobi,
## but we do not count that for the ``best'' calculation,
## since otherwise there is potential bias, as sometimes Gurobi does better without VPCs
sizes = [0, 2, 4, 8, 16, 32, 64]

shortcols = [
        'DB',
        'V',
        'V+G',
        #'GurF',
        'V+GurF',
        #'GurL',
        'V+GurL',
    ]

gap_by_size_df = pd.DataFrame(
    columns = shortcols,
    index = sizes + ['Best'],
    # index = [str(size) + " leaves" for size in sizes]+['Best'],
    dtype = float,
)
zero_row_name = 0

# `grouped_df` will collect gap closed across instances, grouped by num terms
grouped_df = selected_gap_df.groupby(level='disj_terms').mean()
ungrouped_df = best_gap_df.mean()

# For each of the columns (in shortcols),
# save the average value for each size
# (this will put in the right place as the index is based on sizes for both)
for col in shortcols:
    orig_col = map_short_to_cols[col]
    #gap_by_size_df.loc[2]['DB'] = best_gap_df[orig_col].mean()
    gap_by_size_df[col] = grouped_df[orig_col]

# Fill in the 'Best' row, since that is currently stored in `gap_by_size_df` in the "0" row
gap_by_size_df.loc['Best'] = gap_by_size_df.loc[zero_row_name]

# Now update the zero row with correct values
col = 'DB'
gap_by_size_df[col][zero_row_name] = 0.

col = 'V'
gap_by_size_df[col][zero_row_name] = 0.

stubs = ['G', 'GurF', 'GurL']
for stub in stubs:
    col = 'V+'+stub
    # orig_col = map_short_to_cols[stub]
    gap_by_size_df[col][0] = ungrouped_df[stub]

# Reindex to add "leaves" to index
idx = [str(size) + " leaves" for size in sizes]+['Best']
reidx = {old_id : new_id for old_id, new_id in zip(gap_by_size_df.index,idx)}
gap_by_size_df.rename(reidx, inplace=True)

# display(grouped_df[gap_cols])
display(ungrouped_df)
display(gap_by_size_df)

G                      14.129750
DB                     15.281080
V                       9.673803
V+G                    19.989592
GurF                   23.826539
V+GurF                 30.360311
GurL                   41.489612
V+GurL                 46.616962
BEST VPC DISJ          31.433735
BEST GMIC+VPC DISJ     25.759036
BEST V+GurF DISJ       16.006024
BEST V+GurL DISJ       17.710843
NUM VPC                57.177711
NUM GMIC              169.560241
dtype: float64

Unnamed: 0,DB,V,V+G,V+GurF,V+GurL
0 leaves,0.0,0.0,14.12975,23.826539,41.489612
2 leaves,2.278573,1.746209,14.757409,27.065254,42.858194
4 leaves,4.086872,2.787291,15.226602,27.269124,43.391265
8 leaves,6.219857,3.585201,15.729267,27.674987,43.543924
16 leaves,8.855139,4.994371,16.824188,28.064522,43.886622
32 leaves,11.915676,6.816493,18.145808,29.069201,44.77662
64 leaves,15.281074,8.11412,19.149979,29.894078,45.411474
Best,15.28108,9.673803,19.989592,30.361414,46.760653


### Table 5: `all_gap_results_df`: complete gap closed results

In [516]:
inst_set = selected_gap_df.index.levels[0]
inst_set.set_names("Instance",inplace=True)

col_idx = pd.MultiIndex.from_arrays(
    [
        ['', '', '# cuts', '# cuts'] + ['% gap closed']*len(gap_cols_short),
        ['Rows', 'Cols', 'G', 'V'] + gap_cols_short
    ],
)

all_gap_results_df = pd.DataFrame(
    columns = col_idx,
    index = inst_set,
    dtype = object,
)

# Enter number of rows and cols
tmp_df = df.xs(0, level='disj_terms').loc[inst_set,['ROWS','COLS']]
tmp_df.columns = pd.MultiIndex.from_product([[''],['Rows','Cols']])
all_gap_results_df.loc[:,tmp_df.columns] = tmp_df

# Enter number of cuts
# tmp_df = best_gap_df.xs(0, level='disj_terms').loc[inst_set,['NUM GMIC', 'NUM VPC']]
tmp_df = best_gap_df.loc[inst_set, ['NUM GMIC', 'NUM VPC']]
tmp_df.columns = pd.MultiIndex.from_product([['# cuts'],['G','V']])
all_gap_results_df.loc[:,tmp_df.columns] = tmp_df

# Enter gap closed
tmp_df = best_gap_df.loc[inst_set, gap_cols_short]
tmp_df.columns = pd.MultiIndex.from_product([['% gap closed'],gap_cols_short])
all_gap_results_df.loc[:,tmp_df.columns] = tmp_df

# Add average row
all_gap_results_df.loc["Average"] = all_gap_results_df.loc[:,('% gap closed',gap_cols_short)].mean()

# Now convert the % gap closed columns to objects so we can add an int row
all_gap_results_df.loc[:,('% gap closed',gap_cols_short)] = all_gap_results_df.loc[:,('% gap closed',gap_cols_short)].astype(object)

# Add wins row
win_gap_cols_short = ['DB', 'V', 'V+G', 'V+GurF', 'V+GurL']
all_gap_results_df.loc['Wins',('% gap closed',win_gap_cols_short)] = avg_gap_df.loc[all_set,win_gap_cols_short].values.tolist()
# all_gap_results_df.loc['Wins',('% gap closed',win_gap_cols_short)] = avg_gap_df.loc[all_set,gap_cols_short].astype(np.int64).values.tolist()
# all_gap_results_df.loc["Wins"] = avg_gap_df.loc[all_set,gap_cols_short]
# wins_df.at[cols[ind1],cols[ind2]] = int(sum(best_gap_df[cols[ind1]] > best_gap_df[cols[ind2]] + EPS))

# Replace missing entries with empty string
all_gap_results_df = all_gap_results_df.fillna('',downcast=False)

# Convert rows, cols, # cuts to int values
tmp_cols = pd.MultiIndex.from_product([[''],['Rows','Cols']])
all_gap_results_df.loc[inst_set,tmp_cols] = all_gap_results_df.loc[inst_set,tmp_cols].astype(np.int64)
tmp_cols = pd.MultiIndex.from_product([['# cuts'],['G','V']])
all_gap_results_df.loc[inst_set,tmp_cols] = all_gap_results_df.loc[inst_set,tmp_cols].astype(np.int64)

all_gap_results_df.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,# cuts,# cuts,% gap closed,% gap closed,% gap closed,% gap closed,% gap closed,% gap closed,% gap closed,% gap closed
Unnamed: 0_level_1,Rows,Cols,G,V,G,DB,V,V+G,GurF,V+GurF,GurL,V+GurL
Instance,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
vpm1_presolved,128.0,188.0,14.0,9.0,29.906542,7.242991,7.009346,29.906542,41.121495,50.934579,50.934579,50.934579
vpm2_presolved,127.0,187.0,25.0,25.0,17.849671,14.293216,8.125084,20.006451,42.919339,50.063401,75.669712,71.193532
zib54-UUE_presolved,1114.0,3726.0,56.0,56.0,10.696523,17.642953,7.604164,15.377159,28.264404,54.830159,68.989315,69.241224
Average,,,,,14.12975,15.28108,9.673803,19.989592,23.826539,30.360311,41.489612,46.616962
Wins,,,,,,147.0,108.0,191.0,,221.0,,191.0


# Section 3: Time tables

## `time_df`: Create subset of dataframe relevant to time

In [434]:
## Create subset of dataframe relevant to time
time_df = df.loc[:, 
                [
                    'NUM DISJ TERMS',
                    'ROWS',
                    'COLS',
                    'LP OBJ',
                    'IP OBJ',
                    'FIRST REF OBJ',
                    'BEST REF OBJ',
                    'FIRST REF+V OBJ',
                    'FIRST REF BOUND',
                    'BEST REF BOUND',
                    'FIRST REF+V BOUND',
                    'FIRST REF ITERS',
                    'BEST REF ITERS',
                    'FIRST REF+V ITERS',
                    'FIRST REF NODES',
                    'BEST REF NODES',
                    'FIRST REF+V NODES',
                    'FIRST REF TIME',
                    'BEST REF TIME',
                    'AVG REF TIME',
                    'FIRST REF+V TIME',
                    'VPC_GEN_TIME',
                    'NUM GMIC',
                    'NUM VPC',
                    'NUM OBJ',
                    'ALL REF TIME',
                    'ExitReason']
               ]
#display(time_df.loc[("bm23_presolved",2)])

## Prepare short/long column names for time dfs
1. First run of Gurobi without VPCs
2. Best among 7 runs of Gurobi without VPCs
3. First run of Gurobi with VPCs for each disjunction size
4. First run of Gurobi with VPCs for each disjunction size, adding cut generation time
5. Best run across first Gurobi without VPCs and first Gurobi with VPCs (across all terms)

In [435]:
col_num_vpcs = 'NUM VPC'
col_vpc_gen_time = 'VPC_GEN_TIME'

# Gur1/Gur7 names
gur1_col_stub = 'FIRST REF'
gur7_col_stub = 'BEST REF'
gur1v_col_stub = gur1_col_stub + '+V'
gur1v_w_cut_col_stub = gur1v_col_stub + ' W/CUTGEN'

# gur1time: first run of Gurobi without VPCs
gur1time_col = gur1_col_stub + ' TIME'
gur1nodes_col = gur1_col_stub + ' NODES'

# gur7time: best among 7 runs of Gurobi without VPCs
gur7time_col = gur7_col_stub + ' TIME'
gur7nodes_col = gur7_col_stub + ' NODES'

# gur1vtime: first run of Gurobi w/VPCs for each disj size
gur1vtime_col = gur1v_col_stub + ' TIME'
gur1vnodes_col = gur1v_col_stub + ' NODES'

# gur1v_w_cut_time: first run of Gurobi w/VPCs for each disj size, counting cut generation time
gur1v_w_cut_time_col = gur1v_w_cut_col_stub + ' TIME'

# Track best disjunction used in 0-row
gurv_disj_col = gur1v_col_stub + ' DISJ'
gurv_w_cut_disj_col = gur1v_w_cut_col_stub + ' DISJ'

# Best Gurobi run across the first without VPCs and first w/VPCs for each disj size
mintime_col       = 'MIN BB TIME'
mintime_w_cut_col = 'MIN BB W/CUTGEN TIME'
mintime_disj_col  = 'MIN BB TIME DISJ'
minnodes_col      = 'MIN BB NODES'

map_cols_to_short_time = {
    gur1time_col         : 'Gur1',
    gur7time_col         : 'Gur7',
    gur1vtime_col        : 'V',
    gur1v_w_cut_time_col : 'Total',
    mintime_col          : 'V7',
    mintime_w_cut_col    : 'Total7',
}

map_cols_to_short_nodes = {
    gur1nodes_col        : 'Gur1',
    gur7nodes_col        : 'Gur7',
    gur1vnodes_col       : 'V',
    minnodes_col         : 'V7',
}

map_short_to_cols_time = {v: k for k, v in map_cols_to_short_time.items()}
map_short_to_cols_nodes = {v: k for k, v in map_cols_to_short_nodes.items()}

time_cols_short = list(map_short_to_cols_time.keys())
node_cols_short = list(map_short_to_cols_nodes.keys())
# display(time_cols, node_cols)

# Select a subset of columns for the "long" list used when updating the 0-row
time_cols_long = [map_short_to_cols_time[col] for col in time_cols_short]
node_cols_long = [map_short_to_cols_nodes[col] for col in node_cols_short]

# # Update list of columns with mintime cols
# newshortcol1 = 'V7'
# newshortcol2 = 'Total7'
# newshortcol3 = 'V7'
# map_cols_to_short_time [mintime_col]       = newshortcol1
# map_cols_to_short_time [mintime_w_cut_col] = newshortcol2
# map_cols_to_short_nodes[minnodes_col]      = newshortcol3

# map_short_to_cols_time [newshortcol1]      = mintime_col
# map_short_to_cols_time [newshortcol2]      = mintime_w_cut_col
# map_short_to_cols_nodes[newshortcol3]      = minnodes_col

# time_cols_short.append(newshortcol1)
# time_cols_short.append(newshortcol2)
# node_cols_short.append(newshortcol3)

## Add total time for running solver + generating cuts

In [436]:
# Add total time for running solver + generating cuts
time_df[gur1v_w_cut_time_col] = time_df[gur1vtime_col] + time_df[col_vpc_gen_time]

display(time_df.loc['bm23_presolved'])

Unnamed: 0_level_0,NUM DISJ TERMS,ROWS,COLS,LP OBJ,IP OBJ,FIRST REF OBJ,BEST REF OBJ,FIRST REF+V OBJ,FIRST REF BOUND,BEST REF BOUND,...,BEST REF TIME,AVG REF TIME,FIRST REF+V TIME,VPC_GEN_TIME,NUM GMIC,NUM VPC,NUM OBJ,ALL REF TIME,ExitReason,FIRST REF+V W/CUTGEN TIME
disj_terms,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0,20,27,20.570922,34.0,34.0,34.0,0.0,34.0,34.0,...,0.07,0.076,0.0,0.0,0,0,0,0.070728;0.090601;0.072226;0.075089;0.080344;0...,NO_DISJUNCTION,0.0
2,2,20,27,20.570922,34.0,0.0,0.0,34.0,0.0,0.0,...,0.0,0.0,0.063,0.0,6,6,7,,CUT_LIMIT,0.063
4,4,20,27,20.570922,34.0,0.0,0.0,34.0,0.0,0.0,...,0.0,0.0,0.076,0.0,6,6,6,,CUT_LIMIT,0.076
8,8,20,27,20.570922,34.0,0.0,0.0,34.0,0.0,0.0,...,0.0,0.0,0.08,0.02,6,6,6,,CUT_LIMIT,0.1
16,16,20,27,20.570922,34.0,0.0,0.0,34.0,0.0,0.0,...,0.0,0.0,0.076,0.02,6,6,6,,CUT_LIMIT,0.096
32,32,20,27,20.570922,34.0,0.0,0.0,34.0,0.0,0.0,...,0.0,0.0,0.068,0.05,6,6,6,,CUT_LIMIT,0.118
64,64,20,27,20.570922,34.0,0.0,0.0,34.0,0.0,0.0,...,0.0,0.0,0.07,0.13,6,6,6,,CUT_LIMIT,0.2


## `selected_time_df`: Solving and cut-generation time for instances selected for time reporting; 0-row with min values across all rows

In [437]:
## Solving and cut-generation time for instances selected for time reporting
selected_time_df = time_df.loc[selected_time_instances_dict.keys()]
selected_time_df.index = selected_time_df.index.remove_unused_levels()
selected_time_df[minnodes_col] = 0

## Fill in 0-row with min values across all rows
## Also fill in gur1 values (present only in 0 row currently) for all disj terms
comparison_time_cols = [gur1vtime_col, gur1v_w_cut_time_col]
comparison_node_cols = [gur1vnodes_col]
cols_to_display = [col_num_vpcs]+[gur1time_col,gur1vtime_col]+[gur1nodes_col,gur1vnodes_col]+[mintime_col,mintime_w_cut_col,minnodes_col,gurv_disj_col,gurv_w_cut_disj_col,mintime_disj_col]
inst_set = selected_time_df.index.levels[0]
# tmp_inst = '23588_presolved'
# inst_set = ['10teams_presolved',tmp_inst]
for i, inst in enumerate(inst_set):
    print("{}/{}".format(i+1,len(inst_set)), end='\r', flush=True)
    curr_df = selected_time_df.loc[inst].copy() # copy needed to not throw SettingWithCopyWarning
    
    # Select only the rows in which VPCs were generated
    curr_df_with_vpcs = curr_df[curr_df[col_num_vpcs] > 0]
    
    # display(inst)
    # display(curr_df_with_vpcs[[col_num_vpcs]+[gur1time_col,gur1vtime_col]])

    # Set 0-row to have min time values across all (non-0-vpc) rows for this instance
    # best_vals = curr_df_with_vpcs[comparison_time_cols].min()
    # selected_time_df.loc[(inst,0),comparison_time_cols] = best_vals
    best_vals_idx = curr_df_with_vpcs[comparison_time_cols].idxmin()
    for curr_col, curr_disj_id in zip(comparison_time_cols, best_vals_idx):
        selected_time_df.at[(inst,0),curr_col] = curr_df_with_vpcs.at[curr_disj_id, curr_col]
    
    # display(best_vals_idx)
    # print("selected_time_df.at[('{}',0),gur1vtime_col] = {}".format(inst,selected_time_df.at[(inst,0),gur1vtime_col]))
    # display(selected_time_df[[col_num_vpcs]+[gur1time_col,gur1vtime_col]].head(14))
    # print("selected_time_df.at[('{}',0),gur1vtime_col] = {}".format(inst,selected_time_df.at[(inst,0),gur1vtime_col]))


    # Also add id of the best disj to the 0-row
    selected_time_df.at[(inst,0),gurv_disj_col]       = int(best_vals_idx[0])
    selected_time_df.at[(inst,0),gurv_w_cut_disj_col] = int(best_vals_idx[1])

    # Update 0-row of mintime (V7) entries
    curr_gur1time       = selected_time_df.at[(inst,0),gur1time_col]
    curr_gur1vtime      = selected_time_df.at[(inst,0),gur1vtime_col]
    curr_gur1vcuts_time = selected_time_df.at[(inst,0),gur1v_w_cut_time_col]
    
    curr_vals = [curr_gur1time, curr_gur1vtime]
    min_id = np.argmin(curr_vals)

    # If min_id is 0, then no cuts are used and we report the gur1 time
    # If min_id is 1, then gur1v < gur1 and we can report the number of cuts used
    selected_time_df.at[(inst,0),mintime_col] = curr_vals[min_id]

    # Add num cuts from mintime disj into num vpc col
    best_disj_size = 0 if min_id == 0 else best_vals_idx[0]
    selected_time_df.at[(inst,0),mintime_disj_col] = best_disj_size
    best_num_cuts = selected_time_df.at[(inst,best_disj_size),col_num_vpcs]
    selected_time_df.at[(inst,0),col_num_vpcs] = best_num_cuts

    # Update with cuts into Total7 column
    curr_vals = [curr_gur1time, curr_gur1vcuts_time]
    selected_time_df.at[(inst,0),mintime_w_cut_col] = min(curr_vals)

    # Repeat for nodes
    best_vals = curr_df_with_vpcs[comparison_node_cols].min()
    selected_time_df.loc[(inst,0),comparison_node_cols] = best_vals
    # selected_time_df.at[(inst,0),minnodes_col] = int(selected_time_df.loc[(inst,0),[gur1nodes_col,gur1vnodes_col]].min())

    curr_gur1nodes       = selected_time_df.at[(inst,0),gur1nodes_col]
    curr_gur1vnodes      = selected_time_df.at[(inst,0),gur1vnodes_col]
    curr_vals = [curr_gur1nodes, curr_gur1vnodes]
    min_id = np.argmin(curr_vals)
    selected_time_df.at[(inst,0),minnodes_col] = int(curr_vals[min_id])

    # Propogate down 0-row values for gur1 columns
    selected_time_df.loc[inst, gur1time_col] = curr_gur1time
    selected_time_df.loc[inst, gur1nodes_col] = curr_gur1nodes

    #### FOR SOME REASON, THE BELOW ZEROES OUT selected_time_df.loc[[(inst,0)]][gur1vtime_col]
    # display(selected_time_df.loc[(inst,0),[gur1nodes_col,gur1vnodes_col]])

    ## OLD CODE BELOW
    # best_vals_idx = curr_df_with_vpcs[comparison_node_cols].idxmin()
    # for curr_col, curr_disj_id in zip(comparison_node_cols, best_vals_idx):
    #     selected_time_df.at[(inst,0),curr_col] = curr_df_with_vpcs.loc[curr_disj_id, curr_col]

    # # Also add id of the best disj to the 0-row
    # selected_time_df.at[(inst,0),gurv_disj_col + ' (NODES)'] = int(best_vals_idx[0])
    # selected_time_df.at[(inst,0),gurv_w_cut_disj_col+ ' (NODES)'] = int(best_vals_idx[1])

    # for ind in curr_df.index:
    #     if ind == 0:
    #         continue

    #     # Propogate GurF and GurL down
    #     subinds = [4,6]
    #     sel_gap = [gap_cols[i] for i in subinds]
    #     selected_gap_df.loc[(inst,ind),sel_gap] = curr_df.loc[0,sel_gap]

    #     # If no VPCs produced, the values for V+GurF and V+GurL have not been provided
    #     # We replace these by GurF and GurL
    #     # Currently disabled: update max for that column too (if disabled, we instead keep max as the value among those that generated VPCs)
    #     num_vpc = curr_df.loc[ind,col_num_vpcs]
    #     if num_vpc == 0:
    #         # print("Zero cuts for inst {} at depth {:d}".format(inst, ind))
    #         subinds = [5,7]
    #         refinds = [4,6]
    #         sel_gap = [gap_cols[i] for i in subinds]
    #         selected_gap_df.loc[(inst,ind),sel_gap] = curr_df.loc[0,[gap_cols[i] for i in refinds]].to_numpy()

    #         # for i in refinds:
    #         #     if curr_df.loc[0,gap_cols[i]] > selected_gap_df.loc[(inst,0),gap_cols[i+1]]:
    #         #         if curr_df.loc[0,gap_cols[i]] > 0:
    #         #             # print("DEBUG: Updating {} for inst {} from {:f} to {:f}".format(
    #         #             #     gap_cols[i+1], 
    #         #             #     inst, 
    #         #             #     selected_gap_df.loc[(inst,0),gap_cols[i+1]], 
    #         #             #     curr_df.loc[0,gap_cols[i]]))
    #         #         selected_gap_df.loc[(inst,0),gap_cols[i+1]] = curr_df.loc[0,gap_cols[i]]

# Add minimum time when using cuts and when not using cuts
# selected_time_df[mintime_col] = selected_time_df[[gur1time_col, gur1vtime_col]].min(axis=1)
# selected_time_df[mintime_w_cut_col] = selected_time_df[[gur1time_col, gur1v_w_cut_time_col]].min(axis=1)
# selected_time_df[minnodes_col] = selected_time_df[[gur1nodes_col,gur1vnodes_col]].min(axis=1)

display(selected_time_df.head(35).loc[:,[col_num_vpcs]+[gur1time_col,gur1vtime_col]+[gur1nodes_col,gur1vnodes_col]+[mintime_col,mintime_w_cut_col,minnodes_col,gurv_disj_col,gurv_w_cut_disj_col,mintime_disj_col]])
# display(selected_time_df.loc['10teams_presolved',[col_num_vpcs]+[gur1time_col,gur1vtime_col]+[gur1nodes_col,gur1vnodes_col]+[mintime_col,mintime_w_cut_col,minnodes_col,gurv_disj_col,gurv_w_cut_disj_col,mintime_disj_col]])
# display(selected_time_df.loc[inst_set,cols_to_display])

298/298

Unnamed: 0_level_0,Unnamed: 1_level_0,NUM VPC,FIRST REF TIME,FIRST REF+V TIME,FIRST REF NODES,FIRST REF+V NODES,MIN BB TIME,MIN BB W/CUTGEN TIME,MIN BB NODES,FIRST REF+V DISJ,FIRST REF+V W/CUTGEN DISJ,MIN BB TIME DISJ
INSTANCE,disj_terms,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
10teams_presolved,0,0,12.469,13.946,794,1235,12.469,12.469,794,2.0,2.0,0.0
10teams_presolved,2,74,12.469,13.946,794,1235,,,0,,,
10teams_presolved,4,4,12.469,15.161,794,1690,,,0,,,
10teams_presolved,8,74,12.469,15.285,794,1690,,,0,,,
10teams_presolved,16,1,12.469,15.091,794,1690,,,0,,,
10teams_presolved,32,1,12.469,15.087,794,1690,,,0,,,
10teams_presolved,64,0,12.469,0.0,794,0,,,0,,,
23588_presolved,0,34,1.023,0.745,940,788,0.745,1.023,788,2.0,2.0,2.0
23588_presolved,2,34,1.023,0.745,940,788,,,0,,,
23588_presolved,4,75,1.023,0.904,940,1008,,,0,,,


In [438]:
# ### DEBUGGING that first ref+v time gets zeroed out for some reason?
# tmp_df = selected_time_df[[col_num_vpcs]+[gur1time_col,gur1vtime_col]].head(14).copy(deep=True)
# display(tmp_df)

# print(tmp_df.loc[('23588_presolved',0),gur1vtime_col])
# display(tmp_df.loc[[('23588_presolved',0)]][gur1vtime_col])

# tmp_df = selected_time_df
# print(tmp_df.loc[('23588_presolved',0),gur1vtime_col])
# display(tmp_df.loc[[('23588_presolved',0)]][gur1vtime_col])
# display(tmp_df.loc['23588_presolved'])

## Table 3: `avg_bb_df`: average time/nodes taken

### Prepare variables for row/col names

In [439]:
## Prepare variables for row/col names

bb_classes = ['All', '6 wins']
num_bb_classes = len(bb_classes)

bucket_min = [0, 10, 100, 1000]
bucket_max = [3600, 3600, 3600, 3600]
num_buckets = len(bucket_min)
assert(len(bucket_max) == num_buckets)
bb_buckets = ['[' + str(bucket_min[j]) + ',' + str(bucket_max[j]) + ')' for j in range(num_buckets)]
# bucket_names = [classes[i] + ' [' + str(bucket_min[j]) + ',' + str(bucket_max[j]) + ')' for i in range(num_classes) for j in range(num_buckets)]
# display(bucket_names)

bb_metrics = ['Gmean', 'Wins1', 'Wins7']

time_col_header = 'Time (s)'
node_col_header = 'Nodes (\\#)'

### Set up empty `avg_bb_df`

In [440]:
## Prepare avg_bb_df

avg_bb_cols = pd.MultiIndex.from_arrays(
    [[time_col_header]*len(time_cols_short) + [node_col_header]*len(node_cols_short), time_cols_short + node_cols_short],
    names = ['criterion', 'type'])

#bb_row_names = pd.MultiIndex.from_product([bb_buckets, bb_row_names], names=['bucket', 'metric'])
bb_row_names = pd.MultiIndex.from_product(
    [bb_classes, bb_buckets, bb_metrics],
    names=['class', 'bucket', 'metric'])

avg_bb_df = pd.DataFrame(
    columns = avg_bb_cols,
    index = bb_row_names,
    dtype = float
)

display(avg_bb_df.loc[:,avg_bb_cols.get_level_values(0)==node_col_header].head(6))
#display(avg_bb_df.loc[(bb_classes[0], bb_buckets[1], bb_metrics[0]),:])
display(avg_bb_df.loc[(bb_classes[0], bb_buckets, bb_metrics[0]),:])

Unnamed: 0_level_0,Unnamed: 1_level_0,criterion,Nodes (\#),Nodes (\#),Nodes (\#),Nodes (\#)
Unnamed: 0_level_1,Unnamed: 1_level_1,type,Gur1,Gur7,V,V7
class,bucket,metric,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
All,"[0,3600)",Gmean,,,,
All,"[0,3600)",Wins1,,,,
All,"[0,3600)",Wins7,,,,
All,"[10,3600)",Gmean,,,,
All,"[10,3600)",Wins1,,,,
All,"[10,3600)",Wins7,,,,


Unnamed: 0_level_0,Unnamed: 1_level_0,criterion,Time (s),Time (s),Time (s),Time (s),Time (s),Time (s),Nodes (\#),Nodes (\#),Nodes (\#),Nodes (\#)
Unnamed: 0_level_1,Unnamed: 1_level_1,type,Gur1,Gur7,V,Total,V7,Total7,Gur1,Gur7,V,V7
class,bucket,metric,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
All,"[0,3600)",Gmean,,,,,,,,,,
All,"[10,3600)",Gmean,,,,,,,,,,
All,"[100,3600)",Gmean,,,,,,,,,,
All,"[1000,3600)",Gmean,,,,,,,,,,


### `avg_bb_df`: shifted geometric mean of time taken across instances, in various buckets, and geomean of nodes too

In [441]:
## Create gmean_df
#   = shifted geometric mean of time taken across instances, in various buckets
#     and geomean of nodes too

# Custom functions for prior to python 3.8
# def geo_mean(iterable):
#     a = np.array(iterable)
#     return a.prod()**(1.0/len(a))
# def geo_mean_overflow(iterable):
#     return np.exp(np.log(iterable).mean())
from statistics import geometric_mean
SHIFT_TIME  = 60
SHIFT_NODES = 1000

num_inst = np.zeros(len(avg_bb_df),dtype = np.int64)
row_ind = 0

#avg_bb_df.loc[(bb_classes[0], bb_buckets, bb_metrics[0]),:] = \
shortcols_time = time_cols_short
cols_time = [map_short_to_cols_time[shortcol] for shortcol in shortcols_time]
shortcols_nodes = node_cols_short
cols_nodes = [map_short_to_cols_nodes[shortcol] for shortcol in shortcols_nodes]

cols = cols_time + cols_nodes
shortcols = shortcols_time + shortcols_nodes

# First calculate stats for "all" instances
curr_df = selected_time_df.loc[:,cols]
curr_df = curr_df[curr_df.index.get_level_values(1) == 0] # take only best values

for i in range(num_buckets):
    curr_df = curr_df[curr_df[gur1time_col] > bucket_min[i]]
    
    avg_bb_df.loc[(bb_classes[0], bb_buckets[i], bb_metrics[0]),(time_col_header,shortcols_time)] = \
        [geometric_mean(curr_df[col] + SHIFT_TIME) - SHIFT_TIME for col in cols_time]
    avg_bb_df.loc[(bb_classes[0], bb_buckets[i], bb_metrics[0]),(node_col_header,shortcols_nodes)] = \
        [geometric_mean(curr_df[col] + SHIFT_NODES) - SHIFT_NODES for col in cols_nodes]
    
    print("row {:d}: {:d}".format(row_ind,len(curr_df)))
    
    num_inst[row_ind:row_ind+len(bb_metrics)] = len(bb_metrics)*[len(curr_df)]
    row_ind += len(bb_metrics)

# Now calculate stats for "6 trees" instances
curr_df = selected_time_df.loc[all6_instances_dict.keys(),cols]
curr_df = curr_df[curr_df.index.get_level_values(1) == 0] # take only best values

for i in range(num_buckets):
    curr_df = curr_df[curr_df[gur1time_col] > bucket_min[i]]
    
    avg_bb_df.loc[(bb_classes[1], bb_buckets[i], bb_metrics[0]),(time_col_header,shortcols_time)] = \
        [geometric_mean(curr_df[col] + SHIFT_TIME) - SHIFT_TIME for col in cols_time]
    avg_bb_df.loc[(bb_classes[1], bb_buckets[i], bb_metrics[0]),(node_col_header,shortcols_nodes)] = \
        [geometric_mean(curr_df[col] + SHIFT_NODES) - SHIFT_NODES for col in cols_nodes]
    
    print("row {:d}: {:d}".format(row_ind,len(curr_df)))

    num_inst[row_ind:row_ind+len(bb_metrics)] = len(bb_metrics)*[len(curr_df)]
    row_ind += len(bb_metrics)

avg_bb_df[inst_col_name] = num_inst
# avg_bb_df['NUM INST'] = avg_bb_df['NUM INST'].astype(np.int64)

display(avg_bb_df.loc[(bb_classes, bb_buckets, bb_metrics[0]),:])

row 0: 298
row 3: 156
row 6: 90
row 9: 47
row 12: 205
row 15: 88
row 18: 48
row 21: 16


Unnamed: 0_level_0,Unnamed: 1_level_0,criterion,Time (s),Time (s),Time (s),Time (s),Time (s),Time (s),Nodes (\#),Nodes (\#),Nodes (\#),Nodes (\#),# inst
Unnamed: 0_level_1,Unnamed: 1_level_1,type,Gur1,Gur7,V,Total,V7,Total7,Gur1,Gur7,V,V7,Unnamed: 13_level_1
class,bucket,metric,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
All,"[0,3600)",Gmean,102.496832,77.664428,86.197471,99.422309,83.747398,87.386714,8936.354957,6284.648795,7035.125878,6760.348067,298
All,"[10,3600)",Gmean,331.965998,227.642042,261.353078,310.380952,251.89653,266.050944,49657.623417,29633.668408,34473.200729,32969.658511,156
All,"[100,3600)",Gmean,1064.11437,657.111083,783.440846,851.758724,753.885649,796.218992,193771.965544,104885.368102,129681.139688,122584.325945,90
All,"[1000,3600)",Gmean,2812.234604,1785.485418,2126.627947,2217.464846,2103.639733,2149.405542,275743.148245,143534.4081,184851.244683,181579.582204,47
6 wins,"[0,3600)",Gmean,59.209529,45.689506,47.66877,49.772969,46.821584,48.158516,6895.270957,5094.019302,5277.309248,5188.095623,205
6 wins,"[10,3600)",Gmean,227.322365,158.87007,167.976409,175.357124,164.075363,169.812492,63361.97256,38173.056421,40072.183944,39368.21823,88
6 wins,"[100,3600)",Gmean,666.903984,431.513173,451.84641,468.509191,441.471857,455.115861,249369.24813,146695.868541,145878.740612,144457.213803,48
6 wins,"[1000,3600)",Gmean,2228.979723,1356.379267,1452.910371,1518.803496,1418.906952,1483.048826,325503.868603,167756.007009,172610.954525,171879.760133,16


### Update wins1 rows

In [442]:
## Update wins1 rows
# A win in terms of time is counted when the ``Gur1'' baseline seconds taken 
# is at least 10\% slower, to account for some variability in runtimes.
# A win in terms of nodes is when the ``Gur1'' baseline number of nodes is higher.

# Make all columns "object" type to allow for integer values
avg_bb_df.loc[:,(time_col_header,shortcols_time)] = avg_bb_df.loc[:,(time_col_header,shortcols_time)].astype(object)
avg_bb_df.loc[:,(node_col_header,shortcols_nodes)] = avg_bb_df.loc[:,(node_col_header,shortcols_nodes)].astype(object)

# First calculate stats for "all" instances
curr_df = selected_time_df.loc[:,cols]
curr_df = curr_df[curr_df.index.get_level_values(1) == 0] # take only best values

for i in range(num_buckets):
    curr_df = curr_df[curr_df[gur1time_col] > bucket_min[i]]
    
    refcol = gur1time_col
    avg_bb_df.loc[(bb_classes[0], bb_buckets[i], bb_metrics[1]),(time_col_header,shortcols_time)] = \
        [ int(sum(curr_df[refcol] > 1.1*curr_df[col])) for col in cols_time ]

    refcol = gur1nodes_col
    avg_bb_df.loc[(bb_classes[0], bb_buckets[i], bb_metrics[1]),(node_col_header,shortcols_nodes)] = \
        [ int(sum(curr_df[refcol] > curr_df[col])) for col in cols_nodes ]

# Now calculate stats for "6 trees" instances
curr_df = selected_time_df.loc[all6_instances_dict.keys(),cols]
curr_df = curr_df[curr_df.index.get_level_values(1) == 0] # take only best values

for i in range(num_buckets):
    curr_df = curr_df[curr_df[gur1time_col] > bucket_min[i]]
    
    refcol = gur1time_col
    avg_bb_df.loc[(bb_classes[1], bb_buckets[i], bb_metrics[1]),(time_col_header,shortcols_time)] = \
        [ int(sum(curr_df[refcol] > 1.1*curr_df[col])) for col in cols_time ]

    refcol = gur1nodes_col
    avg_bb_df.loc[(bb_classes[1], bb_buckets[i], bb_metrics[1]),(node_col_header,shortcols_nodes)] = \
        [ int(sum(curr_df[refcol] > curr_df[col])) for col in cols_nodes ]

### Update wins7 rows

In [443]:
## Update wins7 rows
# A win in terms of time is counted when the ``Gur1'' baseline seconds taken 
# is at least 10\% slower, to account for some variability in runtimes.
# A win in terms of nodes is when the ``Gur1'' baseline number of nodes is higher.

# First calculate stats for "all" instances
curr_df = selected_time_df.loc[:,cols]
curr_df = curr_df[curr_df.index.get_level_values(1) == 0] # take only best values
for i in range(num_buckets):
    curr_df = curr_df[curr_df[gur1time_col] > bucket_min[i]]
    
    refcol = gur7time_col
    avg_bb_df.loc[(bb_classes[0], bb_buckets[i], bb_metrics[2]),(time_col_header,shortcols_time)] = \
        [ int(sum(curr_df[refcol] > 1.1*curr_df[col])) for col in cols_time ]

    refcol = gur7nodes_col
    avg_bb_df.loc[(bb_classes[0], bb_buckets[i], bb_metrics[2]),(node_col_header,shortcols_nodes)] = \
        [ int(sum(curr_df[refcol] > curr_df[col])) for col in cols_nodes ]

# Now calculate stats for "6 trees" instances
curr_df = selected_time_df.loc[all6_instances_dict.keys(),cols]
curr_df = curr_df[curr_df.index.get_level_values(1) == 0] # take only best values

for i in range(num_buckets):
    curr_df = curr_df[curr_df[gur1time_col] > bucket_min[i]]
    
    refcol = gur7time_col
    avg_bb_df.loc[(bb_classes[1], bb_buckets[i], bb_metrics[2]),(time_col_header,shortcols_time)] = \
        [ int(sum(curr_df[refcol] > 1.1*curr_df[col])) for col in cols_time ]

    refcol = gur7nodes_col
    avg_bb_df.loc[(bb_classes[1], bb_buckets[i], bb_metrics[2]),(node_col_header,shortcols_nodes)] = \
        [ int(sum(curr_df[refcol] > curr_df[col])) for col in cols_nodes ]

In [444]:
# display(avg_bb_df.loc[:,cols.get_level_values(0)=='Nodes'].head(6))
display(avg_bb_df.loc[(bb_classes[0:2], bb_buckets, bb_metrics[0:3]),:])

Unnamed: 0_level_0,Unnamed: 1_level_0,criterion,Time (s),Time (s),Time (s),Time (s),Time (s),Time (s),Nodes (\#),Nodes (\#),Nodes (\#),Nodes (\#),# inst
Unnamed: 0_level_1,Unnamed: 1_level_1,type,Gur1,Gur7,V,Total,V7,Total7,Gur1,Gur7,V,V7,Unnamed: 13_level_1
class,bucket,metric,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
All,"[0,3600)",Gmean,102.496832,77.664428,86.197471,99.422309,83.747398,87.386714,8936.354957,6284.648795,7035.125878,6760.348067,298
All,"[0,3600)",Wins1,0.0,195.0,164.0,93.0,164.0,93.0,0.0,222.0,214.0,214.0,298
All,"[0,3600)",Wins7,0.0,0.0,49.0,22.0,49.0,22.0,0.0,0.0,102.0,102.0,298
All,"[10,3600)",Gmean,331.965998,227.642042,261.353078,310.380952,251.89653,266.050944,49657.623417,29633.668408,34473.200729,32969.658511,156
All,"[10,3600)",Wins1,0.0,109.0,94.0,76.0,94.0,76.0,0.0,131.0,130.0,130.0,156
All,"[10,3600)",Wins7,0.0,0.0,26.0,17.0,26.0,17.0,0.0,0.0,62.0,62.0,156
All,"[100,3600)",Gmean,1064.11437,657.111083,783.440846,851.758724,753.885649,796.218992,193771.965544,104885.368102,129681.139688,122584.325945,90
All,"[100,3600)",Wins1,0.0,61.0,52.0,46.0,52.0,46.0,0.0,76.0,75.0,75.0,90
All,"[100,3600)",Wins7,0.0,0.0,12.0,10.0,12.0,10.0,0.0,0.0,30.0,30.0,90
All,"[1000,3600)",Gmean,2812.234604,1785.485418,2126.627947,2217.464846,2103.639733,2149.405542,275743.148245,143534.4081,184851.244683,181579.582204,47


## Table 6: `all_bb_results_df`: all time/nodes results

In [445]:
inst_set = selected_time_df.index.levels[0]
inst_set.set_names("Instance",inplace=True)
numcuts_col_header = '# cuts'

col_idx = pd.MultiIndex.from_arrays(
    [
        ['', '', numcuts_col_header] + [time_col_header]*len(time_cols_short) + [node_col_header]*len(node_cols_short),
        ['Rows', 'Cols', map_cols_to_short_time[gur1vtime_col]] + time_cols_short + node_cols_short
    ],
)

all_bb_results_df = pd.DataFrame(
    columns = col_idx,
    index = inst_set,
    dtype = object,
)

# Enter number of rows and cols
tmp_df = df.xs(0, level='disj_terms').loc[inst_set,['ROWS','COLS']]
tmp_df.columns = pd.MultiIndex.from_product([[''],['Rows','Cols']])
all_bb_results_df.loc[:,tmp_df.columns] = tmp_df

# Enter number of cuts
# tmp_df = selected_time_df.loc[(inst_set,0), ['NUM VPC']]
tmp_df = selected_time_df.xs(0, level='disj_terms')['NUM VPC']
tmp_df.columns = pd.MultiIndex.from_product([[numcuts_col_header],[map_cols_to_short_time[gur1vtime_col]]])
all_bb_results_df.loc[:,tmp_df.columns] = tmp_df

# Enter time
tmp_df = selected_time_df.xs(0, level='disj_terms')[time_cols_long]
tmp_df.columns = pd.MultiIndex.from_product([[time_col_header],time_cols_short])
all_bb_results_df.loc[:,tmp_df.columns] = tmp_df

# Enter nodes
tmp_df = selected_time_df.xs(0, level='disj_terms')[node_cols_long]
tmp_df.columns = pd.MultiIndex.from_product([[node_col_header],node_cols_short])
all_bb_results_df.loc[:,tmp_df.columns] = tmp_df

all_bb_results_df = all_bb_results_df.sort_values(by=[(time_col_header, map_cols_to_short_time[mintime_col])])

# Add average + wins rows
# Replace missing entries with empty string
tmp_df = avg_bb_df.xs((bb_classes[0],bb_buckets[0])).copy(deep=True)
tmp_df.drop(inst_col_name, axis=1, level=0, inplace=True)
all_bb_results_df = pd.concat([all_bb_results_df, tmp_df]).fillna('',downcast=False)
# all_bb_results_df = all_bb_results_df.fillna('',downcast=False)

# Convert rows, cols, # cuts to int values
tmp_cols = pd.MultiIndex.from_product([[''],['Rows','Cols']])
all_bb_results_df.loc[inst_set,tmp_cols] = all_bb_results_df.loc[inst_set,tmp_cols].astype(np.int64)
tmp_cols = pd.MultiIndex.from_product([[numcuts_col_header],[map_cols_to_short_time[gur1vtime_col]]])
all_bb_results_df.loc[inst_set,tmp_cols] = all_bb_results_df.loc[inst_set,tmp_cols].astype(np.int64)

# Rename inst col back to inst_row_name
all_bb_results_df.index.set_names("Instance",inplace=True)

display(all_bb_results_df.head(15))
display(all_bb_results_df.tail(10))

  tmp_df = avg_bb_df.xs((bb_classes[0],bb_buckets[0])).copy(deep=True)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,# cuts,Time (s),Time (s),Time (s),Time (s),Time (s),Time (s),Nodes (\#),Nodes (\#),Nodes (\#),Nodes (\#)
Unnamed: 0_level_1,Rows,Cols,V,Gur1,Gur7,V,Total,V7,Total7,Gur1,Gur7,V,V7
Instance,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
neos-796608_presolved,64,104,0,0.002,0.001,0.002,0.022,0.002,0.002,1,1,1,1
neos-530627_presolved,60,70,0,0.003,0.002,0.003,0.003,0.003,0.003,1,1,1,1
neos-501453_presolved,13,52,0,0.004,0.002,0.004,0.004,0.004,0.004,1,1,1,1
gt2_presolved,28,173,0,0.006,0.005,0.006,0.006,0.006,0.006,1,1,1,1
vpm1_presolved,128,188,0,0.007,0.007,0.007,0.017,0.007,0.007,1,1,1,1
set1cl_presolved,431,651,0,0.012,0.01,0.012,0.052,0.012,0.012,1,1,1,1
nexp-50-20-1-1_presolved,267,443,2,0.018,0.009,0.015,0.059,0.015,0.018,1,1,1,1
sp150x300d_presolved,269,419,0,0.017,0.016,0.017,0.038,0.017,0.017,1,1,1,1
p0548_presolved,117,365,2,0.02,0.017,0.019,0.04,0.019,0.02,1,1,1,1
pipex_presolved,25,48,6,0.023,0.022,0.02,0.025,0.02,0.023,1,1,1,1


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,# cuts,Time (s),Time (s),Time (s),Time (s),Time (s),Time (s),Nodes (\#),Nodes (\#),Nodes (\#),Nodes (\#)
Unnamed: 0_level_1,Rows,Cols,V,Gur1,Gur7,V,Total,V7,Total7,Gur1,Gur7,V,V7
Instance,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
cvs16r89-60_presolved,3068.0,2384.0,0.0,3600.0,3600.0,3600.0,4874.35,3600.0,3600.0,43027.0,43027.0,47102.0,43027.0
protfold_presolved,2110.0,1835.0,33.0,3600.001,3600.0,3600.0,3952.85,3600.0,3600.001,24622.0,24622.0,24435.0,24435.0
cvs16r70-62_presolved,3278.0,2112.0,0.0,3600.0,3600.0,3600.001,3873.921,3600.0,3600.0,22665.0,17109.0,21834.0,21834.0
cvs08r139-94_presolved,2398.0,1864.0,545.0,3600.001,3600.0,3600.0,3656.591,3600.0,3600.001,61547.0,61547.0,63127.0,61547.0
shiftreg1-4_presolved,2340.0,3312.0,275.0,3600.001,3600.0,3600.0,3660.65,3600.0,3600.001,115600.0,76566.0,78444.0,78444.0
berlin_5_8_0_presolved,1330.0,982.0,0.0,3600.0,3600.0,3600.0,3600.96,3600.0,3600.0,6717625.0,5882583.0,5929425.0,5929425.0
hgms-det_presolved,4599.0,950.0,1.0,3600.003,3600.0,3600.001,3602.871,3600.001,3600.003,256883.0,231087.0,246875.0,246875.0
Gmean,,,,102.496832,77.664428,86.197471,99.422309,83.747398,87.386714,8936.354957,6284.648795,7035.125878,6760.348067
Wins1,,,,0.0,195.0,164.0,93.0,164.0,93.0,0.0,222.0,214.0,214.0
Wins7,,,,0.0,0.0,49.0,22.0,49.0,22.0,0.0,0.0,102.0,102.0


## Table 7: ``all6_bb_results_df``: 6-trees time/nodes results

In [446]:
inst_set = all6_instances_dict.keys()
all6_bb_results_df = all_bb_results_df.loc[inst_set]

all6_bb_results_df = all6_bb_results_df.sort_values(by=[(time_col_header, map_cols_to_short_time[mintime_col])])

# Add average + wins rows
# Replace missing entries with empty string
tmp_df = avg_bb_df.xs((bb_classes[1],bb_buckets[0])).copy(deep=True)
tmp_df.drop(inst_col_name, axis=1, level=0, inplace=True)
all6_bb_results_df = pd.concat([all6_bb_results_df, tmp_df]).fillna('',downcast=False)
# all_bb_results_df = all_bb_results_df.fillna('',downcast=False)

# Convert rows, cols, # cuts to int values
tmp_cols = pd.MultiIndex.from_product([[''],['Rows','Cols']])
all6_bb_results_df.loc[inst_set,tmp_cols] = all6_bb_results_df.loc[inst_set,tmp_cols].astype(np.int64)
tmp_cols = pd.MultiIndex.from_product([[numcuts_col_header],[map_cols_to_short_time[gur1vtime_col]]])
all6_bb_results_df.loc[inst_set,tmp_cols] = all6_bb_results_df.loc[inst_set,tmp_cols].astype(np.int64)

# Rename inst col back to inst_row_name
all6_bb_results_df.index.set_names("Instance",inplace=True)

display(all6_bb_results_df.head(15))
display(all6_bb_results_df.tail(10))

  tmp_df = avg_bb_df.xs((bb_classes[1],bb_buckets[0])).copy(deep=True)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,# cuts,Time (s),Time (s),Time (s),Time (s),Time (s),Time (s),Nodes (\#),Nodes (\#),Nodes (\#),Nodes (\#)
Unnamed: 0_level_1,Rows,Cols,V,Gur1,Gur7,V,Total,V7,Total7,Gur1,Gur7,V,V7
Instance,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
neos-530627_presolved,60,70,0,0.003,0.002,0.003,0.003,0.003,0.003,1,1,1,1
neos-501453_presolved,13,52,0,0.004,0.002,0.004,0.004,0.004,0.004,1,1,1,1
gt2_presolved,28,173,0,0.006,0.005,0.006,0.006,0.006,0.006,1,1,1,1
vpm1_presolved,128,188,0,0.007,0.007,0.007,0.017,0.007,0.007,1,1,1,1
set1cl_presolved,431,651,0,0.012,0.01,0.012,0.052,0.012,0.012,1,1,1,1
nexp-50-20-1-1_presolved,267,443,2,0.018,0.009,0.015,0.059,0.015,0.018,1,1,1,1
sp150x300d_presolved,269,419,0,0.017,0.016,0.017,0.038,0.017,0.017,1,1,1,1
p0548_presolved,117,365,2,0.02,0.017,0.019,0.04,0.019,0.02,1,1,1,1
pipex_presolved,25,48,6,0.023,0.022,0.02,0.025,0.02,0.023,1,1,1,1
roy_presolved,147,139,7,0.031,0.026,0.023,0.043,0.023,0.031,1,1,1,1


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,# cuts,Time (s),Time (s),Time (s),Time (s),Time (s),Time (s),Nodes (\#),Nodes (\#),Nodes (\#),Nodes (\#)
Unnamed: 0_level_1,Rows,Cols,V,Gur1,Gur7,V,Total,V7,Total7,Gur1,Gur7,V,V7
Instance,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
neos-1616732_presolved,1026.0,200.0,114.0,2102.857,1556.932,1752.301,1758.291,1752.301,1758.291,1215504.0,763102.0,950349.0,950349.0
nobel-eu-DBE_presolved,726.0,3460.0,48.0,3600.0,2030.723,1816.245,1839.355,1816.245,1839.355,736303.0,300205.0,276220.0,276220.0
cov1075_presolved,637.0,120.0,1.0,3025.184,2738.625,2619.14,2633.56,2619.14,2633.56,1681466.0,1544971.0,1447848.0,1447848.0
toll-like_presolved,4038.0,2570.0,0.0,2623.447,2448.353,2990.052,2997.692,2623.447,2623.447,88098.0,32021.0,40279.0,40279.0
maxgasflow_presolved,4127.0,4221.0,6.0,3600.006,2670.426,2736.648,2743.328,2736.648,2743.328,251967.0,140953.0,168603.0,168603.0
cost266-UUE_presolved,1302.0,3882.0,56.0,3600.001,2694.31,2840.171,2878.078,2840.171,2878.078,76856.0,64179.0,72488.0,72488.0
iis-bupa-cov_presolved,4796.0,337.0,153.0,3600.0,3107.096,3043.204,3124.843,3043.204,3124.843,217847.0,197525.0,149842.0,149842.0
Gmean,,,,59.209529,45.689506,47.66877,49.772969,46.821584,48.158516,6895.270957,5094.019302,5277.309248,5188.095623
Wins1,,,,0.0,138.0,120.0,72.0,120.0,72.0,0.0,148.0,147.0,147.0
Wins7,,,,0.0,0.0,37.0,20.0,37.0,20.0,0.0,0.0,74.0,74.0


## Table 8: `avg_bb_by_depth_df`: average time/nodes by depth

In [237]:
## Prepare avg_bb_by_depth_df
## Prepare variables for row/col names

bb_classes_by_depth = [str(t) + ' leaves' for t in sizes[1:]]
num_bb_classes_by_depth = len(bb_classes_by_depth)

bb_buckets_by_depth = bb_buckets
bb_metrics_by_depth = bb_metrics[0:2]

cols_time_by_depth       = [gur1time_col, gur1vtime_col, gur1v_w_cut_time_col]
shortcols_time_by_depth  = [map_cols_to_short_time[col] for col in cols_time_by_depth]
cols_nodes_by_depth      = [gur1nodes_col, gur1vnodes_col]
shortcols_nodes_by_depth = [map_cols_to_short_nodes[col] for col in cols_nodes_by_depth]

avg_bb_cols_by_depth = pd.MultiIndex.from_arrays(
    [[time_col_header]*len(shortcols_time_by_depth) + 
     [node_col_header]*len(shortcols_nodes_by_depth), 
     shortcols_time_by_depth + shortcols_nodes_by_depth],
    names = ['criterion', 'type'])

# bucket_min = [0, 10, 100, 1000]
# bucket_max = [3600, 3600, 3600, 3600]
# num_buckets = len(bucket_min)
# assert(len(bucket_max) == num_buckets)
# bb_buckets = ['[' + str(bucket_min[j]) + ',' + str(bucket_max[j]) + ')' for j in range(num_buckets)]
# # bucket_names = [classes[i] + ' [' + str(bucket_min[j]) + ',' + str(bucket_max[j]) + ')' for i in range(num_classes) for j in range(num_buckets)]
# # display(bucket_names)

# bb_metrics = ['Gmean', 'Wins1', 'Wins7']

# time_col_header = 'Time (s)'
# node_col_header = 'Nodes (\\#)'

#bb_row_names = pd.MultiIndex.from_product([bb_buckets, bb_row_names], names=['bucket', 'metric'])
bb_row_names_by_depth = pd.MultiIndex.from_product(
    [bb_classes_by_depth, bb_buckets_by_depth, bb_metrics_by_depth],
    names=['class', 'bucket', 'metric'])

avg_bb_by_depth_df = pd.DataFrame(
    columns = avg_bb_cols_by_depth,
    index = bb_row_names_by_depth,
    dtype = float
)

# Fill in values for Gur1 from avg_bb_df
# display(
#     avg_bb_df.loc[
#         (bb_classes[1], bb_buckets, bb_metrics[0:2]),
#         [(time_col_header,map_cols_to_short_time[gur1time_col]),
#         (node_col_header,map_cols_to_short_nodes[gur1nodes_col])]
#     ]
# )

# Make all columns "object" type to allow for integer values
avg_bb_by_depth_df.loc[:,(time_col_header,shortcols_time_by_depth)] = avg_bb_by_depth_df.loc[:,(time_col_header,shortcols_time_by_depth)].astype(object)
avg_bb_by_depth_df.loc[:,(node_col_header,shortcols_nodes_by_depth)] = avg_bb_by_depth_df.loc[:,(node_col_header,shortcols_nodes_by_depth)].astype(object)

## Create gmean_df by depth
#   = shifted geometric mean of time taken across instances, in various buckets
#     and geomean of nodes too

num_inst_by_depth = np.zeros(len(avg_bb_by_depth_df),dtype = np.int64)
row_ind = 0

cols = cols_time_by_depth + cols_nodes_by_depth
shortcols = shortcols_time_by_depth + shortcols_nodes_by_depth

# Calculate stats for 6 trees instances by depth
curr_df = selected_time_df.loc[all6_instances_dict.keys(),cols]
for curr_size_ind in range(0,len(bb_classes_by_depth)):
    # print("{}".format(bb_classes_by_depth[curr_size_ind]))
    curr_by_depth_df = curr_df[curr_df.index.get_level_values(1) == sizes[curr_size_ind+1]] # take only best values

    for i in range(num_buckets):
        curr_by_depth_df = curr_by_depth_df[curr_by_depth_df[gur1time_col] > bucket_min[i]]
        avg_bb_by_depth_df.loc[
                (bb_classes_by_depth[curr_size_ind], bb_buckets_by_depth[i], bb_metrics_by_depth[0]),
                (time_col_header,shortcols_time_by_depth)] = \
            [geometric_mean(curr_by_depth_df[col] + SHIFT_TIME) - SHIFT_TIME for col in cols_time_by_depth]

        # display(avg_bb_by_depth_df.loc[
        #         (bb_classes_by_depth[curr_size_ind], bb_buckets_by_depth[i], bb_metrics_by_depth[0]),
        #         (time_col_header,shortcols_time_by_depth)].head())
        avg_bb_by_depth_df.loc[
                (bb_classes_by_depth[curr_size_ind], bb_buckets_by_depth[i], bb_metrics_by_depth[0]),
                (node_col_header,shortcols_nodes_by_depth)] = \
            [geometric_mean(curr_by_depth_df[col] + SHIFT_NODES) - SHIFT_NODES for col in cols_nodes_by_depth]
        
        # print("row {:d}: {:d}".format(row_ind,len(curr_by_depth_df)))

        num_inst_by_depth[row_ind:row_ind+len(bb_metrics_by_depth)] = len(bb_metrics_by_depth)*[len(curr_by_depth_df)]
        row_ind += len(bb_metrics_by_depth)

        ## Update wins1 rows
        # A win in terms of time is counted when the ``Gur1'' baseline seconds taken 
        # is at least 10\% slower, to account for some variability in runtimes.
        # A win in terms of nodes is when the ``Gur1'' baseline number of nodes is higher.
        refcol = gur1time_col
        avg_bb_by_depth_df.loc[
                (bb_classes_by_depth[curr_size_ind], bb_buckets_by_depth[i], bb_metrics_by_depth[1]),
                (time_col_header,shortcols_time_by_depth)] = \
            [ int(sum(curr_by_depth_df[refcol] > 1.1*curr_by_depth_df[col])) for col in cols_time_by_depth ]

        refcol = gur1nodes_col
        avg_bb_by_depth_df.loc[
                (bb_classes_by_depth[curr_size_ind], bb_buckets_by_depth[i], bb_metrics_by_depth[1]),
                (node_col_header,shortcols_nodes_by_depth)] = \
            [ int(sum(curr_by_depth_df[refcol] > curr_by_depth_df[col])) for col in cols_nodes_by_depth ]

avg_bb_by_depth_df[inst_col_name] = num_inst_by_depth

# for i in range(num_buckets):
#     curr_df = curr_df[curr_df[gur1time_col] > bucket_min[i]]
    
display(avg_bb_by_depth_df.loc[(bb_classes_by_depth, bb_buckets_by_depth, bb_metrics_by_depth),:])

Unnamed: 0_level_0,Unnamed: 1_level_0,criterion,Time (s),Time (s),Time (s),Nodes (\#),Nodes (\#),# inst
Unnamed: 0_level_1,Unnamed: 1_level_1,type,Gur1,V,Total,Gur1,V,Unnamed: 8_level_1
class,bucket,metric,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
2 leaves,"[0,3600)",Gmean,59.209529,60.741787,61.39609,6895.270957,7066.365171,205
2 leaves,"[0,3600)",Wins1,0.0,58.0,42.0,0.0,83.0,205
2 leaves,"[10,3600)",Gmean,227.322365,235.350047,236.44707,63361.97256,64518.719785,88
2 leaves,"[10,3600)",Wins1,0.0,33.0,33.0,0.0,49.0,88
2 leaves,"[100,3600)",Gmean,666.903984,611.063147,612.293569,249369.24813,220617.784595,48
2 leaves,"[100,3600)",Wins1,0.0,21.0,21.0,0.0,33.0,48
2 leaves,"[1000,3600)",Gmean,2228.979723,2082.886399,2085.305034,325503.868603,286491.795016,16
2 leaves,"[1000,3600)",Wins1,0.0,7.0,7.0,0.0,12.0,16
4 leaves,"[0,3600)",Gmean,59.209529,63.799957,65.797478,6895.270957,7308.959531,205
4 leaves,"[0,3600)",Wins1,0.0,60.0,36.0,0.0,83.0,205


# Section 4: Objective and time analysis

## `obj_and_time_df`: objectives, successes, fails, and time per obj or cut

In [388]:
inst_set = best_gap_df.index
# inst_set = ['10teams_presolved', 'bm23_presolved', 'vpm1_presolved']

# Define rows to add
inst_depth_set = [(inst, best_gap_df.loc[inst, 'BEST VPC DISJ']) for inst in inst_set]

# Define columns to add
fail_rate_col_name = 'Fail rate (%)'
time_col_name = 'Time (s)'
sec_per_obj_col_name = '(s) / obj'
sec_per_cut_col_name = '(s) / cut'
obj_and_time_new_cols = [
    fail_rate_col_name,
    time_col_name,
    sec_per_obj_col_name,
    sec_per_cut_col_name,
]

obj_and_time_df = df.loc[inst_depth_set,['NUM OBJ', 'NUM CUTS', 'NUM FAILS']].copy(deep=True)
obj_and_time_df[fail_rate_col_name] = 100. * obj_and_time_df['NUM FAILS'] / obj_and_time_df['NUM OBJ']
obj_and_time_df[time_col_name] = df['VPC_GEN_TIME']
obj_and_time_df[sec_per_obj_col_name] = obj_and_time_df[time_col_name] / obj_and_time_df['NUM OBJ']
obj_and_time_df[sec_per_cut_col_name] = obj_and_time_df[time_col_name] / obj_and_time_df['NUM CUTS']

# Replace Fail rate = NaN when all cuts are one-sided cuts
SKIP_CHAR = '-'
obj_and_time_df.fillna(SKIP_CHAR, inplace = True)
obj_and_time_df.replace(np.inf, SKIP_CHAR, inplace = True)

# Add average row
# obj_and_time_df.loc['Average'] = 0
obj_and_time_df.loc['Average', obj_and_time_new_cols] =\
    [obj_and_time_df[obj_and_time_df[col] != SKIP_CHAR][col].mean() for col in obj_and_time_new_cols]
# for col in obj_and_time_new_cols:
#     obj_and_time_df.at[('Average',0),col] =\
#         obj_and_time_df[obj_and_time_df[col] != SKIP_CHAR][col].mean()

display(obj_and_time_df)
# obj_and_time_df[obj_and_time_df['NUM CUTS'] == 0]
# obj_and_time_df[obj_and_time_df['(s) / obj'] > 100000]
# obj_and_time_df.loc['neos18_presolved']
# obj_and_time_df[obj_and_time_df['(s) / obj'] != SKIP_CHAR]['(s) / obj'].max()

Unnamed: 0_level_0,Unnamed: 1_level_0,NUM OBJ,NUM CUTS,NUM FAILS,Fail rate (%),Time (s),(s) / obj,(s) / cut
INSTANCE,disj_terms,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
10teams_presolved,2,133.0,74.0,59.0,44.360902,13.020000,0.097895,0.175946
23588_presolved,64,75.0,75.0,0.0,0.0,73.490000,0.979867,0.979867
30n20b8_presolved,4,198.0,190.0,8.0,4.040404,378.320000,1.910707,1.991158
50v-10_presolved,64,29.0,29.0,0.0,0.0,10.280000,0.354483,0.354483
a1c1s1_presolved,64,10.0,4.0,6.0,60.0,10.430000,1.043,2.607500
...,...,...,...,...,...,...,...,...
usAbbrv-8-25_70_presolved,8,5.0,4.0,1.0,20.0,1.310000,0.262,0.327500
vpm1_presolved,64,29.0,9.0,20.0,68.965517,0.320000,0.011034,0.035556
vpm2_presolved,32,30.0,25.0,5.0,16.666667,0.210000,0.007,0.008400
zib54-UUE_presolved,64,72.0,56.0,16.0,22.222222,143.970000,1.999583,2.570893


In [143]:
# cuts_cols = [col for col in df.columns if col.startswith('NUM CUTS')]
# time_cols = [
#     'INIT_SOLVE_TIME',
#     'VPC_GEN_TIME',
#     'VPC_APPLY_TIME',
#     'BB_TIME',
#     'TOTAL_TIME'
# ]
# display(df.loc['bell3b_presolved',['NUM OBJ', 'NUM FAILS'] + cuts_cols])

# obj_and_time_df = df.loc[inst_depth_set].copy(deep = True)['NUM OBJ', 'NUM CUTS', 'NUM FAILS', 'VPC_GEN_TIME']
# display(obj_and_time_df)

# max_diff_time = 0.
# max_diff_inst = ''
# for inst in best_gap_df.index:
#     depth = best_gap_df.loc[inst, 'BEST VPC DISJ']
#     curr_num_obj   = df.loc[(inst,depth)]['NUM OBJ']
#     curr_num_vpc   = df.loc[(inst,depth)]['NUM VPC']
#     curr_num_1side = df.loc[(inst,depth)]['NUM CUTS ONE_SIDED']
#     curr_num_fails = df.loc[(inst,depth)]['NUM FAILS']
#     if curr_num_vpc + curr_num_fails != curr_num_obj + curr_num_1side:
#         raise ValueError("{}: curr_num_vpc ({:d}) + curr_num_fails ({:d}) != curr_num_obj ({:d}) + curr_num_1side ({:d})".format(inst, curr_num_vpc, curr_num_fails, curr_num_obj, curr_num_1side))
    
#     curr_fail_pct = 100. * curr_num_fails / curr_num_obj
#     curr_init_solve = df.loc[(inst,depth)]['INIT_SOLVE_TIME']
#     curr_vpc_gen = df.loc[(inst,depth)]['VPC_GEN_TIME']
#     curr_vpc_apply = df.loc[(inst,depth)]['VPC_APPLY_TIME']
#     curr_bb_time = df.loc[(inst,depth)]['BB_TIME']
#     curr_total_time = df.loc[(inst,depth)]['TOTAL_TIME']

#     curr_diff_time = curr_total_time - (curr_init_solve + curr_vpc_gen + curr_vpc_apply + curr_bb_time)
#     if curr_diff_time < -EPS:
#         display(df.loc[inst,time_cols])
#         raise ValueError("{} (depth {:d}): curr_diff_time {} < 0.".format(inst,depth,curr_diff_time))
    
#     if max_diff_time < curr_diff_time:
#         max_diff_inst = inst
#         max_diff_time = curr_diff_time

# print("Max diff time = {} for inst {}".format(max_diff_time,max_diff_inst))
# display(df.loc[(max_diff_inst,best_gap_df.loc[max_diff_inst, 'BEST VPC DISJ']),time_cols])


Unnamed: 0_level_0,Unnamed: 1_level_0,NUM OBJ,NUM CUTS,NUM FAILS,VPC_GEN_TIME
INSTANCE,disj_terms,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
10teams_presolved,2,133,74,59,13.02
bm23_presolved,64,6,6,0,0.13
vpm1_presolved,64,29,9,20,0.32


# Section 5: Export tables to LaTeX

## Format Table 1: gap closed and num wins

In [517]:
# Format Table 1: gap closed and num wins

# Create copy of table then remove values we do not want (wins for 'G)
# TABLE1 = avg_df.copy(deep=True)[[inst_col_name, 'G', 'DB', 'V', 'V+G', 'GurF', 'V+GurF', 'GurL', 'V+GurL']]
TABLE1 = avg_gap_df.copy(deep=True)[[inst_col_name]+gap_cols_short]

TABLE1['G'].loc[:,wins_row_name] = ""

# Process the column with # inst to only report number of instances for each set
TABLE1[inst_col_name].loc[:,wins_row_name] = ""
val = TABLE1[inst_col_name].loc[all_set_name,avg_row_name]
TABLE1[inst_col_name].loc[all_set_name,avg_row_name] = \
    create_multirow_string(str(val), extra_format=r"\tablenum[table-format=3]")
val = TABLE1[inst_col_name].loc[good_vpc_set_name,avg_row_name]
TABLE1[inst_col_name].loc[good_vpc_set_name,avg_row_name] = \
    create_multirow_string(str(val), extra_format=r"\tablenum[table-format=3]")

# Reset index to appear as cols
TABLE1.reset_index(inplace=True)

# Place column with # inst as second column
inst_col = TABLE1[inst_col_name]
TABLE1.drop(columns=[inst_col_name], inplace=True)
TABLE1.insert(loc=1, column=inst_col_name, value=inst_col)

# Set column should have multirow
setseries = TABLE1['Set']
format_col_as_multirow(setseries)

# for i in TABLE1.index:
#     curr_name = tex_escape(str(i))
#     print("Changing {} to {}".format(i, curr_name))
#     TABLE1.rename({i: curr_name}, inplace=True)
# print("")

# If we are not using the automatic tex-escaper, we need to do it ourselves
for col in TABLE1.columns:
    # curr_col = '{' + tex_escape(col) + '}'
    curr_col = tex_escape(str(col))
    TABLE1.rename({col: curr_col}, inplace=True, axis=1)

# Finally, apply the desired style
# styler.format({
#     ("Numeric", "Integers"): '\${}',
#     ("Numeric", "Floats"): '{:.3f}',
#     ("Non-Numeric", "Strings"): str.upper
# })
# styler.format_index(escape="latex", axis=0).format_index(escape="latex", axis=1)
# styler.hide(level=0,axis=0)
table1_str = TABLE1.style.\
    hide(axis=0).\
    format(formatter = int_format).\
    to_latex(
        #@{}l@{\hskip 5pt}
        column_format="""
        @{}l@{}
        S[table-format=2.0,table-auto-round,table-number-alignment=center]
        l
        *{1}{S[table-auto-round]}
        *{7}{S[table-auto-round]}
        @{}""",
        hrules = True,
        #clines = "skip-last;data",
        sparse_index = True,
        multirow_align = "c",
        # float_format="%.2f", 
        # escape=False, 
        siunitx=True,
        # index_names=False,
        #columns=['\# inst', 'G', 'DB', 'V', 'V+G', 'GurF', 'V+GurF', 'GurL', 'V+GurL']
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "tab:gap-closed-summary",
        caption = """
            Summary statistics for percent gap closed by VPCs.
            The wins row reports how many instances close at least $\epsilon$ more gap using DB, V, V+G compared to G on its own, V+GurF compared to GurF, and V+GurL compared to GurL.
        """,
        )

# Add a midrule between the two sets; the "3" is hand-coded but can be automated
table1_str = add_midrule(table1_str, -3)

# Adjustbox environment sets width to pagewidth
table1_str = add_adjustbox_environment(table1_str)

# Set default siunitx options for this table
table1_str = add_sisetup(table1_str)

print(table1_str)


{
\sisetup{
    table-alignment-mode = format,
    table-number-alignment = center,
    table-format = 2.2,
\begin{table}
\centering
\caption{
            Summary statistics for percent gap closed by VPCs.
            The wins row reports how many instances close at least $\epsilon$ more gap using DB, V, V+G compared to G on its own, V+GurF compared to GurF, and V+GurL compared to GurL.
        }
\label{tab:gap-closed-summary}
\begin{adjustbox}{width=1\textwidth}
\begin{tabular}{@{}l@{}
        S[table-format=2.0,table-auto-round,table-number-alignment=center]
        l
        *{1}{S[table-auto-round]}
        *{7}{S[table-auto-round]}
        @{}}
\toprule
{Set} & {\# inst} & {} & {G} & {DB} & {V} & {V+G} & {GurF} & {V+GurF} & {GurL} & {V+GurL} \\
\midrule
{\multirow[c]{2}{*}{All}} & {\multirow[c]{2}{*}{\tablenum[table-format=3]{332}}} & Avg (\%) & 14.12974966814919 & 15.28107991803026 & 9.673803234934525 & 19.98959181405679 & 23.826538838783573 & 30.360311183322917 & 41.48961234419

## Format Table 2: depth x gap

In [518]:
# Format Table 2: percent gap closed by depth
TABLE2 = gap_by_size_df.copy(deep=True)

# If we are not using the automatic tex-escaper, we need to do it ourselves
for col in TABLE2.columns:
    # curr_col = '{' + tex_escape(col) + '}'
    curr_col = tex_escape(str(col))
    TABLE2.rename({col: curr_col}, inplace=True, axis=1)

# Finally, apply the desired style
table2_str = TABLE2.style.\
    format(formatter = int_format).\
    to_latex(
        column_format="""
        @{}l
        *{5}{S[table-auto-round]}
        @{}""",
        hrules = True,
        sparse_index = True,
        multirow_align = "c",
        siunitx=True,
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "tab:depth",
        caption = """
            Average percent gap closed broken down by the number of leaf nodes used to construct the partial branch-and-bound tree,
            for VPCs with and without GMICs, as well as at the root by \Gurobi{} after the first and last round of cuts. 
            ``0 leaves'' refers to the percent gap closed when no VPCs are used.
            ``Best'' refers to the maximum gap closed across all partial tree sizes.
        """,
        )

# Adjustbox environment sets width to pagewidth
# table2_str = add_adjustbox_environment(table2_str)

# Set default siunitx options for this table
table2_str = add_sisetup(table2_str)

print(table2_str)


{
\sisetup{
    table-alignment-mode = format,
    table-number-alignment = center,
    table-format = 2.2,
\begin{table}
\centering
\caption{
            Average percent gap closed broken down by the number of leaf nodes used to construct the partial branch-and-bound tree,
            for VPCs with and without GMICs, as well as at the root by \Gurobi{} after the first and last round of cuts. 
            ``0 leaves'' refers to the percent gap closed when no VPCs are used.
            ``Best'' refers to the maximum gap closed across all partial tree sizes.
        }
\label{tab:depth}
\begin{tabular}{@{}l
        *{5}{S[table-auto-round]}
        @{}}
\toprule
{} & {DB} & {V} & {V+G} & {V+GurF} & {V+GurL} \\
\midrule
0 leaves & 0.0 & 0.0 & 14.12974966814919 & 23.826538838783573 & 41.48961234419237 \\
2 leaves & 2.27857304557643 & 1.7462086510922106 & 14.75740921790148 & 27.065253626811735 & 42.85819360664284 \\
4 leaves & 4.086871715069517 & 2.7872907006953263 & 15.226601874455058 & 27

## Format Table 3: summary of b&b results

In [49]:
# Format Table 3: summary of b&b results
TABLE3 = avg_bb_df.copy(deep=True)

# Remove unnecessary entries
TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]),([time_col_header,node_col_header],map_cols_to_short_time[gur1time_col])] = ""
TABLE3.loc[(slice(None), slice(None), bb_metrics[2]),([time_col_header,node_col_header],map_cols_to_short_time[gur7time_col])] = ""

# Process the column with # inst to only report number of instances for each set
TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]), inst_col_name] = ""

for curr_class in bb_classes:
    for curr_bucket in bb_buckets:
        curr_name = (curr_class, curr_bucket, bb_metrics[0])
        val = TABLE3.loc[curr_name, inst_col_name]
        TABLE3.loc[curr_name, inst_col_name] = \
            create_multirow_string(str(val), num_rows = 3, extra_format=r"\tablenum[table-format=3]")

# Set num wins in int format or enclose in braces (center)
# tmp_df = TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]),time_col_header].applymap(int_format, num_digits=6)
tmp_df = TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]),time_col_header].applymap(int_format, num_digits=4, add_phantom=True)
# tmp_df = TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]),time_col_header].applymap(enclose_in_braces)
tmp_df.columns = pd.MultiIndex.from_product([[time_col_header],tmp_df.columns])
TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]),time_col_header] = tmp_df

# tmp_df = TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]),node_col_header].applymap(int_format, num_digits=6)
tmp_df = TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]),node_col_header].applymap(int_format, num_digits=6, add_phantom=False)
# tmp_df = TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]),node_col_header].applymap(enclose_in_braces)
tmp_df.columns = pd.MultiIndex.from_product([[node_col_header],tmp_df.columns])
TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]),node_col_header] = tmp_df

# Reset index to appear as cols
TABLE3.reset_index(inplace=True)

# Add new col combining class and bucket in one
class_bucket_col = "\multirow{3}{*}{\shortstack[l]{" + TABLE3['class'] + "\\\\\\relax " + TABLE3['bucket'] + "}}"
for i in range(len(class_bucket_col)):
    if i%3!=0:
        class_bucket_col[i] = ""
TABLE3.drop(columns = ['class', 'bucket'], inplace = True, level = 0)
TABLE3.insert(loc=0, column="Set", value=class_bucket_col)

# Place column with # inst as second column
inst_col = TABLE3[inst_col_name]
TABLE3.drop(columns=[inst_col_name], inplace=True, level=0)
TABLE3.insert(loc=1, column=inst_col_name, value=inst_col)

# If we are not using the automatic tex-escaper, we need to do it ourselves
for col in TABLE3.columns:
    if isinstance(col, tuple):
        for lvl_ind, lvl_col in enumerate(col):
            curr_col = tex_escape(str(lvl_col))
            TABLE3.rename({lvl_col: curr_col}, inplace=True, axis=1, level=lvl_ind)
    else:
        # curr_col = '{' + tex_escape(col) + '}'
        curr_col = tex_escape(str(col))
        TABLE3.rename({col: curr_col}, inplace=True, axis=1)

# Finally, apply the desired style
    # format(formatter = int_format).\
table3_str = TABLE3.style.\
    hide(axis=0).\
    to_latex(
        column_format="""
        @{}l    % set
        c       % inst
        l       % stat
        *{6}{S[table-auto-round,table-format=4.2]}
        *{4}{S[table-auto-round,table-format=6.0]}
        @{}""",
        hrules = True,
        sparse_index = True,
        multirow_align = "c",
        siunitx=True,
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "tab:bb-summary",
        caption = """
            Summary statistics for time to solve instances with branch-and-bound.
        """,
        )

# Add a midrule between the two sets; the "9" is hand-coded but can be automated
table3_str = add_midrule(table3_str, -13)

# Adjustbox environment sets width to pagewidth
table3_str = add_adjustbox_environment(table3_str)

# Set default siunitx options for this table
table3_str = add_sisetup(table3_str, table_format="4.2")

print(table3_str)


{
\sisetup{
    table-alignment-mode = format,
    table-number-alignment = center,
    table-format = 4.2,
\begin{table}
\centering
\caption{
            Summary statistics for time to solve instances with branch-and-bound.
        }
\label{tab:bb-summary}
\begin{adjustbox}{width=1\textwidth}
\begin{tabular}{@{}l    % set
        c       % inst
        l       % stat
        *{6}{S[table-auto-round,table-format=4.2]}
        *{4}{S[table-auto-round,table-format=6.0]}
        @{}}
\toprule
{Set} & {\# inst} & {metric} & \multicolumn{6}{r}{Time (s)} & \multicolumn{4}{r}{Nodes (\textbackslash{}\#)} \\
{} & {} & {} & {Gur1} & {Gur7} & {V} & {Total} & {V7} & {Total7} & {Gur1} & {Gur7} & {V} & {V7} \\
\midrule
\multirow{3}{*}{\shortstack[l]{All\\\relax [0,3600)}} & {\multirow[c]{3}{*}{\tablenum[table-format=3]{298}}} & Gmean & 102.496832 & 77.664428 & 86.197471 & 99.422309 & 83.747398 & 87.386714 & 8936.354957 & 6284.648795 & 7035.125878 & 6760.348067 \\
 &  & Wins1 &  & {\tablenum[table-f

  TABLE3.loc[curr_name, inst_col_name] = \


## Prepare Table 4: rejected instances

#### Verbose version

In [50]:
## *Verbose version*: For each instance that was not selected, print the reason
df_rejection_reason_rejected = df_rejection_reason[df_rejection_reason['SELECTED_GAP'] == False]
rejected_instance_list = df_rejection_reason_rejected.index
rejected_instance_list.name = 'Instance'
cols = ['Set', 'Reason']
df_rejected_instances = pd.DataFrame(columns=cols, index=rejected_instance_list)
df_rejected_instances['Set'] = df_ipopt.loc[rejected_instance_list,'SET']
df_rejected_instances.loc[df_rejection_reason_rejected['OPTIMAL_SOLUTION_FOUND'] > 0, 
                            'Reason'] = "Integer-optimal solution found constructing partial tree"
df_rejected_instances.loc[(df_rejection_reason_rejected['OPTIMAL_SOLUTION_FOUND'] == 0) 
                                & (df_rejection_reason_rejected['LP=DLB=DUB'] == 6), 
                            'Reason'] = "Max leaf value = LP value"
df_rejected_instances.loc[(df_rejection_reason_rejected['OPTIMAL_SOLUTION_FOUND'] == 0) 
                                & (df_rejection_reason_rejected['LP=DLB=DUB'] < 6) 
                                & (df_rejection_reason_rejected['LP=DLB=DUB'] + df_rejection_reason_rejected['PRLP_INFEASIBLE'] == 6), 
                            'Reason'] = "Max leaf value = LP value or PRLP primal infeasible"
df_rejected_instances.loc[(df_rejection_reason_rejected['OPTIMAL_SOLUTION_FOUND'] == 0)
                                & (df_rejection_reason_rejected['LP=DLB=DUB'] < 6) 
                                & (df_rejection_reason_rejected['LP=DLB=DUB'] + df_rejection_reason_rejected['PRLP_INFEASIBLE'] < 6)
                                & (df_rejection_reason_rejected['LP=DLB=DUB'] + df_rejection_reason_rejected['PRLP_INFEASIBLE'] + df_rejection_reason_rejected['PRLP_TIME_LIMIT'] == 6), 
                            'Reason'] = "Max leaf value = LP value or PRLP primal infeasible / hits time limit"
df_rejected_instances.loc[df_rejection_reason_rejected['<7_ATTEMPTS'] > 0, 
                            'Reason'] = "Numerical issues"
display(df_rejected_instances.head())
col_format = """@{}*{2}{l}X@{}"""

tmp_df_remaining_rejected_instances = df_rejection_reason.loc[df_rejected_instances[df_rejected_instances['Reason'].isna()].index]
if len(tmp_df_remaining_rejected_instances) > 0:
    display(tmp_df_remaining_rejected_instances)

Unnamed: 0_level_0,Set,Reason
Instance,Unnamed: 1_level_1,Unnamed: 2_level_1
22433_presolved,miplib2017,Integer-optimal solution found constructing pa...
air01_presolved,miplib2,Integer-optimal solution found constructing pa...
app1-1_presolved,miplib2017,Integer-optimal solution found constructing pa...
b-ball_presolved,miplib2017,Max leaf value = LP value or PRLP primal infea...
bnatt400_presolved,miplib2017,Max leaf value = LP value


Unnamed: 0_level_0,SELECTED_GAP,SELECTED_TIME,SELECTED_6TREES,NUM_WITH_OBJS,NUM_WITH_CUTS,IP_OPT_UNKNOWN,TOO_MANY_ROWS_OR_COLS,OPTIMAL_SOLUTION_FOUND,LP_OPT_IS_NOT_CUT,DLB=DUB,LP=DLB=DUB,PRLP_INFEASIBLE,PRLP_TIME_LIMIT,NO_CUTS,NO_GAP,GUR_TIMEOUT,<7_ATTEMPTS
Instance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
milo-v12-6-r2-40-1_presolved,False,True,True,6,0,False,False,0,0,0,0,0,0,True,False,False,False


#### Succinct version

In [51]:
## *Succinct version*: For each instance that was not selected, print the reason
df_rejected_instances = df_status_by_depth.loc[df_rejection_reason[df_rejection_reason['SELECTED_GAP'] == False].index]
df_rejected_instances.insert(loc = 0, column = 'Set', value = df_ipopt.loc[rejected_instance_list,'SET'])
col_format="""@{}*{2}{l}*{6}{c}@{}"""

### Print Table 4

In [52]:
# Format Table 4: rejected instances reasons
TABLE4 = df_rejected_instances.copy(deep=True)
TABLE4.reset_index(inplace=True)

TABLE4["Instance"] = TABLE4["Instance"].apply(remove_presolved_from_name)
TABLE4["Instance"] = TABLE4["Instance"].apply(tex_escape)

# If we are not using the automatic tex-escaper, we need to do it ourselves
for col in TABLE4.columns:
    # curr_col = '{' + tex_escape(col) + '}'
    curr_col = tex_escape(str(col))
    TABLE4.rename({col: curr_col}, inplace=True, axis=1)

# Finally, apply the desired style
    # format_index(escape="latex", axis=0).\
table4_str = TABLE4.style.\
    hide(axis=0).\
    to_latex(
        column_format=col_format,
        hrules = True,
        sparse_index = True,
        multirow_align = "c",
        siunitx=False,
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "app:tab:discarded-instances",
        caption = """
            Instances that were not considered with reason for being discarded.
        """,
        )
        
print(table4_str)

\begin{table}
\centering
\caption{
            Instances that were not considered with reason for being discarded.
        }
\label{app:tab:discarded-instances}
\begin{tabular}{@{}*{2}{l}*{6}{c}@{}}
\toprule
Instance & Set & 2 & 4 & 8 & 16 & 32 & 64 \\
\midrule
22433 & miplib2017 &  &  & \ref{selection-criterion:partial-tree-does-not-find-opt} & \ref{selection-criterion:partial-tree-does-not-find-opt} & \ref{selection-criterion:partial-tree-does-not-find-opt} & \ref{selection-criterion:partial-tree-does-not-find-opt} \\
air01 & miplib2 & \ref{selection-criterion:partial-tree-does-not-find-opt} & \ref{selection-criterion:partial-tree-does-not-find-opt} & \ref{selection-criterion:partial-tree-does-not-find-opt} & \ref{selection-criterion:partial-tree-does-not-find-opt} & \ref{selection-criterion:partial-tree-does-not-find-opt} & \ref{selection-criterion:partial-tree-does-not-find-opt} \\
app1-1 & miplib2017 &  &  &  &  & \ref{selection-criterion:partial-tree-does-not-find-opt} & \ref{sel

### DEBUG: Test Table 4 code and make sure "set" is properly identified

In [384]:
#### DEBUG
# df_rejection_reason[df_rejection_reason['NUM_WITH_OBJS'] != df_rejection_reason['NUM_WITH_CUTS']]
# df_rejection_reason[(df_rejection_reason['NUM_WITH_CUTS'] > 0) & (df_rejection_reason['DLB=DUB'] > 0) & (df_rejection_reason['OPTIMAL_SOLUTION_FOUND'] == 0)]
# df_rejection_reason[(df_rejection_reason['LP=DLB=DUB'] == 6)]

# inst = 'chromaticindex32-8_presolved'
# # df_rejection_reason.loc[inst]
# tmp = df_bb.loc[(inst,64)]
# tmp[25:50]

# len(df_rejection_reason[df_rejection_reason['SELECTED'] == True])
# inst = 'berlin_5_8_0_presolved'
# gap_df.loc[inst]
#df_rejection_reason.loc['bnatt400_presolved']

Unnamed: 0_level_0,SELECTED,NUM_WITH_OBJS,NUM_WITH_CUTS,IP_OPT_UNKNOWN,TOO_MANY_ROWS_OR_COLS,OPT_SOL_FOUND_BY_PARTIAL,LP_OPT_IS_NOT_CUT,DLB=DUB,LP=DLB=DUB,PRLP_INFEASIBLE,PRLP_TIMELIMIT,NO_CUTS,GUR_TIMEOUT,<7_ATTEMPTS
Instance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1


In [53]:
##### DEBUG: Verify "Set" col is correct
for inst in rejected_instance_list:
    curr_set = df_ipopt.loc[inst,'SET']
    has_error = False
    if isinstance(curr_set, pd.Series):
        # check that all sets are same, then just take first
        first_set = curr_set[0]
        for tmp_set in curr_set:
            if tmp_set != first_set:
                print("*** ERROR: not all sets are equal ({} != {})".format(first_set, tmp_set))
                has_error = True
                break
        curr_set = first_set
    ref_set = df_rejected_instances.loc[inst, 'Set']
    if ref_set != curr_set:
        print("*** ERROR: for inst {}, df_rej_inst set {} != df_ipopt set {}".format(inst, ref_set, curr_set))
        has_error = True
    
    if has_error:
        break

## Format Table 5: full gap closed results

In [54]:
# Format Table 5: full gap closed results
TABLE5 = all_gap_results_df.copy(deep=True)

# Set wins row to be integer valued
TABLE5.loc['Wins'] = TABLE5.loc['Wins'].apply(int_format)
# TABLE5.iloc[len(TABLE5)-1] = TABLE5.iloc[len(TABLE5)-1].apply(int_format)

# Move instance names into a column
TABLE5.reset_index(inplace=True, col_level=1)

TABLE5[('',"Instance")] = TABLE5[('',"Instance")].apply(remove_presolved_from_name)
TABLE5[('',"Instance")] = TABLE5[('',"Instance")].apply(tex_escape)

# If we are not using the automatic tex-escaper, we need to do it ourselves
for col in TABLE5.columns:
    # curr_col = '{' + tex_escape(col) + '}'
    curr_col = tex_escape(str(col))
    TABLE5.rename({col: curr_col}, inplace=True, axis=1)

# Finally, apply the desired style
    # format(formatter = int_format).\
table5_str = TABLE5.style.\
    hide(axis=0).\
    to_latex(
        column_format="""@{}l*{2}{c}*{2}{c}H*{8}{c}@{}""",
        hrules = True,
        sparse_index = True,
        multirow_align = "c",
        siunitx=True,
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "app:tab:gap-closed",
        caption = """
            Percent gap closed by instance for GMICs (G), VPCs (V), both VPCs and GMICs used together, 
            and the bound implied by the partial branch-and-bound tree with 64 leaf nodes (DB).
            Also shown are the sizes of the instances, the number of cuts added, and the percent gap closed by 
            \Gurobi{} at the root (after one round (GurF) and after the last round (GurL)). 
            Entries in which DB appears to be 0.00 are actually small strictly positive numbers.
        """,
        )

# Adjustbox environment sets width to pagewidth
# table2_str = add_adjustbox_environment(table2_str)

# Set default siunitx options for this table
table5_str = add_sisetup(table5_str)

# Add a midrule between the instances and 3 summary rows; the "5" is hand-coded but can be automated
table5_str = add_midrule(table5_str, -5)

print(table5_str)


{
\sisetup{
    table-alignment-mode = format,
    table-number-alignment = center,
    table-format = 2.2,
\begin{table}
\centering
\caption{
            Percent gap closed by instance for GMICs (G), VPCs (V), both VPCs and GMICs used together, 
            and the bound implied by the partial branch-and-bound tree with 64 leaf nodes (DB).
            Also shown are the sizes of the instances, the number of cuts added, and the percent gap closed by 
            \Gurobi{} at the root (after one round (GurF) and after the last round (GurL)). 
            Entries in which DB appears to be 0.00 are actually small strictly positive numbers.
        }
\label{app:tab:gap-closed}
\begin{tabular}{@{}l*{2}{c}*{2}{c}H*{8}{c}@{}}
\toprule
\multicolumn{3}{r}{} & \multicolumn{2}{r}{# cuts} & \multicolumn{8}{r}{% gap closed} \\
{Instance} & {Rows} & {Cols} & {G} & {V} & {G} & {DB} & {V} & {V+G} & {GurF} & {V+GurF} & {GurL} & {V+GurL} \\
\midrule
10teams & 210 & 1600 & 153 & 74 & 100.000000 & 0.0000

## Format Table 6: "all" time/nodes results

In [55]:
# Format Table 6: "all" time/nodes results
TABLE6 = all_bb_results_df.copy(deep=True)

# Set wins row to be integer valued
TABLE6.loc['Wins1'] = TABLE6.loc['Wins1'].apply(int_format)
TABLE6.loc['Wins7'] = TABLE6.loc['Wins7'].apply(int_format)
# TABLE6.iloc[len(TABLE6)-1] = TABLE6.iloc[len(TABLE6)-1].apply(int_format)

# Move instance names into a column
TABLE6.reset_index(inplace=True, col_level=1)

TABLE6[('',"Instance")] = TABLE6[('',"Instance")].apply(remove_presolved_from_name)
TABLE6[('',"Instance")] = TABLE6[('',"Instance")].apply(tex_escape)

# If we are not using the automatic tex-escaper, we need to do it ourselves
for col in TABLE6.columns:
    # curr_col = '{' + tex_escape(col) + '}'
    curr_col = tex_escape(str(col))
    TABLE6.rename({col: curr_col}, inplace=True, axis=1)

# Finally, apply the desired style
    # format(formatter = int_format).\
table6_str = TABLE6.style.\
    hide(axis=0).\
    to_latex(
        column_format="""@{}l*{2}{c}*{2}{c}H*{8}{c}@{}""",
        hrules = True,
        sparse_index = True,
        multirow_align = "c",
        siunitx=True,
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "app:tab:bb",
        caption = """
            Time (in seconds) and number nodes taken to solve each instance.
            The table is sorted by column 4 (``V'' under ``Time (s)'').
            ``Gur1'' indicates \Gurobi{} run with one random seed.
            ``Gur7'' indicates the minimum from seven runs of \Gurobi{} with different random seeds.
        """,
        )

# Adjustbox environment sets width to pagewidth
# table6_str = add_adjustbox_environment(table6_str)

# Set default siunitx options for this table
table6_str = add_sisetup(table6_str)

# Add a midrule between the instances and 3 summary rows; the "6" is hand-coded but can be automated
table6_str = add_midrule(table6_str, -6)

print(table6_str)


{
\sisetup{
    table-alignment-mode = format,
    table-number-alignment = center,
    table-format = 2.2,
\begin{table}
\centering
\caption{
            Time (in seconds) and number nodes taken to solve each instance.
            The table is sorted by column 4 (``V'' under ``Time (s)'').
            ``Gur1'' indicates \Gurobi{} run with one random seed.
            ``Gur7'' indicates the minimum from seven runs of \Gurobi{} with different random seeds.
        }
\label{app:tab:bb}
\begin{tabular}{@{}l*{2}{c}*{2}{c}H*{8}{c}@{}}
\toprule
\multicolumn{3}{r}{} & {# cuts} & \multicolumn{6}{r}{Time (s)} & \multicolumn{4}{r}{Nodes (\#)} \\
{Instance} & {Rows} & {Cols} & {V} & {Gur1} & {Gur7} & {V} & {Total} & {V7} & {Total7} & {Gur1} & {Gur7} & {V} & {V7} \\
\midrule
neos-796608 & 64 & 104 & 0 & 0.002000 & 0.001000 & 0.002000 & 0.022000 & 0.002000 & 0.002000 & 1 & 1 & 1 & 1 \\
neos-530627 & 60 & 70 & 0 & 0.003000 & 0.002000 & 0.003000 & 0.003000 & 0.003000 & 0.003000 & 1 & 1 & 1 & 1 \\
ne

## Format Table 7: "6 trees" time/nodes results

In [56]:
# Format Table 7: "6 trees" time/nodes results
TABLE7 = all6_bb_results_df.copy(deep=True)

# Set wins row to be integer valued
TABLE7.loc['Wins1'] = TABLE7.loc['Wins1'].apply(int_format)
TABLE7.loc['Wins7'] = TABLE7.loc['Wins7'].apply(int_format)
# TABLE7.iloc[len(TABLE7)-1] = TABLE7.iloc[len(TABLE7)-1].apply(int_format)

# Move instance names into a column
TABLE7.reset_index(inplace=True, col_level=1)

TABLE7[('',"Instance")] = TABLE7[('',"Instance")].apply(remove_presolved_from_name)
TABLE7[('',"Instance")] = TABLE7[('',"Instance")].apply(tex_escape)

# If we are not using the automatic tex-escaper, we need to do it ourselves
for col in TABLE7.columns:
    # curr_col = '{' + tex_escape(col) + '}'
    curr_col = tex_escape(str(col))
    TABLE7.rename({col: curr_col}, inplace=True, axis=1)

# Finally, apply the desired style
    # format(formatter = int_format).\
table7_str = TABLE7.style.\
    hide(axis=0).\
    to_latex(
        column_format="""@{}l*{2}{c}*{2}{c}H*{8}{c}@{}""",
        hrules = True,
        sparse_index = True,
        multirow_align = "c",
        siunitx=True,
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "app:tab:bb-7trees",
        caption = """
  Time (in seconds) and number nodes taken to solve each of the instances for which all six branch-and-bound trees successfully yielded VPCs.
  %The columns with V1x are those in which we do not terminate the VPC computation as soon as the time exceeds \Gurobi{}'s time.  
  The table is sorted by column 4 (``V7'' under ``Time (s)'').
  ``Gur1'' indicates Gurobi run with one random seed.
  ``Gur7'' indicates the minimum from seven runs of Gurobi with different random seeds.
        """,
        )

# Adjustbox environment sets width to pagewidth
# table7_str = add_adjustbox_environment(table7_str)

# Set default siunitx options for this table
table7_str = add_sisetup(table7_str)

# Add a midrule between the instances and 3 summary rows; the "6" is hand-coded but can be automated
table7_str = add_midrule(table7_str, -6)

print(table7_str)


{
\sisetup{
    table-alignment-mode = format,
    table-number-alignment = center,
    table-format = 2.2,
\begin{table}
\centering
\caption{
  Time (in seconds) and number nodes taken to solve each of the instances for which all six branch-and-bound trees successfully yielded VPCs.
  %The columns with V1x are those in which we do not terminate the VPC computation as soon as the time exceeds \Gurobi{}'s time.  
  The table is sorted by column 4 (``V7'' under ``Time (s)'').
  ``Gur1'' indicates Gurobi run with one random seed.
  ``Gur7'' indicates the minimum from seven runs of Gurobi with different random seeds.
        }
\label{app:tab:bb-7trees}
\begin{tabular}{@{}l*{2}{c}*{2}{c}H*{8}{c}@{}}
\toprule
\multicolumn{3}{r}{} & {# cuts} & \multicolumn{6}{r}{Time (s)} & \multicolumn{4}{r}{Nodes (\#)} \\
{Instance} & {Rows} & {Cols} & {V} & {Gur1} & {Gur7} & {V} & {Total} & {V7} & {Total7} & {Gur1} & {Gur7} & {V} & {V7} \\
\midrule
neos-530627 & 60 & 70 & 0 & 0.003000 & 0.002000 & 0.00300

## Format Table 8: b&b summary by depth

In [72]:
# Format Table 8: summary of b&b results
TABLE8 = avg_bb_by_depth_df.copy(deep=True)

# Remove unnecessary entries
TABLE8.loc[(slice(None), slice(None), bb_metrics_by_depth[1:]),([time_col_header,node_col_header],map_cols_to_short_time[gur1time_col])] = ""
# TABLE8.loc[(slice(None), slice(None), bb_metrics_by_depth[2]),([time_col_header,node_col_header],map_cols_to_short_time[gur7time_col])] = ""

# Process the column with # inst to only report number of instances for each set
TABLE8.loc[(slice(None), slice(None), bb_metrics_by_depth[1:]), inst_col_name] = ""

for curr_class in bb_classes_by_depth:
    for curr_bucket in bb_buckets_by_depth:
        curr_name = (curr_class, curr_bucket, bb_metrics_by_depth[0])
        val = TABLE8.loc[curr_name, inst_col_name]
        TABLE8.loc[curr_name, inst_col_name] = \
            create_multirow_string(str(val), num_rows = 2, extra_format=r"\tablenum[table-format=3]")

# Set num wins in int format or enclose in braces (center)
# tmp_df = TABLE8.loc[(slice(None), slice(None), bb_metrics_by_depth[1:3]),time_col_header].applymap(int_format, num_digits=6)
tmp_df = TABLE8.loc[(slice(None), slice(None), bb_metrics_by_depth[1:3]),time_col_header].applymap(int_format, num_digits=4, add_phantom=True)
# tmp_df = TABLE8.loc[(slice(None), slice(None), bb_metrics_by_depth[1:3]),time_col_header].applymap(enclose_in_braces)
tmp_df.columns = pd.MultiIndex.from_product([[time_col_header],tmp_df.columns])
TABLE8.loc[(slice(None), slice(None), bb_metrics_by_depth[1:3]),time_col_header] = tmp_df

# tmp_df = TABLE8.loc[(slice(None), slice(None), bb_metrics_by_depth[1:3]),node_col_header].applymap(int_format, num_digits=6)
tmp_df = TABLE8.loc[(slice(None), slice(None), bb_metrics_by_depth[1:3]),node_col_header].applymap(int_format, num_digits=6, add_phantom=False)
# tmp_df = TABLE8.loc[(slice(None), slice(None), bb_metrics_by_depth[1:3]),node_col_header ].applymap(enclose_in_braces)
tmp_df.columns = pd.MultiIndex.from_product([[node_col_header],tmp_df.columns])
TABLE8.loc[(slice(None), slice(None), bb_metrics_by_depth[1:3]),node_col_header] = tmp_df

# Reset index to appear as cols
TABLE8.reset_index(inplace=True)

# Add new col combining class and bucket in one
class_bucket_col = "\multirow{2}{*}{\shortstack[l]{" + TABLE8['class'] + "\\\\\\relax " + TABLE8['bucket'] + "}}"
for i in range(len(class_bucket_col)):
    if i%len(bb_metrics_by_depth)!=0:
        class_bucket_col[i] = ""
TABLE8.drop(columns = ['class', 'bucket'], inplace = True, level = 0)
TABLE8.insert(loc=0, column="Set", value=class_bucket_col)

# Place column with # inst as second column
inst_col = TABLE8[inst_col_name]
TABLE8.drop(columns=[inst_col_name], inplace=True, level=0)
TABLE8.insert(loc=1, column=inst_col_name, value=inst_col)

# If we are not using the automatic tex-escaper, we need to do it ourselves
for col in TABLE8.columns:
    if isinstance(col, tuple):
        for lvl_ind, lvl_col in enumerate(col):
            curr_col = tex_escape(str(lvl_col))
            TABLE8.rename({lvl_col: curr_col}, inplace=True, axis=1, level=lvl_ind)
    else:
        # curr_col = '{' + tex_escape(col) + '}'
        curr_col = tex_escape(str(col))
        TABLE8.rename({col: curr_col}, inplace=True, axis=1)

# Finally, apply the desired style
    # format(formatter = int_format).\
table8_str = TABLE8.style.\
    hide(axis=0).\
    to_latex(
        column_format="""
        @{}l    % set
        c       % inst
        l       % stat
        *{6}{S[table-auto-round,table-format=4.2]}
        *{4}{S[table-auto-round,table-format=6.0]}
        @{}""",
        hrules = True,
        sparse_index = True,
        multirow_align = "c",
        siunitx=True,
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "tab:bb-summary",
        caption = """
            Summary statistics for time to solve instances with branch-and-bound.
        """,
        )

# Add a midrule between the two sets; the "9" is hand-coded but can be automated
table8_str = add_midrule(table8_str, -41)
table8_str = add_midrule(table8_str, -33)
table8_str = add_midrule(table8_str, -25)
table8_str = add_midrule(table8_str, -17)
table8_str = add_midrule(table8_str, -9)

# Adjustbox environment sets width to pagewidth
table8_str = add_adjustbox_environment(table8_str)

# Set default siunitx options for this table
table8_str = add_sisetup(table8_str, table_format="4.2")

print(table8_str)

  TABLE8.loc[curr_name, inst_col_name] = \



{
\sisetup{
    table-alignment-mode = format,
    table-number-alignment = center,
    table-format = 4.2,
\begin{table}
\centering
\caption{
            Summary statistics for time to solve instances with branch-and-bound.
        }
\label{tab:bb-summary}
\begin{adjustbox}{width=1\textwidth}
\begin{tabular}{@{}l    % set
        c       % inst
        l       % stat
        *{6}{S[table-auto-round,table-format=4.2]}
        *{4}{S[table-auto-round,table-format=6.0]}
        @{}}
\toprule
{Set} & {\# inst} & {metric} & \multicolumn{3}{r}{Time (s)} & \multicolumn{2}{r}{Nodes (\textbackslash{}\#)} \\
{} & {} & {} & {Gur1} & {V} & {Total} & {Gur1} & {V} \\
\midrule
\multirow{2}{*}{\shortstack[l]{2 leaves\\\relax [0,3600)}} & {\multirow[c]{2}{*}{\tablenum[table-format=3]{205}}} & Gmean & 59.209529 & 60.741787 & 61.396090 & 6895.270957 & 7066.365171 \\
 &  & Wins1 &  & {\tablenum[table-format=4]{58}\phantom{.00}} & {\tablenum[table-format=4]{42}\phantom{.00}} &  & {\tablenum[table-format=

## Format Table 9: objective + time analysis per instance

In [390]:
# Format Table 9: obj and time analysis
TABLE9 = obj_and_time_df.copy(deep=True)

# Move instance names into a column
TABLE9.reset_index(inplace=True)
TABLE9.drop('disj_terms',axis=1,inplace=True)

# Create new column index
TABLE9.columns = pd.MultiIndex.from_tuples(
    [('','Instance'),
    ('Objectives','Obj'),
    ('Objectives','Succ'),
    ('Objectives','Fails'),
    ('Objectives','\% fails'),
    ('Time (s)','Total'),
    ('Time (s)','(s) / obj'),
    ('Time (s)','(s) / cut')]
)

# Format instance column correctly
TABLE9[('',"Instance")] = TABLE9[('',"Instance")].apply(remove_presolved_from_name)
TABLE9[('',"Instance")] = TABLE9[('',"Instance")].apply(tex_escape)

# If we are not using the automatic tex-escaper, we need to do it ourselves
for col in TABLE9.columns:
    # curr_col = '{' + tex_escape(col) + '}'
    curr_col = tex_escape(str(col))
    TABLE9.rename({col: curr_col}, inplace=True, axis=1)

# Finally, apply the desired style
table9_str = TABLE9.style.\
    hide(axis=0).\
    to_latex(
        column_format="""
        @{}
        l
        *{3}{S[table-format=3.0,table-auto-round,table-number-alignment=center]}
        *{1}{S[table-format=2.1,table-auto-round,table-number-alignment=center]}
        *{1}{S[table-format=4.1,table-auto-round,table-number-alignment=center]}
        *{2}{S[table-format=4.1,table-auto-round,table-number-alignment=center]}
        @{}""",
        hrules = True,
        sparse_index = True,
        multirow_align = "c",
        siunitx=True,
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "app:tab:obj-and-time-best",
        caption = """
            Information about objectives and time to generate cuts corresponding to the results in Table~\ref{app:tab:gap-closed}.
        """,
        )

# Adjustbox environment sets width to pagewidth
# table9_str = add_adjustbox_environment(table9_str)

table9_str = add_midrule(table9_str, -2)

# Set default siunitx options for this table
table9_str = add_sisetup(table9_str)

print(table9_str)


{
\sisetup{
    table-alignment-mode = format,
    table-number-alignment = center,
    table-format = 2.2,
\begin{table}
\centering
\caption{
            Information about objectives and time to generate cuts corresponding to the results in Table~
ef{app:tab:gap-closed}.
        }
\label{app:tab:obj-and-time-best}
\begin{tabular}{@{}
        l
        *{3}{S[table-format=3.0,table-auto-round,table-number-alignment=center]}
        *{1}{S[table-format=2.1,table-auto-round,table-number-alignment=center]}
        *{1}{S[table-format=4.1,table-auto-round,table-number-alignment=center]}
        *{2}{S[table-format=4.1,table-auto-round,table-number-alignment=center]}
        @{}}
\toprule
{} & \multicolumn{4}{r}{Objectives} & \multicolumn{3}{r}{Time (s)} \\
{Instance} & {Obj} & {Succ} & {Fails} & {\% fails} & {Total} & {(s) / obj} & {(s) / cut} \\
\midrule
10teams & 133.000000 & 74.000000 & 59.000000 & 44.360902 & 13.020000 & 0.097895 & 0.175946 \\
23588 & 75.000000 & 75.000000 & 0.000000 

# Old table code

In [None]:
# ## DEBUG
# from statistics import geometric_mean
# tmp = [54, 24, 36]
# tmp = np.array(tmp)
# shift = 0

# def geo_mean(iterable):
#     a = np.array(iterable)
#     return a.prod()**(1.0/len(a))
# def geo_mean_overflow(iterable):
#     return np.exp(np.log(iterable).mean())

# display(geometric_mean(tmp+shift)-shift)
# display(geo_mean(tmp+shift)-shift)
# display(geo_mean_overflow(tmp+shift)-shift)

In [None]:
# inst_families = ['boxqp', 'biq', 'maxcut']
# fam_name = {'boxqp': 'BoxQP', 'biq': 'Biq', 'maxcut': 'MaxCut'}
# ranges = [
#     [[20,30], [40,50], [60,90], [100,125], [200,250]],
#     #[[20,50], [60,80], [90,125], [200,250]],
#     [[20,90], [100,100], [120,150], [200,250]],
#     [[60,60], [80,80], [100,100], [150,225]]
# ]

# def find_zero_gap_instances(dfs):
# #     return [ 'gka6c',
# #              'gka3a',
# #              'gka7c',
# #              'gka8a',
# #              'bqp50-8',
# #              'bqp50-7',
# #              'bqp50-5',
# #              'gka1a',
# #              'bqp50-6',
# #              'bqp50-2',
# #              'gka2a',
# #              'bqp50-1',
# #              'bqp50-3',
# #              'bqp50-4',
# #              'bqp50-9']
#     tmpnames = dfs[0].index
#     tmpnames = tmpnames.intersection(dfs[1].index)
#     tmpnames = tmpnames.intersection(dfs[2].index)
#     zero_instances = []
#     for inst_name in tmpnames:
#         gaps = []
#         for i in range(len(dfs)):
#             gaps.append(dfs[i]['Gap'][inst_name])
#         if max(gaps) < 1e-7:
#             zero_instances.append(inst_name)
#     # Add some instances that we manually have detected should be there
#     if "bqp50-9" not in zero_instances:
#         zero_instances.append("bqp50-9")
#     return zero_instances

# def print_gap_and_time_table(ranges, fam, dfs, target_time):
#     # Exclude zero-gap instances
#     zero_gap_instances = find_zero_gap_instances(dfs)
    
#     # Ensure only instances common to all sets are taken
#     tmpnames = dfs[0].index
#     tmpnames = tmpnames.intersection(dfs[1].index)
#     tmpnames = tmpnames.intersection(dfs[2].index)
#     common_names = tmpnames
    
#     tab = []
#     total_num_inst = 0
#     for curr_range in ranges:
#         curr_row = []
#         num_inst = -1
#         stats = []
#         for curr_df in dfs:
#             # instances from max cut are off by one due to constant term in objective encoded as C
#             lower_range = (curr_df['n'] >= curr_range[0])
#             upper_range = (curr_df['n'] <= curr_range[1] + (1 if fam == 'maxcut' else 0))
#             in_fam = df_instances['set'][curr_df.index] == fam
#             nonzero_inst = ~curr_df.index.isin(zero_gap_instances)
#             common_inst = curr_df.index.isin(common_names)
#             curr_df = curr_df[lower_range & upper_range & in_fam & nonzero_inst & common_inst]
#             curr_num_inst = len(curr_df)
#             if num_inst >= 0:
#                 assert(curr_num_inst == num_inst)
#             else:
#                 num_inst = curr_num_inst
#             stats.append([curr_df['Gap'].mean(), curr_df['Gurobi time'].mean()])
#         total_num_inst += num_inst
#         if curr_range[0] != curr_range[1]:
#             curr_row.append("$n \in [%d,%d]$"%(curr_range[0], curr_range[1]))
#         else:
#             curr_row.append("$n = %d$"%(curr_range[0]))
#         curr_row.append("%d"%(num_inst))
#         curr_row.extend([stats[i][0] for i in range(3)])
#         curr_row.extend([stats[i][1] for i in range(3)])
#         tab.append(curr_row)

#     caption = (r"Results on %d %s instances for \SPARSE, \DENSE, and \HYBRID." % (total_num_inst, fam_name[fam])
#                + " Results are averages over instances grouped by size, under a time limit of %s." % (target_time))
#     return matrix2latex(
#         tab, 
#         None,
#         "table", "center", "tabular",
#         headerRow=[
#             ["","",r"Gap closed (%)",r"Gap closed (%)",r"Gap closed (%)", "Last LP time (s)", "Last LP time (s)", "Last LP time (s)"],
#             [r"Instance group",r"#",r"\SPARSE",r"\DENSE",r"\HYBRID",r"\SPARSE",r"\DENSE",r"\HYBRID"]
#         ],
#         alignment=r"@{} lc *{3}{c} *{3}{c} @{}",
#         label="table:%s"%fam,
#         formatColumn=["%s","%d","%.2f","%.2f","%.2f","%.2f","%.2f","%.2f"],
#         summaryrows = 0,
#         midruleIndex = [],
#         caption=caption,
#         position="t"
#     )


# full_dfs = [df_sparse, df_dense, df_hybrid]

# print("\n## family: %s" % inst_families[0])
# print(print_gap_and_time_table(ranges[0], inst_families[0], full_dfs, "1 day"))
# print(print_gap_and_time_table(ranges[0], inst_families[0], new_dfs, "1 hour"))

# print("\n## family: %s" % inst_families[1])
# print(print_gap_and_time_table(ranges[1], inst_families[1], full_dfs, "1 day"))
# print(print_gap_and_time_table(ranges[1], inst_families[1], new_dfs, "1 hour"))

# print("\n## family: %s" % inst_families[2])
# print(print_gap_and_time_table(ranges[2], inst_families[2], full_dfs, "1 day"))
# print(print_gap_and_time_table(ranges[2], inst_families[2], new_dfs, "1 hour"))