# V-polyhedral disjunctive cuts plotting worksheet
1. Table 1: Summary statistics for percent gap closed by VPCs --- avg (%) and number of strict wins (best by at least `EPS`), including set of all instances and set of ≥ 10% gap closed instances
2. Table 2: Average percent gap closed by num disj terms
3. Table 3: Summary statistics for time to solve instances with branch-and-bound

We select instances that meet the following criteria:
1. Belong to MIPLIB, NEOS, or COR@L
2. IP optimal value is known
3. ≤ 5000 variables and 5000 constraints (in presolved instance)
4. The partial branch-and-bound tree with 64 leaves does not find an IP optimal solution
5. The disjunctive lower bound is strictly less than the maximum objective value on any leaf node

There are some instances for which we do not have data for all 6 partial tree sizes. We include these instances in most tables, except if we are showing how some statistic changes as the disjunction increases in size.

# Section 0: Set variables, import whatever is needed, and read in data

### Global variables

In [1]:
## Global variables
EPS = 1e-7
INFINITY = 1e+100

## Set up variables containing relevant directories
import os
repos_key = 'REPOS_DIR'
try:
    REPOS_DIR = os.environ[repos_key]
    print("REPOS_DIR set to \"%s\"." % REPOS_DIR)
    HOME_DIR = os.environ['HOME']
    print("HOME_DIR set to \"%s\"." % HOME_DIR)
except KeyError:
    print("*** ERROR: %s not found!" % repos_key)

VPC_DIR = REPOS_DIR + "/vpc/"
#RESULTS_DIR = VPC_DIR + "results/saved/"
#RESULTS_DIR = VPC_DIR + "results/2023-06-25/"
RESULTS_DIR = HOME_DIR + '/' + "results/saved/"
DATA_DIR = VPC_DIR + "data/"

ONLY_PURE_BINARY = False
ONLY_MIXED_BINARY = False

REPOS_DIR set to "/Users/akazachkov/repos".
HOME_DIR set to "/Users/akazachkov".


### Import data processing, plotting, and export packages and functions

In [2]:
## Import data processing, plotting, and export packages and functions
from IPython.display import display

from plots_helper import * # this includes matplotlib (+ params), pandas, and custom LaTeX helper functions

### `initialize_df`: common way to process each data frame that we need

In [3]:
## Common way to process each data frame that we need
def initialize_df(filename):
    """
    Create a multilevel index df out of data from file `filename`.
    """
    df = pd.read_csv(filename, sep=',', index_col=False, skiprows=1)
    df.sort_values(by = ['INSTANCE','disj_terms'], inplace=True)
    df.set_index(['INSTANCE','disj_terms'], inplace=True)
    df.replace({"\'-inf\'": -np.inf, "\'inf\'": np.inf}, inplace=True)
    return df

### `df_ipopt`: Retrieve best known IP objective values

In [4]:
## Best known IP objective values
df_ipopt = pd.read_csv(DATA_DIR + "ip_obj.csv")
df_ipopt = df_ipopt.set_index(df_ipopt[df_ipopt.columns[0]])
# df_ipopt.rename(columns = {'IP_OBJ' : 'IP OBJ'}, inplace=True) # for consistency with other dfs
# df_ipopt.rename(columns = {'IP Objective' : 'IP OBJ'}, inplace=True) # for consistency with other dfs
df_ipopt = df_ipopt[~df_ipopt.index.duplicated()]
display(df_ipopt.head())
display(df_ipopt['IP OBJ']['bm23_presolved'])

Unnamed: 0_level_0,INSTANCE,IP OBJ,SET
INSTANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
22433,22433,21477,miplib2017
23588,23588,8090,miplib2017
10teams,10teams,924,miplib2017
2club200v15p5scn,2club200v15p5scn,-70,miplib2017
30_70_45_05_100,30_70_45_05_100,9,miplib2017


'34'

### `df_preprocess`: Results from preprocessing instances

In [5]:
## Results from preprocessing instances
df_preprocess = pd.read_csv(RESULTS_DIR + "vpc-preprocess.csv", sep=',', index_col=False, skiprows=1)
df_preprocess = df_preprocess.set_index(df_preprocess[df_preprocess.columns[0]])
display(df_preprocess.head())
display(df_preprocess.loc['bm23','CLEANED LP OBJ'])

Unnamed: 0_level_0,INSTANCE,STRATEGY,ORIG LP OBJ,CLEANED LP OBJ,ORIG FIRST GUR NODES,CLEANED FIRST GUR NODES,ORIG BEST GUR NODES,CLEANED BEST GUR NODES,ORIG FIRST GUR TIME,CLEANED FIRST GUR TIME,...,vpc_version,cbc_version,clp_version,gurobi_version,cplex_version,ExitReason,end_time_string,time elapsed,instname,Unnamed: 137
INSTANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
22433,22433,536,21240.526171,21240.526170798898,9,12,9,12,0.236,0.137,...,#e5b66ee,#d4272be,#8294096,10.02,22.1.1,SUCCESS,Thu Nov 9 16:36:45 2023,1,22433,DONE
23588,23588,536,7649.866134,7649.866133822502,1612,654,1612,654,1.463,0.807,...,#e5b66ee,#d4272be,#8294096,10.02,22.1.1,SUCCESS,Wed Nov 8 23:51:27 2023,3,23588,DONE
10teams,10teams,536,917.0,917.0000000000003,1,1,1,1,0.723,0.526,...,#e5b66ee,#d4272be,#8294096,10.02,22.1.1,SUCCESS,Wed Nov 8 22:25:16 2023,1,10teams,DONE
2club200v15p5scn,2club200v15p5scn,536,-121.222222,-120.07692307692302,231910,137774,231910,137774,7200.002,7200.001,...,#e5b66ee,#d4272be,#8294096,10.02,22.1.1,SUCCESS,Thu Nov 9 12:06:26 2023,14401,2club200v15p5scn,DONE
30_70_45_05_100,30_70_45_05_100,536,8.1,8.09999999998854,1,1,1,1,4.036,4.394,...,#e5b66ee,#d4272be,#8294096,10.02,22.1.1,SUCCESS,Wed Nov 8 23:26:24 2023,18,30_70_45_05_100,DONE


'20.57092176323557097817'

### `df_bb`: Results from generating VPCs for various number of disjunctive terms

In [6]:
## Results from generating VPCs for various number of disjunctive terms
df_bb = initialize_df(RESULTS_DIR + "vpc-bb0bb.csv")
display(df_bb.head())

Unnamed: 0_level_0,Unnamed: 1_level_0,cutlimit,gomory,mode,partial_bb_strategy,partial_bb_keep_pruned_nodes,partial_bb_num_strong,preprocess,prlp_flip_beta,rounds,bb_mode,...,gurobi_version,cplex_version,hostname,cpu_model,cpu_id,ExitReason,end_time_string,time elapsed,instname,Unnamed: 294
INSTANCE,disj_terms,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
10teams_presolved,2,-1,-1,0,4,0,5,0,0,1,11,...,10.03,22.1.1,rupert4,13th Gen Intel(R) Core(TM) i9-13900K,6,SUCCESS,Sun Nov 26 23:53:24 2023,31,10teams_presolved,DONE
10teams_presolved,4,-1,-1,0,4,0,5,0,0,1,11,...,10.03,22.1.1,rupert1,13th Gen Intel(R) Core(TM) i9-13900K,12,FAIL_LIMIT,Wed Nov 29 10:59:53 2023,30,10teams_presolved,DONE
10teams_presolved,8,-1,-1,0,4,0,5,0,0,1,11,...,10.03,22.1.1,rupert3,13th Gen Intel(R) Core(TM) i9-13900K,8,SUCCESS,Tue Nov 28 23:41:07 2023,400,10teams_presolved,DONE
10teams_presolved,16,-1,-1,0,4,0,5,0,0,1,11,...,10.03,22.1.1,rupert2,13th Gen Intel(R) Core(TM) i9-13900K,14,SUCCESS,Sun Nov 26 09:35:13 2023,1496,10teams_presolved,DONE
10teams_presolved,32,-1,-1,0,4,0,5,0,0,1,11,...,10.03,22.1.1,rupert4,13th Gen Intel(R) Core(TM) i9-13900K,2,TIME_LIMIT,Tue Nov 28 10:35:33 2023,3636,10teams_presolved,DONE


### `df`: Append to `df_bb` results from running baseline solver 7 times

In [7]:
## Append results from running baseline solver 7 times
#df = df_bb.append(initialize_df(RESULTS_DIR + "vpc-bb0.csv")) # deprecated
df = pd.concat([df_bb, initialize_df(RESULTS_DIR + "vpc-bb0.csv")])
df.sort_values(by = ['INSTANCE','disj_terms'], inplace=True)

col_list = ["BEST DISJ OBJ", "WORST DISJ OBJ"]
for col in col_list:
    df[col] = pd.to_numeric(df[col])

df['NUM DISJ TERMS'] = df.index.get_level_values(1)

## Identify pure binary instances, which are those where 'CLEANED BINARY' column equals 'CLEANED COLS'
df['IS PURE BINARY'] = (df['BINARY'] == df['COLS'])

## Identify mixed binary instances, which are those where 'CLEANED GEN INT' column = 0
df['IS MIXED BINARY'] = (df['GEN INT'] == 0)

# col_list = ['NUM DISJ TERMS']
# for col in col_list:
#     df[col] = pd.to_numeric(df[col])

# start = 220
# end = start + 15
# print(df.columns[start:end])
# print(df.dtypes[start:end])

display(df.head())

Unnamed: 0_level_0,Unnamed: 1_level_0,cutlimit,gomory,mode,partial_bb_strategy,partial_bb_keep_pruned_nodes,partial_bb_num_strong,preprocess,prlp_flip_beta,rounds,bb_mode,...,cpu_model,cpu_id,ExitReason,end_time_string,time elapsed,instname,Unnamed: 294,end,IS PURE BINARY,IS MIXED BINARY
INSTANCE,disj_terms,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
10teams_presolved,0,-1,0,0,4,0,5,0,0,1,1,...,13th Gen Intel(R) Core(TM) i9-13900K,2,SUCCESS,Sat Dec 2 13:44:10 2023,15,10teams_presolved,,DONE,True,True
10teams_presolved,2,-1,-1,0,4,0,5,0,0,1,11,...,13th Gen Intel(R) Core(TM) i9-13900K,6,SUCCESS,Sun Nov 26 23:53:24 2023,31,10teams_presolved,DONE,,True,True
10teams_presolved,4,-1,-1,0,4,0,5,0,0,1,11,...,13th Gen Intel(R) Core(TM) i9-13900K,12,FAIL_LIMIT,Wed Nov 29 10:59:53 2023,30,10teams_presolved,DONE,,True,True
10teams_presolved,8,-1,-1,0,4,0,5,0,0,1,11,...,13th Gen Intel(R) Core(TM) i9-13900K,8,SUCCESS,Tue Nov 28 23:41:07 2023,400,10teams_presolved,DONE,,True,True
10teams_presolved,16,-1,-1,0,4,0,5,0,0,1,11,...,13th Gen Intel(R) Core(TM) i9-13900K,14,SUCCESS,Sun Nov 26 09:35:13 2023,1496,10teams_presolved,DONE,,True,True


In [8]:
# # get the count of each instance-disj_terms pair
# counts = df.groupby(level=[0]).size()

# # get the instances that have only one occurrence
# instances_with_one_occurrence = counts[counts == 1].index.get_level_values(0).unique()

# # filter df_bb to only include instances with one occurrence
# df_bb_one_occurrence = df.loc[instances_with_one_occurrence]

# # display the filtered dataframe
# display(df_bb_one_occurrence)

### Remove `stein*` instances (keep modified `stein*_nocard` instances)

In [9]:
# Remove unmodified stein instances from consideration
df.drop(index = ['stein09_presolved', 'stein15_presolved', 'stein27_presolved', 'stein45_presolved'], inplace=True)
df.index = df.index.remove_unused_levels()

### Modify `mas` instances

In [10]:
df_preprocess.rename(index={'mas74': 'mas074', 'mas76': 'mas076'}, inplace=True)

### Fix mistake in code for one root pass containing wrong bound

In [11]:
prefix_list = ["FIRST", "AVG", "BEST"]
for prefix in prefix_list:
    inst_set = df[prefix + " REF+V ROOT_PASSES"] == 1
    tmp_df = df[inst_set]
    tmp_df = tmp_df[["LP OBJ",
                     prefix + " REF+V BOUND", 
                     prefix + " REF+V FIRST_CUT_PASS",
                     prefix + " REF+V LAST_CUT_PASS"]]
    tmp_df.tail(30)

    tmp_tmp_df = tmp_df["LP OBJ"] - tmp_df[prefix + " REF+V FIRST_CUT_PASS"]
    assert(tmp_tmp_df.max() < EPS)

    refcol = prefix + " REF+V BOUND"
    col = prefix + " REF+V FIRST_CUT_PASS"
    df.loc[inst_set,col] = tmp_df[refcol].values
    col = prefix + " REF+V LAST_CUT_PASS"
    df.loc[inst_set,col] = tmp_df[refcol].values


df.loc  ["misc02_presolved",
         [refcol,
          "FIRST REF+V FIRST_CUT_PASS",
          "FIRST REF+V LAST_CUT_PASS"
         ]
        ]

Unnamed: 0_level_0,BEST REF+V BOUND,FIRST REF+V FIRST_CUT_PASS,FIRST REF+V LAST_CUT_PASS
disj_terms,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.0,0.0,0.0
2,1690.0,1045.0,1252.692308
4,1690.0,1052.692308,1250.235294
8,1690.0,1690.0,1690.0
16,1690.0,1690.0,1690.0
32,1690.0,1690.0,1690.0
64,1690.0,1690.0,1690.0


### `instances`: get unique instance list

In [12]:
# Get unique instance list
if (ONLY_PURE_BINARY):
    # Select only instances in df in which column ['IS PURE BINARY'] is True
    tmp_df = df[df['IS PURE BINARY'] == True]
    tmp_df.index = tmp_df.index.remove_unused_levels()
    instances = tmp_df.index.levels[0]
elif (ONLY_MIXED_BINARY):
    tmp_df = df[df['IS MIXED BINARY'] == True]
    tmp_df.index = tmp_df.index.remove_unused_levels()
    instances = tmp_df.index.levels[0]
else:
    instances = df.index.levels[0]

instances.set_names(names = 'Instance', inplace=True)

print("Number of selected instances: ", len(instances))

Number of selected instances:  438


### `df_rejection_reason`: Track why instances were not selected for our statistics

In [13]:
rejection_reasons = [
    'SELECTED_GAP', # *not* rejected for gap experiments; _must_ be first column
    'SELECTED_TIME', # *not* rejected for time experiments; _must_ be second column
    'SELECTED_6TREES', # *not* rejected for 6trees set for time experiments; _must_ be third column
    'NUM_WITH_OBJS', # number of attempts that successfully tried solving the PRLP
    'NUM_WITH_CUTS', # number of attempts that successfully yielded cuts
    'IP_OPT_UNKNOWN', # ip opt val must be known
    'TOO_MANY_ROWS_OR_COLS', # require max(nrows, ncols) ≤ 5K
    'OPTIMAL_SOLUTION_FOUND', # optimal solution should not be found by any of the partial trees
    'LP_OPT_IS_NOT_CUT', # check if lp opt < ip opt
    'DLB=DUB', # check if disj lb < disj ub
    'LP=DLB=DUB', # require either lp opt < disj lb or disj lb < disj ub
    'PRLP_INFEASIBLE', # require PRLP is feasible and solves within timelimit for at least one of the attempts
    'PRLP_TIME_LIMIT', # require PRLP solves within timelimit for at least one of the attempts
    'NO_CUTS', # there must be cuts from at least one of the partial b&b trees
    'NO_GAP', # require that ip opt != lp opt
    'GUR_TIMEOUT', # require Gur7 < 3600 (Gurobi is able to solve the instance to optimality within an hour either with or without using VPCs)'
    '<7_ATTEMPTS', # indicates not all partial trees were successfully run
]
df_rejection_reason = pd.DataFrame(index = instances, columns = rejection_reasons, dtype=bool)
df_rejection_reason.iloc[:,3:] = False # no rejection criteria at true

for col in ['OPTIMAL_SOLUTION_FOUND']:
    df_rejection_reason[col] = df_rejection_reason[col].astype(np.int64)
for col in ['NUM_WITH_OBJS', 'NUM_WITH_CUTS', 'LP_OPT_IS_NOT_CUT', 'DLB=DUB', 'LP=DLB=DUB', 'PRLP_INFEASIBLE', 'PRLP_TIME_LIMIT', 'GUR_TIMEOUT']:
    df_rejection_reason[col] = df_rejection_reason[col].astype(np.int8)
display(df_rejection_reason.head())

Unnamed: 0_level_0,SELECTED_GAP,SELECTED_TIME,SELECTED_6TREES,NUM_WITH_OBJS,NUM_WITH_CUTS,IP_OPT_UNKNOWN,TOO_MANY_ROWS_OR_COLS,OPTIMAL_SOLUTION_FOUND,LP_OPT_IS_NOT_CUT,DLB=DUB,LP=DLB=DUB,PRLP_INFEASIBLE,PRLP_TIME_LIMIT,NO_CUTS,NO_GAP,GUR_TIMEOUT,<7_ATTEMPTS
Instance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
10teams_presolved,True,True,True,0,0,False,False,0,0,0,0,0,0,False,False,0,False
22433_presolved,True,True,True,0,0,False,False,0,0,0,0,0,0,False,False,0,False
23588_presolved,True,True,True,0,0,False,False,0,0,0,0,0,0,False,False,0,False
30n20b8_presolved,True,True,True,0,0,False,False,0,0,0,0,0,0,False,False,0,False
50v-10_presolved,True,True,True,0,0,False,False,0,0,0,0,0,0,False,False,0,False


### `map_rejection_reason_to_number`: Reference paper's rejection criteria

In [14]:
# map_rejection_reason_to_number = {
#     'OPTIMAL_SOLUTION_FOUND':   '(3)',
#     'LP=DLB=DUB':               '(4a)',
#     'PRLP_INFEASIBLE':          '(4b)',
#     'PRLP_TIME_LIMIT':          '(4c)',
#     '<7_ATTEMPTS':              '(?)',
# }
map_rejection_reason_to_number = {
    'IP_OPT_UNKNOWN':           '\\ref{selection-criterion:ip-opt-known}',
    'NO_GAP':                   '\\ref{selection-criterion:ip-opt-known}',
    'TOO_MANY_ROWS_OR_COLS':    '\\ref{selection-criterion:max-instance-size}',
    'OPTIMAL_SOLUTION_FOUND':   '\\ref{selection-criterion:partial-tree-does-not-find-opt}',
    'LP=DLB=DUB':               '\\ref{selection-criterion:cuts-are-generated:not_lp=dlb=dub}',
    'PRLP_INFEASIBLE':          '\\ref{selection-criterion:cuts-are-generated:PRLP-primal-feasible}',
    'PRLP_TIME_LIMIT':          '\\ref{selection-criterion:cuts-are-generated:PRLP-time-limit}',
    'NO_CUTS':                  '\\ref{selection-criterion:cuts-are-generated:cuts-are-generated}',
    'GUR_TIMEOUT':              'G',
    '<7_ATTEMPTS':              '?',
}

### `df_status_by_depth`: Track success or failure reason by depth

In [15]:
sizes = [2, 4, 8, 16, 32, 64]
df_status_by_depth = pd.DataFrame(index = instances, columns = sizes, dtype=str)

DEFAULT_STATUS = map_rejection_reason_to_number['<7_ATTEMPTS']

df_status_by_depth[:] = DEFAULT_STATUS

display(df_status_by_depth.head())

Unnamed: 0_level_0,2,4,8,16,32,64
Instance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
10teams_presolved,?,?,?,?,?,?
22433_presolved,?,?,?,?,?,?
23588_presolved,?,?,?,?,?,?
30n20b8_presolved,?,?,?,?,?,?
50v-10_presolved,?,?,?,?,?,?


### DEBUG

In [16]:
# col = "REF+V FIRST_CUT_PASS"
# tmp = df[col]
# display(tmp)

# for col in df.columns:
#     if str(col).endswith("FIRST_CUT_PASS"):
#         print("{}".format(col))

# inst = 'neos22_presolved'
# col = 'NUM DISJ TERMS'
# df.loc[inst][col]

# display(df.loc[('bppc4-08_presolved',2)]['LP OBJ'])
# display(df.loc[('bppc4-08_presolved',2)]['BEST DISJ OBJ'])
# display(df.loc[('bppc4-08_presolved',2)]['WORST DISJ OBJ'])
# display(df['BEST DISJ OBJ'])

# Section 1: Select instances

### `selected_gap_instances_dict` (original index, instance): Select instances for gap closed calculations

Criteria to filter gap closed instances:
* ip opt val is known
* lp opt < ip opt
* max(nrows, ncols) ≤ 5K
* optimal solution should not be found by any of the partial trees
* either lp opt < disj lb or disj lb < disj ub
* PRLP is feasible and solves within timelimit for at least one of the attempts

In [17]:
## Select instances for gap closed calculations
#
# Criteria to filter gap closed instances:
# * ip opt val is known
# * lp opt < ip opt
# * max(nrows, ncols) ≤ 5K
# * optimal solution should not be found by any of the partial trees
# * either lp opt < disj lb or disj lb < disj ub
# * PRLP is feasible and solves within timelimit for at least one of the attempts

# Constants
MAX_ROWS = 5000
MAX_COLS = MAX_ROWS
PRINT_SKIP_REASON = False

# Information to save
selected_gap_instances_dict = {} # dictionary of (original index, instance)
#selected_indices = []
num_gap_errors = 0

inst_set = instances
num_attempts = np.zeros(len(inst_set), dtype=int)

for i, inst in enumerate(inst_set):
    print("{}/{}".format(i+1,len(inst_set)), end='\r', flush=True)
    skip_instance = False
    curr_df = df.loc[inst]
    
    # Count number of times instance appears
    num_attempts[i] = len(curr_df)

    if num_attempts[i] < 7:
        print("*** ERROR: Instance {:d} -- {}: {:d} < 7 attempts.".format(i, inst, num_attempts[i]))
        skip_instance = True
        num_gap_errors += 1
        df_rejection_reason.loc[inst, '<7_ATTEMPTS'] = True

    # Check that LP opt < IP opt
    lp_obj = np.float64(df_preprocess.loc[remove_presolved_from_name(inst),'CLEANED LP OBJ'])
    ip_obj = np.float64(df_ipopt.loc[inst,'IP OBJ'])
    YES_GAP = abs(ip_obj - lp_obj) >= 1e-7
    if not YES_GAP:
        print("*** ERROR: Instance {:d} -- {}: not YES GAP (lp = {:.10f}; ip = {:.10f})".format(i, inst, lp_obj, ip_obj))
        skip_instance = True
        num_gap_errors += 1
        df_rejection_reason.loc[inst, 'NO_GAP'] = True
        
    # Check that ExitReason != OPTIMAL_SOLUTION_FOUND
    OPT_SOL_FOUND = False
    for curr_index, row in curr_df.iterrows():
        #print(i,j, curr_df['ExitReason'])
        curr_depth = int(curr_index)
        if curr_depth == 0:
            continue
        exitreason = row['ExitReason']
        if exitreason == 'OPTIMAL_SOLUTION_FOUND' and not OPT_SOL_FOUND:
            if PRINT_SKIP_REASON:
                print("Skipping instance {:d} -- {}: optimal IP solution found at depth {:d}.".format(
                    i, inst, curr_depth
                ))
            skip_instance = True
            OPT_SOL_FOUND = True
            df_rejection_reason.loc[inst, 'OPTIMAL_SOLUTION_FOUND'] = curr_depth
        if OPT_SOL_FOUND:
            df_status_by_depth.loc[inst, curr_depth] = map_rejection_reason_to_number['OPTIMAL_SOLUTION_FOUND']
        else:
            df_status_by_depth.loc[inst, curr_depth] = ''

    # Check that best and worst bound on leaf nodes is not same (likely cause of primal infeasible PRLP)
    num_successful_attempts = 0
    has_zero = False
    terms = curr_df.index
    for curr_index in terms:
        if curr_df['NUM DISJ TERMS'][curr_index] == 0:
            has_zero = True
            continue
            
        lp_obj = curr_df['LP OBJ'][curr_index]
        ip_obj = curr_df['IP OBJ'][curr_index]
        best_disj_obj = curr_df['BEST DISJ OBJ'][curr_index]
        worst_disj_obj = curr_df['WORST DISJ OBJ'][curr_index]
        num_frac = curr_df['NUM FRAC'][curr_index]
        num_obj_tried = curr_df['NUM OBJ'][curr_index]
        num_cuts = curr_df['NUM VPC'][curr_index] # can be > 0 even if num_obj_tried = 0, b/c of OPTIMAL_SOLUTION_FOUND exit reason
        exitreason = curr_df['ExitReason'][curr_index]

        YES_GAP = abs(ip_obj - lp_obj) >= 1e-7
        LP_OPT_IS_CUT = (num_frac > 0) and YES_GAP and abs(lp_obj - worst_disj_obj) >= 1e-7
        DLB_NE_DUB = (num_frac > 0) and abs(best_disj_obj - worst_disj_obj) >= 1e-7
        df_rejection_reason.loc[inst, 'NO_GAP'] += (not YES_GAP)
        df_rejection_reason.loc[inst, 'LP_OPT_IS_NOT_CUT'] += (not LP_OPT_IS_CUT)
        df_rejection_reason.loc[inst, 'DLB=DUB'] += (not DLB_NE_DUB)
        df_rejection_reason.loc[inst, 'PRLP_INFEASIBLE'] += (exitreason == 'PRLP_INFEASIBLE')
        df_rejection_reason.loc[inst, 'PRLP_TIME_LIMIT'] += (exitreason == 'PRLP_TIME_LIMIT')
        # if not DLB_NE_DUB and num_obj_tried > 0:
        #     raise ValueError(
        #         "*** ERROR: Instance {:d} -- {}: at depth {:d}, num obj tried = {:d} (num cuts = {:d}) but lp opj {:.10f}, best_disj_obj {:.10f} = worst_disj_obj {:.10f} with exit reason {}".format(
        #             i, inst, curr_index, num_obj_tried, num_cuts, lp_obj, best_disj_obj, worst_disj_obj, curr_df['ExitReason'][curr_index]
        #         )
        #     )
        if LP_OPT_IS_CUT or DLB_NE_DUB:
            if (num_obj_tried == 0) and (exitreason not in ['PRLP_TIME_LIMIT','PRLP_INFEASIBLE','OPTIMAL_SOLUTION_FOUND','TIME_LIMIT']):
                # We should be trying objectives at this point, unless the initial PRLP timed out or was infeasible or an optimal solution was found
                raise ValueError(
                    "*** ERROR: Instance {:d} -- {}: at depth {:d}, num obj tried = 0 but lp opj {:.10f} < best_disj_obj {:.10f} < worst_disj_obj {:.10f} with exit reason {}".format(
                        i, inst, curr_index, lp_obj, best_disj_obj, worst_disj_obj, curr_df['ExitReason'][curr_index]
                    )
                )
            if num_obj_tried > 0:
                df_rejection_reason.loc[inst, 'NUM_WITH_OBJS'] += 1
                if num_cuts > 0:
                    num_successful_attempts += 1
                    df_rejection_reason.loc[inst, 'NUM_WITH_CUTS'] += 1
                else:
                    df_status_by_depth.loc[inst, curr_index] = map_rejection_reason_to_number['NO_CUTS']
            elif exitreason == 'PRLP_INFEASIBLE':
                df_status_by_depth.loc[inst, curr_index] = map_rejection_reason_to_number[exitreason]
            elif exitreason == 'PRLP_TIME_LIMIT':
                df_status_by_depth.loc[inst, curr_index] = map_rejection_reason_to_number[exitreason]
        else:
            # check that num obj tried is 0
            if (num_obj_tried > 0):
                raise ValueError(
                    "*** ERROR: Instance {:d} -- {}: at depth {:d}, num obj tried = {:d} > 0 but best_disj_obj {:f} = worst_disj_obj {:f}".format(
                        i, inst, curr_index, num_obj_tried, best_disj_obj, worst_disj_obj
                    )
                )
            df_rejection_reason.loc[inst, 'LP=DLB=DUB'] += 1
            df_status_by_depth.loc[inst, int(curr_index)] = map_rejection_reason_to_number['LP=DLB=DUB']

    if not has_zero:
        raise ValueError(
            "*** ERROR: Instance {:d} -- {}: has no bb0 entry.".format(
                i, inst, curr_index
            )
        )        
    
    if num_successful_attempts == 0 and not skip_instance:
        if PRINT_SKIP_REASON:
            print("Skipping instance {:d} -- {}: best and worst bound on leaf nodes coincide for all trees, no objectives ever tried, or no objectives successfully produced cuts.".format(
                i, inst, num_attempts[i]))
        skip_instance = True
        exitreason = 'NO_CUTS'
        df_rejection_reason.loc[inst, exitreason] = True
    else:        
        # Ensure IP objective value is known
        ip_obj = curr_df['IP OBJ'][curr_df.index[0]]
        if not isinstance(ip_obj,float):
            if PRINT_SKIP_REASON:
                print(
                    "Skipping instance {:d} -- {}: IP objective value ({}) is not detected to be a float value.".format(
                    i, inst, ip_obj))
            skip_instance = True
            df_rejection_reason.loc[inst, 'IP_OPT_UNKNOWN'] = True
            
        # Ensure nrows and ncols is not too many
        nrows = curr_df['ROWS'][curr_df.index[0]]
        ncols = curr_df['COLS'][curr_df.index[0]]
        if (nrows > MAX_ROWS) or (ncols > MAX_COLS):
            if PRINT_SKIP_REASON:
                print("Skipping instance {:d} -- {}: nrows = {:d} > {:d} or ncols = {:d} > {:d}.".format(
                        i, inst, nrows, ncols, MAX_ROWS, MAX_COLS))
            skip_instance = True
            df_rejection_reason.loc[inst, 'TOO_MANY_ROWS_OR_COLS'] = True
    
    if not skip_instance:
        #selected_gap_instances_dict[len(selected_gap_instances_dict)] = inst
        selected_gap_instances_dict[inst] = i
    else:
        df_rejection_reason.loc[inst, 'SELECTED_GAP'] = False
        df_rejection_reason.loc[inst, 'SELECTED_TIME'] = False
        df_rejection_reason.loc[inst, 'SELECTED_6TREES'] = False

num_selected_gap_instances = len(selected_gap_instances_dict)
print("Total number of errors: {}".format(num_gap_errors))
print("Total number of selected instances for gap closed reporting: {}/{:d}".format(num_selected_gap_instances,len(instances)))

*** ERROR: Instance 213 -- neos-3214367-sovi_presolved: 4 < 7 attempts.
*** ERROR: Instance 230 -- neos-3734794-moppy_presolved: 1 < 7 attempts.
*** ERROR: Instance 253 -- neos-530627_presolved: 5 < 7 attempts.
Total number of errors: 3
Total number of selected instances for gap closed reporting: 332/438


In [18]:
# Retrieve all instances from df with df_rejection_reason '<7_ATTEMPTS' == True
instances_with_less_than_7_attempts = df_rejection_reason[df_rejection_reason['<7_ATTEMPTS'] == True].index.tolist()
display(instances_with_less_than_7_attempts)

# Get df_bb entries for instances_with_less_than_7_attempts
df_bb_with_less_than_7_attempts = df.loc[instances_with_less_than_7_attempts]

df_bb_with_less_than_7_attempts.loc[instances_with_less_than_7_attempts[0]]

['neos-3214367-sovi_presolved',
 'neos-3734794-moppy_presolved',
 'neos-530627_presolved']

Unnamed: 0_level_0,cutlimit,gomory,mode,partial_bb_strategy,partial_bb_keep_pruned_nodes,partial_bb_num_strong,preprocess,prlp_flip_beta,rounds,bb_mode,...,cpu_model,cpu_id,ExitReason,end_time_string,time elapsed,instname,Unnamed: 294,end,IS PURE BINARY,IS MIXED BINARY
disj_terms,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,-1,0,0,4,0,5,0,0,1,1,...,13th Gen Intel(R) Core(TM) i9-13900K,0,SUCCESS,Sun Dec 3 04:18:36 2023,25205,neos-3214367-sovi_presolved,,DONE,False,False
2,-1,-1,0,4,0,5,0,0,1,11,...,13th Gen Intel(R) Core(TM) i9-13900K,6,FAIL_LIMIT,Mon Nov 27 19:23:00 2023,68395,neos-3214367-sovi_presolved,DONE,,False,False
4,-1,-1,0,4,0,5,0,0,1,11,...,13th Gen Intel(R) Core(TM) i9-13900K,6,PRLP_TIME_LIMIT,Tue Nov 28 23:34:27 2023,67516,neos-3214367-sovi_presolved,DONE,,False,False
8,-1,-1,0,4,0,5,0,0,1,11,...,13th Gen Intel(R) Core(TM) i9-13900K,4,PRLP_TIME_LIMIT,Sun Nov 26 09:14:17 2023,69828,neos-3214367-sovi_presolved,DONE,,False,False


### `selected_time_instances_dict` and `all6_instances_dict` (original index, instance): Select instances for time tables

Criteria to filter instances for reporting time:
* ip opt val is known
* lp opt < ip opt
* max(nrows, ncols) ≤ 5K
* optimal solution should not be found by any of the partial trees
* either lp opt < disj lb or disj lb < disj ub
* PRLP is feasible and solves within timelimit for at least one of the attempts
* Gur7 < 3600 (Gurobi is able to solve the instance to optimality within an hour either with or without using VPCs)

6 trees set
* all six partial tree sizes produced VPCs

In [19]:
## Select instances for time tables
#
# Criteria to filter instances for reporting time:
# * ip opt val is known
# * lp opt < ip opt 
# * max(nrows, ncols) ≤ 5K
# * optimal solution should not be found by any of the partial trees
# * either lp opt < disj lb or disj lb < disj ub
# * PRLP is feasible and solves within timelimit for at least one of the attempts
# * Gur7 < 3600 (Gurobi is able to solve the instance to optimality within an hour either with or without using VPCs)
#
# 6 trees set
# * all six partial tree sizes produced VPCs

# Constants
MAX_TIME = 3600
PRINT_SKIP_REASON = False

# Information to save
selected_time_instances_dict = {}   # dictionary of (original index, instance)
all6_instances_dict = {}            # dictionary of (original index, instance)
skipped_instances_dict = {}         # dictionary of (original index, instance)
error_instances_dict = {}           # dictionary of (original index, instance)

num_timeouts = 0
num_time_errors = 0

inst_set = list(selected_gap_instances_dict.keys())
# inst_set = ['lotsize_presolved']
for i, inst in enumerate(inst_set):
    print("{}/{}".format(i+1,len(inst_set)), end='\r', flush=True)
    skip_instance = False
    curr_df = df.loc[inst]

    # Check Gur < 3600 (Gurobi is able to solve the instance to optimality within an hour without using VPCs)
    col = 'BEST REF TIME'
    mintime_gur = float(curr_df.loc[0,col].min())
    
    # Check Gur7 < 3600 (Gurobi is able to solve the instance to optimality within an hour either with or without using VPCs)
    col = 'BEST REF+V TIME'
    mintime_gur7 = float(curr_df.loc[2:64,col].min())

    mintime = min(mintime_gur, mintime_gur7)
    if mintime > MAX_TIME - EPS:
        if PRINT_SKIP_REASON:
            print("{:d}: Skipping instance {:d} -- {}: Gurobi's best time (with or without VPCs) is {:.7f} >= {:.7f}.".format(
                    len(skipped_instances_dict), i, inst, mintime, MAX_TIME-EPS
                ))
        skip_instance = True
        skipped_instances_dict[inst] = i
        num_timeouts += 1
        df_rejection_reason.loc[inst, 'GUR_TIMEOUT'] += 1
        # df_status_by_depth.loc[inst, int(curr_index)] = map_rejection_reason_to_number['GUR_TIMEOUT']

    # Check how many times VPCs were successfully generated
    num_successful_attempts = 0
    has_zero = False
    for curr_index, row in curr_df.iterrows():
        if row['NUM DISJ TERMS'] == 0:
            has_zero = True
            continue

        num_vpc = float(row['NUM VPC'])
        num_successful_attempts += (num_vpc > 0)

        if df_status_by_depth.loc[inst, int(curr_index)] == DEFAULT_STATUS:
            curr_time = float(curr_df.loc[curr_index,col])
            if curr_time > MAX_TIME - EPS:
                df_status_by_depth.loc[inst, int(curr_index)] = map_rejection_reason_to_number['GUR_TIMEOUT']

    if not has_zero:
        raise ValueError(
            "*** ERROR: Instance {:d} -- {}: has no bb0 entry.".format(
                i, inst, curr_index
            )
        )        
    
    # if num_successful_attempts == 0 and not skip_instance:
    #     if PRINT_SKIP_REASON:
    #         print("Skipping instance {:d} -- {}: no VPCs generated successfully for any number of terms.".format(i, inst, num_attempts[i]))
    #     skip_instance = True
    #     skipped_instances_dict[inst] = i

    if not skip_instance:
        if num_successful_attempts == 6:
            all6_instances_dict[inst] = i
        else:
            df_rejection_reason.loc[inst, 'SELECTED_6TREES'] = False
        #selected_time_instances_dict[len(selected_time_instances_dict)] = inst
        selected_time_instances_dict[inst] = i
    else:
        df_rejection_reason.loc[inst, 'SELECTED_TIME'] = False
        df_rejection_reason.loc[inst, 'SELECTED_6TREES'] = False

num_selected_time_instances = len(selected_time_instances_dict)
num_all6_instances = len(all6_instances_dict)
print("Total number of errors: {}".format(num_time_errors))
print("Total number of timeouts: {}".format(num_timeouts))
print("Total number of instances for time reporting: {}".format(num_selected_time_instances))
print("Total number of \"6 trees\" instances: {}".format(num_all6_instances))

Total number of errors: 0
Total number of timeouts: 26
Total number of instances for time reporting: 306
Total number of "6 trees" instances: 221


#### DEBUG (check which instances were selected but do not have all six runs)

In [20]:
## DEBUG (check which instances were selected but do not have all six runs)
not_all_6 = [key for key in selected_time_instances_dict.keys() if key not in all6_instances_dict.keys()]
not_all_6

['10teams_presolved',
 '30n20b8_presolved',
 'a1c1s1_presolved',
 'app3_presolved',
 'berlin_5_8_0_presolved',
 'bg512142_presolved',
 'bnatt400_presolved',
 'bppc8-02_presolved',
 'cod105_presolved',
 'cvs08r139-94_presolved',
 'cvs16r106-72_presolved',
 'cvs16r128-89_presolved',
 'cvs16r70-62_presolved',
 'cvs16r89-60_presolved',
 'danoint_presolved',
 'dg012142_presolved',
 'eilB101_presolved',
 'eild76_presolved',
 'f2gap801600_presolved',
 'graph20-20-1rand_presolved',
 'graphdraw-domain_presolved',
 'hgms-det_presolved',
 'ic97_potential_presolved',
 'ic97_tension_presolved',
 'icir97_tension_presolved',
 'lrn_presolved',
 'milo-v12-6-r2-40-1_presolved',
 'mkc1_presolved',
 'n2seq36f_presolved',
 'n4-3_presolved',
 'neos-1058477_presolved',
 'neos-1215259_presolved',
 'neos-1330346_presolved',
 'neos-1396125_presolved',
 'neos-1413153_presolved',
 'neos-1415183_presolved',
 'neos-1480121_presolved',
 'neos-1601936_presolved',
 'neos-1605061_presolved',
 'neos-1605075_presolved',


In [21]:
import csv

# Define the file paths
gap_file_path = 'selected_gap_instances.csv'
time_file_path = 'selected_time_instances.csv'

# Get the keys from selected_gap_instances_dict and selected_time_instances_dict
selected_gap_keys = list(selected_gap_instances_dict.keys())
selected_time_keys = list(selected_time_instances_dict.keys())

# Write the keys to the CSV files
with open(gap_file_path, 'w', newline='') as gap_file:
  gap_writer = csv.writer(gap_file)
  gap_writer.writerow(['Selected Gap Instances'])
  gap_writer.writerows([[key] for key in selected_gap_keys])

with open(time_file_path, 'w', newline='') as time_file:
  time_writer = csv.writer(time_file)
  time_writer.writerow(['Selected Time Instances'])
  time_writer.writerows([[key] for key in selected_time_keys])

print('Selected instances have been written to the CSV files.')


Selected instances have been written to the CSV files.


# Section 2: Gap closed tables

### Prepare gap short/long column names

In [22]:
reg_cut_type_list = ["GMIC", "ROOT", "BEST DISJ", "VPC", "VPC+GMIC"]
solver_cut_type_list = ["REF FIRST_CUT_PASS", "REF+V FIRST_CUT_PASS", "REF LAST_CUT_PASS", "REF+V LAST_CUT_PASS"]

REF_TYPE1 = 'AVG'
REF_TYPE2 = 'BEST'
REFV_TYPE1 = 'AVG'
col_gmic            = 'GMIC % GAP CLOSED'
col_root            = 'ROOT % GAP CLOSED'
col_best_disj       = 'BEST DISJ % GAP CLOSED'
col_vpc             = 'VPC % GAP CLOSED'
col_vpc_gmic        = 'VPC+GMIC % GAP CLOSED'
col_max_gmic_vpc    = 'MAX(GMIC,VPC) % GAP CLOSED'
col_first_ref_first = 'FIRST' + ' ' + 'REF FIRST_CUT_PASS % GAP CLOSED'
col_first_ref_avg   = 'AVG' + ' ' + 'REF FIRST_CUT_PASS % GAP CLOSED'
col_first_ref_best  = 'BEST' + ' ' + 'REF FIRST_CUT_PASS % GAP CLOSED'
col_first_ref       = col_first_ref_avg
col_first_ref_v     = REFV_TYPE1 + ' ' + 'REF+V FIRST_CUT_PASS % GAP CLOSED'
col_last_ref_first  = 'FIRST' + ' ' + 'REF LAST_CUT_PASS % GAP CLOSED'
col_last_ref_avg    = 'AVG' + ' ' + 'REF LAST_CUT_PASS % GAP CLOSED'
col_last_ref_best   = 'BEST' + ' ' + 'REF LAST_CUT_PASS % GAP CLOSED'
col_last_ref        = col_last_ref_avg
col_last_ref_v      = REFV_TYPE1 + ' ' + 'REF+V LAST_CUT_PASS % GAP CLOSED'
col_num_vpcs        = 'NUM VPC'
col_num_gmic        = 'NUM GMIC'
gap_cols = [
    col_gmic,
    col_root,
    col_best_disj,
    col_vpc,
    col_vpc_gmic,
    col_max_gmic_vpc,
    col_first_ref_first,
    col_first_ref_avg,
    col_first_ref_best,
    col_first_ref_v,
    col_last_ref_first,
    col_last_ref_avg,
    col_last_ref_best,
    col_last_ref_v,
]

map_short_to_cols_gap = {
    'G'      : col_gmic,
    'R'      : col_root,
    'DB'     : col_best_disj,
    'V'      : col_vpc,
    'V+G'    : col_vpc_gmic,
    'max(G,V)': col_max_gmic_vpc,
    'GurF'   : col_first_ref,
    'V+GurF' : col_first_ref_v,
    'GurL'   : col_last_ref,
    'V+GurL' : col_last_ref_v,
}
map_cols_to_short_gap = {v: k for k, v in map_short_to_cols_gap.items()}

gap_cols_short = list(map_short_to_cols_gap.keys())

### `calc_gap_closed` function

In [23]:
## Calculate gap closed for GMICs, Gurobi, and VPCs
def calc_gap_closed(gap_df, col):
    return np.where(
        ((gap_df[col] > EPS) & (gap_df[col] < INFINITY)) | ((gap_df[col] < -EPS) & (gap_df[col] > -INFINITY)), # condition
        100. * (gap_df[col] - gap_df["LP OBJ"]) / (gap_df["IP OBJ"] - gap_df["LP OBJ"]), # if condition is true
        0.0 # if condition is false
    )


def calc_gap_closed2(gap_df, col):
    conditions = gap_df[col] > EPS & np.isfinite(gap_df[col])
    choices = 100. * (gap_df[col] - gap_df["LP OBJ"]) / (gap_df["IP OBJ"] - gap_df["LP OBJ"])
    return np.select(conditions, choices, default=0.0)



### `gap_df`: Calculate gap closed for GMICs, Gurobi, and VPCs

In [24]:
# Create subset of dataframe relevant to gap closed
gap_df = df.loc[:, 
                [
                    'NUM DISJ TERMS',
                    'ROWS',
                    'COLS',
                    'LP OBJ',
                    'WORST DISJ OBJ',
                    'IP OBJ'
                ]
                +
                [ cut_type + ' OBJ' for cut_type in reg_cut_type_list ]
                +
                [ 'FIRST ' + cut_type for cut_type in solver_cut_type_list ]
                +
                [ 'AVG ' + cut_type for cut_type in solver_cut_type_list ]
                +
                [ 'BEST ' + cut_type for cut_type in solver_cut_type_list ]
                # [
                #     'GMIC OBJ',
                #     'ROOT OBJ',
                #     'VPC OBJ',
                #     'VPC+GMIC OBJ',
                #     REF_TYPE1 + ' ' + 'REF FIRST_CUT_PASS',
                #     REF_TYPE2 + ' ' + 'REF FIRST_CUT_PASS',
                #     REFV_TYPE1 + ' ' + 'REF+V FIRST_CUT_PASS',
                #     REF_TYPE1 + ' ' + 'REF LAST_CUT_PASS',
                #     REF_TYPE2 + ' ' + 'REF LAST_CUT_PASS',
                #     REFV_TYPE1 + ' ' + 'REF+V LAST_CUT_PASS'
                # ]
                +
                [
                    'NUM GMIC',
                    'NUM VPC',
                    'NUM OBJ',
                    'ExitReason'
                ]
               ]

# Calculate some missing % gap closed columns
# gap closed = 100 * (post_cut_opt_val - lp_opt_val) / (ip_opt_val - lp_opt_val)
cut_type = "GMIC"
col = cut_type + " OBJ"
gap_df[cut_type + " % GAP CLOSED"] = calc_gap_closed(gap_df, col)

cut_type = "ROOT"
col = cut_type + " OBJ"
gap_df[cut_type + " % GAP CLOSED"] = calc_gap_closed(gap_df, col)

cut_type = "BEST DISJ"
col = cut_type + " OBJ"
gap_df[cut_type + " % GAP CLOSED"] = calc_gap_closed(gap_df, col)

cut_type = "VPC"
col = cut_type + " OBJ"
gap_df[cut_type + " % GAP CLOSED"] = calc_gap_closed(gap_df, col)

cut_type = "VPC+GMIC"
col = cut_type + " OBJ"
gap_df[cut_type + " % GAP CLOSED"] = calc_gap_closed(gap_df, col)

# Add max(G,V) column
gap_df["MAX(GMIC,VPC) % GAP CLOSED"] = np.maximum(gap_df["GMIC % GAP CLOSED"], gap_df["VPC % GAP CLOSED"])

# Compare against Gurobi
for ref_type in ['FIRST', 'AVG', 'BEST']:
    col = "REF FIRST_CUT_PASS"
    gap_df[ref_type + " " + col + " % GAP CLOSED"] = calc_gap_closed(gap_df, ref_type + " " + col)
    col = "REF+V FIRST_CUT_PASS"
    gap_df[ref_type + " " + col + " % GAP CLOSED"] = calc_gap_closed(gap_df, ref_type + " " + col)
    # gap_df[REFV_TYPE1 + " " + col + " % GAP CLOSED"] = calc_gap_closed(gap_df, REFV_TYPE1 + " " + col)
    col = "REF LAST_CUT_PASS"
    gap_df[ref_type + " " + col + " % GAP CLOSED"] = calc_gap_closed(gap_df, ref_type + " " + col)
    # gap_df[REF_TYPE1 + " " + col + " % GAP CLOSED"] = calc_gap_closed(gap_df, REF_TYPE1 + " " + col)
    # gap_df[REF_TYPE2 + " " + col + " % GAP CLOSED"] = calc_gap_closed(gap_df, REF_TYPE2 + " " + col)
    col = "REF+V LAST_CUT_PASS"
    gap_df[ref_type + " " + col + " % GAP CLOSED"] = calc_gap_closed(gap_df, ref_type + " " + col)
    # gap_df[REFV_TYPE1 + " " + col + " % GAP CLOSED"] = calc_gap_closed(gap_df, REFV_TYPE1 + " " + col)

display(gap_df.loc[['bm23_presolved','maxgasflow_presolved']][gap_cols])
display(gap_df.loc[("bm23_presolved",2)])

Unnamed: 0_level_0,Unnamed: 1_level_0,GMIC % GAP CLOSED,ROOT % GAP CLOSED,BEST DISJ % GAP CLOSED,VPC % GAP CLOSED,VPC+GMIC % GAP CLOSED,"MAX(GMIC,VPC) % GAP CLOSED",FIRST REF FIRST_CUT_PASS % GAP CLOSED,AVG REF FIRST_CUT_PASS % GAP CLOSED,BEST REF FIRST_CUT_PASS % GAP CLOSED,AVG REF+V FIRST_CUT_PASS % GAP CLOSED,FIRST REF LAST_CUT_PASS % GAP CLOSED,AVG REF LAST_CUT_PASS % GAP CLOSED,BEST REF LAST_CUT_PASS % GAP CLOSED,AVG REF+V LAST_CUT_PASS % GAP CLOSED
INSTANCE,disj_terms,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
bm23_presolved,0,0.0,0.0,0.0,0.0,0.0,0.0,20.867452,21.16006,22.152648,0.0,37.49489,39.057502,41.913096,0.0
bm23_presolved,2,16.809643,0.0,6.797215,6.797215,19.460606,16.80964,20.867452,21.16006,22.152648,21.653104,37.49489,39.057502,41.913096,40.835719
bm23_presolved,4,16.809643,0.0,14.64701,14.64701,19.953919,16.80964,20.867452,21.16006,22.152648,22.621971,37.49489,39.057502,41.913096,43.497066
bm23_presolved,8,16.809643,0.0,17.61315,17.61315,19.950078,17.61315,20.867452,21.16006,22.152648,23.062444,37.49489,39.057502,41.913096,42.020223
bm23_presolved,16,16.809643,0.0,39.54304,38.93925,38.944835,38.93925,20.867452,21.16006,22.152648,39.138007,37.49489,39.057502,41.913096,49.398341
bm23_presolved,32,16.809643,0.0,56.52621,56.52621,56.526208,56.52621,20.867452,21.16006,22.152648,56.988833,37.49489,39.057502,41.913096,62.106093
bm23_presolved,64,16.809643,0.0,71.46658,69.64211,69.642109,69.64211,20.867452,21.16006,22.152648,66.200094,37.49489,39.057502,41.913096,71.147067
maxgasflow_presolved,0,0.0,0.0,0.0,0.0,-0.0,0.0,-0.004339,-0.004347,-0.004356,0.0,-0.005587,-0.005559,-0.006117,0.0
maxgasflow_presolved,2,-0.004342,6.22658e-15,-5.808108e-08,-5.808108e-08,-0.004342,-5.808108e-08,-0.004339,-0.004347,-0.004356,-0.004351,-0.005587,-0.005559,-0.006117,-0.00546
maxgasflow_presolved,4,-0.004342,6.22658e-15,-7.878622e-08,-7.878622e-08,-0.004343,-7.878622e-08,-0.004339,-0.004347,-0.004356,-0.004351,-0.005587,-0.005559,-0.006117,-0.005358


NUM DISJ TERMS                                     2
ROWS                                              20
COLS                                              27
LP OBJ                                     20.570922
WORST DISJ OBJ                             27.267238
IP OBJ                                          34.0
GMIC OBJ                                   22.828302
ROOT OBJ                                   20.570922
BEST DISJ OBJ                              21.483725
VPC OBJ                                    21.483725
VPC+GMIC OBJ                               23.184302
FIRST REF FIRST_CUT_PASS                   23.373228
FIRST REF+V FIRST_CUT_PASS                 23.392329
FIRST REF LAST_CUT_PASS                     25.60614
FIRST REF+V LAST_CUT_PASS                  25.906639
AVG REF FIRST_CUT_PASS                     23.412523
AVG REF+V FIRST_CUT_PASS                   23.478734
AVG REF LAST_CUT_PASS                      25.815984
AVG REF+V LAST_CUT_PASS                    26.

In [25]:
# tmp_df = gap_df.loc['sentoy_presolved']
# tmp_df[gap_cols]

### `selected_gap_df`: Gap closed for selected instances, adding 0-row that has best for `V+` cols

In [26]:
## `selected_gap_df`: Gap closed for selected instances, adding 0-row that has best for `V+` cols
## Show the instances that have been selected (and their original index)
## and then set the selected_gap_df as the selected instances from gap_df
## We also set the '0' row to contain the best result for each method
## (including the option of not using VPCs at all)
## and we replace any runs with no VPCs with the values obtained without them
selected_gap_df = gap_df.loc[selected_gap_instances_dict.keys()]

# From https://pandas.pydata.org/docs/user_guide/advanced.html#defined-levels
# "The MultiIndex keeps all the defined levels of an index, even if they are not actually used.
# When slicing an index, you may notice this."
# Even without using remove_unused_levels, index was correct with selected_gap_df.index.get_level_values(0).unique()
selected_gap_df.index = selected_gap_df.index.remove_unused_levels()

#display(selected_gap_df.index.difference(gap_df.index))
#selected_gap_df.drop(['22433_presolved'])

# # Check what the selected_gap_df contains for bm23
# inst = "bm23_presolved"
# display(selected_gap_df.loc[inst])

#inst = "10teams_presolved"
# inst = '22433_presolved'
# curr_df = selected_gap_df.loc[inst]
# display(curr_df)
# # for i in curr_df.index:
# #     display(curr_df.loc[i])

#display(selected_gap_df.index.get_level_values(0).unique())

# Do we update the value of the "best" in each column when no VPCs are generated for a run and we use the "no-VPCs" data?
# This may cause the stats in the "best" row to improve
# For example, we replace V+GurF with GurF when no VPCs are generated, since that is what would occur without VPCs
# But if GurF is better than any V+GurF when VPCs are produced, then the average in the max-row is inflated
SHOULD_UPDATE_MAX_WHEN_NO_VPCS = False

# inst_set = selected_gap_df.index.get_level_values(0).unique()
inst_set = selected_gap_df.index.levels[0]
num_inst = len(inst_set)
for curr_inst_ind, inst in enumerate(inst_set):
    print("{}/{}".format(curr_inst_ind+1,num_inst), end='\r', flush=True)
    curr_df = selected_gap_df.loc[inst].copy() # copy needed to not throw SettingWithCopyWarning

    # Set 0-row to have max values across all rows for this instance
    max_vals = curr_df[gap_cols].max()
    selected_gap_df.loc[(inst,0),gap_cols] = max_vals

    for ind in curr_df.index:
        if ind == 0:
            continue

        # Propogate GurF and GurL down
        sel_gap = [col_first_ref_first, col_first_ref, col_last_ref_first, col_last_ref]
        selected_gap_df.loc[(inst,ind),sel_gap] = curr_df.loc[0,sel_gap]

        # If no VPCs produced, the values for V+GurF and V+GurL have not been provided
        # We replace these by GurF and GurL
        # Currently disabled: update max for that column too (if disabled, we instead keep max as the value among those that generated VPCs)
        num_vpc = curr_df.loc[ind,col_num_vpcs]
        if num_vpc == 0:
            # print("Zero cuts for inst {} at depth {:d}".format(inst, ind))
            ref_gap = [col_first_ref_first, col_last_ref_first] # this is where we pull info from
            refinds = [gap_cols.index(colname) for colname in ref_gap] 
            sel_gap = [col_first_ref_v, col_last_ref_v] # this is where we put the info
            selected_gap_df.loc[(inst,ind),sel_gap] = curr_df.loc[0,ref_gap].to_numpy()

            if SHOULD_UPDATE_MAX_WHEN_NO_VPCS:
                for i in refinds:
                    if curr_df.loc[0,gap_cols[i]] > selected_gap_df.loc[(inst,0),gap_cols[i+1]]:
                        # if curr_df.loc[0,gap_cols[i]] > 0:
                            # print("DEBUG: Updating {} for inst {} from {:f} to {:f}".format(
                            #     gap_cols[i+1], 
                            #     inst, 
                            #     selected_gap_df.loc[(inst,0),gap_cols[i+1]], 
                            #     curr_df.loc[0,gap_cols[i]]))
                        selected_gap_df.loc[(inst,0),gap_cols[i+1]] = curr_df.loc[0,gap_cols[i]]

display(selected_gap_df.head(21).loc[:,[col_num_vpcs]+gap_cols])

332/332

Unnamed: 0_level_0,Unnamed: 1_level_0,NUM VPC,GMIC % GAP CLOSED,ROOT % GAP CLOSED,BEST DISJ % GAP CLOSED,VPC % GAP CLOSED,VPC+GMIC % GAP CLOSED,"MAX(GMIC,VPC) % GAP CLOSED",FIRST REF FIRST_CUT_PASS % GAP CLOSED,AVG REF FIRST_CUT_PASS % GAP CLOSED,BEST REF FIRST_CUT_PASS % GAP CLOSED,AVG REF+V FIRST_CUT_PASS % GAP CLOSED,FIRST REF LAST_CUT_PASS % GAP CLOSED,AVG REF LAST_CUT_PASS % GAP CLOSED,BEST REF LAST_CUT_PASS % GAP CLOSED,AVG REF+V LAST_CUT_PASS % GAP CLOSED
INSTANCE,disj_terms,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
10teams_presolved,0,0,100.0,0.0,8.120488e-12,1.864464e-09,100.0,100.0,100.0,93.877551,100.0,93.877551,100.0,100.0,100.0,100.0
10teams_presolved,2,77,100.0,0.0,-4.872293e-12,-1.689062e-10,100.0,100.0,100.0,93.877551,100.0,93.877551,100.0,100.0,100.0,100.0
10teams_presolved,4,1,100.0,0.0,8.120488e-12,-2.744725e-10,100.0,100.0,100.0,93.877551,100.0,93.877551,100.0,100.0,100.0,100.0
10teams_presolved,8,34,100.0,0.0,-1.624098e-11,1.864464e-09,100.0,100.0,100.0,93.877551,100.0,93.877551,100.0,100.0,100.0,100.0
10teams_presolved,16,68,100.0,0.0,-9.744586e-12,-3.897834e-11,100.0,100.0,100.0,93.877551,100.0,93.877551,100.0,100.0,100.0,100.0
10teams_presolved,32,12,100.0,0.0,-9.744586e-12,-2.848667e-09,100.0,100.0,100.0,93.877551,100.0,93.877551,100.0,100.0,100.0,100.0
10teams_presolved,64,0,100.0,0.0,-9.744586e-12,0.0,100.0,100.0,100.0,93.877551,0.0,100.0,100.0,100.0,0.0,100.0
23588_presolved,0,0,5.772831,0.0,72.18238,71.61482,71.624582,71.614817,15.995738,15.591428,16.055009,68.936554,24.929159,24.38133,26.063355,71.775864
23588_presolved,2,11,5.772831,0.0,21.88689,17.45981,18.670949,17.45981,15.995738,15.591428,16.055009,24.005256,24.929159,24.38133,26.063355,31.657072
23588_presolved,4,75,5.772831,0.0,34.09109,27.49366,27.49366,27.49366,15.995738,15.591428,16.055009,26.212873,24.929159,24.38133,26.063355,31.887967


#### DEBUG: Why REF+V is less than REF

In [27]:
### DEBUG
# Why REF+V < REF

# inst = 'f2gap801600_presolved'
inst = 'neos-1112787_presolved'

tmp_df = gap_df.loc[inst,['NUM VPC']+[REF_TYPE2 + ' ' + 'REF FIRST_CUT_PASS']+[REFV_TYPE1 + ' ' + 'REF+V FIRST_CUT_PASS']+gap_cols+['LP OBJ','IP OBJ']]

# display(tmp_df)
# display(gap_df.loc[inst,['NUM VPC']+['BEST REF FIRST_CUT_PASS']+['FIRST REF+V FIRST_CUT_PASS']+gap_cols])

# display(selected_gap_df.loc[inst,['NUM VPC']+['BEST REF FIRST_CUT_PASS']+['FIRST REF+V FIRST_CUT_PASS']+gap_cols])

col = "REF+V FIRST_CUT_PASS"
tmp_df[REFV_TYPE1 + " " + col + " % GAP CLOSED"] = calc_gap_closed(tmp_df, REFV_TYPE1 + " " + col)
display(tmp_df)

Unnamed: 0_level_0,NUM VPC,BEST REF FIRST_CUT_PASS,AVG REF+V FIRST_CUT_PASS,GMIC % GAP CLOSED,ROOT % GAP CLOSED,BEST DISJ % GAP CLOSED,VPC % GAP CLOSED,VPC+GMIC % GAP CLOSED,"MAX(GMIC,VPC) % GAP CLOSED",FIRST REF FIRST_CUT_PASS % GAP CLOSED,AVG REF FIRST_CUT_PASS % GAP CLOSED,BEST REF FIRST_CUT_PASS % GAP CLOSED,AVG REF+V FIRST_CUT_PASS % GAP CLOSED,FIRST REF LAST_CUT_PASS % GAP CLOSED,AVG REF LAST_CUT_PASS % GAP CLOSED,BEST REF LAST_CUT_PASS % GAP CLOSED,AVG REF+V LAST_CUT_PASS % GAP CLOSED,LP OBJ,IP OBJ
disj_terms,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
0,0,509205800000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.212392,14.212392,14.212392,0.0,51.656914,51.641457,51.860079,0.0,500000000000.0,564773000000.0
2,0,0.0,0.0,23.423909,0.0,1.183228,0.0,23.423909,23.423909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,500000000000.0,564773000000.0
4,0,0.0,0.0,23.423909,0.0,3.188024,0.0,23.423909,23.423909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,500000000000.0,564773000000.0
8,0,0.0,0.0,23.423909,0.0,5.09975,0.0,23.423909,23.423909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,500000000000.0,564773000000.0
16,0,0.0,0.0,23.423909,0.0,9.931782,0.0,23.423909,23.423909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,500000000000.0,564773000000.0
32,0,0.0,0.0,23.423909,0.0,23.324585,0.0,23.423909,23.423909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,500000000000.0,564773000000.0
64,0,0.0,0.0,23.423909,0.0,75.121684,0.0,23.423909,23.423909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,500000000000.0,564773000000.0


In [28]:
# inst = 'neos-1112787_presolved'

# display(gap_df.loc[inst,['NUM VPC']+['BEST REF FIRST_CUT_PASS']+['FIRST REF+V FIRST_CUT_PASS']+gap_cols])

# display(selected_gap_df.loc[inst,['NUM VPC']+['BEST REF FIRST_CUT_PASS']+['FIRST REF+V FIRST_CUT_PASS']+gap_cols])

### `best_gap_df`: For each instance, what the best gap closed is (and how that was obtained)

In [29]:
## Create best df = for each instance, what the best gap closed is (and how that was obtained)

# inst_set = selected_gap_instances_dict.keys()
inst_set = selected_gap_df.index.levels[0]
# inst_set = ['neos22_presolved']
# inst_set = ['usAbbrv-8-25_70_presolved']

best_gap_df = pd.DataFrame(
    columns = gap_cols_short+[
        'BEST VPC DISJ',
        'BEST GMIC+VPC DISJ',
        'BEST V+GurF DISJ',
        'BEST V+GurL DISJ',
        'NUM VPC',
        'NUM GMIC',
    ],
    index = inst_set,
    dtype = float,
)

num_inst = len(inst_set)
for i, inst in enumerate(inst_set):
    print("{}/{}".format(i+1,num_inst), end='\r', flush=True)
    # print("Processing instance {:d} with name {}.".format(i, inst))
    best_vpc = -1.
    best_vpc_disj = -1
    best_vpcgmic = -1.
    best_vpcgmic_disj = -1
    best_max_gmic_vpc = -1.
    best_VGurF = -1.
    best_VGurF_disj = -1
    best_VGurL = -1.
    best_VGurL_disj = -1
    best_num_vpc = -1
    best_num_gmic = -1
    
    curr_df = selected_gap_df.loc[inst]
    
    # Get info for GurF and GurL from the no-VPC row
    row = curr_df.loc[0]
    GurF_gap = float(row[col_first_ref])
    GurL_gap = float(row[col_last_ref])
    root_gap = float(row[col_root])
    gmic_gap = float(row[col_gmic])
    disj_gap = float(row[col_best_disj])

    for index, row in curr_df.iterrows():
        num_disj_terms = int(row['NUM DISJ TERMS'])
        # num_obj_tried  = float(row['NUM OBJ'])
        num_vpc        = float(row[col_num_vpcs])
        if num_disj_terms <= 0 or num_vpc == 0:
            continue
            
        # print("Index {:d}: Processing instance {} with {:d} disj terms.".format(index, inst, num_disj_terms))
        vpc_gap     = float(row[col_vpc])
        vpcgmic_gap = float(row[col_vpc_gmic])
        VGurF_gap   = float(row[col_first_ref_v])
        VGurL_gap   = float(row[col_last_ref_v])
        # num_vpc     = float(row['NUM VPC'])
        num_gmic    = float(row[col_num_gmic])
        
        if (best_vpc < vpc_gap): #or (is_val(best_vpc, vpc_gap) and best_num_vpc == 0):
            best_vpc = vpc_gap
            best_vpc_disj = index
            best_num_vpc = num_vpc
            best_num_gmic = num_gmic
        if best_vpcgmic < vpcgmic_gap:
            best_vpcgmic = vpcgmic_gap
            best_vpcgmic_disj = index
        if best_max_gmic_vpc < max(vpc_gap, gmic_gap):
            best_max_gmic_vpc = max(vpc_gap, gmic_gap)
        if best_VGurF < VGurF_gap:
            best_VGurF = VGurF_gap
            best_VGurF_disj = index
        if best_VGurL < VGurL_gap:
            best_VGurL = VGurL_gap
            best_VGurL_disj = index

    best_gap_df.iloc[i] = [
        gmic_gap if gmic_gap >= EPS else 0.,
        root_gap if root_gap >= EPS else 0.,
        disj_gap if disj_gap >= EPS else 0.,
        best_vpc if best_vpc >= EPS else 0.,
        best_vpcgmic if best_vpcgmic >= EPS else 0.,
        best_max_gmic_vpc if best_max_gmic_vpc >= EPS else 0.,
        GurF_gap if GurF_gap >= EPS else 0.,
        best_VGurF if best_VGurF >= EPS else 0.,
        GurL_gap if GurL_gap >= EPS else 0.,
        best_VGurL if best_VGurL >= EPS else 0.,
        best_vpc_disj,
        best_vpcgmic_disj,
        best_VGurF_disj,
        best_VGurL_disj,
        best_num_vpc,
        best_num_gmic,
    ]

col_list = ['BEST VPC DISJ', 'BEST GMIC+VPC DISJ', 'BEST V+GurF DISJ', 'BEST V+GurL DISJ', 'NUM VPC', 'NUM GMIC']
for col in col_list:
    best_gap_df[col] = best_gap_df[col].astype(np.int64)

display(best_gap_df)

332/332

Unnamed: 0_level_0,G,R,DB,V,V+G,"max(G,V)",GurF,V+GurF,GurL,V+GurL,BEST VPC DISJ,BEST GMIC+VPC DISJ,BEST V+GurF DISJ,BEST V+GurL DISJ,NUM VPC,NUM GMIC
INSTANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
10teams_presolved,100.000000,0.0,0.000000,0.000000,100.000000,100.000000,93.877551,93.877551,100.000000,100.000000,8,32,4,2,34,153
23588_presolved,5.772831,0.0,72.182376,71.614817,71.624582,71.614817,15.591428,68.936554,24.381330,71.775864,64,64,64,64,75,75
30n20b8_presolved,11.099235,0.0,1.564622,0.033334,11.099235,11.099235,0.926468,1.331188,16.910036,16.300723,2,2,2,4,190,184
50v-10_presolved,45.753596,0.0,18.008191,11.184101,45.823184,45.753596,49.858925,50.152252,73.169319,74.444520,64,16,8,64,29,29
a1c1s1_presolved,25.105518,0.0,4.895611,1.094331,25.382401,25.105518,45.584474,46.254097,88.806373,88.730339,8,2,4,4,3,154
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
umts_presolved,0.973181,0.0,0.209040,0.108872,0.973181,0.973181,1.412379,1.463163,4.793132,4.993099,64,64,2,32,276,275
usAbbrv-8-25_70_presolved,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,8,8,8,8,4,722
vpm1_presolved,16.930266,0.0,7.788162,4.672897,16.930266,16.930266,45.861148,56.942590,68.157543,72.129506,32,2,2,64,10,16
vpm2_presolved,17.849671,0.0,14.293216,7.834301,19.708285,17.849671,43.301829,50.632749,73.261050,73.527940,64,32,8,32,25,25


#### DEBUG: Look at `best_gap_df` entries

In [30]:
#best_gap_df.to_csv('best_gap.csv')

#### DEBUG: In `best_gap_df`, can get V > V+G due to numerical issues

In [31]:
## DEBUG: You can get V > V+G due to numerical issues

col1 = best_gap_df['V']
col2 = best_gap_df['V+G']

display(best_gap_df[(col1 > col2 + EPS) == True])

#df.loc['neos-1058477_presolved'] #.to_csv("neos-1058477_presolved_data.csv")
df.loc['seymour-disj-10_presolved']

Unnamed: 0_level_0,G,R,DB,V,V+G,"max(G,V)",GurF,V+GurF,GurL,V+GurL,BEST VPC DISJ,BEST GMIC+VPC DISJ,BEST V+GurF DISJ,BEST V+GurL DISJ,NUM VPC,NUM GMIC
INSTANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
seymour-disj-10_presolved,0.212331,0.000224,5.987611,0.89352,0.89352,0.89352,0.168905,0.788231,0.368672,1.130276,8,8,8,8,621,614


Unnamed: 0_level_0,cutlimit,gomory,mode,partial_bb_strategy,partial_bb_keep_pruned_nodes,partial_bb_num_strong,preprocess,prlp_flip_beta,rounds,bb_mode,...,cpu_model,cpu_id,ExitReason,end_time_string,time elapsed,instname,Unnamed: 294,end,IS PURE BINARY,IS MIXED BINARY
disj_terms,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,-1,0,0,4,0,5,0,0,1,1,...,13th Gen Intel(R) Core(TM) i9-13900K,4,SUCCESS,Sat Dec 2 22:16:05 2023,25202,seymour-disj-10_presolved,,DONE,False,False
2,-1,-1,0,4,0,5,0,0,1,11,...,13th Gen Intel(R) Core(TM) i9-13900K,6,SUCCESS,Wed Nov 29 17:42:54 2023,50414,seymour-disj-10_presolved,DONE,,False,False
4,-1,-1,0,4,0,5,0,0,1,11,...,13th Gen Intel(R) Core(TM) i9-13900K,4,SUCCESS,Tue Nov 28 04:56:34 2023,50518,seymour-disj-10_presolved,DONE,,False,False
8,-1,-1,0,4,0,5,0,0,1,11,...,13th Gen Intel(R) Core(TM) i9-13900K,12,CUT_LIMIT,Mon Nov 27 06:12:37 2023,51167,seymour-disj-10_presolved,DONE,,False,False
16,-1,-1,0,4,0,5,0,0,1,11,...,13th Gen Intel(R) Core(TM) i9-13900K,12,PRLP_INFEASIBLE,Wed Nov 29 01:13:35 2023,50458,seymour-disj-10_presolved,DONE,,False,False
32,-1,-1,0,4,0,5,0,0,1,11,...,13th Gen Intel(R) Core(TM) i9-13900K,2,PRLP_INFEASIBLE,Sun Nov 26 05:07:29 2023,50495,seymour-disj-10_presolved,DONE,,False,False
64,-1,-1,0,4,0,5,0,0,1,11,...,13th Gen Intel(R) Core(TM) i9-13900K,2,PRLP_TIME_LIMIT,Sun Nov 26 09:38:08 2023,50564,seymour-disj-10_presolved,DONE,,False,False


#### DEBUG: Find instances in which V+GurF max does not match up

In [32]:
## DEBUG: Find instances in which V+GurF max does not match up
# This causes the value in Table 2 'Best' row to not match Table 1 'All'

# For instance f2gap801600_presolved, the gap closed at the end of the root node is 0% whenever VPCs are used,
# but without VPCs, the gap closed is 50%
# In `best_gap_df`, for an instance in which no VPCs were generated,
# we use the value of GurF/GurL for V+GurF/V+GurL
# In `selected_gap_df`, the "zero" row contains

num_inst = len(best_gap_df.index)
col = 'V+GurF'
origcol = map_short_to_cols_gap[col]
num_errors = 0
avg1 = 0
avg2 = 0
for inst in best_gap_df.index:
    val1 = best_gap_df.loc[inst,col]
    val2 = selected_gap_df.loc[(inst,0),origcol]
    if abs(val1-val2) > EPS:
        print("{} has best_gap_df = {:f} and selected_gap_df = {:f} for col {} (diff = {:e})".format(inst,val1,val2,col,abs(val1-val2)))
        num_errors += 1
    avg1 += val1 / num_inst
    avg2 += val2 / num_inst

print("Average from best_gap_df = {}".format(avg1))
print("Average from selected_gap_df = {}".format(avg2))
print("Total # of errors =", num_errors, flush=True)

Average from best_gap_df = 34.7613945195311
Average from selected_gap_df = 34.761394519531734
Total # of errors = 0


#### DEBUG: Print relevant info from `selected_gap_df` and `best_gap_df` to further debug

In [33]:
## DEBUG
# inst = 'f2gap801600_presolved'
# inst = 'neos22_presolved'
# inst = 'neos-1112787_presolved'
# display(best_gap_df.loc[inst])
# display(selected_gap_df.loc[inst,[col_num_vpcs]+gap_cols])

In [34]:
# ## DEBUG
# gap_cols = [
#     'GMIC % GAP CLOSED',
#     'BEST DISJ % GAP CLOSED',
#     'VPC % GAP CLOSED',
#     'VPC+GMIC % GAP CLOSED',
#     'REF FIRST_CUT_PASS % GAP CLOSED',
#     'REF+V FIRST_CUT_PASS % GAP CLOSED',
#     'REF LAST_CUT_PASS % GAP CLOSED',
#     'REF+V LAST_CUT_PASS % GAP CLOSED',
# ]
# col_num_vpcs = 'NUM VPC'

inst = 'f2gap801600_presolved'
tmp_selected_gap_df = gap_df.loc[selected_gap_instances_dict.keys()]
# Check if inst is in tmp_selected_gap_df
if inst not in tmp_selected_gap_df.index.get_level_values(0).unique():
    print(ValueError("Instance {} is not in tmp_selected_gap_df".format(inst)))
else:
    curr_df = tmp_selected_gap_df.loc[inst].copy() # copy needed to not throw SettingWithCopyWarning

    # Set 0-row to have max values across all rows for this instance
    max_vals = curr_df[gap_cols].max()
    # selected_gap_df.loc[(inst,0),gap_cols] = max_vals

    display(tmp_selected_gap_df.loc[inst])
    display(max_vals)

Unnamed: 0_level_0,NUM DISJ TERMS,ROWS,COLS,LP OBJ,WORST DISJ OBJ,IP OBJ,GMIC OBJ,ROOT OBJ,BEST DISJ OBJ,VPC OBJ,...,FIRST REF LAST_CUT_PASS % GAP CLOSED,FIRST REF+V LAST_CUT_PASS % GAP CLOSED,AVG REF FIRST_CUT_PASS % GAP CLOSED,AVG REF+V FIRST_CUT_PASS % GAP CLOSED,AVG REF LAST_CUT_PASS % GAP CLOSED,AVG REF+V LAST_CUT_PASS % GAP CLOSED,BEST REF FIRST_CUT_PASS % GAP CLOSED,BEST REF+V FIRST_CUT_PASS % GAP CLOSED,BEST REF LAST_CUT_PASS % GAP CLOSED,BEST REF+V LAST_CUT_PASS % GAP CLOSED
disj_terms,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0,80,1600,86570.120867,-inf,86678.99998,,,-inf,,...,49.794643,0.0,49.794643,0.0,49.794643,0.0,49.794643,0.0,49.794643,0.0
2,2,80,1600,86570.120867,86578.45,86678.99998,86655.691018,86570.120867,86571.27,86571.026383,...,49.794643,95.407769,49.794643,95.407769,49.794643,95.407769,49.794643,95.407769,49.794643,95.407769
4,4,80,1600,86570.120867,86582.75,86678.99998,86655.691018,86570.120867,86573.02,86571.188056,...,49.794643,95.407769,49.794643,95.407769,49.794643,95.407769,49.794643,95.407769,49.794643,95.407769
8,8,80,1600,86570.120867,86619.55,86678.99998,86655.691018,86570.120867,86574.1,86571.326821,...,49.794643,95.407769,49.794643,95.407769,49.794643,95.407769,49.794643,95.407769,49.794643,95.407769
16,16,80,1600,86570.120867,86619.55,86678.99998,86655.691018,86570.120867,86575.05,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
32,32,80,1600,86570.120867,86619.55,86678.99998,86655.691018,86570.120867,86575.88,86571.271636,...,49.794643,95.407769,49.794643,95.407769,49.794643,95.407769,49.794643,95.407769,49.794643,95.407769
64,64,80,1600,86570.120867,86622.23,86678.99998,86655.691018,86570.120867,86576.75,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


GMIC % GAP CLOSED                        78.591888
ROOT % GAP CLOSED                         0.000000
BEST DISJ % GAP CLOSED                    6.086315
VPC % GAP CLOSED                          1.107609
VPC+GMIC % GAP CLOSED                    78.607302
MAX(GMIC,VPC) % GAP CLOSED               78.591888
FIRST REF FIRST_CUT_PASS % GAP CLOSED    49.794643
AVG REF FIRST_CUT_PASS % GAP CLOSED      49.794643
BEST REF FIRST_CUT_PASS % GAP CLOSED     49.794643
AVG REF+V FIRST_CUT_PASS % GAP CLOSED    95.407769
FIRST REF LAST_CUT_PASS % GAP CLOSED     49.794643
AVG REF LAST_CUT_PASS % GAP CLOSED       49.794643
BEST REF LAST_CUT_PASS % GAP CLOSED      49.794643
AVG REF+V LAST_CUT_PASS % GAP CLOSED     95.407769
dtype: float64

### Table 1: `avg_gap_df`: average percent gap closed across different combinations of cuts

In [35]:
## TABLE 1: average percent gap closed across different combinations of cuts
## Create avg_gap_df = average gap closed across instances
all_set_name = 'All'
good_vpc_set_name = tex_escape('≥10%')
binary_set_name = 'Binary'
avg_row_name = tex_escape('Avg (%)')
wins_row_name = 'Wins'

idx = pd.MultiIndex.from_product(
    [ [all_set_name, good_vpc_set_name, binary_set_name], [avg_row_name, wins_row_name] ],
    names = ['Set', '']
)
    
ncols = len(best_gap_df.columns)
nrows = len(idx)

col = best_gap_df['V'].astype(float)
good_vpc_df = best_gap_df[col >= 10.]

# intersect the pure binary instances with the selected gap instances
pure_binary_instances = df[df["IS PURE BINARY"] == True].index.get_level_values(0).unique().to_list()
selected_pure_binary_instances = list(set(selected_gap_instances_dict.keys()).intersection(pure_binary_instances))
selected_pure_binary_instances.sort()
print("Found {:d} pure binary instances".format(len(selected_pure_binary_instances)))
binary_instances_df = best_gap_df.loc[selected_pure_binary_instances]

data = np.zeros((nrows, ncols), dtype=float)
data[0,:] = [best_gap_df[col].mean() for col in best_gap_df.columns]
data[2,:] = [good_vpc_df[col].mean() for col in best_gap_df.columns]
data[4,:] = [binary_instances_df[col].mean() for col in best_gap_df.columns]

# display(best_gap_df.head())
avg_gap_df = pd.DataFrame(
    data,
    columns = best_gap_df.columns,
    index = idx,
    dtype = object
)

inst_col_name = '# inst'
avg_gap_df[inst_col_name] = [len(best_gap_df), 0, len(good_vpc_df), 0, len(binary_instances_df), 0]

avg_gap_df.iloc[1] = ["" for i in range(ncols+1)]
avg_gap_df.iloc[3] = ["" for i in range(ncols+1)]
avg_gap_df.iloc[5] = ["" for i in range(ncols+1)]

display(avg_gap_df)

Found 65 pure binary instances


  avg_gap_df.iloc[1] = ["" for i in range(ncols+1)]


Unnamed: 0_level_0,Unnamed: 1_level_0,G,R,DB,V,V+G,"max(G,V)",GurF,V+GurF,GurL,V+GurL,BEST VPC DISJ,BEST GMIC+VPC DISJ,BEST V+GurF DISJ,BEST V+GurL DISJ,NUM VPC,NUM GMIC,# inst
Set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
All,Avg (\%),16.440442,0.256744,18.369376,12.046781,23.871485,22.962066,28.861973,34.761395,48.232477,52.706725,39.933735,35.819277,23.445783,24.46988,72.087349,175.879518,332.0
All,Wins,,,,,,,,,,,,,,,,,
$\ge$10\%,Avg (\%),16.804984,0.692761,38.100165,29.886305,35.981221,34.310278,26.410749,38.66664,45.268305,55.431562,54.413793,51.827586,36.758621,34.637931,64.232759,92.043103,116.0
$\ge$10\%,Wins,,,,,,,,,,,,,,,,,
Binary,Avg (\%),13.641808,0.0,21.798838,17.32117,26.383819,25.48986,19.229524,31.363912,32.935918,42.752544,40.738462,40.0,28.4,28.307692,163.646154,380.323077,65.0
Binary,Wins,,,,,,,,,,,,,,,,,


### `wins_df`: num wins

In [88]:
## Create num wins df
# x wins over y for an instance if x > y + EPS
#shortcols = avg_gap_df.columns[0:-1]
wins_df = pd.DataFrame(
    np.zeros((len(gap_cols_short), len(gap_cols_short)), dtype=int),
    columns = gap_cols_short,
    index = gap_cols_short,
    dtype = int,
)

from itertools import permutations
for (ind1, ind2) in permutations(range(len(gap_cols_short)), 2):
    wins_df.at[gap_cols_short[ind1],gap_cols_short[ind2]] =\
        int(sum(best_gap_df[gap_cols_short[ind1]] > best_gap_df[gap_cols_short[ind2]] + EPS))
    wins_df.at[gap_cols_short[ind2],gap_cols_short[ind1]] =\
        int(sum(best_gap_df[gap_cols_short[ind2]] > best_gap_df[gap_cols_short[ind1]] + EPS))

# Sets we are considering
# all_set = 'Wins (All)'
# good_vpc_set = 'Wins (V ≥ 10%)'
all_set = (all_set_name,wins_row_name)
good_vpc_set = (good_vpc_set_name,wins_row_name)
binary_set = (binary_set_name,wins_row_name)

# "G" are wins relative to "V"
shortrefcol = 'V'
#refcol = 'VPC % GAP CLOSED'
#refcol = map_short_to_cols[shortrefcol]
refcol = shortrefcol
shortdestcol = 'G'
#col = 'GMIC % GAP CLOSED'
#col = map_short_to_cols[shortcol]
destcol = shortdestcol
avg_gap_df.at[all_set,shortdestcol] = wins_df.at[shortdestcol,shortrefcol]
avg_gap_df.at[good_vpc_set,shortdestcol] = sum(good_vpc_df[destcol] > good_vpc_df[refcol] + EPS)
avg_gap_df.at[binary_set,shortdestcol] = sum(binary_instances_df[destcol] > binary_instances_df[refcol] + EPS)

# "DB", "V", "V+G": wins are relative to "G"
shortrefcol = 'G'
#refcol = 'GMIC % GAP CLOSED'
#refcol = map_short_to_cols[shortrefcol]
refcol = shortrefcol
shortdestcol = 'DB'
#col = 'BEST DISJ % GAP CLOSED'
#col = map_short_to_cols[shortcol]
destcol = shortdestcol
avg_gap_df.at[all_set,shortdestcol] = wins_df.at[shortdestcol,shortrefcol]
avg_gap_df.at[good_vpc_set,shortdestcol] = sum(good_vpc_df[destcol] > good_vpc_df[refcol] + EPS)
avg_gap_df.at[binary_set,shortdestcol] = sum(binary_instances_df[destcol] > binary_instances_df[refcol] + EPS)

shortdestcol = 'V'
#col = 'VPC % GAP CLOSED'
#col = map_short_to_cols[shortcol]
destcol = shortdestcol
avg_gap_df.at[all_set,shortdestcol] = wins_df.at[shortdestcol,shortrefcol]
avg_gap_df.at[good_vpc_set,shortdestcol] = sum(good_vpc_df[destcol] > good_vpc_df[refcol] + EPS)
avg_gap_df.at[binary_set,shortdestcol] = sum(binary_instances_df[destcol] > binary_instances_df[refcol] + EPS)

shortdestcol = 'V+G'
#col = 'VPC+GMIC % GAP CLOSED'
#col = map_short_to_cols[shortcol]
destcol = shortdestcol
avg_gap_df.at[all_set,shortdestcol] = wins_df.at[shortdestcol,shortrefcol]
avg_gap_df.at[good_vpc_set,shortdestcol] = sum(good_vpc_df[destcol] > good_vpc_df[refcol] + EPS)
avg_gap_df.at[binary_set,shortdestcol] = sum(binary_instances_df[destcol] > binary_instances_df[refcol] + EPS)

# "V+GurF" are wins relative to "GurF"
shortrefcol = 'GurF'
refcol = shortrefcol
shortdestcol = 'V+GurF'
destcol = shortdestcol
#col = map_short_to_cols[shortcol]
avg_gap_df.at[all_set,shortdestcol] = wins_df.at[shortdestcol,shortrefcol]
avg_gap_df.at[good_vpc_set,shortdestcol] = sum(good_vpc_df[destcol] > good_vpc_df[refcol] + EPS)
avg_gap_df.at[binary_set,shortdestcol] = sum(binary_instances_df[destcol] > binary_instances_df[refcol] + EPS)

# "V+GurL" are wins relative to "GurL"
shortrefcol = 'GurL'
refcol = shortrefcol
shortdestcol = 'V+GurL'
destcol = shortdestcol
wins_df.at[shortdestcol,shortrefcol] = int(sum(best_gap_df[destcol] > best_gap_df[refcol] + EPS))
wins_df.at[shortrefcol,shortdestcol] = int(sum(best_gap_df[refcol] > best_gap_df[destcol] + EPS))
avg_gap_df.at[all_set,shortdestcol] = wins_df.at[shortdestcol,shortrefcol]
avg_gap_df.at[good_vpc_set,shortdestcol] = sum(good_vpc_df[destcol] > good_vpc_df[refcol] + EPS)
avg_gap_df.at[binary_set,shortdestcol] = sum(binary_instances_df[destcol] > binary_instances_df[refcol] + EPS)

# Count number of instances that have V+G > 0
shortdestcol = inst_col_name
#col = 'V+GurL'
destcol = 'V+G'
avg_gap_df.at[all_set,shortdestcol] = sum(best_gap_df[destcol] > EPS)
avg_gap_df.at[good_vpc_set,shortdestcol] = sum(good_vpc_df[destcol] > EPS)
avg_gap_df.at[binary_set,shortdestcol] = sum(binary_instances_df[destcol] > EPS)

display(avg_gap_df)
display(wins_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,G,R,DB,V,V+G,"max(G,V)",GurF,V+GurF,GurL,V+GurL,BEST VPC DISJ,BEST GMIC+VPC DISJ,BEST V+GurF DISJ,BEST V+GurL DISJ,NUM VPC,NUM GMIC,# inst
Set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
All,Avg (\%),16.440442,0.256744,18.369376,12.046781,23.871485,22.962066,28.861973,34.761395,48.232477,52.706725,39.933735,35.819277,23.445783,24.46988,72.087349,175.879518,332
All,Wins,163.0,,181.0,135.0,233.0,,,258.0,,237.0,,,,,,,298
$\ge$10\%,Avg (\%),16.804984,0.692761,38.100165,29.886305,35.981221,34.310278,26.410749,38.66664,45.268305,55.431562,54.413793,51.827586,36.758621,34.637931,64.232759,92.043103,116
$\ge$10\%,Wins,24.0,,102.0,92.0,112.0,,,103.0,,107.0,,,,,,,116
Binary,Avg (\%),13.641808,0.0,21.798838,17.32117,26.383819,25.48986,19.229524,31.363912,32.935918,42.752544,40.738462,40.0,28.4,28.307692,163.646154,380.323077,65
Binary,Wins,22.0,,50.0,37.0,53.0,,,50.0,,49.0,,,,,,,59


Unnamed: 0,G,R,DB,V,V+G,"max(G,V)",GurF,V+GurF,GurL,V+GurL
G,0,288,121,163,0,0,69,33,16,9
R,1,0,0,0,0,0,1,0,1,0
DB,181,284,0,267,153,167,122,109,81,63
V,135,265,0,0,1,0,93,58,59,14
V+G,233,298,137,251,0,186,136,97,69,25
"max(G,V)",135,298,121,163,1,0,125,83,67,20
GurF,224,300,184,213,167,174,0,0,0,0
V+GurF,269,306,191,242,199,213,258,0,55,0
GurL,291,308,232,255,241,243,295,248,0,52
V+GurL,301,313,244,294,279,284,303,291,237,0


### Analyze instances in which DB > G but V <= G

In [89]:
col1 = 'DB'
col2 = 'G'
tmp_df = best_gap_df.loc[best_gap_df[col1] > best_gap_df[col2] + EPS]

col1 = 'V'
tmp_df = tmp_df[tmp_df[col1] <= tmp_df[col2] + EPS]
display(tmp_df.head())

# inst_set = tmp_df.index
inst_depth_set = [(inst,tmp_df.at[inst,'BEST VPC DISJ']) for inst in tmp_df.index]

print("Total num inst with DB > G >= V is {:d}".format(len(tmp_df)))
print("Num times hit cut limit = {:d}".format(sum(df.loc[inst_depth_set,'ExitReason'] == 'CUT_LIMIT')))

# display(df.loc[inst_depth_set])


Unnamed: 0_level_0,G,R,DB,V,V+G,"max(G,V)",GurF,V+GurF,GurL,V+GurL,BEST VPC DISJ,BEST GMIC+VPC DISJ,BEST V+GurF DISJ,BEST V+GurL DISJ,NUM VPC,NUM GMIC
Instance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
app3_presolved,20.407763,0.0,55.66602,15.523885,26.668549,20.407763,28.42703,34.82387,83.904311,84.426687,32,32,32,16,15,18
beasleyC3_presolved,1.296774,0.0,2.810011,1.236898,1.796111,1.296774,57.739584,63.229491,97.718222,97.873228,16,32,2,32,124,124
bppc8-09_presolved,3.078078,0.0,3.301803,0.593229,3.078078,3.078078,1.187913,1.588956,3.611834,3.167295,4,32,4,4,30,30
cvs16r106-72_presolved,0.75258,0.0,1.981538,0.0,0.828432,0.75258,9.27381,9.602921,13.26718,13.430216,4,2,2,2,675,2440
cvs16r128-89_presolved,0.858758,0.0,3.019355,0.168238,0.964516,0.858758,5.134957,5.134957,6.691533,6.525244,4,4,2,2,962,3200


Total num inst with DB > G >= V is 46
Num times hit cut limit = 23


### Analyze instances in which V+G <= G

In [90]:
col1 = 'V+G'
col2 = 'G'
tmp_df = best_gap_df.loc[best_gap_df[col1] <= best_gap_df[col2] + EPS]

display(tmp_df)

inst_depth_set = [(inst,tmp_df.at[inst,'BEST VPC DISJ']) for inst in tmp_df.index]

print("Total num inst with V+G <= G is {:d}".format(len(tmp_df)))
print("Num times with G = 100% gap closed = {:d}".format(sum(tmp_df['G'] > 100. - EPS)))
print("Num times with V+G = 0% gap closed = {:d}".format(sum(tmp_df['V+G'] == 0.)))
print("Num times hit cut limit = {:d}".format(sum(df.loc[inst_depth_set,'ExitReason'] == 'CUT_LIMIT')))

# display(df.loc[inst_depth_set])

Unnamed: 0_level_0,G,R,DB,V,V+G,"max(G,V)",GurF,V+GurF,GurL,V+GurL,BEST VPC DISJ,BEST GMIC+VPC DISJ,BEST V+GurF DISJ,BEST V+GurL DISJ,NUM VPC,NUM GMIC
Instance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
10teams_presolved,100.000000,0.0,0.000000,0.000000,100.000000,100.000000,93.877551,93.877551,100.000000,100.000000,8,32,4,2,34,153
30n20b8_presolved,11.099235,0.0,1.564622,0.033334,11.099235,11.099235,0.926468,1.331188,16.910036,16.300723,2,2,2,4,190,184
a2c1s1_presolved,24.372660,0.0,3.337449,0.461882,24.372660,24.372660,42.797493,44.248500,91.529658,92.019454,2,4,16,32,18,157
b2c1s1_presolved,19.812503,0.0,2.007490,0.085124,19.812503,19.812503,23.068346,23.399644,72.082156,72.319120,64,32,16,4,4,238
berlin_5_8_0_presolved,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,16,16,16,16,9,235
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
tr12-30_presolved,58.360211,0.0,1.743446,0.477979,58.360211,58.360211,60.236075,60.438883,98.547975,98.689839,64,2,2,4,2,321
traininstance6_presolved,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,2,2,2,2,2,21
umts_presolved,0.973181,0.0,0.209040,0.108872,0.973181,0.973181,1.412379,1.463163,4.793132,4.993099,64,64,2,32,276,275
usAbbrv-8-25_70_presolved,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,8,8,8,8,4,722


Total num inst with V+G <= G is 99
Num times with G = 100% gap closed = 3
Num times with V+G = 0% gap closed = 34
Num times hit cut limit = 23


### Analyze when G > V

In [91]:
col1 = 'G'
col2 = 'V'
tmp_df = best_gap_df.loc[best_gap_df[col1] > best_gap_df[col2] + EPS]

display(tmp_df)

inst_depth_set = [(inst,tmp_df.at[inst,'BEST VPC DISJ']) for inst in tmp_df.index]

print("Total num inst with G > V is {:d}".format(len(tmp_df)))
print("Num times with #V < 10 is {:d}".format(len(tmp_df[(tmp_df['NUM VPC'] < 10)])))
print("Num times with #V < 10 while #G > 10 is {:d}".format(len(tmp_df[(tmp_df['NUM VPC'] < 10) & (tmp_df['NUM GMIC'] > 10)])))
# print("Num times with #V < 10 is {:d}".format(sum(tmp_df['NUM VPC'] < 10)))

# print("Num times with V+G = 0% gap closed = {:d}".format(sum(tmp_df['V+G'] == 0.)))
print("Num times hit cut limit = {:d}".format(sum(df.loc[inst_depth_set,'ExitReason'] == 'CUT_LIMIT')))

tmp_inst_set = tmp_df[(tmp_df['NUM VPC'] < 10) & (tmp_df['NUM GMIC'] > 10)].index
tmp_inst_depth_set = [(inst,tmp_df.at[inst,'BEST VPC DISJ']) for inst in tmp_inst_set]
print("Num times hit cut limit when #G > #V = {:d} (should be 0)".format(sum(df.loc[tmp_inst_depth_set,'ExitReason'] == 'CUT_LIMIT')))

# display(df.loc[inst_depth_set])

Unnamed: 0_level_0,G,R,DB,V,V+G,"max(G,V)",GurF,V+GurF,GurL,V+GurL,BEST VPC DISJ,BEST GMIC+VPC DISJ,BEST V+GurF DISJ,BEST V+GurL DISJ,NUM VPC,NUM GMIC
Instance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
10teams_presolved,100.000000,0.0,0.000000,0.000000,100.000000,100.000000,93.877551,93.877551,100.000000,100.000000,8,32,4,2,34,153
30n20b8_presolved,11.099235,0.0,1.564622,0.033334,11.099235,11.099235,0.926468,1.331188,16.910036,16.300723,2,2,2,4,190,184
50v-10_presolved,45.753596,0.0,18.008191,11.184101,45.823184,45.753596,49.858925,50.152252,73.169319,74.444520,64,16,8,64,29,29
a1c1s1_presolved,25.105518,0.0,4.895611,1.094331,25.382401,25.105518,45.584474,46.254097,88.806373,88.730339,8,2,4,4,3,154
a2c1s1_presolved,24.372660,0.0,3.337449,0.461882,24.372660,24.372660,42.797493,44.248500,91.529658,92.019454,2,4,16,32,18,157
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
tr12-30_presolved,58.360211,0.0,1.743446,0.477979,58.360211,58.360211,60.236075,60.438883,98.547975,98.689839,64,2,2,4,2,321
umts_presolved,0.973181,0.0,0.209040,0.108872,0.973181,0.973181,1.412379,1.463163,4.793132,4.993099,64,64,2,32,276,275
vpm1_presolved,16.930266,0.0,7.788162,4.672897,16.930266,16.930266,45.861148,56.942590,68.157543,72.129506,32,2,2,64,10,16
vpm2_presolved,17.849671,0.0,14.293216,7.834301,19.708285,17.849671,43.301829,50.632749,73.261050,73.527940,64,32,8,32,25,25


Total num inst with G > V is 163
Num times with #V < 10 is 64
Num times with #V < 10 while #G > 10 is 60
Num times hit cut limit = 61
Num times hit cut limit when #G > #V = 0 (should be 0)


### Analyze when DB % gap closed nontrivial

In [92]:
# Select instances in selected_gap_df for which value in col_best_disj is at least MIN_DISJ_GAP
MIN_DISJ_GAP = 0.
tmp_df = selected_gap_df.loc[selected_gap_df[col_best_disj] >= MIN_DISJ_GAP]

# Collect instance names with best disj gap >= MIN_DISJ_GAP
tmp_df.index = tmp_df.index.remove_unused_levels()
inst_set_db = tmp_df.index.get_level_values(0).unique()
num_inst_db = len(inst_set_db)
inst_set_orig = selected_gap_df.index.levels[0]
num_inst_orig = len(inst_set)
print("Total num inst with best disj gap >= {:f} is {:d} (out of {:d} total instances).".format(MIN_DISJ_GAP, num_inst_db, num_inst_orig))

# Report average in each column broken down by depth
tmp_df_grouped = tmp_df.groupby(level='disj_terms').mean(numeric_only=True)
display(tmp_df_grouped[gap_cols])

# Repeat with MIN_DISJ_GAP = 1.0
MIN_DISJ_GAP = 1.
tmp_df = selected_gap_df.loc[selected_gap_df[col_best_disj] >= MIN_DISJ_GAP]

# Collect instance names with best disj gap >= MIN_DISJ_GAP
tmp_df.index = tmp_df.index.remove_unused_levels()
inst_set_db = tmp_df.index.get_level_values(0).unique()
num_inst_db = len(inst_set_db)
inst_set_orig = selected_gap_df.index.levels[0]
num_inst_orig = len(inst_set)
print("Total num inst with best disj gap >= {:f} is {:d} (out of {:d} total instances).".format(MIN_DISJ_GAP, num_inst_db, num_inst_orig))

# Report average in each column broken down by depth
tmp_df_grouped = tmp_df.groupby(level='disj_terms').mean(numeric_only=True)
display(tmp_df_grouped[gap_cols])

# Repeat with MIN_DISJ_GAP = 10.
MIN_DISJ_GAP = 10.
tmp_df = selected_gap_df.loc[selected_gap_df[col_best_disj] >= MIN_DISJ_GAP]

# Collect instance names with best disj gap >= MIN_DISJ_GAP
tmp_df.index = tmp_df.index.remove_unused_levels()
inst_set_db = tmp_df.index.get_level_values(0).unique()
num_inst_db = len(inst_set_db)
inst_set_orig = selected_gap_df.index.levels[0]
num_inst_orig = len(inst_set)
print("Total num inst with best disj gap >= {:f} is {:d} (out of {:d} total instances).".format(MIN_DISJ_GAP, num_inst_db, num_inst_orig))

# Report average in each column broken down by depth
tmp_df_grouped = tmp_df.groupby(level='disj_terms').mean(numeric_only=True)
display(tmp_df_grouped[gap_cols])


Total num inst with best disj gap >= 0.000000 is 332 (out of 332 total instances).


Unnamed: 0_level_0,GMIC % GAP CLOSED,ROOT % GAP CLOSED,BEST DISJ % GAP CLOSED,VPC % GAP CLOSED,VPC+GMIC % GAP CLOSED,"MAX(GMIC,VPC) % GAP CLOSED",FIRST REF FIRST_CUT_PASS % GAP CLOSED,AVG REF FIRST_CUT_PASS % GAP CLOSED,BEST REF FIRST_CUT_PASS % GAP CLOSED,AVG REF+V FIRST_CUT_PASS % GAP CLOSED,FIRST REF LAST_CUT_PASS % GAP CLOSED,AVG REF LAST_CUT_PASS % GAP CLOSED,BEST REF LAST_CUT_PASS % GAP CLOSED,AVG REF+V LAST_CUT_PASS % GAP CLOSED
disj_terms,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,16.440442,0.256744,18.369376,12.046781,23.871485,22.962066,28.777295,28.86196,30.557867,34.761395,48.268423,48.23246,50.998902,52.706725
2,16.81659,0.271462,3.187312,2.338625,17.552359,17.114105,29.51606,29.629626,29.794142,31.731929,49.60489,49.77657,49.622293,50.530618
4,16.88398,0.268047,5.502264,3.750756,18.218311,17.635035,29.742792,29.823835,31.028692,32.083393,49.979826,49.893666,51.220159,50.742802
8,16.563539,0.271462,8.583211,5.145331,18.70753,17.961435,29.564349,29.642072,29.221818,32.380648,49.910656,49.733513,48.9748,50.795753
16,16.634153,0.271462,11.813938,6.835819,20.362324,19.666793,29.341258,29.423419,28.321713,33.144299,49.631617,49.445878,47.898063,51.43509
32,16.865828,0.275855,15.763571,9.430012,22.552764,21.676923,29.551504,29.560056,27.641877,34.442462,49.407011,49.249751,46.62279,52.176809
64,16.741323,0.274081,19.609752,10.939689,23.558656,22.766893,29.406976,29.39943,27.225228,34.630077,49.334182,48.94916,44.961847,52.487831


Total num inst with best disj gap >= 1.000000 is 260 (out of 332 total instances).


Unnamed: 0_level_0,GMIC % GAP CLOSED,ROOT % GAP CLOSED,BEST DISJ % GAP CLOSED,VPC % GAP CLOSED,VPC+GMIC % GAP CLOSED,"MAX(GMIC,VPC) % GAP CLOSED",FIRST REF FIRST_CUT_PASS % GAP CLOSED,AVG REF FIRST_CUT_PASS % GAP CLOSED,BEST REF FIRST_CUT_PASS % GAP CLOSED,AVG REF+V FIRST_CUT_PASS % GAP CLOSED,FIRST REF LAST_CUT_PASS % GAP CLOSED,AVG REF LAST_CUT_PASS % GAP CLOSED,BEST REF LAST_CUT_PASS % GAP CLOSED,AVG REF+V LAST_CUT_PASS % GAP CLOSED
disj_terms,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,18.227744,0.327843,23.424177,15.364223,27.711104,26.552123,30.447021,30.44314,31.916892,37.672142,50.476574,50.216553,52.708422,55.889712
2,19.70366,0.575591,6.569134,4.810681,21.164194,20.334465,31.905676,31.748461,32.011063,34.828815,50.825198,50.683747,51.784477,52.253868
4,19.380455,0.439116,8.922722,6.08816,21.551279,20.608501,32.167897,32.143303,33.563055,35.230839,52.306665,52.195128,54.373568,53.780922
8,19.838527,0.396461,12.472576,7.484633,22.959018,21.877153,32.415086,32.394054,32.298629,35.894962,52.844445,52.731761,52.914118,54.409681
16,19.289088,0.372223,16.14384,9.347157,24.392653,23.443893,31.711214,31.734877,30.742417,36.423521,51.971993,51.780105,50.803904,54.674929
32,19.041903,0.350778,20.005784,11.976958,26.267993,25.1581,31.438156,31.519871,29.975556,37.300792,51.554022,51.454553,49.559866,55.296133
64,18.227744,0.327843,23.424177,13.068023,26.376989,25.43202,30.447021,30.44314,28.422874,36.483809,50.476574,50.216553,46.895427,54.376022


Total num inst with best disj gap >= 10.000000 is 173 (out of 332 total instances).


Unnamed: 0_level_0,GMIC % GAP CLOSED,ROOT % GAP CLOSED,BEST DISJ % GAP CLOSED,VPC % GAP CLOSED,VPC+GMIC % GAP CLOSED,"MAX(GMIC,VPC) % GAP CLOSED",FIRST REF FIRST_CUT_PASS % GAP CLOSED,AVG REF FIRST_CUT_PASS % GAP CLOSED,BEST REF FIRST_CUT_PASS % GAP CLOSED,AVG REF+V FIRST_CUT_PASS % GAP CLOSED,FIRST REF LAST_CUT_PASS % GAP CLOSED,AVG REF LAST_CUT_PASS % GAP CLOSED,BEST REF LAST_CUT_PASS % GAP CLOSED,AVG REF+V LAST_CUT_PASS % GAP CLOSED
disj_terms,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,18.320578,0.49271,32.579625,21.776063,31.901435,30.402559,29.440204,29.485523,31.137317,38.395949,50.074118,50.001361,52.446508,57.139024
2,30.605842,4.229435,25.987925,18.31223,36.275594,34.303646,43.921009,42.456622,44.141253,46.945514,64.871026,64.401773,67.02913,66.459897
4,22.382298,1.575672,21.67584,15.406007,28.007343,25.9471,34.670847,34.136859,35.504004,37.360304,56.065492,56.207543,58.824663,58.14814
8,18.918868,0.92367,24.013431,14.610736,25.208134,23.228734,31.117014,30.883212,30.389245,35.16893,52.902363,52.883089,53.433311,54.605975
16,18.81644,0.69258,25.831705,15.311312,27.491245,26.041263,30.230217,30.187669,30.382105,36.552548,51.666548,51.613845,52.043302,55.533319
32,18.171154,0.575597,29.438205,18.110155,29.406812,27.826986,28.93884,28.934191,28.925375,36.403282,50.065183,49.996045,49.989243,55.037541
64,18.320578,0.49271,32.579625,18.516932,29.983077,28.758728,29.440204,29.485523,28.002424,36.934508,50.074118,50.001361,47.007296,55.310255


### Table 2: `gap_by_size_df`: gap closed by num leaves

In [93]:
## TABLE 2: gap closed by num leaves
## Note that ``best'' can be worse than for a single row
## because when no VPCs are generated, we assume the "no VPCs" results hold for Gurobi,
## but we do not count that for the ``best'' calculation,
## since otherwise there is potential bias, as sometimes Gurobi does better without VPCs
shortcols = [
        'DB',
        'V',
        'V+G',
        'max(G,V)',
        #'GurF',
        'V+GurF',
        #'GurL',
        'V+GurL',
    ]

gap_by_size_df = pd.DataFrame(
    columns = shortcols,
    index = [0] + sizes + ['Best'],
    # index = [str(size) + " leaves" for size in sizes]+['Best'],
    dtype = float,
)
zero_row_name = 0

# `grouped_df` will collect gap closed across instances, grouped by num terms
grouped_df = selected_gap_df.groupby(level='disj_terms').mean(numeric_only=True)
ungrouped_df = best_gap_df.mean(numeric_only=True)

# For each of the columns (in shortcols),
# save the average value for each size
# (this will put in the right place as the index is based on sizes for both)
for col in shortcols:
    orig_col = map_short_to_cols_gap[col]
    #gap_by_size_df.loc[2]['DB'] = best_gap_df[orig_col].mean()
    gap_by_size_df[col] = grouped_df[orig_col]

# Fill in the 'Best' row, since that is currently stored in `gap_by_size_df` in the "0" row
gap_by_size_df.loc['Best'] = gap_by_size_df.loc[zero_row_name]

# Now update the zero row with correct values
col = 'DB'
gap_by_size_df[col][zero_row_name] = 0.

col = 'V'
gap_by_size_df[col][zero_row_name] = 0.

stubs = ['G', 'GurF', 'GurL']
for stub in stubs:
    col = 'V+'+stub
    # orig_col = map_short_to_cols[stub]
    gap_by_size_df[col][0] = ungrouped_df[stub]

# Also replace the 0-row of the "max(G,V)" column with the value of G, since that corresponds to no VPCs
gap_by_size_df['max(G,V)'][0] = gap_by_size_df['V+G'][0]

# Reindex to add "leaves" to index
idx = ['VPCs disabled']+[str(size) + " leaves" for size in sizes]+['Best']
reidx = {old_id : new_id for old_id, new_id in zip(gap_by_size_df.index,idx)}
gap_by_size_df.rename(reidx, inplace=True)

# display(grouped_df[gap_cols])
display(ungrouped_df)
display(gap_by_size_df)

# Create new df with additional columns:
# (1) the ratio 'V'/'DB'
# (2) the ratio 'max(G,V)'/'V+G'
gap_by_size_df_new = gap_by_size_df.copy()
gap_by_size_df_new['V/DB'] = gap_by_size_df_new['V'] / gap_by_size_df_new['DB']
gap_by_size_df_new['max(G,V)/V+G'] = gap_by_size_df_new['max(G,V)'] / gap_by_size_df_new['V+G']
display(gap_by_size_df_new)

G                      16.440442
R                       0.256744
DB                     18.369376
V                      12.046781
V+G                    23.871485
max(G,V)               22.962066
GurF                   28.861973
V+GurF                 34.761395
GurL                   48.232477
V+GurL                 52.706725
BEST VPC DISJ          39.933735
BEST GMIC+VPC DISJ     35.819277
BEST V+GurF DISJ       23.445783
BEST V+GurL DISJ       24.469880
NUM VPC                72.087349
NUM GMIC              175.879518
dtype: float64

Unnamed: 0,DB,V,V+G,"max(G,V)",V+GurF,V+GurL
VPCs disabled,0.0,0.0,16.440442,16.440442,28.861973,48.232477
2 leaves,3.014506,2.211832,17.138744,16.721827,30.879922,48.985705
4 leaves,5.27024,3.592592,17.718585,17.159826,31.028714,49.023677
8 leaves,8.117856,4.866368,18.468179,17.762548,31.446474,49.217817
16 leaves,11.173424,6.465202,19.966469,19.308661,32.38715,50.088648
32 leaves,14.671516,8.776728,21.73339,20.918238,33.351888,50.824167
64 leaves,18.369376,10.247721,22.826544,22.084876,33.794465,51.350585
Best,18.369376,12.046781,23.871485,22.962066,34.761395,52.706725


Unnamed: 0,DB,V,V+G,"max(G,V)",V+GurF,V+GurL,V/DB,"max(G,V)/V+G"
VPCs disabled,0.0,0.0,16.440442,16.440442,28.861973,48.232477,,1.0
2 leaves,3.014506,2.211832,17.138744,16.721827,30.879922,48.985705,0.733729,0.975674
4 leaves,5.27024,3.592592,17.718585,17.159826,31.028714,49.023677,0.681675,0.968465
8 leaves,8.117856,4.866368,18.468179,17.762548,31.446474,49.217817,0.599465,0.961792
16 leaves,11.173424,6.465202,19.966469,19.308661,32.38715,50.088648,0.578623,0.967054
32 leaves,14.671516,8.776728,21.73339,20.918238,33.351888,50.824167,0.598215,0.962493
64 leaves,18.369376,10.247721,22.826544,22.084876,33.794465,51.350585,0.55787,0.967509
Best,18.369376,12.046781,23.871485,22.962066,34.761395,52.706725,0.655808,0.961904


### Table 5: `all_gap_results_df`: complete gap closed results

In [94]:
inst_set = selected_gap_df.index.levels[0]
inst_set.set_names("Instance",inplace=True)

col_idx = pd.MultiIndex.from_arrays(
    [
        ['', '', '# cuts', '# cuts'] + ['% gap closed']*len(gap_cols_short),
        ['Rows', 'Cols', 'G', 'V'] + gap_cols_short
    ],
)

all_gap_results_df = pd.DataFrame(
    columns = col_idx,
    index = inst_set,
    dtype = object,
)

# Enter number of rows and cols
tmp_df = df.xs(0, level='disj_terms').loc[inst_set,['ROWS','COLS']]
tmp_df.columns = pd.MultiIndex.from_product([[''],['Rows','Cols']])
all_gap_results_df.loc[:,tmp_df.columns] = tmp_df

# Enter number of cuts
# tmp_df = best_gap_df.xs(0, level='disj_terms').loc[inst_set,['NUM GMIC', 'NUM VPC']]
tmp_df = best_gap_df.loc[inst_set, ['NUM GMIC', 'NUM VPC']]
tmp_df.columns = pd.MultiIndex.from_product([['# cuts'],['G','V']])
all_gap_results_df.loc[:,tmp_df.columns] = tmp_df

# Enter gap closed
tmp_df = best_gap_df.loc[inst_set, gap_cols_short]
tmp_df.columns = pd.MultiIndex.from_product([['% gap closed'],gap_cols_short])
all_gap_results_df.loc[:,tmp_df.columns] = tmp_df

# Add average row
all_gap_results_df.loc["Average"] = all_gap_results_df.loc[:,('% gap closed',gap_cols_short)].mean()

# Now convert the % gap closed columns to objects so we can add an int row
all_gap_results_df.loc[:,('% gap closed',gap_cols_short)] = all_gap_results_df.loc[:,('% gap closed',gap_cols_short)].astype(object)

# Add wins row
win_gap_cols_short = ['DB', 'V', 'V+G', 'V+GurF', 'V+GurL']
all_gap_results_df.loc['Wins',('% gap closed',win_gap_cols_short)] = avg_gap_df.loc[all_set,win_gap_cols_short].values.tolist()
# all_gap_results_df.loc['Wins',('% gap closed',win_gap_cols_short)] = avg_gap_df.loc[all_set,gap_cols_short].astype(np.int64).values.tolist()
# all_gap_results_df.loc["Wins"] = avg_gap_df.loc[all_set,gap_cols_short]
# wins_df.at[cols[ind1],cols[ind2]] = int(sum(best_gap_df[cols[ind1]] > best_gap_df[cols[ind2]] + EPS))

# Replace missing entries with empty string
all_gap_results_df = all_gap_results_df.fillna('',downcast=False)

# Convert rows, cols, # cuts to int values
tmp_cols = pd.MultiIndex.from_product([[''],['Rows','Cols']])
all_gap_results_df.loc[inst_set,tmp_cols] = all_gap_results_df.loc[inst_set,tmp_cols].astype(np.int64)
tmp_cols = pd.MultiIndex.from_product([['# cuts'],['G','V']])
all_gap_results_df.loc[inst_set,tmp_cols] = all_gap_results_df.loc[inst_set,tmp_cols].astype(np.int64)

display(all_gap_results_df.tail())

print("Num instances =",len(all_gap_results_df)-2)

  all_gap_results_df = all_gap_results_df.fillna('',downcast=False)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,# cuts,# cuts,% gap closed,% gap closed,% gap closed,% gap closed,% gap closed,% gap closed,% gap closed,% gap closed,% gap closed,% gap closed
Unnamed: 0_level_1,Rows,Cols,G,V,G,R,DB,V,V+G,"max(G,V)",GurF,V+GurF,GurL,V+GurL
Instance,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
vpm1_presolved,128.0,188.0,16.0,10.0,16.930266,0.0,7.788162,4.672897,16.930266,16.930266,45.861148,56.94259,68.157543,72.129506
vpm2_presolved,127.0,187.0,25.0,25.0,17.849671,0.0,14.293216,7.834301,19.708285,17.849671,43.301829,50.632749,73.26105,73.52794
zib54-UUE_presolved,1114.0,3726.0,56.0,56.0,10.696523,0.0,17.642953,8.662292,15.689964,10.696523,43.113981,54.09513,68.331458,68.225269
Average,,,,,16.440442,0.256744,18.369376,12.046781,23.871485,22.962066,28.861973,34.761395,48.232477,52.706725
Wins,,,,,,,181.0,135.0,233.0,,,258.0,,237.0


Num instances = 332


# Section 3: Time tables

## `time_df`: Create subset of dataframe relevant to time

In [95]:
## Create subset of dataframe relevant to time
time_df = df.loc[:, 
                [
                    'NUM DISJ TERMS',
                    'ROWS',
                    'COLS',
                    'LP OBJ',
                    'IP OBJ',
                    'FIRST REF OBJ',
                    'AVG REF OBJ',
                    'BEST REF OBJ',
                    'FIRST REF+V OBJ',
                    'AVG REF+V OBJ',
                    'FIRST REF BOUND',
                    'AVG REF BOUND',
                    'BEST REF BOUND',
                    'FIRST REF+V BOUND',
                    'AVG REF+V BOUND',
                    'FIRST REF ITERS',
                    'AVG REF ITERS',
                    'BEST REF ITERS',
                    'FIRST REF+V ITERS',
                    'AVG REF+V ITERS',
                    'FIRST REF NODES',
                    'AVG REF NODES',
                    'BEST REF NODES',
                    'FIRST REF+V NODES',
                    'AVG REF+V NODES',
                    'FIRST REF TIME',
                    'AVG REF TIME',
                    'BEST REF TIME',
                    'FIRST REF+V TIME',
                    'AVG REF+V TIME',
                    'VPC_GEN_TIME',
                    'NUM GMIC',
                    'NUM VPC',
                    'NUM OBJ',
                    'ALL REF TIME',
                    'ALL REF+V TIME',
                    'ExitReason']
               ]
#display(time_df.loc[("bm23_presolved",2)])

## Prepare short/long column names for time dfs
1. First run of Gurobi without VPCs
2. Best among 7 runs of Gurobi without VPCs
3. First run of Gurobi with VPCs for each disjunction size
4. First run of Gurobi with VPCs for each disjunction size, adding cut generation time
5. Best run across first Gurobi without VPCs and first Gurobi with VPCs (across all terms)

In [96]:
col_num_vpcs = 'NUM VPC'
col_vpc_gen_time = 'VPC_GEN_TIME'

# Gur1/Gur7 names
gur1_col_stub = 'AVG REF' # Should we change to AVG?
gur7_col_stub = 'BEST REF'
# gur_w_v_col_stub = 'AVG REF'
gur1v_col_stub = gur1_col_stub + '+V'
gur1v_w_cut_col_stub = gur1v_col_stub + ' W/CUTGEN'

# gur1time: first run of Gurobi without VPCs
gur1time_col = gur1_col_stub + ' TIME'
gur1nodes_col = gur1_col_stub + ' NODES'

# gur7time: best among 7 runs of Gurobi without VPCs
gur7time_col = gur7_col_stub + ' TIME'
gur7nodes_col = gur7_col_stub + ' NODES'

# gur1vtime: first run of Gurobi w/VPCs for each disj size
gur1vtime_col = gur1v_col_stub + ' TIME'
gur1vnodes_col = gur1v_col_stub + ' NODES'

# gur1v_w_cut_time: first run of Gurobi w/VPCs for each disj size, counting cut generation time
gur1v_w_cut_time_col = gur1v_w_cut_col_stub + ' TIME'

# Track best disjunction used in 0-row
gurv_disj_col = gur1v_col_stub + ' DISJ'
gurv_w_cut_disj_col = gur1v_w_cut_col_stub + ' DISJ'

# Best Gurobi run across the first without VPCs and first w/VPCs for each disj size
mintime_col       = 'MIN BB TIME'
mintime_w_cut_col = 'MIN BB W/CUTGEN TIME'
mintime_disj_col  = 'MIN BB TIME DISJ'
minnodes_col      = 'MIN BB NODES'

map_cols_to_short_time = {
    gur1time_col         : 'Gur1',
    gur7time_col         : 'Gur7',
    gur1vtime_col        : 'V',
    gur1v_w_cut_time_col : 'Total',
    mintime_col          : 'V7',
    mintime_w_cut_col    : 'Total7',
}

map_cols_to_short_nodes = {
    gur1nodes_col        : 'Gur1',
    gur7nodes_col        : 'Gur7',
    gur1vnodes_col       : 'V',
    minnodes_col         : 'V7',
}

map_short_to_cols_time = {v: k for k, v in map_cols_to_short_time.items()}
map_short_to_cols_nodes = {v: k for k, v in map_cols_to_short_nodes.items()}

time_cols_short = list(map_short_to_cols_time.keys())
node_cols_short = list(map_short_to_cols_nodes.keys())
# display(time_cols, node_cols)

# Select a subset of columns for the "long" list used when updating the 0-row
time_cols_long = [map_short_to_cols_time[col] for col in time_cols_short]
node_cols_long = [map_short_to_cols_nodes[col] for col in node_cols_short]

# # Update list of columns with mintime cols
# newshortcol1 = 'V7'
# newshortcol2 = 'Total7'
# newshortcol3 = 'V7'
# map_cols_to_short_time [mintime_col]       = newshortcol1
# map_cols_to_short_time [mintime_w_cut_col] = newshortcol2
# map_cols_to_short_nodes[minnodes_col]      = newshortcol3

# map_short_to_cols_time [newshortcol1]      = mintime_col
# map_short_to_cols_time [newshortcol2]      = mintime_w_cut_col
# map_short_to_cols_nodes[newshortcol3]      = minnodes_col

# time_cols_short.append(newshortcol1)
# time_cols_short.append(newshortcol2)
# node_cols_short.append(newshortcol3)

## Add total time for running solver + generating cuts

In [97]:
# Add total time for running solver + generating cuts
time_df[gur1v_w_cut_time_col] = time_df[gur1vtime_col] + time_df[col_vpc_gen_time]

display(time_df.loc['bm23_presolved'])

Unnamed: 0_level_0,NUM DISJ TERMS,ROWS,COLS,LP OBJ,IP OBJ,FIRST REF OBJ,AVG REF OBJ,BEST REF OBJ,FIRST REF+V OBJ,AVG REF+V OBJ,...,FIRST REF+V TIME,AVG REF+V TIME,VPC_GEN_TIME,NUM GMIC,NUM VPC,NUM OBJ,ALL REF TIME,ALL REF+V TIME,ExitReason,AVG REF+V W/CUTGEN TIME
disj_terms,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0,20,27,20.570922,34.0,34.0,34.0,34.0,0.0,0.0,...,0.0,0.0,0.0,0,0,0,0.039986;0.038486;0.039172;0.051573;0.040937;0...,,SUCCESS,0.0
2,2,20,27,20.570922,34.0,34.0,34.0,34.0,34.0,34.0,...,0.048,0.042,0.0,6,6,8,0.040495;0.038867;0.038812;0.052034;0.041164;0...,0.047853;0.034036;0.037246;0.044523;0.043889;0...,CUT_LIMIT,0.042
4,4,20,27,20.570922,34.0,34.0,34.0,34.0,34.0,34.0,...,0.037,0.043,0.0,6,6,7,0.040525;0.038924;0.039546;0.052120;0.042080;0...,0.036886;0.048001;0.041653;0.044801;0.040121;0...,CUT_LIMIT,0.043
8,8,20,27,20.570922,34.0,34.0,34.0,34.0,34.0,34.0,...,0.048,0.046,0.01,6,6,7,0.039684;0.038217;0.038999;0.051653;0.040933;0...,0.048114;0.041782;0.046991;0.037966;0.049075;0...,CUT_LIMIT,0.056
16,16,20,27,20.570922,34.0,34.0,34.0,34.0,34.0,34.0,...,0.042,0.039,0.01,6,6,7,0.039156;0.037640;0.038380;0.050683;0.040228;0...,0.042212;0.036440;0.040171;0.038601;0.040167;0...,CUT_LIMIT,0.049
32,32,20,27,20.570922,34.0,34.0,34.0,34.0,34.0,34.0,...,0.034,0.035,0.03,6,6,7,0.039639;0.038239;0.038946;0.051488;0.040839;0...,0.034126;0.034393;0.034666;0.037021;0.037501;0...,CUT_LIMIT,0.065
64,64,20,27,20.570922,34.0,34.0,34.0,34.0,34.0,34.0,...,0.039,0.04,0.06,6,6,7,0.039900;0.038785;0.039532;0.052361;0.041305;0...,0.039412;0.044548;0.038914;0.042464;0.037830;0...,CUT_LIMIT,0.1


## `selected_time_df`: Solving and cut-generation time for instances selected for time reporting; 0-row with min values across all rows

In [98]:
## Solving and cut-generation time for instances selected for time reporting
selected_time_df = time_df.loc[selected_time_instances_dict.keys()]
selected_time_df.index = selected_time_df.index.remove_unused_levels()
selected_time_df[minnodes_col] = 0

## Fill in 0-row with min values across all rows
## Also fill in gur1 values (present only in 0 row currently) for all disj terms
comparison_time_cols = [gur1vtime_col, gur1v_w_cut_time_col]
comparison_node_cols = [gur1vnodes_col]
cols_to_display = [col_num_vpcs]+[gur1time_col,gur1vtime_col]+[gur1nodes_col,gur1vnodes_col]+[mintime_col,mintime_w_cut_col,minnodes_col,gurv_disj_col,gurv_w_cut_disj_col,mintime_disj_col]
inst_set = selected_time_df.index.levels[0]
# tmp_inst = '23588_presolved'
# inst_set = ['10teams_presolved',tmp_inst]
for i, inst in enumerate(inst_set):
    print("{}/{}".format(i+1,len(inst_set)), end='\r', flush=True)
    curr_df = selected_time_df.loc[inst].copy() # copy needed to not throw SettingWithCopyWarning
    
    # Select only the rows in which VPCs were generated
    curr_df_with_vpcs = curr_df[curr_df[col_num_vpcs] > 0]
    
    # display(inst)
    # display(curr_df_with_vpcs[[col_num_vpcs]+[gur1time_col,gur1vtime_col]])

    # Set 0-row to have min time values across all (non-0-vpc) rows for this instance
    # best_vals = curr_df_with_vpcs[comparison_time_cols].min()
    # selected_time_df.loc[(inst,0),comparison_time_cols] = best_vals
    best_vals_idx = curr_df_with_vpcs[comparison_time_cols].idxmin()
    for curr_col, curr_disj_id in zip(comparison_time_cols, best_vals_idx):
        selected_time_df.at[(inst,0),curr_col] = curr_df_with_vpcs.at[curr_disj_id, curr_col]
    
    # display(best_vals_idx)
    # print("selected_time_df.at[('{}',0),gur1vtime_col] = {}".format(inst,selected_time_df.at[(inst,0),gur1vtime_col]))
    # display(selected_time_df[[col_num_vpcs]+[gur1time_col,gur1vtime_col]].head(14))
    # print("selected_time_df.at[('{}',0),gur1vtime_col] = {}".format(inst,selected_time_df.at[(inst,0),gur1vtime_col]))


    # Also add id of the best disj to the 0-row
    selected_time_df.at[(inst,0),gurv_disj_col]       = int(best_vals_idx.iloc[0])
    selected_time_df.at[(inst,0),gurv_w_cut_disj_col] = int(best_vals_idx.iloc[1])

    # Update 0-row of mintime (V7) entries
    curr_gur1time       = selected_time_df.at[(inst,0),gur1time_col]
    curr_gur1vtime      = selected_time_df.at[(inst,0),gur1vtime_col]
    curr_gur1vcuts_time = selected_time_df.at[(inst,0),gur1v_w_cut_time_col]
    
    curr_vals = [curr_gur1time, curr_gur1vtime]
    min_id = np.argmin(curr_vals)

    # If min_id is 0, then no cuts are used and we report the gur1 time
    # If min_id is 1, then gur1v < gur1 and we can report the number of cuts used
    selected_time_df.at[(inst,0),mintime_col] = curr_vals[min_id]

    # Add num cuts from mintime disj into num vpc col
    best_disj_size = 0 if min_id == 0 else best_vals_idx.iloc[0]
    selected_time_df.at[(inst,0),mintime_disj_col] = best_disj_size
    best_num_cuts = selected_time_df.at[(inst,best_disj_size),col_num_vpcs]
    selected_time_df.at[(inst,0),col_num_vpcs] = best_num_cuts

    # Update with cuts into Total7 column
    curr_vals = [curr_gur1time, curr_gur1vcuts_time]
    selected_time_df.at[(inst,0),mintime_w_cut_col] = min(curr_vals)

    # Repeat for nodes
    best_vals = curr_df_with_vpcs[comparison_node_cols].min()
    selected_time_df.loc[(inst,0),comparison_node_cols] = best_vals
    # selected_time_df.at[(inst,0),minnodes_col] = int(selected_time_df.loc[(inst,0),[gur1nodes_col,gur1vnodes_col]].min())

    curr_gur1nodes       = selected_time_df.at[(inst,0),gur1nodes_col]
    curr_gur1vnodes      = selected_time_df.at[(inst,0),gur1vnodes_col]
    curr_vals = [curr_gur1nodes, curr_gur1vnodes]
    min_id = np.argmin(curr_vals)
    selected_time_df.at[(inst,0),minnodes_col] = int(curr_vals[min_id])

    # Propogate down 0-row values for gur1 columns
    selected_time_df.loc[inst, gur1time_col] = curr_gur1time
    selected_time_df.loc[inst, gur1nodes_col] = curr_gur1nodes

    #### FOR SOME REASON, THE BELOW ZEROES OUT selected_time_df.loc[[(inst,0)]][gur1vtime_col]
    # display(selected_time_df.loc[(inst,0),[gur1nodes_col,gur1vnodes_col]])

    ## OLD CODE BELOW
    # best_vals_idx = curr_df_with_vpcs[comparison_node_cols].idxmin()
    # for curr_col, curr_disj_id in zip(comparison_node_cols, best_vals_idx):
    #     selected_time_df.at[(inst,0),curr_col] = curr_df_with_vpcs.loc[curr_disj_id, curr_col]

    # # Also add id of the best disj to the 0-row
    # selected_time_df.at[(inst,0),gurv_disj_col + ' (NODES)'] = int(best_vals_idx[0])
    # selected_time_df.at[(inst,0),gurv_w_cut_disj_col+ ' (NODES)'] = int(best_vals_idx[1])

    # for ind in curr_df.index:
    #     if ind == 0:
    #         continue

    #     # Propogate GurF and GurL down
    #     subinds = [4,6]
    #     sel_gap = [gap_cols[i] for i in subinds]
    #     selected_gap_df.loc[(inst,ind),sel_gap] = curr_df.loc[0,sel_gap]

    #     # If no VPCs produced, the values for V+GurF and V+GurL have not been provided
    #     # We replace these by GurF and GurL
    #     # Currently disabled: update max for that column too (if disabled, we instead keep max as the value among those that generated VPCs)
    #     num_vpc = curr_df.loc[ind,col_num_vpcs]
    #     if num_vpc == 0:
    #         # print("Zero cuts for inst {} at depth {:d}".format(inst, ind))
    #         subinds = [5,7]
    #         refinds = [4,6]
    #         sel_gap = [gap_cols[i] for i in subinds]
    #         selected_gap_df.loc[(inst,ind),sel_gap] = curr_df.loc[0,[gap_cols[i] for i in refinds]].to_numpy()

    #         # for i in refinds:
    #         #     if curr_df.loc[0,gap_cols[i]] > selected_gap_df.loc[(inst,0),gap_cols[i+1]]:
    #         #         if curr_df.loc[0,gap_cols[i]] > 0:
    #         #             # print("DEBUG: Updating {} for inst {} from {:f} to {:f}".format(
    #         #             #     gap_cols[i+1], 
    #         #             #     inst, 
    #         #             #     selected_gap_df.loc[(inst,0),gap_cols[i+1]], 
    #         #             #     curr_df.loc[0,gap_cols[i]]))
    #         #         selected_gap_df.loc[(inst,0),gap_cols[i+1]] = curr_df.loc[0,gap_cols[i]]

# Add minimum time when using cuts and when not using cuts
# selected_time_df[mintime_col] = selected_time_df[[gur1time_col, gur1vtime_col]].min(axis=1)
# selected_time_df[mintime_w_cut_col] = selected_time_df[[gur1time_col, gur1v_w_cut_time_col]].min(axis=1)
# selected_time_df[minnodes_col] = selected_time_df[[gur1nodes_col,gur1vnodes_col]].min(axis=1)

display(selected_time_df.head(35).loc[:,[col_num_vpcs]+[gur1time_col,gur1vtime_col]+[gur1nodes_col,gur1vnodes_col]+[mintime_col,mintime_w_cut_col,minnodes_col,gurv_disj_col,gurv_w_cut_disj_col,mintime_disj_col]])
# display(selected_time_df.loc['10teams_presolved',[col_num_vpcs]+[gur1time_col,gur1vtime_col]+[gur1nodes_col,gur1vnodes_col]+[mintime_col,mintime_w_cut_col,minnodes_col,gurv_disj_col,gurv_w_cut_disj_col,mintime_disj_col]])
# display(selected_time_df.loc[inst_set,cols_to_display])

306/306

Unnamed: 0_level_0,Unnamed: 1_level_0,NUM VPC,AVG REF TIME,AVG REF+V TIME,AVG REF NODES,AVG REF+V NODES,MIN BB TIME,MIN BB W/CUTGEN TIME,MIN BB NODES,AVG REF+V DISJ,AVG REF+V W/CUTGEN DISJ,MIN BB TIME DISJ
INSTANCE,disj_terms,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
10teams_presolved,0,77,2.146,1.755,280,218,1.755,2.146,218,2.0,4.0,2.0
10teams_presolved,2,77,2.146,1.755,280,218,,,0,,,
10teams_presolved,4,1,2.146,1.773,280,221,,,0,,,
10teams_presolved,8,34,2.146,1.994,280,337,,,0,,,
10teams_presolved,16,68,2.146,1.876,280,277,,,0,,,
10teams_presolved,32,12,2.146,1.976,280,245,,,0,,,
10teams_presolved,64,0,2.146,0.0,280,0,,,0,,,
23588_presolved,0,75,0.539,0.479,882,893,0.479,0.539,882,4.0,4.0,4.0
23588_presolved,2,11,0.539,0.511,882,893,,,0,,,
23588_presolved,4,75,0.539,0.479,882,950,,,0,,,


In [99]:
# ### DEBUGGING that first ref+v time gets zeroed out for some reason?
# tmp_df = selected_time_df[[col_num_vpcs]+[gur1time_col,gur1vtime_col]].head(14).copy(deep=True)
# display(tmp_df)

# print(tmp_df.loc[('23588_presolved',0),gur1vtime_col])
# display(tmp_df.loc[[('23588_presolved',0)]][gur1vtime_col])

# tmp_df = selected_time_df
# print(tmp_df.loc[('23588_presolved',0),gur1vtime_col])
# display(tmp_df.loc[[('23588_presolved',0)]][gur1vtime_col])
# display(tmp_df.loc['23588_presolved'])

## Table 3: `avg_bb_df`: average time/nodes taken

### Prepare variables for row/col names

In [100]:
## Prepare variables for row/col names

bb_classes = ['All', '6 trees']
num_bb_classes = len(bb_classes)

bucket_min = [0, 10, 100, 1000]
bucket_max = [3600, 3600, 3600, 3600]
num_buckets = len(bucket_min)
assert(len(bucket_max) == num_buckets)
bb_buckets = ['[' + str(bucket_min[j]) + ',' + str(bucket_max[j]) + ')' for j in range(num_buckets)]
# bucket_names = [classes[i] + ' [' + str(bucket_min[j]) + ',' + str(bucket_max[j]) + ')' for i in range(num_classes) for j in range(num_buckets)]
# display(bucket_names)

bb_metrics = ['Gmean', 'Wins1', 'Wins7']

time_col_header = 'Time (s)'
node_col_header = 'Nodes (\\#)'

### Set up empty `avg_bb_df`

In [101]:
## Prepare avg_bb_df

avg_bb_cols = pd.MultiIndex.from_arrays(
    [[time_col_header]*len(time_cols_short) + [node_col_header]*len(node_cols_short), time_cols_short + node_cols_short],
    names = ['criterion', 'type'])

#bb_row_names = pd.MultiIndex.from_product([bb_buckets, bb_row_names], names=['bucket', 'metric'])
bb_row_names = pd.MultiIndex.from_product(
    [bb_classes, bb_buckets, bb_metrics],
    names=['class', 'bucket', 'metric'])

avg_bb_df = pd.DataFrame(
    columns = avg_bb_cols,
    index = bb_row_names,
    dtype = float
)

display(avg_bb_df.loc[:,avg_bb_cols.get_level_values(0)==node_col_header].head(6))
#display(avg_bb_df.loc[(bb_classes[0], bb_buckets[1], bb_metrics[0]),:])
display(avg_bb_df.loc[(bb_classes[0], bb_buckets, bb_metrics[0]),:])

Unnamed: 0_level_0,Unnamed: 1_level_0,criterion,Nodes (\#),Nodes (\#),Nodes (\#),Nodes (\#)
Unnamed: 0_level_1,Unnamed: 1_level_1,type,Gur1,Gur7,V,V7
class,bucket,metric,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
All,"[0,3600)",Gmean,,,,
All,"[0,3600)",Wins1,,,,
All,"[0,3600)",Wins7,,,,
All,"[10,3600)",Gmean,,,,
All,"[10,3600)",Wins1,,,,
All,"[10,3600)",Wins7,,,,


Unnamed: 0_level_0,Unnamed: 1_level_0,criterion,Time (s),Time (s),Time (s),Time (s),Time (s),Time (s),Nodes (\#),Nodes (\#),Nodes (\#),Nodes (\#)
Unnamed: 0_level_1,Unnamed: 1_level_1,type,Gur1,Gur7,V,Total,V7,Total7,Gur1,Gur7,V,V7
class,bucket,metric,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
All,"[0,3600)",Gmean,,,,,,,,,,
All,"[10,3600)",Gmean,,,,,,,,,,
All,"[100,3600)",Gmean,,,,,,,,,,
All,"[1000,3600)",Gmean,,,,,,,,,,


### `avg_bb_df`: shifted geometric mean of time taken across instances, in various buckets, and geomean of nodes too

In [102]:
## Create gmean_df
#   = shifted geometric mean of time taken across instances, in various buckets
#     and geomean of nodes too

# Custom functions for prior to python 3.8
# def geo_mean(iterable):
#     a = np.array(iterable)
#     return a.prod()**(1.0/len(a))
# def geo_mean_overflow(iterable):
#     return np.exp(np.log(iterable).mean())
from statistics import geometric_mean
SHIFT_TIME  = 60
SHIFT_NODES = 1000

num_inst = np.zeros(len(avg_bb_df),dtype = np.int64)
row_ind = 0

#avg_bb_df.loc[(bb_classes[0], bb_buckets, bb_metrics[0]),:] = \
shortcols_time = time_cols_short
cols_time = [map_short_to_cols_time[shortcol] for shortcol in shortcols_time]
shortcols_nodes = node_cols_short
cols_nodes = [map_short_to_cols_nodes[shortcol] for shortcol in shortcols_nodes]

cols = cols_time + cols_nodes
shortcols = shortcols_time + shortcols_nodes

# First calculate stats for "all" instances
curr_df = selected_time_df.loc[:,cols_time + cols_nodes]
curr_df = curr_df[curr_df.index.get_level_values(1) == 0] # take only "best" values

for i in range(num_buckets):
    curr_df = curr_df[curr_df[gur1time_col] > bucket_min[i]]
    
    avg_bb_df.loc[(bb_classes[0], bb_buckets[i], bb_metrics[0]),(time_col_header,shortcols_time)] = \
        [geometric_mean(curr_df[col] + SHIFT_TIME) - SHIFT_TIME for col in cols_time]
    avg_bb_df.loc[(bb_classes[0], bb_buckets[i], bb_metrics[0]),(node_col_header,shortcols_nodes)] = \
        [geometric_mean(curr_df[col] + SHIFT_NODES) - SHIFT_NODES for col in cols_nodes]
    
    print("row {:d}: {:d}".format(row_ind,len(curr_df)))
    
    num_inst[row_ind:row_ind+len(bb_metrics)] = len(bb_metrics)*[len(curr_df)]
    row_ind += len(bb_metrics)

# Now calculate stats for "6 trees" instances
curr_df = selected_time_df.loc[all6_instances_dict.keys(),cols_time + cols_nodes]
curr_df = curr_df[curr_df.index.get_level_values(1) == 0] # take only best values

for i in range(num_buckets):
    curr_df = curr_df[curr_df[gur1time_col] > bucket_min[i]]
    
    avg_bb_df.loc[(bb_classes[1], bb_buckets[i], bb_metrics[0]),(time_col_header,shortcols_time)] = \
        [geometric_mean(curr_df[col] + SHIFT_TIME) - SHIFT_TIME for col in cols_time]
    avg_bb_df.loc[(bb_classes[1], bb_buckets[i], bb_metrics[0]),(node_col_header,shortcols_nodes)] = \
        [geometric_mean(curr_df[col] + SHIFT_NODES) - SHIFT_NODES for col in cols_nodes]
    
    print("row {:d}: {:d}".format(row_ind,len(curr_df)))

    num_inst[row_ind:row_ind+len(bb_metrics)] = len(bb_metrics)*[len(curr_df)]
    row_ind += len(bb_metrics)

avg_bb_df[inst_col_name] = num_inst
# avg_bb_df['NUM INST'] = avg_bb_df['NUM INST'].astype(np.int64)

display(avg_bb_df.loc[(bb_classes, bb_buckets, bb_metrics[0]),:])

row 0: 306
row 3: 141
row 6: 86
row 9: 44
row 12: 221
row 15: 84
row 18: 47
row 21: 19


Unnamed: 0_level_0,Unnamed: 1_level_0,criterion,Time (s),Time (s),Time (s),Time (s),Time (s),Time (s),Nodes (\#),Nodes (\#),Nodes (\#),Nodes (\#),# inst
Unnamed: 0_level_1,Unnamed: 1_level_1,type,Gur1,Gur7,V,Total,V7,Total7,Gur1,Gur7,V,V7,Unnamed: 13_level_1
class,bucket,metric,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
All,"[0,3600)",Gmean,81.100666,59.310879,75.235738,83.010606,73.927539,75.106097,9738.528637,6414.949083,8739.13831,8495.004511,306
All,"[10,3600)",Gmean,312.325909,201.221989,280.887352,314.787786,274.066056,279.490525,85128.603638,44834.937789,72684.968402,69560.42737,141
All,"[100,3600)",Gmean,838.123599,492.651261,725.535096,756.800472,716.373294,727.036879,260970.422624,124888.145774,217626.667149,209241.270474,86
All,"[1000,3600)",Gmean,2483.499537,1413.163702,2228.163895,2249.735771,2191.64086,2202.430116,636501.165418,287496.000186,551238.101995,528198.390575,44
6 trees,"[0,3600)",Gmean,49.942506,35.695409,45.920914,46.893713,44.992314,45.508547,7525.647969,5173.748217,6700.102908,6567.549581,221
6 trees,"[10,3600)",Gmean,223.488425,139.026902,198.437856,202.234592,192.627156,195.108896,105076.272096,54585.181997,86917.833503,83787.38393,84
6 trees,"[100,3600)",Gmean,610.249941,337.679242,520.651057,525.80939,510.421965,514.089579,313244.867041,148437.917979,253508.676742,247237.729613,47
6 trees,"[1000,3600)",Gmean,2068.498708,854.842119,1861.871326,1869.444219,1791.576123,1797.092856,838148.539448,270146.119516,730271.497068,690451.279754,19


### Update wins1 rows

In [103]:
## Update wins1 rows
# A win in terms of time is counted when the ``Gur1'' baseline seconds taken 
# is at least 10\% slower, to account for some variability in runtimes.
# A win in terms of nodes is when the ``Gur1'' baseline number of nodes is higher.

# Make all columns "object" type to allow for integer values
avg_bb_df.loc[:,(time_col_header,shortcols_time)] = avg_bb_df.loc[:,(time_col_header,shortcols_time)].astype(object)
avg_bb_df.loc[:,(node_col_header,shortcols_nodes)] = avg_bb_df.loc[:,(node_col_header,shortcols_nodes)].astype(object)

# First calculate stats for "all" instances
curr_df = selected_time_df.loc[:,cols_time + cols_nodes]
curr_df = curr_df[curr_df.index.get_level_values(1) == 0] # take only best values

for i in range(num_buckets):
    curr_df = curr_df[curr_df[gur1time_col] > bucket_min[i]]
    
    refcol = gur1time_col
    avg_bb_df.loc[(bb_classes[0], bb_buckets[i], bb_metrics[1]),(time_col_header,shortcols_time)] = \
        [ int(sum(curr_df[refcol] > 1.1*curr_df[col])) for col in cols_time ]

    refcol = gur1nodes_col
    avg_bb_df.loc[(bb_classes[0], bb_buckets[i], bb_metrics[1]),(node_col_header,shortcols_nodes)] = \
        [ int(sum(curr_df[refcol] > curr_df[col])) for col in cols_nodes ]

# Now calculate stats for "6 trees" instances
curr_df = selected_time_df.loc[all6_instances_dict.keys(),cols_time + cols_nodes]
curr_df = curr_df[curr_df.index.get_level_values(1) == 0] # take only best values

for i in range(num_buckets):
    curr_df = curr_df[curr_df[gur1time_col] > bucket_min[i]]
    
    refcol = gur1time_col
    avg_bb_df.loc[(bb_classes[1], bb_buckets[i], bb_metrics[1]),(time_col_header,shortcols_time)] = \
        [ int(sum(curr_df[refcol] > 1.1*curr_df[col])) for col in cols_time ]

    refcol = gur1nodes_col
    avg_bb_df.loc[(bb_classes[1], bb_buckets[i], bb_metrics[1]),(node_col_header,shortcols_nodes)] = \
        [ int(sum(curr_df[refcol] > curr_df[col])) for col in cols_nodes ]

### Update wins7 rows

In [104]:
## Update wins7 rows
# A win in terms of time is counted when the ``Gur1'' baseline seconds taken 
# is at least 10\% slower, to account for some variability in runtimes.
# A win in terms of nodes is when the ``Gur1'' baseline number of nodes is higher.

# First calculate stats for "all" instances
curr_df = selected_time_df.loc[:,cols_time + cols_nodes]
curr_df = curr_df[curr_df.index.get_level_values(1) == 0] # take only best values
for i in range(num_buckets):
    curr_df = curr_df[curr_df[gur1time_col] > bucket_min[i]]
    
    refcol = gur7time_col
    avg_bb_df.loc[(bb_classes[0], bb_buckets[i], bb_metrics[2]),(time_col_header,shortcols_time)] = \
        [ int(sum(curr_df[refcol] > 1.1*curr_df[col])) for col in cols_time ]

    refcol = gur7nodes_col
    avg_bb_df.loc[(bb_classes[0], bb_buckets[i], bb_metrics[2]),(node_col_header,shortcols_nodes)] = \
        [ int(sum(curr_df[refcol] > curr_df[col])) for col in cols_nodes ]

# Now calculate stats for "6 trees" instances
curr_df = selected_time_df.loc[all6_instances_dict.keys(),cols_time + cols_nodes]
curr_df = curr_df[curr_df.index.get_level_values(1) == 0] # take only best values

for i in range(num_buckets):
    curr_df = curr_df[curr_df[gur1time_col] > bucket_min[i]]
    
    refcol = gur7time_col
    avg_bb_df.loc[(bb_classes[1], bb_buckets[i], bb_metrics[2]),(time_col_header,shortcols_time)] = \
        [ int(sum(curr_df[refcol] > 1.1*curr_df[col])) for col in cols_time ]

    refcol = gur7nodes_col
    avg_bb_df.loc[(bb_classes[1], bb_buckets[i], bb_metrics[2]),(node_col_header,shortcols_nodes)] = \
        [ int(sum(curr_df[refcol] > curr_df[col])) for col in cols_nodes ]

In [105]:
# display(avg_bb_df.loc[:,cols.get_level_values(0)=='Nodes'].head(6))
display(avg_bb_df.loc[(bb_classes[0:2], bb_buckets, bb_metrics[0:3]),:])

Unnamed: 0_level_0,Unnamed: 1_level_0,criterion,Time (s),Time (s),Time (s),Time (s),Time (s),Time (s),Nodes (\#),Nodes (\#),Nodes (\#),Nodes (\#),# inst
Unnamed: 0_level_1,Unnamed: 1_level_1,type,Gur1,Gur7,V,Total,V7,Total7,Gur1,Gur7,V,V7,Unnamed: 13_level_1
class,bucket,metric,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
All,"[0,3600)",Gmean,81.100666,59.310879,75.235738,83.010606,73.927539,75.106097,9738.528637,6414.949083,8739.13831,8495.004511,306
All,"[0,3600)",Wins1,0.0,254.0,144.0,74.0,144.0,74.0,0.0,278.0,215.0,215.0,306
All,"[0,3600)",Wins7,0.0,0.0,27.0,7.0,27.0,7.0,0.0,0.0,39.0,39.0,306
All,"[10,3600)",Gmean,312.325909,201.221989,280.887352,314.787786,274.066056,279.490525,85128.603638,44834.937789,72684.968402,69560.42737,141
All,"[10,3600)",Wins1,0.0,118.0,68.0,51.0,68.0,51.0,0.0,141.0,107.0,107.0,141
All,"[10,3600)",Wins7,0.0,0.0,8.0,6.0,8.0,6.0,0.0,0.0,23.0,23.0,141
All,"[100,3600)",Gmean,838.123599,492.651261,725.535096,756.800472,716.373294,727.036879,260970.422624,124888.145774,217626.667149,209241.270474,86
All,"[100,3600)",Wins1,0.0,65.0,38.0,34.0,38.0,34.0,0.0,86.0,66.0,66.0,86
All,"[100,3600)",Wins7,0.0,0.0,5.0,5.0,5.0,5.0,0.0,0.0,15.0,15.0,86
All,"[1000,3600)",Gmean,2483.499537,1413.163702,2228.163895,2249.735771,2191.64086,2202.430116,636501.165418,287496.000186,551238.101995,528198.390575,44


## Table 6: `all_bb_results_df`: all time/nodes results

In [106]:
inst_set = selected_time_df.index.levels[0]
inst_set.set_names("Instance",inplace=True)
numcuts_col_header = '# cuts'

col_idx = pd.MultiIndex.from_arrays(
    [
        ['', '', numcuts_col_header] + [time_col_header]*len(time_cols_short) + [node_col_header]*len(node_cols_short),
        ['Rows', 'Cols', map_cols_to_short_time[gur1vtime_col]] + time_cols_short + node_cols_short
    ],
)

all_bb_results_df = pd.DataFrame(
    columns = col_idx,
    index = inst_set,
    dtype = object,
)

# Enter number of rows and cols
tmp_df = df.xs(0, level='disj_terms').loc[inst_set,['ROWS','COLS']]
tmp_df.columns = pd.MultiIndex.from_product([[''],['Rows','Cols']])
all_bb_results_df.loc[:,tmp_df.columns] = tmp_df

# Enter number of cuts
# tmp_df = selected_time_df.loc[(inst_set,0), ['NUM VPC']]
tmp_df = selected_time_df.xs(0, level='disj_terms')['NUM VPC']
tmp_df.columns = pd.MultiIndex.from_product([[numcuts_col_header],[map_cols_to_short_time[gur1vtime_col]]])
all_bb_results_df.loc[:,tmp_df.columns] = tmp_df

# Enter time
tmp_df = selected_time_df.xs(0, level='disj_terms')[time_cols_long]
tmp_df.columns = pd.MultiIndex.from_product([[time_col_header],time_cols_short])
all_bb_results_df.loc[:,tmp_df.columns] = tmp_df

# Enter nodes
tmp_df = selected_time_df.xs(0, level='disj_terms')[node_cols_long]
tmp_df.columns = pd.MultiIndex.from_product([[node_col_header],node_cols_short])
all_bb_results_df.loc[:,tmp_df.columns] = tmp_df

all_bb_results_df = all_bb_results_df.sort_values(by=[(time_col_header, map_cols_to_short_time[mintime_col])])

# Add average + wins rows
# Replace missing entries with empty string
tmp_df = avg_bb_df.xs((bb_classes[0],bb_buckets[0])).copy(deep=True)
tmp_df.drop(inst_col_name, axis=1, level=0, inplace=True)
all_bb_results_df = pd.concat([all_bb_results_df, tmp_df]).fillna('',downcast=False)

# Remove unnecessary entries
all_bb_results_df.loc['Wins1',[
        (time_col_header,map_cols_to_short_time[gur1time_col]),
        (node_col_header,map_cols_to_short_nodes[gur1nodes_col])
    ]] = ""
# all_bb_results_df.loc['Wins1',([time_col_header,node_col_header],'Gur1')] = ""
all_bb_results_df.loc['Wins7',[
        (time_col_header,map_cols_to_short_time[gur1time_col]),
        (time_col_header,map_cols_to_short_time[gur7time_col]),
        (node_col_header,map_cols_to_short_nodes[gur1nodes_col]),
        (node_col_header,map_cols_to_short_nodes[gur7nodes_col]),
    ]] = ""
# all_bb_results_df.loc['Wins7',([time_col_header,node_col_header],['Gur1','Gur7'])] = ""
# all_bb_results_df = all_bb_results_df.fillna('',downcast=False)

# Convert rows, cols, # cuts to int values
tmp_cols = pd.MultiIndex.from_product([[''],['Rows','Cols']])
all_bb_results_df.loc[inst_set,tmp_cols] = all_bb_results_df.loc[inst_set,tmp_cols].astype(np.int64)
tmp_cols = pd.MultiIndex.from_product([[numcuts_col_header],[map_cols_to_short_time[gur1vtime_col]]])
all_bb_results_df.loc[inst_set,tmp_cols] = all_bb_results_df.loc[inst_set,tmp_cols].astype(np.int64)

# Rename inst col back to inst_row_name
all_bb_results_df.index.set_names("Instance",inplace=True)

display(all_bb_results_df.head(15))
display(all_bb_results_df.tail(10))

  tmp_df = avg_bb_df.xs((bb_classes[0],bb_buckets[0])).copy(deep=True)
  all_bb_results_df = pd.concat([all_bb_results_df, tmp_df]).fillna('',downcast=False)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,# cuts,Time (s),Time (s),Time (s),Time (s),Time (s),Time (s),Nodes (\#),Nodes (\#),Nodes (\#),Nodes (\#)
Unnamed: 0_level_1,Rows,Cols,V,Gur1,Gur7,V,Total,V7,Total7,Gur1,Gur7,V,V7
Instance,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
neos-501453_presolved,13,52,0,0.001,0.001,0.001,0.001,0.001,0.001,1,1,1,1
neos-796608_presolved,64,104,0,0.001,0.001,0.001,0.021,0.001,0.001,1,1,1,1
gt2_presolved,28,173,0,0.003,0.002,0.003,0.003,0.003,0.003,1,1,1,1
vpm1_presolved,128,188,10,0.005,0.004,0.004,0.015,0.004,0.005,1,1,1,1
set1cl_presolved,431,651,0,0.005,0.004,0.005,0.015,0.005,0.005,1,1,1,1
pipex_presolved,25,48,6,0.016,0.015,0.01,0.01,0.01,0.01,7,1,1,1
nexp-50-20-1-1_presolved,267,443,4,0.012,0.007,0.01,0.02,0.01,0.012,1,1,1,1
p0548_presolved,119,371,0,0.01,0.009,0.011,0.032,0.01,0.01,1,1,1,1
sp150x300d_presolved,269,419,6,0.012,0.008,0.011,0.024,0.011,0.012,1,1,1,1
haprp_presolved,694,756,0,0.013,0.012,0.013,0.073,0.013,0.013,1,1,1,1


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,# cuts,Time (s),Time (s),Time (s),Time (s),Time (s),Time (s),Nodes (\#),Nodes (\#),Nodes (\#),Nodes (\#)
Unnamed: 0_level_1,Rows,Cols,V,Gur1,Gur7,V,Total,V7,Total7,Gur1,Gur7,V,V7
Instance,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
dg012142_presolved,1987.0,1899.0,0.0,3600.0,3600.0,3600.0,3600.96,3600.0,3600.0,204245.0,174754.0,224130.0,204245.0
graph20-20-1rand_presolved,4810.0,1924.0,0.0,3600.0,3600.0,3600.0,3602.82,3600.0,3600.0,154558.0,121514.0,125191.0,125191.0
hgms-det_presolved,4599.0,950.0,0.0,3600.0,3600.0,3600.0,3601.66,3600.0,3600.0,609611.0,529420.0,635737.0,609611.0
protfold_presolved,2110.0,1835.0,0.0,3600.0,3600.0,3600.0,3894.07,3600.0,3600.0,52987.0,49382.0,62368.0,52987.0
cod105_presolved,1024.0,1024.0,0.0,3600.0,3600.0,3600.0,3770.68,3600.0,3600.0,292494.0,244178.0,205825.0,205825.0
cvs16r70-62_presolved,3278.0,2112.0,0.0,3600.0,3600.0,3600.0,3640.37,3600.0,3600.0,79321.0,72687.0,77699.0,77699.0
queens-30_presolved,900.0,900.0,0.0,3600.001,3600.0,3600.003,3630.473,3600.001,3600.001,408591.0,379759.0,388092.0,388092.0
Gmean,,,,81.100666,59.310879,75.235738,83.010606,73.927539,75.106097,9738.528637,6414.949083,8739.13831,8495.004511
Wins1,,,,,254.0,144.0,74.0,144.0,74.0,,278.0,215.0,215.0
Wins7,,,,,,27.0,7.0,27.0,7.0,,,39.0,39.0


## Table 7: ``all6_bb_results_df``: 6-trees time/nodes results

In [107]:
inst_set = all6_instances_dict.keys()
all6_bb_results_df = all_bb_results_df.loc[inst_set]

all6_bb_results_df = all6_bb_results_df.sort_values(by=[(time_col_header, map_cols_to_short_time[mintime_col])])

# Add average + wins rows
# Replace missing entries with empty string
tmp_df = avg_bb_df.xs((bb_classes[1],bb_buckets[0])).copy(deep=True)
tmp_df.drop(inst_col_name, axis=1, level=0, inplace=True)
all6_bb_results_df = pd.concat([all6_bb_results_df, tmp_df]).fillna('',downcast=False)

# Remove unnecessary entries
all6_bb_results_df.loc['Wins1',[
        (time_col_header,map_cols_to_short_time[gur1time_col]),
        (node_col_header,map_cols_to_short_nodes[gur1nodes_col])
    ]] = ""
all6_bb_results_df.loc['Wins7',[
        (time_col_header,map_cols_to_short_time[gur1time_col]),
        (time_col_header,map_cols_to_short_time[gur7time_col]),
        (node_col_header,map_cols_to_short_nodes[gur1nodes_col]),
        (node_col_header,map_cols_to_short_nodes[gur7nodes_col]),
    ]] = ""

# Convert rows, cols, # cuts to int values
tmp_cols = pd.MultiIndex.from_product([[''],['Rows','Cols']])
all6_bb_results_df.loc[inst_set,tmp_cols] = all6_bb_results_df.loc[inst_set,tmp_cols].astype(np.int64)
tmp_cols = pd.MultiIndex.from_product([[numcuts_col_header],[map_cols_to_short_time[gur1vtime_col]]])
all6_bb_results_df.loc[inst_set,tmp_cols] = all6_bb_results_df.loc[inst_set,tmp_cols].astype(np.int64)

# Rename inst col back to inst_row_name
all6_bb_results_df.index.set_names("Instance",inplace=True)

display(all6_bb_results_df.head(15))
display(all6_bb_results_df.tail(10))

  tmp_df = avg_bb_df.xs((bb_classes[1],bb_buckets[0])).copy(deep=True)
  all6_bb_results_df = pd.concat([all6_bb_results_df, tmp_df]).fillna('',downcast=False)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,# cuts,Time (s),Time (s),Time (s),Time (s),Time (s),Time (s),Nodes (\#),Nodes (\#),Nodes (\#),Nodes (\#)
Unnamed: 0_level_1,Rows,Cols,V,Gur1,Gur7,V,Total,V7,Total7,Gur1,Gur7,V,V7
Instance,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
neos-501453_presolved,13,52,0,0.001,0.001,0.001,0.001,0.001,0.001,1,1,1,1
gt2_presolved,28,173,0,0.003,0.002,0.003,0.003,0.003,0.003,1,1,1,1
vpm1_presolved,128,188,10,0.005,0.004,0.004,0.015,0.004,0.005,1,1,1,1
set1cl_presolved,431,651,0,0.005,0.004,0.005,0.015,0.005,0.005,1,1,1,1
pipex_presolved,25,48,6,0.016,0.015,0.01,0.01,0.01,0.01,7,1,1,1
nexp-50-20-1-1_presolved,267,443,4,0.012,0.007,0.01,0.02,0.01,0.012,1,1,1,1
p0548_presolved,119,371,0,0.01,0.009,0.011,0.032,0.01,0.01,1,1,1,1
sp150x300d_presolved,269,419,6,0.012,0.008,0.011,0.024,0.011,0.012,1,1,1,1
haprp_presolved,694,756,0,0.013,0.012,0.013,0.073,0.013,0.013,1,1,1,1
set1al_presolved,432,652,1,0.014,0.014,0.013,0.033,0.013,0.014,2,2,2,2


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,# cuts,Time (s),Time (s),Time (s),Time (s),Time (s),Time (s),Nodes (\#),Nodes (\#),Nodes (\#),Nodes (\#)
Unnamed: 0_level_1,Rows,Cols,V,Gur1,Gur7,V,Total,V7,Total7,Gur1,Gur7,V,V7
Instance,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
neos-3373491-avoca_presolved,1267.0,2152.0,0.0,2281.891,161.361,3273.875,3274.095,2281.891,2281.891,6757885.0,510074.0,11014133.0,6757885.0
neos-3762025-ognon_presolved,2636.0,4507.0,1.0,2889.302,1555.213,2362.714,2363.184,2362.714,2363.184,2216113.0,1544692.0,2197481.0,2197481.0
neos-3754480-nidda_presolved,402.0,253.0,0.0,2590.341,2028.847,2737.815,2737.855,2590.341,2590.341,4550434.0,3969042.0,4885952.0,4550434.0
rlp1_presolved,52.0,316.0,30.0,3086.651,6.557,2866.168,2866.198,2866.168,2866.198,14000404.0,10235.0,12632173.0,12632173.0
lotsize_presolved,1920.0,2985.0,4.0,3415.478,2744.972,2895.944,2899.094,2895.944,2899.094,107995.0,92227.0,109876.0,107995.0
neos-942830_presolved,589.0,831.0,1.0,3600.0,3600.0,2978.357,2978.577,2978.357,2978.577,1027231.0,915212.0,1007419.0,1007419.0
supportcase20_presolved,598.0,896.0,7.0,3600.0,3600.0,3554.368,3554.458,3554.368,3554.458,3990664.0,3401479.0,4253197.0,3990664.0
Gmean,,,,49.942506,35.695409,45.920914,46.893713,44.992314,45.508547,7525.647969,5173.748217,6700.102908,6567.549581
Wins1,,,,,192.0,116.0,62.0,116.0,62.0,,197.0,160.0,160.0
Wins7,,,,,,22.0,6.0,22.0,6.0,,,29.0,29.0


## Table 8: `avg_bb_by_depth_df`: average time/nodes by depth

In [108]:
## Prepare avg_bb_by_depth_df
## Prepare variables for row/col names

bb_classes_by_depth = [str(t) + ' leaves' for t in sizes]
num_bb_classes_by_depth = len(bb_classes_by_depth)

bb_buckets_by_depth = bb_buckets
bb_metrics_by_depth = bb_metrics[0:2]

cols_time_by_depth       = [gur1time_col, gur1vtime_col, gur1v_w_cut_time_col]
shortcols_time_by_depth  = [map_cols_to_short_time[col] for col in cols_time_by_depth]
cols_nodes_by_depth      = [gur1nodes_col, gur1vnodes_col]
shortcols_nodes_by_depth = [map_cols_to_short_nodes[col] for col in cols_nodes_by_depth]

avg_bb_cols_by_depth = pd.MultiIndex.from_arrays(
    [[time_col_header]*len(shortcols_time_by_depth) + 
     [node_col_header]*len(shortcols_nodes_by_depth), 
     shortcols_time_by_depth + shortcols_nodes_by_depth],
    names = ['criterion', 'type'])

# bucket_min = [0, 10, 100, 1000]
# bucket_max = [3600, 3600, 3600, 3600]
# num_buckets = len(bucket_min)
# assert(len(bucket_max) == num_buckets)
# bb_buckets = ['[' + str(bucket_min[j]) + ',' + str(bucket_max[j]) + ')' for j in range(num_buckets)]
# # bucket_names = [classes[i] + ' [' + str(bucket_min[j]) + ',' + str(bucket_max[j]) + ')' for i in range(num_classes) for j in range(num_buckets)]
# # display(bucket_names)

# bb_metrics = ['Gmean', 'Wins1', 'Wins7']

# time_col_header = 'Time (s)'
# node_col_header = 'Nodes (\\#)'

#bb_row_names = pd.MultiIndex.from_product([bb_buckets, bb_row_names], names=['bucket', 'metric'])
bb_row_names_by_depth = pd.MultiIndex.from_product(
    [bb_classes_by_depth, bb_buckets_by_depth, bb_metrics_by_depth],
    names=['class', 'bucket', 'metric'])

avg_bb_by_depth_df = pd.DataFrame(
    columns = avg_bb_cols_by_depth,
    index = bb_row_names_by_depth,
    dtype = float
)

# Fill in values for Gur1 from avg_bb_df
# display(
#     avg_bb_df.loc[
#         (bb_classes[1], bb_buckets, bb_metrics[0:2]),
#         [(time_col_header,map_cols_to_short_time[gur1time_col]),
#         (node_col_header,map_cols_to_short_nodes[gur1nodes_col])]
#     ]
# )

# Make all columns "object" type to allow for integer values
avg_bb_by_depth_df.loc[:,(time_col_header,shortcols_time_by_depth)] = avg_bb_by_depth_df.loc[:,(time_col_header,shortcols_time_by_depth)].astype(object)
avg_bb_by_depth_df.loc[:,(node_col_header,shortcols_nodes_by_depth)] = avg_bb_by_depth_df.loc[:,(node_col_header,shortcols_nodes_by_depth)].astype(object)

## Create gmean_df by depth
#   = shifted geometric mean of time taken across instances, in various buckets
#     and geomean of nodes too

num_inst_by_depth = np.zeros(len(avg_bb_by_depth_df),dtype = np.int64)
row_ind = 0

cols = cols_time_by_depth + cols_nodes_by_depth
shortcols = shortcols_time_by_depth + shortcols_nodes_by_depth

# Calculate stats for 6 trees instances by depth
curr_df = selected_time_df.loc[all6_instances_dict.keys(),cols]
for curr_size_ind in range(0,len(bb_classes_by_depth)):
    # print("{}".format(bb_classes_by_depth[curr_size_ind]))
    curr_by_depth_df = curr_df[curr_df.index.get_level_values(1) == sizes[curr_size_ind]] # take only best values

    for i in range(num_buckets):
        curr_by_depth_df = curr_by_depth_df[curr_by_depth_df[gur1time_col] > bucket_min[i]]
        avg_bb_by_depth_df.loc[
                (bb_classes_by_depth[curr_size_ind], bb_buckets_by_depth[i], bb_metrics_by_depth[0]),
                (time_col_header,shortcols_time_by_depth)] = \
            [geometric_mean(curr_by_depth_df[col] + SHIFT_TIME) - SHIFT_TIME for col in cols_time_by_depth]

        # display(avg_bb_by_depth_df.loc[
        #         (bb_classes_by_depth[curr_size_ind], bb_buckets_by_depth[i], bb_metrics_by_depth[0]),
        #         (time_col_header,shortcols_time_by_depth)].head())
        avg_bb_by_depth_df.loc[
                (bb_classes_by_depth[curr_size_ind], bb_buckets_by_depth[i], bb_metrics_by_depth[0]),
                (node_col_header,shortcols_nodes_by_depth)] = \
            [geometric_mean(curr_by_depth_df[col] + SHIFT_NODES) - SHIFT_NODES for col in cols_nodes_by_depth]
        
        # print("row {:d}: {:d}".format(row_ind,len(curr_by_depth_df)))

        num_inst_by_depth[row_ind:row_ind+len(bb_metrics_by_depth)] = len(bb_metrics_by_depth)*[len(curr_by_depth_df)]
        row_ind += len(bb_metrics_by_depth)

        ## Update wins1 rows
        # A win in terms of time is counted when the ``Gur1'' baseline seconds taken 
        # is at least 10\% slower, to account for some variability in runtimes.
        # A win in terms of nodes is when the ``Gur1'' baseline number of nodes is higher.
        refcol = gur1time_col
        avg_bb_by_depth_df.loc[
                (bb_classes_by_depth[curr_size_ind], bb_buckets_by_depth[i], bb_metrics_by_depth[1]),
                (time_col_header,shortcols_time_by_depth)] = \
            [ int(sum(curr_by_depth_df[refcol] > 1.1*curr_by_depth_df[col])) for col in cols_time_by_depth ]

        refcol = gur1nodes_col
        avg_bb_by_depth_df.loc[
                (bb_classes_by_depth[curr_size_ind], bb_buckets_by_depth[i], bb_metrics_by_depth[1]),
                (node_col_header,shortcols_nodes_by_depth)] = \
            [ int(sum(curr_by_depth_df[refcol] > curr_by_depth_df[col])) for col in cols_nodes_by_depth ]

avg_bb_by_depth_df[inst_col_name] = num_inst_by_depth

# for i in range(num_buckets):
#     curr_df = curr_df[curr_df[gur1time_col] > bucket_min[i]]
    
display(avg_bb_by_depth_df.loc[(bb_classes_by_depth, bb_buckets_by_depth, bb_metrics_by_depth),:])

Unnamed: 0_level_0,Unnamed: 1_level_0,criterion,Time (s),Time (s),Time (s),Nodes (\#),Nodes (\#),# inst
Unnamed: 0_level_1,Unnamed: 1_level_1,type,Gur1,V,Total,Gur1,V,Unnamed: 8_level_1
class,bucket,metric,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
2 leaves,"[0,3600)",Gmean,49.942506,50.854976,51.258406,7525.647969,7646.135673,221
2 leaves,"[0,3600)",Wins1,0.0,68.0,45.0,0.0,110.0,221
2 leaves,"[10,3600)",Gmean,223.488425,230.291344,231.18345,105076.272096,111246.166192,84
2 leaves,"[10,3600)",Wins1,0.0,28.0,27.0,0.0,46.0,84
2 leaves,"[100,3600)",Gmean,610.249941,611.977623,612.681596,313244.867041,329657.02287,47
2 leaves,"[100,3600)",Wins1,0.0,18.0,18.0,0.0,29.0,47
2 leaves,"[1000,3600)",Gmean,2068.498708,2027.350368,2027.867471,838148.539448,838728.783753,19
2 leaves,"[1000,3600)",Wins1,0.0,7.0,7.0,0.0,11.0,19
4 leaves,"[0,3600)",Gmean,49.942506,49.749354,50.975133,7525.647969,7554.416381,221
4 leaves,"[0,3600)",Wins1,0.0,72.0,39.0,0.0,101.0,221


# Section 4: Objective and time analysis

## `obj_and_time_df`: objectives, successes, fails, and time per obj or cut

In [109]:
inst_set = best_gap_df.index
# inst_set = ['10teams_presolved', 'bm23_presolved', 'vpm1_presolved']

# Define rows to add
inst_depth_set = [(inst, best_gap_df.loc[inst, 'BEST VPC DISJ']) for inst in inst_set]

# Define columns to add
fail_rate_col_name = 'Fail rate (%)'
time_col_name = 'Time (s)'
sec_per_obj_col_name = '(s) / obj'
sec_per_cut_col_name = '(s) / cut'
obj_and_time_new_cols = [
    fail_rate_col_name,
    time_col_name,
    sec_per_obj_col_name,
    sec_per_cut_col_name,
]

obj_and_time_df = df.loc[inst_depth_set,['NUM OBJ', 'NUM CUTS', 'NUM FAILS']].copy(deep=True)
obj_and_time_df[fail_rate_col_name] = 100. * obj_and_time_df['NUM FAILS'] / obj_and_time_df['NUM OBJ']
obj_and_time_df[time_col_name] = df['VPC_GEN_TIME']
obj_and_time_df[sec_per_obj_col_name] = obj_and_time_df[time_col_name] / obj_and_time_df['NUM OBJ']
obj_and_time_df[sec_per_cut_col_name] = obj_and_time_df[time_col_name] / obj_and_time_df['NUM CUTS']

# Replace Fail rate = NaN when all cuts are one-sided cuts
SKIP_CHAR = '-'
obj_and_time_df.fillna(SKIP_CHAR, inplace = True)
obj_and_time_df.replace(np.inf, SKIP_CHAR, inplace = True)

# Add average row
# obj_and_time_df.loc['Average'] = 0
obj_and_time_df.loc['Average', obj_and_time_new_cols] =\
    [obj_and_time_df[obj_and_time_df[col] != SKIP_CHAR][col].mean() for col in obj_and_time_new_cols]
# for col in obj_and_time_new_cols:
#     obj_and_time_df.at[('Average',0),col] =\
#         obj_and_time_df[obj_and_time_df[col] != SKIP_CHAR][col].mean()

obj_and_time_df.loc['Average',['NUM OBJ', 'NUM CUTS', 'NUM FAILS']] = ""

display(obj_and_time_df)
# obj_and_time_df[obj_and_time_df['NUM CUTS'] == 0]
# obj_and_time_df[obj_and_time_df['(s) / obj'] > 100000]
# obj_and_time_df.loc['neos18_presolved']
# obj_and_time_df[obj_and_time_df['(s) / obj'] != SKIP_CHAR]['(s) / obj'].max()

  obj_and_time_df.fillna(SKIP_CHAR, inplace = True)
  obj_and_time_df.loc['Average',['NUM OBJ', 'NUM CUTS', 'NUM FAILS']] = ""


Unnamed: 0_level_0,Unnamed: 1_level_0,NUM OBJ,NUM CUTS,NUM FAILS,Fail rate (%),Time (s),(s) / obj,(s) / cut
INSTANCE,disj_terms,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
10teams_presolved,8,322.0,34.0,288.0,89.440994,372.740000,1.157578,10.962941
23588_presolved,64,76.0,75.0,1.0,1.315789,29.050000,0.382237,0.387333
30n20b8_presolved,2,193.0,190.0,3.0,1.554404,48.520000,0.251399,0.255368
50v-10_presolved,64,30.0,29.0,1.0,3.333333,4.530000,0.151,0.156207
a1c1s1_presolved,8,3.0,3.0,0.0,0.0,0.550000,0.183333,0.183333
...,...,...,...,...,...,...,...,...
usAbbrv-8-25_70_presolved,8,4.0,4.0,0.0,0.0,0.690000,0.1725,0.172500
vpm1_presolved,32,34.0,10.0,24.0,70.588235,0.070000,0.002059,0.007000
vpm2_presolved,64,36.0,25.0,11.0,30.555556,0.240000,0.006667,0.009600
zib54-UUE_presolved,64,62.0,56.0,6.0,9.677419,62.550000,1.008871,1.116964


In [110]:
### DEBUG finding max difference in time between TOTAL_TIME and sum of individual times
# cuts_cols = [col for col in df.columns if col.startswith('NUM CUTS')]
# time_cols = [
#     'INIT_SOLVE_TIME',
#     'VPC_GEN_TIME',
#     'VPC_APPLY_TIME',
#     'BB_TIME',
#     'TOTAL_TIME'
# ]
# display(df.loc['bell3b_presolved',['NUM OBJ', 'NUM FAILS'] + cuts_cols])

# obj_and_time_df = df.loc[inst_depth_set].copy(deep = True)['NUM OBJ', 'NUM CUTS', 'NUM FAILS', 'VPC_GEN_TIME']
# display(obj_and_time_df)

# max_diff_time = 0.
# max_diff_inst = ''
# for inst in best_gap_df.index:
#     depth = best_gap_df.loc[inst, 'BEST VPC DISJ']
#     curr_num_obj   = df.loc[(inst,depth)]['NUM OBJ']
#     curr_num_vpc   = df.loc[(inst,depth)]['NUM VPC']
#     curr_num_1side = df.loc[(inst,depth)]['NUM CUTS ONE_SIDED']
#     curr_num_fails = df.loc[(inst,depth)]['NUM FAILS']
#     if curr_num_vpc + curr_num_fails != curr_num_obj + curr_num_1side:
#         raise ValueError("{}: curr_num_vpc ({:d}) + curr_num_fails ({:d}) != curr_num_obj ({:d}) + curr_num_1side ({:d})".format(inst, curr_num_vpc, curr_num_fails, curr_num_obj, curr_num_1side))
    
#     curr_fail_pct = 100. * curr_num_fails / curr_num_obj
#     curr_init_solve = df.loc[(inst,depth)]['INIT_SOLVE_TIME']
#     curr_vpc_gen = df.loc[(inst,depth)]['VPC_GEN_TIME']
#     curr_vpc_apply = df.loc[(inst,depth)]['VPC_APPLY_TIME']
#     curr_bb_time = df.loc[(inst,depth)]['BB_TIME']
#     curr_total_time = df.loc[(inst,depth)]['TOTAL_TIME']

#     curr_diff_time = curr_total_time - (curr_init_solve + curr_vpc_gen + curr_vpc_apply + curr_bb_time)
#     if curr_diff_time < -EPS:
#         display(df.loc[inst,time_cols])
#         raise ValueError("{} (depth {:d}): curr_diff_time {} < 0.".format(inst,depth,curr_diff_time))
    
#     if max_diff_time < curr_diff_time:
#         max_diff_inst = inst
#         max_diff_time = curr_diff_time

# print("Max diff time = {} for inst {}".format(max_diff_time,max_diff_inst))
# display(df.loc[(max_diff_inst,best_gap_df.loc[max_diff_inst, 'BEST VPC DISJ']),time_cols])


## `best_disj_gap_df`: Number of times a particular depth achieves best result and beats baseline by at least EPS

In [111]:
long_cols_to_compare = {
    col_vpc:         col_gmic,
    col_vpc_gmic:    col_gmic,
    col_first_ref_v: col_first_ref,
    col_last_ref_v:  col_last_ref,
}
short_cols_to_compare = [map_cols_to_short_gap[col] for col in long_cols_to_compare.keys()]
row_no_improvement = 'No improvement'

best_disj_gap_df = pd.DataFrame(
    columns = short_cols_to_compare,
    index = [row_no_improvement] + [0] + sizes,
    dtype = int,
)

for curr_depth in [0] + sizes:
    curr_depth_df = selected_gap_df.xs(curr_depth,level='disj_terms')
    for col in long_cols_to_compare.keys():
        shortcol = map_cols_to_short_gap[col]
        refcol = long_cols_to_compare[col]
        
        # Calculate num times this depth yielded the best result
        curr_num_wins = sum(
            (curr_depth_df[col] == best_gap_df[shortcol]) & 
            (best_gap_df[shortcol] > best_gap_df[map_cols_to_short_gap[refcol]] + EPS)
        )
        best_disj_gap_df.at[curr_depth,shortcol] = curr_num_wins
        
# Add no improvement row
curr_depth = row_no_improvement
for col in long_cols_to_compare.keys():
    shortcol = map_cols_to_short_gap[col]
    refcol = long_cols_to_compare[col]

    # Calculate num times no improvement over the baseline
    curr_num_wins = sum(
            (best_gap_df[shortcol] <= best_gap_df[map_cols_to_short_gap[refcol]] + EPS)
        )
    best_disj_gap_df.at[curr_depth,shortcol] = curr_num_wins


# Reindex to add "leaves" to index
idx = [row_no_improvement] + ['Best'] + [str(size) + " leaves" for size in sizes]
reidx = {old_id : new_id for old_id, new_id in zip(best_disj_gap_df.index,idx)}
best_disj_gap_df.rename(reidx, inplace=True)

# Remove best row (it is good to verify this is the same as the relevant entries in win_df or Table 1)
# best_disj_gap_df.drop('Best', axis=0, inplace=True)

# Make sure all cols are int
best_disj_gap_df = best_disj_gap_df.astype(int)

best_disj_gap_df

Unnamed: 0,V,V+G,V+GurF,V+GurL
No improvement,197,99,74,95
Best,135,233,258,237
2 leaves,0,11,59,41
4 leaves,3,13,71,41
8 leaves,5,21,63,35
16 leaves,4,21,83,48
32 leaves,16,37,77,48
64 leaves,107,146,128,93


## `best_disj_time_df`: Number of times depth is best and improvement is at least 10%

In [112]:
row_no_improvement = 'No improvement'

best_disj_time_df = pd.DataFrame(
    columns = bb_classes,
    index = [row_no_improvement] + [0] + sizes,
    dtype = int,
)

col = gur1vtime_col
# shortcol = (time_col_header,map_cols_to_short_time[col])
# destcol = map_cols_to_short_time[col]
refcol = gur1time_col

# Calculate num times this depth yielded the best result
best_time_df = selected_time_df.xs(0,level='disj_terms')
for curr_depth in [0] + sizes:
    # First test for all instances
    curr_depth_df = selected_time_df.xs(curr_depth,level='disj_terms')
    
    curr_num_wins = sum(
        (curr_depth_df[col] == best_time_df[col]) & 
        (best_time_df[refcol] >= 1.1 * best_time_df[col] + EPS)
    )
    best_disj_time_df.at[curr_depth,bb_classes[0]] = curr_num_wins

# Add no improvement row
# Calculate num times no improvement over the baseline
curr_depth = row_no_improvement
curr_num_wins = sum(
        (best_time_df[refcol] < 1.1 * best_time_df[col])
    )
best_disj_time_df.at[curr_depth,bb_classes[0]] = curr_num_wins

# Now check only 6-tree instances
best_time_df = best_time_df.loc[all6_instances_dict.keys()]
for curr_depth in [0] + sizes:
    curr_depth_df = selected_time_df.xs(curr_depth,level='disj_terms').loc[all6_instances_dict.keys()]

    # Calculate num times this depth yielded the best result
    curr_num_wins = sum(
        (curr_depth_df[col] == best_time_df[col]) & 
        (best_time_df[refcol] >= 1.1 * best_time_df[col] + EPS)
    )
    best_disj_time_df.at[curr_depth,bb_classes[1]] = curr_num_wins

# Add no improvement row
# Calculate num times no improvement over the baseline
curr_depth = row_no_improvement
curr_num_wins = sum(
        (best_time_df[refcol] < 1.1 * best_time_df[col])
    )
best_disj_time_df.at[curr_depth,bb_classes[1]] = curr_num_wins

# Reindex to add "leaves" to index
idx = [row_no_improvement] + ['Best'] + [str(size) + " leaves" for size in sizes]
reidx = {old_id : new_id for old_id, new_id in zip(best_disj_time_df.index,idx)}
best_disj_time_df.rename(reidx, inplace=True)

# Remove best row (it is good to verify this is the same as the relevant entries in win_df or Table 1)
# best_disj_time_df.drop('Best', axis=0, inplace=True)

# Make sure all cols are int
best_disj_time_df = best_disj_time_df.astype(int)

best_disj_time_df

Unnamed: 0,All,6 trees
No improvement,162,105
Best,144,116
2 leaves,29,24
4 leaves,32,25
8 leaves,19,16
16 leaves,24,19
32 leaves,32,25
64 leaves,20,19


## `density_df`

In [113]:
rows = [
    '\# inst w/VPCs and time < 3600s',
    '\# wins by time',
    'Avg min cut density',
    'Avg max cut density',
    'Avg avg cut density',
    'Avg avg cut density (win by time)',
    'Avg avg cut density (non-win)',
]

columns = ['V ({:d})'.format(size) for size in sizes]

density_df = pd.DataFrame(
    columns = columns,
    index = rows,
    dtype = float
)

# Calculate stats for 6 trees instances by depth
# inst_set = all6_instances_dict.keys()
inst_set = selected_time_instances_dict.keys()
support_cols = [col for col in df.columns if "SUPPORT VPC" in col]
selected_cols = [gur1time_col,gur1vtime_col]+support_cols+['COLS']+['NUM VPC']
curr_df = df.loc[inst_set,selected_cols]
curr_df0 = curr_df.xs(0,level='disj_terms')

for curr_size_ind in range(0,len(sizes)):
    # Select only this depth
    # curr_by_depth_df = curr_df[curr_df.index.get_level_values(1) == sizes[curr_size_ind]]
    curr_by_depth_df = curr_df.xs(sizes[curr_size_ind], level='disj_terms')
    
    # Remove instances that take more than an hour
    INSTANCES_TO_KEEP = curr_by_depth_df[gur1time_col] < MAX_TIME
    curr_by_depth_df = curr_by_depth_df[INSTANCES_TO_KEEP]

    # Count number of instances having cuts
    curr_row_ind = 0
    density_df.iloc[curr_row_ind,curr_size_ind] = sum(curr_by_depth_df['NUM VPC'] > 0)
    
    # Mean of min, max, avg density
    curr_row_ind = 2
    for col_ind in range(len(support_cols)):
        curr_series = curr_by_depth_df[support_cols[col_ind]] / curr_by_depth_df['COLS']
        density_df.iloc[curr_row_ind,curr_size_ind] = curr_series.mean()
        curr_row_ind += 1

    ## Count wins1 (should be same as in avg_bb_by_depth_df)
    # A win in terms of time is counted when the ``Gur1'' baseline seconds taken 
    # is at least 10\% slower, to account for some variability in runtimes.
    # A win in terms of nodes is when the ``Gur1'' baseline number of nodes is higher.
    curr_wins_df = curr_by_depth_df[curr_df0.loc[INSTANCES_TO_KEEP,gur1time_col] > 1.1*curr_by_depth_df[gur1vtime_col]]
    curr_row_ind = 1
    density_df.iloc[curr_row_ind,curr_size_ind] = len(curr_wins_df)
    curr_row_ind = len(rows)-2
    density_df.iloc[curr_row_ind,curr_size_ind] = (curr_wins_df[support_cols[2]] / curr_wins_df['COLS']).mean()

    # curr_lose_df = curr_by_depth_df[1.1*curr_df0[gur1time_col] < curr_by_depth_df[gur1vtime_col]]
    curr_lose_df = curr_by_depth_df[curr_df0.loc[INSTANCES_TO_KEEP,gur1time_col] <= 1.1*curr_by_depth_df[gur1vtime_col]]
    curr_row_ind = len(rows)-1
    density_df.iloc[curr_row_ind,curr_size_ind] = (curr_lose_df[support_cols[2]] / curr_lose_df['COLS']).mean()

density_df

Unnamed: 0,V (2),V (4),V (8),V (16),V (32),V (64)
\# inst w/VPCs and time < 3600s,265.0,275.0,263.0,264.0,251.0,235.0
\# wins by time,108.0,106.0,96.0,115.0,126.0,132.0
Avg min cut density,0.291067,0.299309,0.275892,0.27213,0.256187,0.218744
Avg max cut density,0.291067,0.299309,0.275892,0.27213,0.256187,0.218744
Avg avg cut density,0.251829,0.291619,0.302125,0.333862,0.353617,0.348781
Avg avg cut density (win by time),0.191211,0.261144,0.164847,0.224588,0.246151,0.186583
Avg avg cut density (non-win),0.2878,0.309271,0.36835,0.404067,0.431887,0.473987


## `obj_fails_df`

In [114]:
inst_set = best_gap_df.index
# inst_set = ['10teams_presolved', 'bm23_presolved', 'vpm1_presolved']

# Define rows to add
inst_depth_set = [(inst, best_gap_df.loc[inst, 'BEST VPC DISJ']) for inst in inst_set]

rows = [
    '\# inst w/obj',
    '\# inst w/succ obj',
    '\# inst no obj',
    '\# inst all obj fail',
    '\# inst all obj succ',
    '\% obj fails',
    '\% fails dup',
    '\% fails unbdd',
    '\% fails tilim',
    '\% fails dyn',
    '\% fails all ones',
    '\% fails post-GMIC obj',
    '\% fails DB',
    '\# obj / cut',
    '(s) / obj',
    '(s) / cut',
]

columns = ['V ({:d})'.format(size) for size in sizes]

obj_fails_df = pd.DataFrame(
    columns = columns,
    index = rows,
    dtype = float
)

# Choose columns to pull
selected_cols = [
    'NUM OBJ',
    'NUM VPC',
    'NUM FAILS DUMMY_OBJ',
    'NUM FAILS ALL_ONES',
    'NUM FAILS CUT_VERTICES',
    'NUM FAILS ITER_BILINEAR',
    'NUM FAILS UNIT_VECTORS',
    'NUM FAILS DISJ_LB',
    'NUM FAILS TIGHT_POINTS',
    'NUM FAILS TIGHT_RAYS',
    'NUM FAILS TIGHT_POINTS2',
    'NUM FAILS TIGHT_RAYS2',
    'NUM FAILS USER',
    'NUM FAILS OBJ_CUT',
    'NUM FAILS ONE_SIDED',
    'NUM FAILS',
    'ABANDONED',
    'BAD_DYNAMISM',
    'BAD_SUPPORT',
    'BAD_VIOLATION',
    'CUT_LIMIT',
    'DUAL_INFEASIBLE',
    'DUPLICATE_SIC',
    'DUPLICATE_VPC',
    'ITERATION_LIMIT',
    'ORTHOGONALITY_SIC',
    'ORTHOGONALITY_VPC',
    'PRIMAL_INFEASIBLE',
    'TIME_LIMIT',
    'NUMERICAL_ISSUES_WARNING',
    'DLB_EQUALS_DUB_NO_OBJ',
    'DLB_EQUALS_LPOPT_NO_OBJ',
    'PRIMAL_INFEASIBLE_NO_OBJ',
    'NUMERICAL_ISSUES_NO_OBJ',
    'UNKNOWN',
    'VPC_GEN_TIME',
]

curr_df = df.loc[inst_set,selected_cols]
curr_df0 = curr_df.xs(0,level='disj_terms')

for curr_size_ind in range(0,len(sizes)):
    # Select only this depth
    # curr_by_depth_df = curr_df[curr_df.index.get_level_values(1) == sizes[curr_size_ind]]
    curr_by_depth_df = curr_df.xs(sizes[curr_size_ind], level='disj_terms')

    # Num inst with objectives tried
    obj_fails_df.iloc[0,curr_size_ind] =\
        sum(curr_by_depth_df['NUM OBJ'] > 0)

    # Num inst with successful objectives
    obj_fails_df.iloc[1,curr_size_ind] =\
        sum((curr_by_depth_df['NUM OBJ'] > 0) & (curr_by_depth_df['NUM VPC'] > 0))

    # Num inst with no objectives tried
    obj_fails_df.iloc[2,curr_size_ind] =\
        sum(curr_by_depth_df['NUM OBJ'] == 0)

    # Num inst with objectives tried but all failed
    obj_fails_df.iloc[3,curr_size_ind] =\
        sum((curr_by_depth_df['NUM OBJ'] > 0) & (curr_by_depth_df['NUM VPC'] == 0))

    # Num inst with objectives tried and all succeeded
    obj_fails_df.iloc[4,curr_size_ind] =\
        sum((curr_by_depth_df['NUM OBJ'] > 0) & (curr_by_depth_df['NUM OBJ'] == curr_by_depth_df['NUM VPC']))

    # Percent objective failures
    inst_w_obj_df = curr_by_depth_df[curr_by_depth_df['NUM OBJ'] > 0]
    obj_fails_df.iloc[5,curr_size_ind] =\
        (100. * inst_w_obj_df['NUM FAILS'] / inst_w_obj_df['NUM OBJ']).mean()

    ## Percent of failures caused by:
    inst_w_fails_df = curr_by_depth_df[curr_by_depth_df['NUM FAILS'] > 0]

    # duplicates
    obj_fails_df.iloc[6,curr_size_ind] =\
        (100. * (inst_w_fails_df['DUPLICATE_VPC']+inst_w_fails_df['DUPLICATE_SIC']) / inst_w_fails_df['NUM FAILS']).mean()

    # unbdd
    obj_fails_df.iloc[7,curr_size_ind] =\
        (100. * (inst_w_fails_df['DUAL_INFEASIBLE']) / inst_w_fails_df['NUM FAILS']).mean()

    # tilim
    obj_fails_df.iloc[8,curr_size_ind] =\
        (100. * (inst_w_fails_df['TIME_LIMIT']) / inst_w_fails_df['NUM FAILS']).mean()
    
    # dynamism
    obj_fails_df.iloc[9,curr_size_ind] =\
        (100. * (inst_w_fails_df['BAD_DYNAMISM']) / inst_w_fails_df['NUM FAILS']).mean()

    # all-ones
    obj_fails_df.iloc[10,curr_size_ind] =\
        (100. * (inst_w_fails_df['NUM FAILS ALL_ONES']) / inst_w_fails_df['NUM FAILS']).mean()

    # post-GMIC
    obj_fails_df.iloc[11,curr_size_ind] =\
        (100. * (inst_w_fails_df['NUM FAILS ITER_BILINEAR']) / inst_w_fails_df['NUM FAILS']).mean()

    # disj_lb
    obj_fails_df.iloc[12,curr_size_ind] =\
        (100. * (inst_w_fails_df['NUM FAILS DISJ_LB']) / inst_w_fails_df['NUM FAILS']).mean()
    
    # num obj / cut
    inst_w_cuts_df = curr_by_depth_df[curr_by_depth_df['NUM VPC'] > 0]
    obj_fails_df.iloc[13,curr_size_ind] = (inst_w_cuts_df['NUM OBJ'] / inst_w_cuts_df['NUM VPC']).mean()

    # (s) / obj
    obj_fails_df.iloc[14,curr_size_ind] = (inst_w_obj_df['VPC_GEN_TIME'] / inst_w_obj_df['NUM OBJ']).mean()

    # (s) / cut
    obj_fails_df.iloc[15,curr_size_ind] = (inst_w_cuts_df['VPC_GEN_TIME'] / inst_w_cuts_df['NUM VPC']).mean()

obj_fails_df

Unnamed: 0,V (2),V (4),V (8),V (16),V (32),V (64)
\# inst w/obj,311.0,318.0,304.0,305.0,288.0,271.0
\# inst w/succ obj,306.0,317.0,299.0,300.0,283.0,263.0
\# inst no obj,21.0,14.0,28.0,27.0,44.0,61.0
\# inst all obj fail,5.0,1.0,5.0,5.0,5.0,8.0
\# inst all obj succ,31.0,42.0,31.0,33.0,28.0,24.0
\% obj fails,29.080722,25.413956,26.204895,30.856389,31.826954,33.980077
\% fails dup,50.558842,39.752316,41.530533,48.9721,51.126837,56.670788
\% fails unbdd,38.430322,47.253809,45.31377,36.017806,32.110224,27.555995
\% fails tilim,1.857593,3.572477,3.648944,5.031881,7.197781,6.349014
\% fails dyn,8.931204,8.720771,9.040952,9.531211,8.83526,9.087506


## `active_cuts_df`: when generated cuts are active, by objective

In [115]:
inst_set = best_gap_df.index
# inst_set = ['10teams_presolved', 'bm23_presolved', 'vpm1_presolved']

# Define rows to add
inst_depth_set = [(inst, best_gap_df.loc[inst, 'BEST VPC DISJ']) for inst in inst_set]

rows = [
    '\% active GMIC',
    '\% active VPC',
    '\% cuts one-sided',
    '\% active one-sided',
    '\% cuts all ones',
    '\% active all ones',
    '\% cuts post-GMIC opt',
    '\% active post-GMIC opt',
    '\% cuts DB',
    '\% active DB',
]

columns = ['V+G ({:d})'.format(size) for size in sizes]

active_cuts_df = pd.DataFrame(
    columns = columns,
    index = rows,
    dtype = float
)

# Choose columns to pull
active_gmic_col = 'ACTIVE GMIC (all cuts)'
active_vpc_col = 'ACTIVE VPC (all cuts)'
selected_cols = [
    'NUM GMIC',
    'NUM VPC',
    active_gmic_col,
    active_vpc_col,
] + [col for col in df.columns if "NUM CUTS " in col] + [col for col in df.columns if "NUM ACTIVE" in col]

curr_df = df.loc[inst_set,selected_cols]

num_inst_with_one_sided_cuts = [0 for size in sizes]
for curr_size_ind in range(0,len(sizes)):
    # Select only this depth
    # curr_by_depth_df = curr_df[curr_df.index.get_level_values(1) == sizes[curr_size_ind]]
    curr_by_depth_df = curr_df.xs(sizes[curr_size_ind], level='disj_terms')

    # active gmic
    active_cuts_df.iloc[0,curr_size_ind] =\
        (100. * curr_by_depth_df[active_gmic_col] / curr_by_depth_df['NUM GMIC']).mean()

    # active vpc
    active_cuts_df.iloc[1,curr_size_ind] =\
        (100. * curr_by_depth_df[active_vpc_col] / curr_by_depth_df['NUM VPC']).mean()

    # percent of active cuts among those generated by a specific objective type
    obj_types = ['ONE_SIDED', 'ALL_ONES', 'ITER_BILINEAR', 'DISJ_LB']
    curr_row_index = 2
    inst_w_vpc = curr_by_depth_df[curr_by_depth_df['NUM VPC'] > 0]
    for obj in obj_types:
        active_cuts_df.iloc[curr_row_index,curr_size_ind] =\
            (100. * inst_w_vpc['NUM CUTS '+obj] / inst_w_vpc['NUM VPC']).mean()
        curr_row_index += 1

        inst_w_cuts = inst_w_vpc[inst_w_vpc['NUM CUTS '+obj] > 0]
        
        active_cuts_df.iloc[curr_row_index,curr_size_ind] =\
            (100. * inst_w_cuts['NUM ACTIVE '+obj] / inst_w_cuts['NUM CUTS '+obj]).mean()
        curr_row_index += 1
    
    # num one-sided cuts
    num_inst_with_one_sided_cuts[curr_size_ind] = sum(curr_by_depth_df['NUM CUTS ONE_SIDED'] > 0)

display(active_cuts_df)

print("Num inst with one-sided cuts (should be same across partial trees) =",num_inst_with_one_sided_cuts)
print("Total num one-sided cuts =", sum(curr_by_depth_df['NUM CUTS ONE_SIDED']))

Unnamed: 0,V+G (2),V+G (4),V+G (8),V+G (16),V+G (32),V+G (64)
\% active GMIC,44.085314,43.477877,42.170665,41.65404,40.862307,40.772512
\% active VPC,30.251908,30.814606,31.339695,35.479053,34.289254,32.310973
\% cuts one-sided,0.772678,0.718051,0.744452,1.11479,0.821596,0.792411
\% active one-sided,100.0,100.0,100.0,100.0,100.0,100.0
\% cuts all ones,11.897763,6.653099,7.849304,9.123057,8.463443,9.045098
\% active all ones,91.447368,84.782609,81.818182,76.397516,79.62963,79.141104
\% cuts post-GMIC opt,2.696105,3.492087,2.206401,2.15033,1.715315,2.660776
\% active post-GMIC opt,86.419753,71.212121,63.461538,64.705882,63.829787,59.016393
\% cuts DB,84.633454,89.136764,89.199842,87.611823,88.999646,87.501714
\% active DB,63.553995,58.91337,55.600502,52.210616,49.085394,42.670693


Num inst with one-sided cuts (should be same across partial trees) = [5, 5, 5, 5, 5, 5]
Total num one-sided cuts = 6


# Section 5: Export tables to LaTeX

## Format Table 1: gap closed and num wins

In [116]:
# Format Table 1: gap closed and num wins

# Create copy of table then remove values we do not want (wins for 'G)
# TABLE1 = avg_df.copy(deep=True)[[inst_col_name, 'G', 'DB', 'V', 'V+G', 'GurF', 'V+GurF', 'GurL', 'V+GurL']]
TABLE1 = avg_gap_df.copy(deep=True)[[inst_col_name]+gap_cols_short]

TABLE1['G'].loc[:,wins_row_name] = ""

# Process the column with # inst to only report number of instances for each set
TABLE1[inst_col_name].loc[:,wins_row_name] = ""
val = TABLE1[inst_col_name].loc[all_set_name,avg_row_name]
TABLE1[inst_col_name].loc[all_set_name,avg_row_name] = \
    create_multirow_string(str(val), extra_format=r"\tablenum[table-format=3]")
val = TABLE1[inst_col_name].loc[good_vpc_set_name,avg_row_name]
TABLE1[inst_col_name].loc[good_vpc_set_name,avg_row_name] = \
    create_multirow_string(str(val), extra_format=r"\tablenum[table-format=3]")

# Reset index to appear as cols
TABLE1.reset_index(inplace=True)

# Place column with # inst as second column
inst_col = TABLE1[inst_col_name]
TABLE1.drop(columns=[inst_col_name], inplace=True)
TABLE1.insert(loc=1, column=inst_col_name, value=inst_col)

# Set column should have multirow
setseries = TABLE1['Set']
format_col_as_multirow(setseries)

# for i in TABLE1.index:
#     curr_name = tex_escape(str(i))
#     print("Changing {} to {}".format(i, curr_name))
#     TABLE1.rename({i: curr_name}, inplace=True)
# print("")

# If we are not using the automatic tex-escaper, we need to do it ourselves
for col in TABLE1.columns:
    # curr_col = '{' + tex_escape(col) + '}'
    curr_col = tex_escape(str(col))
    TABLE1.rename({col: curr_col}, inplace=True, axis=1)

# Finally, apply the desired style
# styler.format({
#     ("Numeric", "Integers"): '\${}',
#     ("Numeric", "Floats"): '{:.3f}',
#     ("Non-Numeric", "Strings"): str.upper
# })
# styler.format_index(escape="latex", axis=0).format_index(escape="latex", axis=1)
# styler.hide(level=0,axis=0)
table1_str = TABLE1.style.\
    hide(axis=0).\
    format(formatter = int_format).\
    to_latex(
        #@{}l@{\hskip 5pt}
        column_format="""
        @{}l@{}
        S[table-format=2.0,table-auto-round,table-number-alignment=center]
        l
        *{1}{S[table-auto-round]}
        *{8}{S[table-auto-round]}
        @{}""",
        hrules = True,
        #clines = "skip-last;data",
        sparse_index = True,
        multirow_align = "c",
        # float_format="%.2f", 
        # escape=False, 
        siunitx=True,
        # index_names=False,
        #columns=['\# inst', 'G', 'DB', 'V', 'V+G', 'GurF', 'V+GurF', 'GurL', 'V+GurL']
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "tab:gap-closed-summary",
        caption = """
            Summary statistics for percent gap closed by VPCs.
            The wins row reports how many instances close at least $\epsilon$ more gap using DB, V, V+G compared to G on its own, V+GurF compared to GurF, and V+GurL compared to GurL.
        """,
        )

# Add a midrule between the two sets; the "3" is hand-coded but can be automated
table1_str = add_midrule(table1_str, -3)

# Adjustbox environment sets width to pagewidth
table1_str = add_adjustbox_environment(table1_str)

# Set default siunitx options for this table
table1_str = add_sisetup(table1_str)

print(table1_str)


{
\sisetup{
    table-alignment-mode = format,
    table-number-alignment = center,
    table-format = 2.2,
}
\begin{table}
\centering
\caption{
            Summary statistics for percent gap closed by VPCs.
            The wins row reports how many instances close at least $\epsilon$ more gap using DB, V, V+G compared to G on its own, V+GurF compared to GurF, and V+GurL compared to GurL.
        }
\label{tab:gap-closed-summary}
\begin{adjustbox}{width=1\textwidth}
\begin{tabular}{@{}l@{}
        S[table-format=2.0,table-auto-round,table-number-alignment=center]
        l
        *{1}{S[table-auto-round]}
        *{8}{S[table-auto-round]}
        @{}}
\toprule
{Set} & {\# inst} & {} & {G} & {R} & {DB} & {V} & {V+G} & {max(G,V)} & {GurF} & {V+GurF} & {GurL} & {V+GurL} \\
\midrule
{\multirow[c]{2}{*}{All}} & {\multirow[c]{2}{*}{\tablenum[table-format=3]{332}}} & Avg (\%) & 16.440441504119036 & 0.25674423491955295 & 18.369375816529555 & 12.046780637829137 & 23.871484673093104 & 22.962066

## Format Table 2: depth x gap

In [117]:
# Format Table 2: percent gap closed by depth
TABLE2 = gap_by_size_df.copy(deep=True)

# If we are not using the automatic tex-escaper, we need to do it ourselves
for col in TABLE2.columns:
    # curr_col = '{' + tex_escape(col) + '}'
    curr_col = tex_escape(str(col))
    TABLE2.rename({col: curr_col}, inplace=True, axis=1)

# Finally, apply the desired style
table2_str = TABLE2.style.\
    format(formatter = int_format).\
    to_latex(
        column_format="""
        @{}l
        *{5}{S[table-auto-round]}
        @{}""",
        hrules = True,
        sparse_index = True,
        multirow_align = "c",
        siunitx=True,
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "tab:depth",
        caption = """
            Average percent gap closed broken down by the number of leaf nodes used to construct the partial branch-and-bound tree,
            for VPCs with and without GMICs, as well as at the root by \Gurobi{} after the first and last round of cuts. 
            ``Best'' refers to the maximum gap closed across all partial tree sizes.
        """,
        )

# Adjustbox environment sets width to pagewidth
# table2_str = add_adjustbox_environment(table2_str)

# Set default siunitx options for this table
table2_str = add_sisetup(table2_str)

print(table2_str)


{
\sisetup{
    table-alignment-mode = format,
    table-number-alignment = center,
    table-format = 2.2,
}
\begin{table}
\centering
\caption{
            Average percent gap closed broken down by the number of leaf nodes used to construct the partial branch-and-bound tree,
            for VPCs with and without GMICs, as well as at the root by \Gurobi{} after the first and last round of cuts. 
            ``Best'' refers to the maximum gap closed across all partial tree sizes.
        }
\label{tab:depth}
\begin{tabular}{@{}l
        *{5}{S[table-auto-round]}
        @{}}
\toprule
{} & {DB} & {V} & {V+G} & {max(G,V)} & {V+GurF} & {V+GurL} \\
\midrule
VPCs disabled & 0.0 & 0.0 & 16.440441504119036 & 16.440441504119036 & 28.861973203401572 & 48.23247668960753 \\
2 leaves & 3.0145060622014737 & 2.211831955134264 & 17.138744094741295 & 16.72182683081693 & 30.879921745988486 & 48.98570506062052 \\
4 leaves & 5.270240433554275 & 3.592591710679685 & 17.718584539768383 & 17.159825568179592 &

## Format Table 3: summary of b&b results

In [118]:
# Format Table 3: summary of b&b results
TABLE3 = avg_bb_df.copy(deep=True)

# Remove unnecessary entries
TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]),([time_col_header,node_col_header],map_cols_to_short_time[gur1time_col])] = ""
TABLE3.loc[(slice(None), slice(None), bb_metrics[2]),([time_col_header,node_col_header],map_cols_to_short_time[gur7time_col])] = ""

# Process the column with # inst to only report number of instances for each set
TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]), inst_col_name] = ""

for curr_class in bb_classes:
    for curr_bucket in bb_buckets:
        curr_name = (curr_class, curr_bucket, bb_metrics[0])
        val = TABLE3.loc[curr_name, inst_col_name]
        TABLE3.loc[curr_name, inst_col_name] = \
            create_multirow_string(str(val), num_rows = 3, extra_format=r"\tablenum[table-format=3]")

# Set num wins in int format or enclose in braces (center)
# tmp_df = TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]),time_col_header].applymap(int_format, num_digits=6)
tmp_df = TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]),time_col_header].applymap(int_format, num_digits=4, add_phantom=True)
# tmp_df = TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]),time_col_header].applymap(enclose_in_braces)
tmp_df.columns = pd.MultiIndex.from_product([[time_col_header],tmp_df.columns])
TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]),time_col_header] = tmp_df

# tmp_df = TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]),node_col_header].applymap(int_format, num_digits=6)
tmp_df = TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]),node_col_header].applymap(int_format, num_digits=6, add_phantom=False)
# tmp_df = TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]),node_col_header].applymap(enclose_in_braces)
tmp_df.columns = pd.MultiIndex.from_product([[node_col_header],tmp_df.columns])
TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]),node_col_header] = tmp_df

# Reset index to appear as cols
TABLE3.reset_index(inplace=True)

# Add new col combining class and bucket in one
class_bucket_col = "\multirow{3}{*}{\shortstack[l]{" + TABLE3['class'] + "\\\\\\relax " + TABLE3['bucket'] + "}}"
for i in range(len(class_bucket_col)):
    if i%3!=0:
        class_bucket_col[i] = ""
TABLE3.drop(columns = ['class', 'bucket'], inplace = True, level = 0)
TABLE3.insert(loc=0, column="Set", value=class_bucket_col)

# Place column with # inst as second column
inst_col = TABLE3[inst_col_name]
TABLE3.drop(columns=[inst_col_name], inplace=True, level=0)
TABLE3.insert(loc=1, column=inst_col_name, value=inst_col)

# If we are not using the automatic tex-escaper, we need to do it ourselves
for col in TABLE3.columns:
    if isinstance(col, tuple):
        for lvl_ind, lvl_col in enumerate(col):
            curr_col = tex_escape(str(lvl_col))
            TABLE3.rename({lvl_col: curr_col}, inplace=True, axis=1, level=lvl_ind)
    else:
        # curr_col = '{' + tex_escape(col) + '}'
        curr_col = tex_escape(str(col))
        TABLE3.rename({col: curr_col}, inplace=True, axis=1)

# Finally, apply the desired style
    # format(formatter = int_format).\
table3_str = TABLE3.style.\
    hide(axis=0).\
    to_latex(
        column_format="""
        @{}l    % set
        c       % inst
        l       % stat
        *{2}{S[table-auto-round,table-format=4.2]}
        *{2}{H}
        *{2}{S[table-auto-round,table-format=4.2]}        
        *{2}{S[table-auto-round,table-format=6.0]}
        *{1}{H}
        *{1}{S[table-auto-round,table-format=6.0]}
        @{}}""",
        hrules = True,
        sparse_index = True,
        multirow_align = "c",
        siunitx=True,
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "tab:bb-summary",
        caption = """
            Summary statistics for time to solve instances with branch-and-bound.
        """,
        )

# Add a midrule between the two sets; the "9" is hand-coded but can be automated
table3_str = add_midrule(table3_str, -13)

# Adjustbox environment sets width to pagewidth
table3_str = add_adjustbox_environment(table3_str)

# Set default siunitx options for this table
table3_str = add_sisetup(table3_str, table_format="4.2")

print(table3_str)


{
\sisetup{
    table-alignment-mode = format,
    table-number-alignment = center,
    table-format = 4.2,
}
\begin{table}
\centering
\caption{
            Summary statistics for time to solve instances with branch-and-bound.
        }
\label{tab:bb-summary}
\begin{adjustbox}{width=1\textwidth}
\begin{tabular}{@{}l    % set
        c       % inst
        l       % stat
        *{2}{S[table-auto-round,table-format=4.2]}
        *{2}{H}
        *{2}{S[table-auto-round,table-format=4.2]}        
        *{2}{S[table-auto-round,table-format=6.0]}
        *{1}{H}
        *{1}{S[table-auto-round,table-format=6.0]}
        @{}}}
\toprule
{Set} & {\# inst} & {metric} & \multicolumn{6}{r}{Time (s)} & \multicolumn{4}{r}{Nodes (\textbackslash{}\#)} \\
{} & {} & {} & {Gur1} & {Gur7} & {V} & {Total} & {V7} & {Total7} & {Gur1} & {Gur7} & {V} & {V7} \\
\midrule
\multirow{3}{*}{\shortstack[l]{All\\\relax [0,3600)}} & {\multirow[c]{3}{*}{\tablenum[table-format=3]{306}}} & Gmean & 81.100666 & 59.31087

  TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]), inst_col_name] = ""
  TABLE3.loc[curr_name, inst_col_name] = \
  tmp_df = TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]),time_col_header].applymap(int_format, num_digits=4, add_phantom=True)
  tmp_df = TABLE3.loc[(slice(None), slice(None), bb_metrics[1:3]),node_col_header].applymap(int_format, num_digits=6, add_phantom=False)


## Prepare Table 4: rejected instances

In [119]:
df_rejection_reason.to_csv('rejection_reason.csv', index=True)

#### Verbose version

In [120]:
## *Verbose version*: For each instance that was not selected, print the reason
df_rejection_reason_rejected = df_rejection_reason[df_rejection_reason['SELECTED_GAP'] == False]
rejected_instance_list = df_rejection_reason_rejected.index
rejected_instance_list.name = 'Instance'
cols = ['Set', 'Reason']
df_rejected_instances = pd.DataFrame(columns=cols, index=rejected_instance_list)
df_rejected_instances['Set'] = df_ipopt.loc[rejected_instance_list,'SET']
df_rejected_instances.loc[df_rejection_reason_rejected['OPTIMAL_SOLUTION_FOUND'] > 0, 
                            'Reason'] = "Integer-optimal solution found constructing partial tree"
df_rejected_instances.loc[(df_rejection_reason_rejected['OPTIMAL_SOLUTION_FOUND'] == 0) 
                                & (df_rejection_reason_rejected['LP=DLB=DUB'] == 6), 
                            'Reason'] = "Max leaf value = LP value"
df_rejected_instances.loc[(df_rejection_reason_rejected['OPTIMAL_SOLUTION_FOUND'] == 0) 
                                & (df_rejection_reason_rejected['LP=DLB=DUB'] < 6) 
                                & (df_rejection_reason_rejected['LP=DLB=DUB'] + df_rejection_reason_rejected['PRLP_INFEASIBLE'] == 6), 
                            'Reason'] = "Max leaf value = LP value or PRLP primal infeasible"
df_rejected_instances.loc[(df_rejection_reason_rejected['OPTIMAL_SOLUTION_FOUND'] == 0)
                                & (df_rejection_reason_rejected['LP=DLB=DUB'] < 6) 
                                & (df_rejection_reason_rejected['LP=DLB=DUB'] + df_rejection_reason_rejected['PRLP_INFEASIBLE'] < 6)
                                & (df_rejection_reason_rejected['LP=DLB=DUB'] + df_rejection_reason_rejected['PRLP_INFEASIBLE'] + df_rejection_reason_rejected['PRLP_TIME_LIMIT'] == 6), 
                            'Reason'] = "Max leaf value = LP value or PRLP primal infeasible / hits time limit"
df_rejected_instances.loc[df_rejection_reason_rejected['<7_ATTEMPTS'] > 0, 
                            'Reason'] = "Numerical issues"
display(df_rejected_instances.head())
col_format = """@{}*{2}{l}X@{}"""

tmp_df_remaining_rejected_instances = df_rejection_reason.loc[df_rejected_instances[df_rejected_instances['Reason'].isna()].index]
if len(tmp_df_remaining_rejected_instances) > 0:
    display(tmp_df_remaining_rejected_instances)

Unnamed: 0_level_0,Set,Reason
Instance,Unnamed: 1_level_1,Unnamed: 2_level_1
22433_presolved,miplib2017,Integer-optimal solution found constructing pa...
air01_presolved,miplib2,Integer-optimal solution found constructing pa...
app1-1_presolved,miplib2017,Integer-optimal solution found constructing pa...
b-ball_presolved,miplib2017,Max leaf value = LP value or PRLP primal infea...
bppc4-08_presolved,miplib2017,Max leaf value = LP value or PRLP primal infea...


Unnamed: 0_level_0,SELECTED_GAP,SELECTED_TIME,SELECTED_6TREES,NUM_WITH_OBJS,NUM_WITH_CUTS,IP_OPT_UNKNOWN,TOO_MANY_ROWS_OR_COLS,OPTIMAL_SOLUTION_FOUND,LP_OPT_IS_NOT_CUT,DLB=DUB,LP=DLB=DUB,PRLP_INFEASIBLE,PRLP_TIME_LIMIT,NO_CUTS,NO_GAP,GUR_TIMEOUT,<7_ATTEMPTS
Instance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
neos-1112782_presolved,False,False,False,6,0,False,False,0,0,0,0,0,0,True,False,0,False
neos-1112787_presolved,False,False,False,6,0,False,False,0,0,0,0,0,0,True,False,0,False


#### Succinct version

In [121]:
## *Succinct version*: For each instance that was not selected, print the reason
df_rejected_instances = df_status_by_depth.loc[df_rejection_reason[df_rejection_reason['SELECTED_GAP'] == False].index]
df_rejected_instances.insert(loc = 0, column = 'Set', value = df_ipopt.loc[rejected_instance_list,'SET'])
col_format="""@{}*{2}{l}*{6}{c}@{}"""

### Print Table 4

In [122]:
# Format Table 4: rejected instances reasons
TABLE4 = df_rejected_instances.copy(deep=True)
TABLE4.reset_index(inplace=True)

TABLE4["Instance"] = TABLE4["Instance"].apply(remove_presolved_from_name)
TABLE4["Instance"] = TABLE4["Instance"].apply(tex_escape)

# If we are not using the automatic tex-escaper, we need to do it ourselves
for col in TABLE4.columns:
    # curr_col = '{' + tex_escape(col) + '}'
    curr_col = tex_escape(str(col))
    TABLE4.rename({col: curr_col}, inplace=True, axis=1)

# Finally, apply the desired style
    # format_index(escape="latex", axis=0).\
table4_str = TABLE4.style.\
    hide(axis=0).\
    to_latex(
        column_format=col_format,
        hrules = True,
        sparse_index = True,
        multirow_align = "c",
        siunitx=False,
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "app:tab:discarded-instances",
        caption = """
            Instances that were not considered with reason for being discarded.
        """,
        )
        
print(table4_str)

\begin{table}
\centering
\caption{
            Instances that were not considered with reason for being discarded.
        }
\label{app:tab:discarded-instances}
\begin{tabular}{@{}*{2}{l}*{6}{c}@{}}
\toprule
Instance & Set & 2 & 4 & 8 & 16 & 32 & 64 \\
\midrule
22433 & miplib2017 &  &  & \ref{selection-criterion:partial-tree-does-not-find-opt} & \ref{selection-criterion:partial-tree-does-not-find-opt} & \ref{selection-criterion:partial-tree-does-not-find-opt} & \ref{selection-criterion:partial-tree-does-not-find-opt} \\
air01 & miplib2 & \ref{selection-criterion:partial-tree-does-not-find-opt} & \ref{selection-criterion:partial-tree-does-not-find-opt} & \ref{selection-criterion:partial-tree-does-not-find-opt} & \ref{selection-criterion:partial-tree-does-not-find-opt} & \ref{selection-criterion:partial-tree-does-not-find-opt} & \ref{selection-criterion:partial-tree-does-not-find-opt} \\
app1-1 & miplib2017 &  &  &  &  & \ref{selection-criterion:partial-tree-does-not-find-opt} & \ref{sel

### DEBUG: Test Table 4 code and make sure "set" is properly identified

In [123]:
#### DEBUG
# df_rejection_reason[df_rejection_reason['NUM_WITH_OBJS'] != df_rejection_reason['NUM_WITH_CUTS']]
# df_rejection_reason[(df_rejection_reason['NUM_WITH_CUTS'] > 0) & (df_rejection_reason['DLB=DUB'] > 0) & (df_rejection_reason['OPTIMAL_SOLUTION_FOUND'] == 0)]
# df_rejection_reason[(df_rejection_reason['LP=DLB=DUB'] == 6)]

# inst = 'chromaticindex32-8_presolved'
# # df_rejection_reason.loc[inst]
# tmp = df_bb.loc[(inst,64)]
# tmp[25:50]

# len(df_rejection_reason[df_rejection_reason['SELECTED'] == True])
# inst = 'berlin_5_8_0_presolved'
# gap_df.loc[inst]
#df_rejection_reason.loc['bnatt400_presolved']

In [124]:
##### DEBUG: Verify "Set" col is correct
for inst in rejected_instance_list:
    curr_set = df_ipopt.loc[inst,'SET']
    has_error = False
    if isinstance(curr_set, pd.Series):
        # check that all sets are same, then just take first
        first_set = curr_set[0]
        for tmp_set in curr_set:
            if tmp_set != first_set:
                print("*** ERROR: not all sets are equal ({} != {})".format(first_set, tmp_set))
                has_error = True
                break
        curr_set = first_set
    ref_set = df_rejected_instances.loc[inst, 'Set']
    if ref_set != curr_set:
        print("*** ERROR: for inst {}, df_rej_inst set {} != df_ipopt set {}".format(inst, ref_set, curr_set))
        has_error = True
    
    if has_error:
        break

## Format Table 5: full gap closed results

In [125]:
# Format Table 5: full gap closed results
TABLE5 = all_gap_results_df.copy(deep=True)

# Set wins row to be integer valued
TABLE5.loc['Wins'] = TABLE5.loc['Wins'].apply(int_format)
# TABLE5.iloc[len(TABLE5)-1] = TABLE5.iloc[len(TABLE5)-1].apply(int_format)

# Move instance names into a column
TABLE5.reset_index(inplace=True, col_level=1)

TABLE5[('',"Instance")] = TABLE5[('',"Instance")].apply(remove_presolved_from_name)
TABLE5[('',"Instance")] = TABLE5[('',"Instance")].apply(tex_escape)

# If we are not using the automatic tex-escaper, we need to do it ourselves
for col in TABLE5.columns:
    # curr_col = '{' + tex_escape(col) + '}'
    curr_col = tex_escape(str(col))
    TABLE5.rename({col: curr_col}, inplace=True, axis=1)

# Finally, apply the desired style
    # format(formatter = int_format).\
table5_str = TABLE5.style.\
    hide(axis=0).\
    to_latex(
        column_format="""
	@{}l*{2}{S[table-format=4.0,table-auto-round,table-number-alignment=center]}
	*{2}{S[table-format=4.0,table-auto-round,table-number-alignment=center]}
	*{8}{S[table-auto-round]}
	@{}
        """,
        hrules = True,
        sparse_index = True,
        multirow_align = "c",
        siunitx=True,
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "app:tab:gap-closed",
        caption = """
            Percent gap closed by instance for GMICs (G), VPCs (V), both VPCs and GMICs used together, 
            and the bound implied by the partial branch-and-bound tree with 64 leaf nodes (DB).
            Also shown are the sizes of the instances, the number of cuts added, and the percent gap closed by 
            \Gurobi{} at the root (after one round (GurF) and after the last round (GurL)). 
            Entries in which DB appears to be 0.00 are actually small strictly positive numbers.
        """,
        )

# Adjustbox environment sets width to pagewidth
# table2_str = add_adjustbox_environment(table2_str)

# Set default siunitx options for this table
table5_str = add_sisetup(table5_str)

# Add a midrule between the instances and 3 summary rows; the "5" is hand-coded but can be automated
table5_str = add_midrule(table5_str, -5)

print(table5_str)


{
\sisetup{
    table-alignment-mode = format,
    table-number-alignment = center,
    table-format = 2.2,
}
\begin{table}
\centering
\caption{
            Percent gap closed by instance for GMICs (G), VPCs (V), both VPCs and GMICs used together, 
            and the bound implied by the partial branch-and-bound tree with 64 leaf nodes (DB).
            Also shown are the sizes of the instances, the number of cuts added, and the percent gap closed by 
            \Gurobi{} at the root (after one round (GurF) and after the last round (GurL)). 
            Entries in which DB appears to be 0.00 are actually small strictly positive numbers.
        }
\label{app:tab:gap-closed}
\begin{tabular}{@{}l*{2}{S[table-format=4.0,table-auto-round,table-number-alignment=center]}
	*{2}{S[table-format=4.0,table-auto-round,table-number-alignment=center]}
	*{8}{S[table-auto-round]}
	@{}}
\toprule
\multicolumn{3}{r}{} & \multicolumn{2}{r}{# cuts} & \multicolumn{10}{r}{% gap closed} \\
{Instance} & {Row

## Format Table 6: "all" time/nodes results

In [126]:
# Format Table 6: "all" time/nodes results
TABLE6 = all_bb_results_df.copy(deep=True)

# Rename summary rows to reflect the set
rename_metrics_all = {metric : metric + ' (All)' for metric in bb_metrics}
TABLE6.rename(rename_metrics_all, inplace=True)

# Add summary rows from 6 trees set
summary_metrics_6trees = all6_bb_results_df.tail(3).copy(deep=True)
rename_metrics_6trees = {metric : metric + ' (6 trees)' for metric in bb_metrics}
summary_metrics_6trees.rename(rename_metrics_6trees, inplace=True)

TABLE6 = pd.concat([TABLE6, summary_metrics_6trees])

# Drop rows, cols, (time,V7)
TABLE6.drop([('','Rows'),('','Cols'),(node_col_header,map_cols_to_short_time[mintime_col])], axis=1, inplace=True)

# Set wins row to be integer valued
TABLE6.loc['Wins1 (All)'] = TABLE6.loc['Wins1 (All)'].apply(int_format)
TABLE6.loc['Wins7 (All)'] = TABLE6.loc['Wins7 (All)'].apply(int_format)
TABLE6.loc['Wins1 (6 trees)'] = TABLE6.loc['Wins1 (6 trees)'].apply(int_format)
TABLE6.loc['Wins7 (6 trees)'] = TABLE6.loc['Wins7 (6 trees)'].apply(int_format)
# TABLE6.iloc[len(TABLE6)-1] = TABLE6.iloc[len(TABLE6)-1].apply(int_format)

# Move instance names into a column
TABLE6.reset_index(inplace=True, col_level=1)

# Store indices of rows of 6-tree instances
six_trees_instances = list(all6_instances_dict.keys())
# mask = TABLE6[('','Instance')].isin(six_trees_instances)
# six_trees_indices = TABLE6.loc[mask, :].index.tolist()

# Remove presolved from name and escape
TABLE6[('',"Instance")] = TABLE6[('',"Instance")].apply(remove_presolved_from_name)
TABLE6[('',"Instance")] = TABLE6[('',"Instance")].apply(tex_escape)

# If we are not using the automatic tex-escaper, we need to do it ourselves
for col in TABLE6.columns:
    # curr_col = '{' + tex_escape(col) + '}'
    curr_col = tex_escape(str(col))
    TABLE6.rename({col: curr_col}, inplace=True, axis=1)

# Finally, apply the desired style
    # format(formatter = int_format).\
table6_str = TABLE6.style.\
    hide(axis=0).\
    to_latex(
        column_format="""
	@{}l % instance
	*{1}{S[table-format=4.0,table-auto-round,table-number-alignment=center]} % # cuts
	*{2}{S[table-format=4.2,table-auto-round]} % Gur1, Gur7
	*{2}{H} % V, Total
	*{2}{S[table-format=4.2,table-auto-round]} % V7, Total7
	*{3}{S[table-format=8.0,table-auto-round,table-number-alignment=center]} % Nodes
	@{}
        """,
        hrules = True,
        sparse_index = True,
        multirow_align = "c",
        siunitx=True,
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "app:tab:bb",
        caption = """
            Time (in seconds) and number nodes taken to solve each instance.
            The table is sorted by column 4 (``V'' under ``Time (s)'').
            ``Gur1'' indicates \Gurobi{} run with one random seed.
            ``Gur7'' indicates the minimum from seven runs of \Gurobi{} with different random seeds.
        """,
        )

# Adjustbox environment sets width to pagewidth
# table6_str = add_adjustbox_environment(table6_str)

# Set default siunitx options for this table
table6_str = add_sisetup(table6_str)

# Add a midrule between the instances and 3 summary rows; the "6" is hand-coded but can be automated
table6_str = add_midrule(table6_str, -6)
table6_str = add_midrule(table6_str, -10)

# Add color to six tree instances
splitlines = table6_str.splitlines()
for i in range(len(splitlines)):
    line = splitlines[i]
    curr_line = line.split('&')
    if len(curr_line) > 0 and curr_line[0].strip()+'_presolved' in six_trees_instances:
        splitlines[i] = '\\rowcolor{lightgray!30} ' + line
table6_str = '\n'.join(splitlines).replace('NaN', '')

print(table6_str)


{
\sisetup{
    table-alignment-mode = format,
    table-number-alignment = center,
    table-format = 2.2,
}
\begin{table}
\centering
\caption{
            Time (in seconds) and number nodes taken to solve each instance.
            The table is sorted by column 4 (``V'' under ``Time (s)'').
            ``Gur1'' indicates \Gurobi{} run with one random seed.
            ``Gur7'' indicates the minimum from seven runs of \Gurobi{} with different random seeds.
        }
\label{app:tab:bb}
\begin{tabular}{@{}l % instance
	*{1}{S[table-format=4.0,table-auto-round,table-number-alignment=center]} % # cuts
	*{2}{S[table-format=4.2,table-auto-round]} % Gur1, Gur7
	*{2}{H} % V, Total
	*{2}{S[table-format=4.2,table-auto-round]} % V7, Total7
	*{3}{S[table-format=8.0,table-auto-round,table-number-alignment=center]} % Nodes
	@{}}
\toprule
{} & {# cuts} & \multicolumn{6}{r}{Time (s)} & \multicolumn{3}{r}{Nodes (\#)} \\
{Instance} & {V} & {Gur1} & {Gur7} & {V} & {Total} & {V7} & {Total7} & {Gur1} & {

## XXX Format Table X: "6 trees" time/nodes results

In [127]:
# Format Table 7: "6 trees" time/nodes results
TABLEX = all6_bb_results_df.copy(deep=True)

# Set wins row to be integer valued
TABLEX.loc['Wins1'] = TABLEX.loc['Wins1'].apply(int_format)
TABLEX.loc['Wins7'] = TABLEX.loc['Wins7'].apply(int_format)
# TABLEX.iloc[len(TABLEX)-1] = TABLEX.iloc[len(TABLEX)-1].apply(int_format)

# Move instance names into a column
TABLEX.reset_index(inplace=True, col_level=1)

TABLEX[('',"Instance")] = TABLEX[('',"Instance")].apply(remove_presolved_from_name)
TABLEX[('',"Instance")] = TABLEX[('',"Instance")].apply(tex_escape)

# If we are not using the automatic tex-escaper, we need to do it ourselves
for col in TABLEX.columns:
    # curr_col = '{' + tex_escape(col) + '}'
    curr_col = tex_escape(str(col))
    TABLEX.rename({col: curr_col}, inplace=True, axis=1)

# Finally, apply the desired style
    # format(formatter = int_format).\
tableX_str = TABLEX.style.\
    hide(axis=0).\
    to_latex(
        column_format="""@{}l*{2}{c}*{2}{c}H*{8}{c}@{}""",
        hrules = True,
        sparse_index = True,
        multirow_align = "c",
        siunitx=True,
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "app:tab:bb-7trees",
        caption = """
  Time (in seconds) and number nodes taken to solve each of the instances for which all six branch-and-bound trees successfully yielded VPCs.
  %The columns with V1x are those in which we do not terminate the VPC computation as soon as the time exceeds \Gurobi{}'s time.  
  The table is sorted by column 4 (``V7'' under ``Time (s)'').
  ``Gur1'' indicates Gurobi run with one random seed.
  ``Gur7'' indicates the minimum from seven runs of Gurobi with different random seeds.
        """,
        )

# Adjustbox environment sets width to pagewidth
# tableX_str = add_adjustbox_environment(tableX_str)

# Set default siunitx options for this table
tableX_str = add_sisetup(tableX_str)

# Add a midrule between the instances and 3 summary rows; the "6" is hand-coded but can be automated
tableX_str = add_midrule(tableX_str, -6)

print(tableX_str)


{
\sisetup{
    table-alignment-mode = format,
    table-number-alignment = center,
    table-format = 2.2,
}
\begin{table}
\centering
\caption{
  Time (in seconds) and number nodes taken to solve each of the instances for which all six branch-and-bound trees successfully yielded VPCs.
  %The columns with V1x are those in which we do not terminate the VPC computation as soon as the time exceeds \Gurobi{}'s time.  
  The table is sorted by column 4 (``V7'' under ``Time (s)'').
  ``Gur1'' indicates Gurobi run with one random seed.
  ``Gur7'' indicates the minimum from seven runs of Gurobi with different random seeds.
        }
\label{app:tab:bb-7trees}
\begin{tabular}{@{}l*{2}{c}*{2}{c}H*{8}{c}@{}}
\toprule
\multicolumn{3}{r}{} & {# cuts} & \multicolumn{6}{r}{Time (s)} & \multicolumn{4}{r}{Nodes (\#)} \\
{Instance} & {Rows} & {Cols} & {V} & {Gur1} & {Gur7} & {V} & {Total} & {V7} & {Total7} & {Gur1} & {Gur7} & {V} & {V7} \\
\midrule
neos-501453 & 13 & 52 & 0 & 0.001000 & 0.001000 & 0.001

## Format Table 7: b&b summary by depth

In [128]:
# Format Table 7: summary of b&b results
TABLE7 = avg_bb_by_depth_df.copy(deep=True)

# Remove unnecessary entries
TABLE7.loc[(slice(None), slice(None), bb_metrics_by_depth[1:]),([time_col_header,node_col_header],map_cols_to_short_time[gur1time_col])] = ""
# TABLE7.loc[(slice(None), slice(None), bb_metrics_by_depth[2]),([time_col_header,node_col_header],map_cols_to_short_time[gur7time_col])] = ""

# Process the column with # inst to only report number of instances for each set
TABLE7.loc[(slice(None), slice(None), bb_metrics_by_depth[1:]), inst_col_name] = ""

for curr_class in bb_classes_by_depth:
    for curr_bucket in bb_buckets_by_depth:
        curr_name = (curr_class, curr_bucket, bb_metrics_by_depth[0])
        val = TABLE7.loc[curr_name, inst_col_name]
        TABLE7.loc[curr_name, inst_col_name] = \
            create_multirow_string(str(val), num_rows = 2, extra_format=r"\tablenum[table-format=3]")

# Set num wins in int format or enclose in braces (center)
# tmp_df = TABLE7.loc[(slice(None), slice(None), bb_metrics_by_depth[1:3]),time_col_header].applymap(int_format, num_digits=6)
tmp_df = TABLE7.loc[(slice(None), slice(None), bb_metrics_by_depth[1:3]),time_col_header].applymap(int_format, num_digits=4, add_phantom=True)
# tmp_df = TABLE7.loc[(slice(None), slice(None), bb_metrics_by_depth[1:3]),time_col_header].applymap(enclose_in_braces)
tmp_df.columns = pd.MultiIndex.from_product([[time_col_header],tmp_df.columns])
TABLE7.loc[(slice(None), slice(None), bb_metrics_by_depth[1:3]),time_col_header] = tmp_df

# tmp_df = TABLE7.loc[(slice(None), slice(None), bb_metrics_by_depth[1:3]),node_col_header].applymap(int_format, num_digits=6)
tmp_df = TABLE7.loc[(slice(None), slice(None), bb_metrics_by_depth[1:3]),node_col_header].applymap(int_format, num_digits=6, add_phantom=False)
# tmp_df = TABLE7.loc[(slice(None), slice(None), bb_metrics_by_depth[1:3]),node_col_header ].applymap(enclose_in_braces)
tmp_df.columns = pd.MultiIndex.from_product([[node_col_header],tmp_df.columns])
TABLE7.loc[(slice(None), slice(None), bb_metrics_by_depth[1:3]),node_col_header] = tmp_df

# Reset index to appear as cols
TABLE7.reset_index(inplace=True)

# Add new col combining class and bucket in one
class_bucket_col = "\multirow{2}{*}{\shortstack[l]{" + TABLE7['class'] + "\\\\\\relax " + TABLE7['bucket'] + "}}"
for i in range(len(class_bucket_col)):
    if i%len(bb_metrics_by_depth)!=0:
        class_bucket_col[i] = ""
TABLE7.drop(columns = ['class', 'bucket'], inplace = True, level = 0)
TABLE7.insert(loc=0, column="Set", value=class_bucket_col)

# Place column with # inst as second column
inst_col = TABLE7[inst_col_name]
TABLE7.drop(columns=[inst_col_name], inplace=True, level=0)
TABLE7.insert(loc=1, column=inst_col_name, value=inst_col)

# If we are not using the automatic tex-escaper, we need to do it ourselves
for col in TABLE7.columns:
    if isinstance(col, tuple):
        for lvl_ind, lvl_col in enumerate(col):
            curr_col = tex_escape(str(lvl_col))
            TABLE7.rename({lvl_col: curr_col}, inplace=True, axis=1, level=lvl_ind)
    else:
        # curr_col = '{' + tex_escape(col) + '}'
        curr_col = tex_escape(str(col))
        TABLE7.rename({col: curr_col}, inplace=True, axis=1)

# Finally, apply the desired style
    # format(formatter = int_format).\
table7_str = TABLE7.style.\
    hide(axis=0).\
    to_latex(
        column_format="""
        @{}l    % set
        c       % inst
        l       % stat
        *{3}{S[table-auto-round,table-format=4.2]}
        *{2}{S[table-auto-round,table-format=6.0]}
        @{}""",
        hrules = True,
        sparse_index = True,
        multirow_align = "c",
        siunitx=True,
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "tab:bb-summary",
        caption = """
            Summary statistics for time to solve instances with branch-and-bound.
        """,
        )

# Add a midrule between the two sets; the "9" is hand-coded but can be automated
table7_str = add_midrule(table7_str, -41)
table7_str = add_midrule(table7_str, -33)
table7_str = add_midrule(table7_str, -25)
table7_str = add_midrule(table7_str, -17)
table7_str = add_midrule(table7_str, -9)

# Adjustbox environment sets width to pagewidth
table7_str = add_adjustbox_environment(table7_str)

# Set default siunitx options for this table
table7_str = add_sisetup(table7_str, table_format="4.2")

print(table7_str)


{
\sisetup{
    table-alignment-mode = format,
    table-number-alignment = center,
    table-format = 4.2,
}
\begin{table}
\centering
\caption{
            Summary statistics for time to solve instances with branch-and-bound.
        }
\label{tab:bb-summary}
\begin{adjustbox}{width=1\textwidth}
\begin{tabular}{@{}l    % set
        c       % inst
        l       % stat
        *{3}{S[table-auto-round,table-format=4.2]}
        *{2}{S[table-auto-round,table-format=6.0]}
        @{}}
\toprule
{Set} & {\# inst} & {metric} & \multicolumn{3}{r}{Time (s)} & \multicolumn{2}{r}{Nodes (\textbackslash{}\#)} \\
{} & {} & {} & {Gur1} & {V} & {Total} & {Gur1} & {V} \\
\midrule
\multirow{2}{*}{\shortstack[l]{2 leaves\\\relax [0,3600)}} & {\multirow[c]{2}{*}{\tablenum[table-format=3]{221}}} & Gmean & 49.942506 & 50.854976 & 51.258406 & 7525.647969 & 7646.135673 \\
 &  & Wins1 &  & {\tablenum[table-format=4.0]{68}\phantom{.00}} & {\tablenum[table-format=4.0]{45}\phantom{.00}} &  & {\tablenum[table-f

  TABLE7.loc[(slice(None), slice(None), bb_metrics_by_depth[1:]), inst_col_name] = ""
  TABLE7.loc[curr_name, inst_col_name] = \
  tmp_df = TABLE7.loc[(slice(None), slice(None), bb_metrics_by_depth[1:3]),time_col_header].applymap(int_format, num_digits=4, add_phantom=True)
  tmp_df = TABLE7.loc[(slice(None), slice(None), bb_metrics_by_depth[1:3]),node_col_header].applymap(int_format, num_digits=6, add_phantom=False)


## Format Table 8: objective + time analysis per instance

In [129]:
# Format Table 8: obj and time analysis
TABLE8 = obj_and_time_df.copy(deep=True)

# Move instance names into a column
TABLE8.reset_index(inplace=True)
TABLE8.drop('disj_terms',axis=1,inplace=True)

# Create new column index
TABLE8.columns = pd.MultiIndex.from_tuples(
    [('','Instance'),
    ('Objectives','Obj'),
    ('Objectives','Succ'),
    ('Objectives','Fails'),
    ('Objectives','\% fails'),
    ('Time (s)','Total'),
    ('Time (s)','(s) / obj'),
    ('Time (s)','(s) / cut')]
)

# Format instance column correctly
TABLE8[('',"Instance")] = TABLE8[('',"Instance")].apply(remove_presolved_from_name)
TABLE8[('',"Instance")] = TABLE8[('',"Instance")].apply(tex_escape)

# Format SKIP_CHAR correctly
for col in TABLE8.columns:
    TABLE8[col] = TABLE8[col].apply(enclose_in_braces, val_to_match=SKIP_CHAR)

# If we are not using the automatic tex-escaper, we need to do it ourselves
for col in TABLE8.columns:
    # curr_col = '{' + tex_escape(col) + '}'
    curr_col = tex_escape(str(col))
    TABLE8.rename({col: curr_col}, inplace=True, axis=1)

# Finally, apply the desired style
table8_str = TABLE8.style.\
    hide(axis=0).\
    to_latex(
        column_format="""
        @{}
        l
        *{3}{S[table-format=3.0,table-auto-round,table-number-alignment=center]}
        *{1}{S[table-format=2.1,table-auto-round,table-number-alignment=center]}
        *{1}{S[table-format=4.1,table-auto-round,table-number-alignment=center]}
        *{2}{S[table-format=4.1,table-auto-round,table-number-alignment=center]}
        @{}""",
        hrules = True,
        sparse_index = True,
        multirow_align = "c",
        siunitx=True,
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "app:tab:obj-and-time-best",
        caption = """
            Information about objectives and time to generate cuts corresponding to the results in Table~\ref{app:tab:gap-closed}.
        """,
        )

# Adjustbox environment sets width to pagewidth
# table8_str = add_adjustbox_environment(table8_str)

table8_str = add_midrule(table8_str, -2)

# Set default siunitx options for this table
table8_str = add_sisetup(table8_str)

print(table8_str)


{
\sisetup{
    table-alignment-mode = format,
    table-number-alignment = center,
    table-format = 2.2,
}
\begin{table}
\centering
\caption{
            Information about objectives and time to generate cuts corresponding to the results in Table~
ef{app:tab:gap-closed}.
        }
\label{app:tab:obj-and-time-best}
\begin{tabular}{@{}
        l
        *{3}{S[table-format=3.0,table-auto-round,table-number-alignment=center]}
        *{1}{S[table-format=2.1,table-auto-round,table-number-alignment=center]}
        *{1}{S[table-format=4.1,table-auto-round,table-number-alignment=center]}
        *{2}{S[table-format=4.1,table-auto-round,table-number-alignment=center]}
        @{}}
\toprule
{} & \multicolumn{4}{r}{Objectives} & \multicolumn{3}{r}{Time (s)} \\
{Instance} & {Obj} & {Succ} & {Fails} & {\% fails} & {Total} & {(s) / obj} & {(s) / cut} \\
\midrule
10teams & 322.0 & 34.0 & 288.0 & 89.44099378881988 & 372.74 & 1.157577639751553 & 10.962941176470588 \\
23588 & 76.0 & 75.0 & 1.0 & 1

## Format Table 9: number of leaf nodes yielding the best result for each experiment per instance

In [130]:
# Format Table 9: frequency of when each size is best
TABLE9 = pd.concat([best_disj_gap_df,best_disj_time_df],axis=1)
TABLE9.drop('Best', axis=0, inplace=True)

# Create new column index
TABLE9.columns = pd.MultiIndex.from_tuples(
    [('Gap',col) for col in best_disj_gap_df.columns]
    + [('Time',col) for col in best_disj_time_df.columns]
)

# If we are not using the automatic tex-escaper, we need to do it ourselves
for col in TABLE9.columns:
    # curr_col = '{' + tex_escape(col) + '}'
    curr_col = tex_escape(str(col))
    TABLE9.rename({col: curr_col}, inplace=True, axis=1)

# Finally, apply the desired style
    # hide(axis=0).\
table9_str = TABLE9.style.\
    to_latex(
        column_format="""@{}l*4{S}*3{S}@{}""",
        hrules = True,
        sparse_index = True,
        multirow_align = "c",
        siunitx=True,
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "app:tab:size",
        caption = """
            Number of leaf nodes yielding the best result for each experiment per instance.
        """,
        )

# Adjustbox environment sets width to pagewidth
# table9_str = add_adjustbox_environment(table9_str)

# table9_str = add_midrule(table9_str, -2)

# Set default siunitx options for this table
table9_str = add_sisetup(table9_str, table_format="3.0")

print(table9_str)


{
\sisetup{
    table-alignment-mode = format,
    table-number-alignment = center,
    table-format = 3.0,
}
\begin{table}
\centering
\caption{
            Number of leaf nodes yielding the best result for each experiment per instance.
        }
\label{app:tab:size}
\begin{tabular}{@{}l*4{S}*3{S}@{}}
\toprule
{} & \multicolumn{4}{r}{Gap} & \multicolumn{2}{r}{Time} \\
{} & {V} & {V+G} & {V+GurF} & {V+GurL} & {All} & {6 trees} \\
\midrule
No improvement & 197 & 99 & 74 & 95 & 162 & 105 \\
2 leaves & 0 & 11 & 59 & 41 & 29 & 24 \\
4 leaves & 3 & 13 & 71 & 41 & 32 & 25 \\
8 leaves & 5 & 21 & 63 & 35 & 19 & 16 \\
16 leaves & 4 & 21 & 83 & 48 & 24 & 19 \\
32 leaves & 16 & 37 & 77 & 48 & 32 & 25 \\
64 leaves & 107 & 146 & 128 & 93 & 20 & 19 \\
\bottomrule
\end{tabular}
\end{table}

}


## Format Table 10: density statistics

In [131]:
# Format Table 10: density statistics
TABLE10 = density_df.copy(deep=True)

# Make # inst and wins row int
tmp_df = TABLE10.iloc[0:2].applymap(int_format, num_digits=3, add_phantom=False)
TABLE10.iloc[0:2] = tmp_df
# tmp_df = TABLE10.loc['\# wins by time',:].apply(int_format, num_digits=2, add_phantom=False)
# TABLE10.loc['\# wins by time'] = tmp_df

# Finally, apply the desired style
    # hide(axis=0).\
table10_str = TABLE10.style.\
    to_latex(
        column_format="""@{}l*{6}{S[table-format=0.3,table-auto-round,table-number-alignment=center]}@{}""",
        hrules = True,
        sparse_index = True,
        multirow_align = "c",
        siunitx=True,
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "app:tab:density",
        caption = """
            Statistics about the density of generated cuts broken down by partial tree size.
        """,
        )

# Adjustbox environment sets width to pagewidth
# table10_str = add_adjustbox_environment(table10_str)

# table10_str = add_midrule(table10_str, -2)

# Set default siunitx options for this table
table10_str = add_sisetup(table10_str, table_format="0.3")

print(table10_str)


{
\sisetup{
    table-alignment-mode = format,
    table-number-alignment = center,
    table-format = 0.3,
}
\begin{table}
\centering
\caption{
            Statistics about the density of generated cuts broken down by partial tree size.
        }
\label{app:tab:density}
\begin{tabular}{@{}l*{6}{S[table-format=0.3,table-auto-round,table-number-alignment=center]}@{}}
\toprule
{} & {V (2)} & {V (4)} & {V (8)} & {V (16)} & {V (32)} & {V (64)} \\
\midrule
\# inst w/VPCs and time < 3600s & {\tablenum[table-format=3.0]{265.0}} & {\tablenum[table-format=3.0]{275.0}} & {\tablenum[table-format=3.0]{263.0}} & {\tablenum[table-format=3.0]{264.0}} & {\tablenum[table-format=3.0]{251.0}} & {\tablenum[table-format=3.0]{235.0}} \\
\# wins by time & {\tablenum[table-format=3.0]{108.0}} & {\tablenum[table-format=3.0]{106.0}} & {\tablenum[table-format=3.0]{96.0}} & {\tablenum[table-format=3.0]{115.0}} & {\tablenum[table-format=3.0]{126.0}} & {\tablenum[table-format=3.0]{132.0}} \\
Avg min cut density & 

  tmp_df = TABLE10.iloc[0:2].applymap(int_format, num_digits=3, add_phantom=False)
 '{\\tablenum[table-format=3.0]{108.0}}']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  TABLE10.iloc[0:2] = tmp_df
 '{\\tablenum[table-format=3.0]{106.0}}']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  TABLE10.iloc[0:2] = tmp_df
 '{\\tablenum[table-format=3.0]{96.0}}']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  TABLE10.iloc[0:2] = tmp_df
 '{\\tablenum[table-format=3.0]{115.0}}']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  TABLE10.iloc[0:2] = tmp_df
 '{\\tablenum[table-format=3.0]{126.0}}']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  TABLE10.iloc[0:2] = tmp_df
 '{\\tablenum[table-format=3.0]{132.0}}']' has dtype incompatible with float64, please explicitly cast to a compa

## Format Table 11: failures

In [132]:
# Format Table 11: failures
TABLE11 = obj_fails_df.copy(deep=True)

# Make num inst rows int
tmp_df = TABLE11.iloc[0:5].applymap(int_format, num_digits=3, add_phantom=False)
TABLE11.iloc[0:5] = tmp_df

# Finally, apply the desired style
    # hide(axis=0).\
table11_str = TABLE11.style.\
    to_latex(
        column_format="""@{}l*{6}{S[table-format=2.2,table-auto-round,table-number-alignment=center]}@{}""",
        hrules = True,
        sparse_index = True,
        multirow_align = "c",
        siunitx=True,
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "app:tab:objectives",
        caption = """
            Statistics about the objectives leading to failures, broken down by partial tree size used for cut generation.
        """,
        )

# Adjustbox environment sets width to pagewidth
# table11_str = add_adjustbox_environment(table11_str)

table11_str = add_midrule(table11_str, -4)
table11_str = add_midrule(table11_str, -8)
table11_str = add_midrule(table11_str, -13)
table11_str = add_midrule(table11_str, -15)

# Set default siunitx options for this table
table11_str = add_sisetup(table11_str, table_format="2.2")

print(table11_str)


{
\sisetup{
    table-alignment-mode = format,
    table-number-alignment = center,
    table-format = 2.2,
}
\begin{table}
\centering
\caption{
            Statistics about the objectives leading to failures, broken down by partial tree size used for cut generation.
        }
\label{app:tab:objectives}
\begin{tabular}{@{}l*{6}{S[table-format=2.2,table-auto-round,table-number-alignment=center]}@{}}
\toprule
{} & {V (2)} & {V (4)} & {V (8)} & {V (16)} & {V (32)} & {V (64)} \\
\midrule
\# inst w/obj & {\tablenum[table-format=3.0]{311.0}} & {\tablenum[table-format=3.0]{318.0}} & {\tablenum[table-format=3.0]{304.0}} & {\tablenum[table-format=3.0]{305.0}} & {\tablenum[table-format=3.0]{288.0}} & {\tablenum[table-format=3.0]{271.0}} \\
\# inst w/succ obj & {\tablenum[table-format=3.0]{306.0}} & {\tablenum[table-format=3.0]{317.0}} & {\tablenum[table-format=3.0]{299.0}} & {\tablenum[table-format=3.0]{300.0}} & {\tablenum[table-format=3.0]{283.0}} & {\tablenum[table-format=3.0]{263.0}} \\
\# 

  tmp_df = TABLE11.iloc[0:5].applymap(int_format, num_digits=3, add_phantom=False)
 '{\\tablenum[table-format=3.0]{306.0}}'
 '{\\tablenum[table-format=3.0]{21.0}}'
 '{\\tablenum[table-format=3.0]{5.0}}'
 '{\\tablenum[table-format=3.0]{31.0}}']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  TABLE11.iloc[0:5] = tmp_df
 '{\\tablenum[table-format=3.0]{317.0}}'
 '{\\tablenum[table-format=3.0]{14.0}}'
 '{\\tablenum[table-format=3.0]{1.0}}'
 '{\\tablenum[table-format=3.0]{42.0}}']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  TABLE11.iloc[0:5] = tmp_df
 '{\\tablenum[table-format=3.0]{299.0}}'
 '{\\tablenum[table-format=3.0]{28.0}}'
 '{\\tablenum[table-format=3.0]{5.0}}'
 '{\\tablenum[table-format=3.0]{31.0}}']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  TABLE11.iloc[0:5] = tmp_df
 '{\\tablenum[table-format=3.0]{300.0}}'
 '{\\tablenum[table-format=3.0]{27.0

## Format Table 12: active cuts

In [133]:
# Format Table 12: active cuts
TABLE12 = active_cuts_df.copy(deep=True)

# Finally, apply the desired style
    # hide(axis=0).\
table12_str = TABLE12.style.\
    to_latex(
        column_format="""@{}l*{6}{S[table-format=3.2,table-auto-round,table-number-alignment=center]}@{}""",
        hrules = True,
        sparse_index = True,
        multirow_align = "c",
        siunitx=True,
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "app:tab:activity",
        caption = """
            Statistics about when generated cuts are active, broken down by partial tree size.
        """,
        )

# Adjustbox environment sets width to pagewidth
# table12_str = add_adjustbox_environment(table12_str)

# Set default siunitx options for this table
table12_str = add_sisetup(table12_str, table_format="2.2")

print(table12_str)


{
\sisetup{
    table-alignment-mode = format,
    table-number-alignment = center,
    table-format = 2.2,
}
\begin{table}
\centering
\caption{
            Statistics about when generated cuts are active, broken down by partial tree size.
        }
\label{app:tab:activity}
\begin{tabular}{@{}l*{6}{S[table-format=3.2,table-auto-round,table-number-alignment=center]}@{}}
\toprule
{} & {V+G (2)} & {V+G (4)} & {V+G (8)} & {V+G (16)} & {V+G (32)} & {V+G (64)} \\
\midrule
\% active GMIC & 44.085314 & 43.477877 & 42.170665 & 41.654040 & 40.862307 & 40.772512 \\
\% active VPC & 30.251908 & 30.814606 & 31.339695 & 35.479053 & 34.289254 & 32.310973 \\
\% cuts one-sided & 0.772678 & 0.718051 & 0.744452 & 1.114790 & 0.821596 & 0.792411 \\
\% active one-sided & 100.000000 & 100.000000 & 100.000000 & 100.000000 & 100.000000 & 100.000000 \\
\% cuts all ones & 11.897763 & 6.653099 & 7.849304 & 9.123057 & 8.463443 & 9.045098 \\
\% active all ones & 91.447368 & 84.782609 & 81.818182 & 76.397516 & 79.62

## Format Table 13: instances with best bb improvement from VPCs

In [134]:
# Format Table 13: "best" time/nodes results
TABLE13 = all_bb_results_df.copy(deep=True)

# Drop all rows in which (time_col_header, 'Gur7') is ''
TABLE13 = TABLE13[TABLE13[(time_col_header, 'Gur7')] != '']

# TABLE13[('','V7-Gur7')]
# Change column (time_col_header, V7) to be float valued
#TABLE13[(time_col_header, 'V7')] = TABLE13[(time_col_header, 'V7')].apply(float_format, num_digits=4)
TABLE13 = TABLE13.astype({(time_col_header, 'V7'): float})
TABLE13 = TABLE13.astype({(time_col_header, 'Gur7'): float})

# Add new column for difference between V7 and Gur7
TABLE13[('Time (s)','V7 - Gur7')] = TABLE13[('Time (s)','V7')] - TABLE13[('Time (s)','Gur7')]

# Sort by V7 - Gur7
TABLE13.sort_values(by=[(time_col_header,'V7 - Gur7')], inplace=True)

TABLE13.head(15)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,# cuts,Time (s),Time (s),Time (s),Time (s),Time (s),Time (s),Nodes (\#),Nodes (\#),Nodes (\#),Nodes (\#),Time (s)
Unnamed: 0_level_1,Rows,Cols,V,Gur1,Gur7,V,Total,V7,Total7,Gur1,Gur7,V,V7,V7 - Gur7
Instance,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
neos-942830_presolved,589.0,831.0,1.0,3600.0,3600.0,2978.357,2978.577,2978.357,2978.577,1027231.0,915212,1007419,1007419,-621.643
ic97_potential_presolved,998.0,726.0,2.0,1794.571,1562.595,1417.115,1417.605,1417.115,1417.605,770494.0,724638,677844,677844,-145.48
ran14x18_presolved,284.0,504.0,18.0,652.573,538.52,427.015,427.055,427.015,427.055,502202.0,429530,364246,364246,-111.505
Wins1,,,,,254.0,144.0,74.0,144.0,74.0,,278,215,215,-110.0
supportcase20_presolved,598.0,896.0,7.0,3600.0,3600.0,3554.368,3554.458,3554.368,3554.458,3990664.0,3401479,4253197,3990664,-45.632
rococoC10-001000_presolved,617.0,2483.0,21.0,191.324,172.505,127.68,128.8,127.68,128.8,32448.0,27089,27578,27578,-44.825
neos-1616732_presolved,1026.0,200.0,200.0,1281.413,1032.443,994.816,995.48,994.816,995.48,1271371.0,1017810,959081,959081,-37.627
p80x400b_presolved,456.0,768.0,4.0,431.141,359.929,333.356,333.966,333.356,333.966,246582.0,217281,189296,189296,-26.573
prod2_presolved,92.0,182.0,44.0,97.446,86.103,66.207,69.437,66.207,69.437,133462.0,118808,98807,98807,-19.896
gen-ip021_presolved,28.0,35.0,15.0,106.222,98.409,86.674,86.684,86.674,86.684,1834786.0,1764969,1750552,1750552,-11.735


In [135]:
### DEBUG DEBUG DEBUG
# inst = 'cost266-UUE_presolved'
# hawea instance
inst = 'neos-3592146-hawea_presolved'
if inst in all_bb_results_df.index:
    display(all_bb_results_df.loc[inst])

            Rows          765
            Cols         4205
# cuts      V              11
Time (s)    Gur1      528.934
            Gur7      428.467
            V         418.935
            Total     428.775
            V7        418.935
            Total7    428.775
Nodes (\#)  Gur1        27991
            Gur7        24794
            V           23821
            V7          23821
Name: neos-3592146-hawea_presolved, dtype: object

In [136]:
# Format Table 13: "best" time/nodes results
TABLE13 = all_bb_results_df.copy(deep=True)

# Drop all rows in which (time_col_header, 'Gur7') is ''
TABLE13 = TABLE13[TABLE13[(time_col_header, 'Gur7')] != '']

# TABLE13[('','V7-Gur7')]
# Change column (time_col_header, V7) to be float valued
#TABLE13[(time_col_header, 'V7')] = TABLE13[(time_col_header, 'V7')].apply(float_format, num_digits=4)
TABLE13 = TABLE13.astype({(time_col_header, 'V7'): float})
TABLE13 = TABLE13.astype({(time_col_header, 'Gur7'): float})

# Add new column for difference between V7 and Gur7
TABLE13[('Time (s)','V7 - Gur7')] = TABLE13[('Time (s)','V7')] - TABLE13[('Time (s)','Gur7')]

# Sort by V7 - Gur7
TABLE13.sort_values(by=[(time_col_header,'V7 - Gur7')], inplace=True)

# Rename summary rows to reflect the set
rename_metrics_all = {metric : metric + ' (All)' for metric in bb_metrics}
TABLE13.rename(rename_metrics_all, inplace=True)

# Add summary rows from 6 trees set
summary_metrics_6trees = all6_bb_results_df.tail(3).copy(deep=True)
rename_metrics_6trees = {metric : metric + ' (6 trees)' for metric in bb_metrics}
summary_metrics_6trees.rename(rename_metrics_6trees, inplace=True)

TABLE13 = pd.concat([TABLE13, summary_metrics_6trees])

# Drop rows, cols, (time,V7)
TABLE13.drop([('','Rows'),('','Cols'),(node_col_header,map_cols_to_short_time[mintime_col])], axis=1, inplace=True)

# Set wins row to be integer valued
TABLE13.loc['Wins1 (All)'] = TABLE13.loc['Wins1 (All)'].apply(int_format)
# TABLE13.loc['Wins7 (All)'] = TABLE13.loc['Wins7 (All)'].apply(int_format)
# TABLE13.loc['Wins1 (6 trees)'] = TABLE13.loc['Wins1 (6 trees)'].apply(int_format)
# TABLE13.loc['Wins7 (6 trees)'] = TABLE13.loc['Wins7 (6 trees)'].apply(int_format)
# TABLE13.iloc[len(TABLE13)-1] = TABLE13.iloc[len(TABLE13)-1].apply(int_format)

# Move instance names into a column
TABLE13.reset_index(inplace=True, col_level=1)

# Store indices of rows of 6-tree instances
six_trees_instances = list(all6_instances_dict.keys())
# mask = TABLE13[('','Instance')].isin(six_trees_instances)
# six_trees_indices = TABLE13.loc[mask, :].index.tolist()

# Remove presolved from name and escape
TABLE13[('',"Instance")] = TABLE13[('',"Instance")].apply(remove_presolved_from_name)
TABLE13[('',"Instance")] = TABLE13[('',"Instance")].apply(tex_escape)

# If we are not using the automatic tex-escaper, we need to do it ourselves
for col in TABLE13.columns:
    # curr_col = '{' + tex_escape(col) + '}'
    curr_col = tex_escape(str(col))
    TABLE13.rename({col: curr_col}, inplace=True, axis=1)

# Finally, apply the desired style
    # format(formatter = int_format).\
table13_str = TABLE13.style.\
    hide(axis=0).\
    to_latex(
        column_format="""
	@{}l % instance
	*{1}{S[table-format=4.0,table-auto-round,table-number-alignment=center]} % # cuts
	*{2}{S[table-format=4.2,table-auto-round]} % Gur1, Gur7
	*{2}{H} % V, Total
	*{2}{S[table-format=4.2,table-auto-round]} % V7, Total7
	*{3}{S[table-format=8.0,table-auto-round,table-number-alignment=center]} % Nodes
	@{}
        """,
        hrules = True,
        sparse_index = True,
        multirow_align = "c",
        siunitx=True,
        convert_css = True,
        environment = "table",
        position_float = "centering",
        label = "app:tab:bb",
        caption = """
            Time (in seconds) and number nodes taken to solve each instance.
            The table is sorted by column 4 (``V'' under ``Time (s)'').
            ``Gur1'' indicates \Gurobi{} run with one random seed.
            ``Gur7'' indicates the minimum from seven runs of \Gurobi{} with different random seeds.
        """,
        )

# Adjustbox environment sets width to pagewidth
# table13_str = add_adjustbox_environment(table13_str)

# Set default siunitx options for this table
table13_str = add_sisetup(table13_str)

# Add a midrule between the instances and 3 summary rows; the "6" is hand-coded but can be automated
table13_str = add_midrule(table13_str, -6)
table13_str = add_midrule(table13_str, -10)

# Add color to six tree instances
splitlines = table13_str.splitlines()
for i in range(len(splitlines)):
    line = splitlines[i]
    curr_line = line.split('&')
    if len(curr_line) > 0 and curr_line[0].strip()+'_presolved' in six_trees_instances:
        splitlines[i] = '\\rowcolor{lightgray!30} ' + line
table13_str = '\n'.join(splitlines).replace('NaN', '')

print(table13_str)


{
\sisetup{
    table-alignment-mode = format,
    table-number-alignment = center,
    table-format = 2.2,
}
\begin{table}
\centering
\caption{
            Time (in seconds) and number nodes taken to solve each instance.
            The table is sorted by column 4 (``V'' under ``Time (s)'').
            ``Gur1'' indicates \Gurobi{} run with one random seed.
            ``Gur7'' indicates the minimum from seven runs of \Gurobi{} with different random seeds.
        }
\label{app:tab:bb}
\begin{tabular}{@{}l % instance
	*{1}{S[table-format=4.0,table-auto-round,table-number-alignment=center]} % # cuts
	*{2}{S[table-format=4.2,table-auto-round]} % Gur1, Gur7
	*{2}{H} % V, Total
	*{2}{S[table-format=4.2,table-auto-round]} % V7, Total7
	*{3}{S[table-format=8.0,table-auto-round,table-number-alignment=center]} % Nodes
	@{}}
\toprule
{} & {# cuts} & \multicolumn{6}{r}{Time (s)} & \multicolumn{3}{r}{Nodes (\#)} & {Time (s)} \\
{Instance} & {V} & {Gur1} & {Gur7} & {V} & {Total} & {V7} & {Total7}

  TABLE13.loc['Wins1 (All)'] = TABLE13.loc['Wins1 (All)'].apply(int_format)


## Results for selected instances

In [137]:
gap_df.loc['bell5_presolved']

Unnamed: 0_level_0,NUM DISJ TERMS,ROWS,COLS,LP OBJ,WORST DISJ OBJ,IP OBJ,GMIC OBJ,ROOT OBJ,BEST DISJ OBJ,VPC OBJ,...,FIRST REF LAST_CUT_PASS % GAP CLOSED,FIRST REF+V LAST_CUT_PASS % GAP CLOSED,AVG REF FIRST_CUT_PASS % GAP CLOSED,AVG REF+V FIRST_CUT_PASS % GAP CLOSED,AVG REF LAST_CUT_PASS % GAP CLOSED,AVG REF+V LAST_CUT_PASS % GAP CLOSED,BEST REF FIRST_CUT_PASS % GAP CLOSED,BEST REF+V FIRST_CUT_PASS % GAP CLOSED,BEST REF LAST_CUT_PASS % GAP CLOSED,BEST REF+V LAST_CUT_PASS % GAP CLOSED
disj_terms,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0,34,56,8951800.0,-inf,8966406.492,,,-inf,,...,33.809335,0.0,13.678491,0.0,36.657885,0.0,13.678491,0.0,43.055081,0.0
2,2,34,56,8951800.0,8960364.0,8966406.492,8953822.0,8951800.0,8951800.0,8951800.0,...,33.809335,40.745574,13.678491,13.803061,36.657885,68.091391,13.678491,13.803061,43.055081,79.029718
4,4,34,56,8951800.0,9002488.0,8966406.492,8953822.0,8951800.0,8952218.0,8951982.0,...,33.809335,34.626473,13.678491,13.803061,36.657885,35.449018,13.678491,13.803061,43.055081,43.055081
8,8,34,56,8951800.0,9051342.0,8966406.492,8953822.0,8951800.0,8954813.0,8952218.0,...,33.809335,43.662785,13.678491,14.953913,36.657885,29.629773,13.678491,14.953913,43.055081,43.662785
16,16,34,56,8951800.0,9057624.0,8966406.492,8953822.0,8951800.0,8960520.0,8952218.0,...,33.809335,43.554487,13.678491,15.073378,36.657885,32.426133,13.678491,15.073378,43.055081,43.554487
32,32,34,56,8951800.0,9057624.0,8966406.492,8953822.0,8951800.0,8963611.0,8952860.0,...,33.809335,46.402124,13.678491,15.625972,36.657885,43.385217,13.678491,15.625972,43.055081,47.031583
64,64,34,56,8951800.0,9057624.0,8966406.492,8953822.0,8951800.0,8964179.0,8962583.0,...,33.809335,76.766036,13.678491,25.223423,36.657885,76.807401,13.678491,25.223423,43.055081,76.862555


In [138]:
df_preprocess.loc[[inst for inst in df_preprocess.index if 'fast' in inst]]

Unnamed: 0_level_0,INSTANCE,STRATEGY,ORIG LP OBJ,CLEANED LP OBJ,ORIG FIRST GUR NODES,CLEANED FIRST GUR NODES,ORIG BEST GUR NODES,CLEANED BEST GUR NODES,ORIG FIRST GUR TIME,CLEANED FIRST GUR TIME,...,vpc_version,cbc_version,clp_version,gurobi_version,cplex_version,ExitReason,end_time_string,time elapsed,instname,Unnamed: 137
INSTANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
fastxgemm-n2r6s0t2,fastxgemm-n2r6s0t2,536,27.0,27.0,517855,119036,517855,119036,2546.312,178.69,...,#e5b66ee,#d4272be,#8294096,10.02,22.1.1,SUCCESS,Wed Nov 8 21:15:39 2023,2725,fastxgemm-n2r6s0t2,DONE
fastxgemm-n2r7s4t1,fastxgemm-n2r7s4t1,536,27.0,27.000000000000004,95000,134187,95000,134187,934.834,361.011,...,#e5b66ee,#d4272be,#8294096,10.02,22.1.1,SUCCESS,Thu Nov 9 08:36:17 2023,1297,fastxgemm-n2r7s4t1,DONE
