## Data Preparation

In [1]:
import pandas as pd

from tqdm.notebook import tqdm
import plotly.graph_objects as go

experiment_parameters = [
    'pois', 'sensors', 
    'k', 'm', 
    'heuristic_name', 'gurobi_model_name'
]
expected_instances = 10

df = pd.read_parquet('results.pq')
df['heuristic_name'] = df['gurobi_model_name'].str.split('_gurobi').str[0].apply(
    lambda val: '' if val.startswith('gurobi') else val
)
df['gurobi_model_name'] = df['gurobi_model_name'].apply(
    lambda val: 'gurobi'+val.split('gurobi')[-1]
)
len(df), sorted(df.columns)

(3542,
 ['area_side',
  'communication_radius',
  'coverage_radius',
  'gurobi_binary_variables_count',
  'gurobi_bound',
  'gurobi_bound_c',
  'gurobi_columns_count',
  'gurobi_continuous_variables_count',
  'gurobi_heuristic_objective_value',
  'gurobi_initial_binary_variables_count',
  'gurobi_initial_columns_count',
  'gurobi_initial_continuous_variables_count',
  'gurobi_initial_integer_variables_count',
  'gurobi_initial_non_zero_count',
  'gurobi_initial_rows_count',
  'gurobi_integer_variables_count',
  'gurobi_logs',
  'gurobi_mip_gap',
  'gurobi_model_name',
  'gurobi_node_count',
  'gurobi_non_zero_count',
  'gurobi_objective_value',
  'gurobi_optimal',
  'gurobi_presolve_removed_columns',
  'gurobi_presolve_removed_rows',
  'gurobi_presolve_time',
  'gurobi_rows_count',
  'gurobi_run_time',
  'gurobi_setup_time',
  'gurobi_simplex_iterations_count',
  'gurobi_solution',
  'gurobi_solutions_count',
  'gurobi_variable_x_size',
  'gurobi_variable_y_size',
  'heuristic_name',
 

In [2]:
results = []

for experiment, dfex in tqdm(df.groupby(experiment_parameters)):
    
    # Ignore (for now) incomplete experiments
    #if len(dfex) < expected_instances:
    #    print(experiment, len(dfex))
    #    continue
    #assert len(dfex), expected_instances
        
    # Get the optimal rows
    opt = dfex[dfex['gurobi_optimal']]
        
    # Create the new result
    result = dict(zip(experiment_parameters, experiment))
    
    # QUANTIFIERS
    result['num_optimal'] = len(opt)
    result['num_integer_solution'] = len(dfex[dfex['gurobi_solutions_count'] > 0])
    # Every instance has a possible solution, so its always the same as the num of instances
    result['num_nonzero_solution'] = len(dfex)

    # GLOBAL MEANS
    means = (
        df.loc[
            dfex.index, 
            ['gurobi_node_count', 'gurobi_bound', 'gurobi_bound_c']
        ]
        .apply(lambda col: col.mean())
        .to_dict()
    )
    result['mean_nodes'] = means['gurobi_node_count']
    result['mean_lower_bound'] = means['gurobi_bound']
    result['mean_upper_bound'] = means['gurobi_bound_c']
    
    gaps = df.loc[dfex.index, ['gurobi_mip_gap', 'sensors']]
    gaps = gaps[gaps['gurobi_mip_gap'] < gaps['sensors']]
    result['mean_gap'] = gaps['gurobi_mip_gap'].mean()

    # GUROBI OPTIMAL MEANS    
    means = (
        df.loc[
            opt.index, 
            ['gurobi_run_time']
        ]
        .apply(lambda col: col.mean())
        .to_dict()
    )
    result['mean_time'] = means['gurobi_run_time']
    
    results.append(result.copy())
    
results = pd.DataFrame(results).sort_values(experiment_parameters).reset_index(drop=True)
results.fillna('-')

  0%|          | 0/359 [00:00<?, ?it/s]

Unnamed: 0,pois,sensors,k,m,heuristic_name,gurobi_model_name,num_optimal,num_integer_solution,num_nonzero_solution,mean_nodes,mean_lower_bound,mean_upper_bound,mean_gap,mean_time
0,100,100,1,1,,gurobi_multi_flow,10,10,10,0.5,5.600,5.600,0.0,4.695259
1,100,100,1,1,,gurobi_single_flow,10,10,10,0.5,5.600,5.600,0.0,4.779089
2,100,100,1,1,,gurobi_y_binary_multi_flow,10,10,10,0.2,5.600,5.600,0.0,11.692959
3,100,100,1,1,,gurobi_y_binary_single_flow,10,10,10,0.3,5.600,5.600,0.0,11.799263
4,100,100,1,1,dinic,gurobi_multi_flow,10,10,10,0.3,5.600,5.600,0.0,0.979186
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
354,200,500,3,3,dinic,gurobi_single_flow,9,9,9,0.0,15.000,15.000,0.0,1647.956253
355,200,500,3,3,max_flood,gurobi_multi_flow,0,6,7,0.0,0.000,0.000,1.0,-
356,200,500,3,3,max_flood,gurobi_single_flow,6,6,8,0.0,13.125,13.125,0.0,2326.64943
357,200,500,3,3,min_flood,gurobi_multi_flow,0,6,7,0.0,0.000,0.000,1.0,-


In [3]:
for MODEL, df_res in results.groupby(['gurobi_model_name', 'heuristic_name']):
    model, heuristic = MODEL
    df_res.to_csv(f'results/{model}.{heuristic}.csv', index=None)

In [4]:
prep = pd.read_csv('results/optimizer.csv', sep='\t', header=None)
prep.columns = ['instance_key', 'k', 'm', 'heuristic_name', 'time_us', 'valid', 'size', 'fraction', 'solution']

keys = pd.DataFrame(
    prep['instance_key'].apply(
        lambda k: [int(i) for j in k.split(';') for i in j.split(' ')]
    ).tolist(),
    index=prep.index
)
keys.columns = ['pois', 'sensors', 'sinks',
                'area_side', 'coverage_radius', 'communication_radius',
                'random_seed']
prep = prep.merge(keys, left_index=True, right_index=True)
prep.loc[:, 'gurobi_model_name'] = ''
prep.loc[:, 'iterations'] = prep['heuristic_name'].apply(
    lambda h: int(h.rsplit('_', 1)[-1]) if h[-1].isdigit() else 0
)
prep.loc[:, 'heuristic_name'] = prep['heuristic_name'].apply(
    lambda h: h.rsplit('_', 1)[0] if h[-1].isdigit() else h
)

In [5]:
prep_results = []
for experiment, dfex in tqdm(prep.groupby(experiment_parameters)):
    
    opt = dfex[dfex['valid'] == 'OK']
    
    result = dict(zip(experiment_parameters, experiment))
    
    result['num_optimal'] = len(opt)
    result['mean_lower_bound'] = opt['size'].mean()
    result['mean_iterations'] = opt['iterations'].mean()
    result['mean_time'] = opt['time_us'].mean()/1_000_000
    
    prep_results.append(result.copy())

prep_result = pd.DataFrame(prep_results).sort_values(experiment_parameters).reset_index(drop=True)

  0%|          | 0/216 [00:00<?, ?it/s]

In [6]:
for heuristic, df_res in prep_result.groupby('heuristic_name'):
    df_res.to_csv(f'results/{heuristic}.csv', index=None)

In [21]:
import os

def get_column(col_name):
    all_data = None
    for file in os.listdir('results'):
        if not file.endswith('.csv'): continue
        if file == 'optimizer.csv': continue
        if file.startswith('.'): continue
        data = pd.read_csv('results/'+file)
        #print(file, data.columns)
        data   = data[['pois', 'sensors', 'k', 'm', col_name]]
        data.columns = ['pois', 'sensors', 'k', 'm', file.replace('..', '.').replace('.csv', '')]
        all_data = data if all_data is None else all_data.merge(data, how='left')
        all_data = all_data.copy(deep=True)
    return all_data

In [23]:
import plotly.graph_objects as go
import numpy as np

In [29]:
tempo = get_column('mean_time')
cols_gurobi = sorted([c for c in tempo.columns
                      if c not in ['pois', 'sensors', 'k', 'm']
                      if 'gurobi' in c])
cols_heur = sorted([c for c in tempo.columns
                    if c not in ['pois', 'sensors', 'k', 'm']
                    if 'gurobi' not in c])

fig = go.Figure(data=[
    go.Scatter(x=tempo['sensors'], y=tempo[col], name=col)
    for col in cols_gurobi
])
fig.show()

In [35]:
fig = go.Figure(data=[
    go.Scatter(x=tempo['sensors'], y=tempo[col], name=col)
    for col in cols_heur if ('flood' not in col)
])
fig.show()

In [33]:
qualidade = get_column('mean_lower_bound')

fig = go.Figure(data=[
    go.Scatter(x=qualidade['sensors'], y=qualidade[col], name=col)
    for col in cols_gurobi+cols_heur
])
fig.show()

# GENERIC

In [None]:
def get_model_results(heuristic_name, model_name,
                      max_k=5, max_m=5,
                      file_name='results.pq', expected_instances=10,
                      verbose=True):


    # DATA PREP ---------------------------------------------
    df = pd.read_parquet(file_name)
    all_data = len(df)
    index = [
        'pois', 'sensors', 'sinks',
        # 'area_side', 'coverage_radius', 'communication_radius',
        'k', 'm'
    ]


    # FILTERS AND METADATA ---------------------------------

    # Get only the desired heuristic, model, and K and M no larger than the max specified
    df = df[  (df['heuristic_name'] == heuristic_name)
            & (df['gurobi_model_name'] == model_name)
            & (df['k'] <= max_k) & (df['m'] <= max_m)].copy()

    # Prepare the aggregate result
    agg = df[index].drop_duplicates().sort_values(index).reset_index(drop=True).copy()

    # Count the number of instances with any results
    sdf = (
        df[index+['random_seed']].groupby(index).nunique()
        .sort_index().reset_index()
        .rename(columns={'random_seed': 'num_instances'})
    ).copy()
    agg.loc[:, 'num_instances'] = sdf['num_instances'].copy(deep=True)

    # Apply the filter
    df = df.merge(sdf.drop(columns=['num_instances']))
    if verbose: print(f'Filtered {len(df)} of {all_data} rows')
    assert len(df) == expected_instances * len(sdf), (len(df), (expected_instances * len(sdf)))


    # QUANTIFIERS ---------------------------------------

    # Get the Gurobi Optimal Solution Found column
    opt = df[df['gurobi_optimal']]
    sdf = (
        opt[index+['random_seed']].groupby(index).nunique()
        .sort_index().reset_index()
        .rename(columns={'random_seed': 'num_optimal'})
    ).copy()
    agg.loc[:, 'num_optimal'] = sdf['num_optimal'].copy(deep=True)

    if verbose: print(f'Have {len(opt)} optimal results of {len(df)}')

    # Get the Gurobi Integer Solution Found column
    sdf = (
        df[df['gurobi_solutions_count'] > 0]
        [index+['random_seed']].groupby(index).nunique()
        .sort_index().reset_index()
        .rename(columns={'random_seed': 'num_integer_solution'})
    ).copy()
    agg.loc[:, 'num_integer_solution'] = sdf['num_integer_solution'].copy(deep=True)

    # Get the Gurobi Non-Zero Solution Found column
    # Every instance has a possible solution, so its always the same as the num of instances
    agg.loc[:, 'num_nonzero_solution'] = agg['num_instances'].copy(deep=True)


    # GLOBAL AVERAGES -------------------------------------
    sdf = (
        df[index+['gurobi_node_count', 'gurobi_bound', 'gurobi_bound_c']].groupby(index).mean()
        .sort_index().reset_index()
        .rename(columns={'gurobi_node_count': 'mean_nodes',
                         'gurobi_bound': 'mean_lower_bound',
                         'gurobi_bound_c': 'mean_upper_bound'})
    ).copy()
    agg.loc[:, 'mean_nodes'] = sdf['mean_nodes'].copy(deep=True)
    agg.loc[:, 'mean_lower_bound'] = sdf['mean_lower_bound'].copy(deep=True)
    agg.loc[:, 'mean_upper_bound'] = sdf['mean_upper_bound'].copy(deep=True)


    sdf = (
        df[index+['gurobi_mip_gap']].dropna()
        [df['gurobi_mip_gap'] < df['sensors']]
        .groupby(index).mean()
        .sort_index().reset_index()
        .rename(columns={'gurobi_mip_gap': 'mean_gap'})
    ).copy()
    agg.loc[:, 'mean_gap'] = sdf['mean_gap'].copy(deep=True)


    # GUROBI OPTIMAL AVERAGES -------------------------------------
    sdf = (
        opt[index+['gurobi_run_time']].groupby(index).mean()
        .sort_index().reset_index()
        .rename(columns={'gurobi_run_time': 'mean_gurobi_time'})
    ).copy()
    agg.loc[:, 'mean_gurobi_time'] = sdf['mean_gurobi_time'].copy(deep=True)

    return df, agg

# SBPO 2022

In [None]:
df, agg = get_model_results('None', 'gurobi_y_binary_multi_flow', max_k=2, max_m=2)

agg = agg.drop(columns='num_instances')
int_cols = ['pois', 'sensors', 'k', 'm', 'num_optimal', 'num_integer_solution', 'num_nonzero_solution']
float_cols = ['mean_nodes', 'mean_lower_bound', 'mean_upper_bound', 'mean_gap', 'mean_gurobi_time']
agg = agg[int_cols+float_cols].copy()
agg.loc[:, int_cols] = agg[int_cols].applymap(lambda v: int(v) if str(v).lower() != 'nan' else '-')
agg.loc[:, float_cols] = agg[float_cols].applymap(lambda v: round(float(v), 2) if str(v).lower() != 'nan' else '-')
agg.columns = ['|P|', '|I|', 'K', 'M', '#opt', '#int', '#non-zero', '#nodes', 'LB', 'UB', '%GAP', 'time(s)']

#print(agg.to_latex(index=None))
agg