In [33]:
import os
import json
import base64
from math import sqrt, pi

import ijson
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

from multiprocessing import Pool

from tqdm.notebook import tqdm

from kcmc_instance import KCMC_Instance

In [34]:
# PARSING THE INSTANCES
instances = pd.read_csv('/data/instances.csv', sep='|', header=None)
instances.columns = ['serial', 'kcmc']
instances.loc[:, 'instance_key'] = instances['serial'].str.split(';', 4).str[:-1]
instances.loc[:, 'pois'] = instances['instance_key'].str[1].str.split(' ').str[0].astype(int)
instances.loc[:, 'sensors'] = instances['instance_key'].str[1].str.split(' ').str[1].astype(int)
instances.loc[:, 'sinks'] = instances['instance_key'].str[1].str.split(' ').str[2]
instances.loc[:, 'communication_radius'] = instances['instance_key'].str[2].str.split(' ').str[0]
instances.loc[:, 'coverage_radius'] = instances['instance_key'].str[2].str.split(' ').str[1]
instances.loc[:, 'area_side'] = instances['instance_key'].str[2].str.split(' ').str[2]
instances.loc[:, 'seed'] = instances['instance_key'].str[3].astype(int)
instances.loc[:, 'instance_key'] = instances['instance_key'].str.join('_').str.replace(' ', '_')
instances.loc[:, 'K'] = instances['kcmc'].str.strip().str[2].astype(int)
instances.loc[:, 'M'] = instances['kcmc'].str.strip().str[4].astype(int)
len(instances)

360

In [25]:
files_list = ['results/optimizer/'+f
              for f in os.listdir('results/optimizer')
              if f.endswith('.json')]

def readfile(f):
    with open(f, 'r') as fin:
        data = json.load(fin)
    
    # REMOVE LARGE KEYS
    data.pop('variables')
    
    return data

# Parallel file read
pool = Pool()
df = list(tqdm(pool.imap_unordered(readfile, files_list), total=len(files_list)))
pool.close()
df = pd.DataFrame(df)

df.loc[:, 'active_sensors'] = df['sensors']
df.loc[:, 'sensors'] = df['key'].str.split(';').str[1].str.split(' ').str[1].astype(int)
df.loc[:, 'K'] = df['kcmc_k'].astype(int)
df.loc[:, 'M'] = df['kcmc_m'].astype(int)

df.loc[:, '#removed'] = (
    df['sensors'].astype(int) - df['active_sensors'].astype(int)
)
df.loc[:, '%removed'] = (
    df['#removed'].astype(int) / df['sensors'].astype(int)
)

df.loc[:, 'ObjVal'] = (df
    ['json_solution']
    .str['SolutionInfo']
    .apply(lambda i: i.get('ObjVal', 0.0))
    .astype(float).clip(upper=10000).replace({10000.0: np.nan})
)

df.loc[:, 'ObjBound'] = (df
    ['json_solution']
    .str['SolutionInfo']
    .apply(lambda i: i.get('ObjBound', 0.0))
    .astype(float).clip(upper=10000).replace({10000.0: np.nan})
)

df.loc[:, 'ObjBoundC'] = (df
    ['json_solution']
    .str['SolutionInfo']
    .apply(lambda i: i.get('ObjBoundC', 0.0))
    .astype(float).clip(upper=10000).replace({10000.0: np.nan})
)

df.loc[:, 'MipGap'] = (df
    ['json_solution']
    .str['SolutionInfo']
    .apply(lambda i: i.get('MipGap', 0.0))
    .astype(float).clip(upper=10000).replace({10000.0: np.nan})
)

df.loc[:, 'NodeCount'] = (df
    ['json_solution']
    .str['SolutionInfo']
    .apply(lambda i: i.get('NodeCount', 0.0))
    .astype(float).clip(upper=10000).replace({10000.0: np.nan})
)


df = df.drop(columns=['kcmc_k', 'kcmc_m']).copy()
len(df), df.columns

  0%|          | 0/468 [00:00<?, ?it/s]

(468,
 Index(['time', 'gurobi_runtime', 'status_code', 'status', 'mip_gap',
        'gurobi_model_fingerprint', 'binary_variables', 'solutions_count',
        'node_count', 'simplex_iterations_count', 'json_solution',
        'objective_value', 'gurobi_model', 'gurobi_logs', 'key', 'pois',
        'sensors', 'sinks', 'coverage_radius', 'communication_radius',
        'random_seed', 'model', 'prep_stage', 'main_stage', 'time_limit',
        'threads', 'coverage_density', 'communication_density', 'preprocessing',
        'active_sensors', 'K', 'M', '#removed', '%removed', 'ObjVal',
        'ObjBound', 'ObjBoundC', 'MipGap', 'NodeCount'],
       dtype='object'))

In [26]:
target_col = '%removed'
target_col = '#removed'
target_col = 'ObjVal'
target_col = 'gurobi_runtime'

# (df[['pois', 'sensors', 'K', 'M', 'model', 'random_seed', target_col]]
#    .sort_values(['pois', 'sensors', 'K', 'M', 'model'])
#    .pivot(index=['pois', 'sensors', 'K', 'M', 'random_seed'],
#           columns=['model'], values=[target_col])
#    .reset_index(drop=False))

In [27]:
index = ['pois', 'sensors', 'K', 'M', 'model']

num_samples = (df
    [index+['random_seed']].groupby(index)
    .nunique()
    .reset_index()
    .rename(columns={'random_seed': 'sample_size'})
    .copy()
)
num_optimal = (df
    [df['status'] == 'OPTIMAL']
    [index+['random_seed']].groupby(index)
    .nunique()
    .rename(columns={'random_seed': '#opt'})
    .reset_index()
    .copy()
)
num_integer = (df
    [df['ObjVal'].round(1).astype(str).str.endswith('.0')]
    [index+['random_seed']].groupby(index)
    .nunique()
    .rename(columns={'random_seed': '#int'})
    .reset_index()
    .copy()
)
num_nzero = (df
    [df['ObjVal'].astype(float) != 0.0]
    [index+['random_seed']].groupby(index)
    .nunique()
    .rename(columns={'random_seed': '#non-zero'})
    .reset_index()
    .copy()
)


avg_prep = (df
    [index+['#removed']].groupby(index)
    .mean()
    .rename(columns={'#removed': '#removed'})
    .reset_index()
    .copy()
)
avg_prep_p = (df
    [index+['%removed']].groupby(index)
    .mean()
    .rename(columns={'%removed': '%removed'})
    .reset_index()
    .copy()
)


avg_nodes = (df
    [index+['NodeCount']].groupby(index)
    .mean()
    .rename(columns={'NodeCount': '#nodes'})
    .reset_index()
    .copy()
)

avg_lb = (df
    [df['status'] == 'OPTIMAL']
    [index+['ObjBound']].groupby(index)
    .mean()
    .rename(columns={'ObjBound': 'LB'})
    .reset_index()
    .copy()
)

avg_ub = (df
    [df['status'] == 'OPTIMAL']
    [index+['ObjBoundC']].groupby(index)
    .mean()
    .rename(columns={'ObjBoundC': 'UB'})
    .reset_index()
    .copy()
)

avg_gap = (df
    [df['status'] == 'OPTIMAL']
    [index+['MipGap']].groupby(index)
    .mean()
    .rename(columns={'MipGap': '%gap'})
    .reset_index()
    .copy()
)

avg_time = (df
    [df['status'] == 'OPTIMAL']
    [index+['gurobi_runtime']].groupby(index)
    .mean()
    .rename(columns={'gurobi_runtime': 'time(s)'})
    .reset_index()
    .copy()
)

In [30]:
grp = (instances[['pois', 'sensors', 'K', 'M']].drop_duplicates()
       .merge(num_samples, how='left')
       .merge(num_optimal, how='left')
       .merge(num_integer, how='left')
       .merge(num_nzero, how='left')
       
       .merge(avg_nodes, how='left')
       .merge(avg_lb, how='left')
       .merge(avg_ub, how='left')
       .merge(avg_gap, how='left')
       .merge(avg_time, how='left')
       
       .merge(avg_prep, how='left')
       .merge(avg_prep_p, how='left')
      )

int_cols = ['sample_size', '#opt', '#int', '#non-zero']
grp.loc[:, int_cols] = grp[int_cols].fillna(0).astype(int)


null_cols = ['LB', 'UB', '%gap', 'time(s)', '#removed', '%removed']
grp.loc[:, null_cols] = grp[null_cols].astype(float).round(3).fillna('-')

grp = grp.sort_values(['pois', 'sensors', 'K', 'M', 'model']).reset_index(drop=True).copy()

print('missing:', sum([10-i for i in grp[grp['sample_size'] < 10]['sample_size']]))
# grp[grp['sample_size'] < 10]

missing: 492


In [32]:
(grp[grp['model'] == 'minimal_flood_dinic__gurobi_single_flow']
    .drop(columns=['model', '#removed',
                  'pois', 'sensors', 'K', 'M',
                  'sample_size'
                  ]))

Unnamed: 0,#opt,#int,#non-zero,#nodes,LB,UB,%gap,time(s),%removed
3,10,10,10,0.3,5.6,5.6,0.0,3.626,0.534
7,10,10,10,0.1,11.6,11.6,0.0,3.251,0.507
11,10,10,10,0.2,11.3,11.3,0.0,5.188,0.487
15,10,10,10,0.3,16.9,16.9,0.0,3.125,0.517
19,10,10,10,0.0,16.7,16.7,0.0,5.065,0.48
23,10,10,10,0.2,17.2,17.2,0.0,7.807,0.459
28,10,10,10,0.0,10.0,10.0,0.0,228.036,0.547
32,10,10,10,0.1,10.0,10.0,0.0,370.738,0.524
36,10,10,10,0.0,15.1,15.1,0.0,178.882,0.548
40,5,5,5,0.0,15.0,15.0,0.0,281.396,0.519
