In [31]:
import os
import json

import pandas as pd

from tqdm.notebook import tqdm

ROOT = 'gurobi_results'
TARGET_COUNT = 200
reference = pd.read_csv('/data/missing.csv')

reference.loc[:, 'key'] = reference.apply(
    lambda row: 'KCMC:{num_pois}:{num_sensors}:{num_sinks}:{area_side}:{covg_radius}:{comm_radius}'.format(**row),
    axis=1
)
objective_count = {key: TARGET_COUNT for key in reference['key'].unique()}

df_raw = []
for file in tqdm(os.listdir(ROOT)):
    if not file.endswith('.json'): continue
    with open(os.path.join(ROOT, file), 'r') as fin:
        data = json.load(fin)
    df_raw.append(data)
    
df_raw = pd.DataFrame(df_raw)
df = pd.DataFrame({
    'key': df_raw['raw'].apply(lambda data: data['key']).str.replace('KCMC_0.1', 'KCMC', regex=False),
    'seed': df_raw['raw'].apply(lambda data: data['instance'].split(';', 4)[3]).astype(int),
    'runtime': df_raw['optimization'].apply(lambda data: data['gurobi_runtime']).astype(float),
    'status': df_raw['optimization'].apply(lambda data: data['status'])
})

to_add = []
for key in reference['key'].unique():
    has_count = len(df[df['key'] == key])
    if has_count < TARGET_COUNT:
        to_add += ([{'key': key, 'seed': -1, 'runtime': 0.0, 'status': 'INFEASIBLE'}]
                   *(TARGET_COUNT-has_count))
if len(to_add) > 0:
    df = df.append(pd.DataFrame(to_add))
df = df.sort_values(['key', 'seed']).reset_index(drop=True).copy()

len(df), df.columns

  0%|          | 0/7799 [00:00<?, ?it/s]

(4000, Index(['key', 'seed', 'runtime', 'status'], dtype='object'))

In [33]:
df['status'].value_counts()

INFEASIBLE    2270
OPTIMAL        923
LIMIT          807
Name: status, dtype: int64

In [48]:
agg_df = []
for pair, sdf in df[['key', 'status', 'runtime']].groupby(['key', 'status']):
    key, status = pair
    agg_df.append({
        'key': key, 'status': status, 'count': len(sdf),
        'min': sdf['runtime'].min(), 'median': sdf['runtime'].median(), 'max': sdf['runtime'].max(),
        'mean': sdf['runtime'].mean(), 'std': sdf['runtime'].std()
    })
agg_df = pd.DataFrame(agg_df).sort_values(['key', 'status']).reset_index(drop=True).copy()
len(agg_df), agg_df.columns

(46,
 Index(['key', 'status', 'count', 'min', 'median', 'max', 'mean', 'std'], dtype='object'))

In [52]:
agg_df[agg_df['status'] == 'OPTIMAL'][['key', 'count', 'mean']].round(3)

Unnamed: 0,key,count,mean
1,KCMC:10:100:1:447:100:150,116,3.919
3,KCMC:10:125:1:447:100:150,155,8.851
5,KCMC:10:150:1:447:100:150,174,19.788
8,KCMC:10:50:1:447:100:150,20,0.452
10,KCMC:10:75:1:447:100:150,73,1.403
12,KCMC:20:100:1:447:100:150,79,15.217
15,KCMC:20:125:1:447:100:150,97,30.315
18,KCMC:20:150:1:447:100:150,44,39.69
20,KCMC:20:50:1:447:100:150,3,2.34
22,KCMC:20:75:1:447:100:150,35,5.872


In [53]:
agg_df[agg_df['status'] == 'LIMIT'][['key', 'count']]

Unnamed: 0,key,count
14,KCMC:20:125:1:447:100:150,22
17,KCMC:20:150:1:447:100:150,103
24,KCMC:30:100:1:447:100:150,11
27,KCMC:30:125:1:447:100:150,90
30,KCMC:30:150:1:447:100:150,135
35,KCMC:40:100:1:447:100:150,24
38,KCMC:40:125:1:447:100:150,87
41,KCMC:40:150:1:447:100:150,125
43,KCMC:50:125:1:447:100:150,86
45,KCMC:50:150:1:447:100:150,124
