In [1]:
import os
import json
import multiprocessing

import pandas as pd

from gurobi_models import KCMC_Result, GurobiModelWrapper

from tqdm.notebook import tqdm
from datetime import timedelta

In [2]:
def parse_file(file):
    
    # Load the raw data from the file
    with open('/data/results/'+file, 'r') as fin:
        raw_data = json.load(fin)

    # Parse the PRESOLVE data
    ini_rows,  ini_cols,  ini_n0, \
    ini_cont,  ini_int,   ini_bin, \
    end_rows,  end_cols,  end_n0, \
    end_cont,  end_int,   end_bin, \
    prem_rows, prem_cols, pre_t, \
    gurobi_heuristic_objective_value = KCMC_Result.parse_presolve(raw_data['gurobi_logs'])

    assert (ini_rows-prem_rows) == end_rows, 'ROWS'
    assert (ini_cols-prem_cols) == end_cols, 'COLUMNS'

    # Parse some other fields
    pois, sensors, sinks = raw_data['key'].split(';')[1].split(' ')
    area_s, cov_r, com_r = raw_data['key'].split(';')[2].split(' ')
    gurobi_setup_time = sum([raw_data['time']['setup']['model']]
                      + list(raw_data['time']['setup']['constraints'].values())
                      + list(raw_data['time']['setup']['vars'].values()))

    # Parse the Gurobi solution to the KCMC problem
    solution = KCMC_Result.get_solution(raw_data['variables']['x'], int(sensors))

    # Emit a KCMC_Result object
    return KCMC_Result(

        # Key Values ----------------------
        pois=int(pois),
        sensors=int(sensors),
        sinks=int(sinks),

        area_side=int(area_s),
        coverage_radius=int(cov_r),
        communication_radius=int(com_r),

        random_seed=int(raw_data['random_seed']),

        k=int(raw_data['kcmc_k']),
        m=int(raw_data['kcmc_m']),

        heuristic_name=str(raw_data['preprocessing'].get('method', None)),
        y_binary=bool(raw_data['gurobi_y_binary']),
        gurobi_model_name=str(raw_data['gurobi_model']),

        # Main results --------------------
        heuristic_solution=str(raw_data['preprocessing'].get('solution', '1'*int(sensors))),
        # heuristic_objective_value
        # heuristic_solution_size
        # heuristic_solution_quality
        gurobi_optimal=bool(raw_data['status'] == 'OPTIMAL'),
        gurobi_heuristic_objective_value=float(gurobi_heuristic_objective_value),
        gurobi_objective_value=float(raw_data['objective_value']),
        gurobi_solution=solution,
        solution=solution,
        # solution_size
        # solution_quality

        # Time ----------------------------
        heuristic_time=float(raw_data['preprocessing'].get('runtime_us', 0)) / 1_000_000,
        gurobi_setup_time=float(gurobi_setup_time/1_000_000_000),
        gurobi_presolve_time=float(pre_t),
        gurobi_run_time=float(raw_data['gurobi_runtime']),
        # total_time: float

        # Other Solver results ------------
        gurobi_mip_gap=float(raw_data['json_solution']['SolutionInfo']['MIPGap']),
        gurobi_bound=float(raw_data['json_solution']['SolutionInfo']['ObjBound']),
        gurobi_bound_c=float(raw_data['json_solution']['SolutionInfo']['ObjBoundC']),
        gurobi_node_count=int(raw_data['json_solution']['SolutionInfo']['NodeCount']),
        gurobi_solutions_count=int(raw_data['json_solution']['SolutionInfo']['SolCount']),
        gurobi_simplex_iterations_count=int(raw_data['json_solution']['SolutionInfo']['IterCount']),
        gurobi_initial_rows_count=int(ini_rows),
        gurobi_initial_columns_count=int(ini_cols),
        gurobi_initial_non_zero_count=int(ini_n0),
        gurobi_initial_continuous_variables_count=int(ini_cont),
        gurobi_initial_integer_variables_count=int(ini_int),
        gurobi_initial_binary_variables_count=int(ini_bin),
        gurobi_presolve_removed_rows=int(prem_rows),
        gurobi_presolve_removed_columns=int(prem_cols),
        gurobi_rows_count=int(end_rows),
        gurobi_columns_count=int(end_cols),
        gurobi_non_zero_count=int(end_n0),
        gurobi_continuous_variables_count=int(end_cont),
        gurobi_integer_variables_count=int(end_int),
        gurobi_binary_variables_count=int(end_bin),

        # Other attributes ----------------
        time_limit=float(raw_data['time_limit']),
        gurobi_logs=str(raw_data['gurobi_logs'])
    )


dir_files = os.listdir('/data/results')
json_files = sorted([f for f in dir_files if f.endswith('.json')])

pool = multiprocessing.Pool()
parsed_results = list(
    tqdm(
        pool.imap_unordered(parse_file, json_files),
        total=len(json_files)
    )
)
pool.close()

df = pd.DataFrame([d.to_dict() for d in parsed_results])
df = df[sorted(df.columns)].copy()

len(dir_files), len(json_files), len(df), df.columns

  0%|          | 0/512 [00:00<?, ?it/s]

(518,
 512,
 512,
 Index(['area_side', 'communication_radius', 'coverage_radius',
        'gurobi_binary_variables_count', 'gurobi_bound', 'gurobi_bound_c',
        'gurobi_columns_count', 'gurobi_continuous_variables_count',
        'gurobi_heuristic_objective_value',
        'gurobi_initial_binary_variables_count', 'gurobi_initial_columns_count',
        'gurobi_initial_continuous_variables_count',
        'gurobi_initial_integer_variables_count',
        'gurobi_initial_non_zero_count', 'gurobi_initial_rows_count',
        'gurobi_integer_variables_count', 'gurobi_logs', 'gurobi_mip_gap',
        'gurobi_model_name', 'gurobi_node_count', 'gurobi_non_zero_count',
        'gurobi_objective_value', 'gurobi_optimal',
        'gurobi_presolve_removed_columns', 'gurobi_presolve_removed_rows',
        'gurobi_presolve_time', 'gurobi_rows_count', 'gurobi_run_time',
        'gurobi_setup_time', 'gurobi_simplex_iterations_count',
        'gurobi_solution', 'gurobi_solutions_count', 'heuristic

In [3]:
df.to_parquet('results.pq')

## Check the Quantities of Processed Instances

In [4]:
sdf = df[[
    'pois', 'sensors', 'sinks',
    # 'area_side', 'coverage_radius', 'communication_radius',
    'k', 'm',
    'heuristic_name', 'gurobi_model_name', 'y_binary',
    'random_seed'
]].copy()

sdf = sdf.groupby(list(sdf.columns[:-1])).nunique().sort_index().reset_index()

expected_total  = 6 * 6  # |{Instance sizes}| * |{(K,M) pairs}|
expected_total *= 10     # |{different seeds per group}|
expected_total *= 2 * 2  # |{different gurobi models}|
expected_total *= 1      # |{different pre-heuristics}|  # TODO

total_processed = int(sdf['random_seed'].sum())
print('\n\nDONE:',
      f'{total_processed}/{expected_total}',
      f'({round(100*(total_processed/expected_total), 3)}%)')

total_runtime = int(sum(df['total_time']))
total_expected_time = int((expected_total/total_processed)*total_runtime)
print('PROCESS TIME:', timedelta(seconds=total_runtime),
      f'\t({round(len(df)/(total_runtime/3600), 2)} tests/hour)',
      f'\t({round((total_runtime/60)/len(df), 2)} minutes/test)')

PARALLELISM = 6
ESTIMATED_AVERAGE_TEST_TIME = (total_runtime/len(df)) * 2
estimated_total_time = ESTIMATED_AVERAGE_TEST_TIME * expected_total
estimated_remaining_time = ESTIMATED_AVERAGE_TEST_TIME * (expected_total-total_processed)
print('ESTIMATED REMAINING WALL TIME:', timedelta(seconds=int(estimated_remaining_time/PARALLELISM)))

print('\n\nComplete:')
sdf[sdf['random_seed'] == 10]



DONE: 512/1440 (35.556%)
PROCESS TIME: 6 days, 4:22:36 	(3.45 tests/hour) 	(17.39 minutes/test)
ESTIMATED REMAINING WALL TIME: 3 days, 17:38:39


Complete:


Unnamed: 0,pois,sensors,sinks,k,m,heuristic_name,gurobi_model_name,y_binary,random_seed
0,100,100,1,1,1,,gurobi_multi_flow,False,10
1,100,100,1,1,1,,gurobi_single_flow,False,10
2,100,100,1,1,1,,gurobi_y_binary_multi_flow,True,10
3,100,100,1,1,1,,gurobi_y_binary_single_flow,True,10
4,100,100,1,2,1,,gurobi_multi_flow,False,10
5,100,100,1,2,1,,gurobi_single_flow,False,10
6,100,100,1,2,1,,gurobi_y_binary_multi_flow,True,10
7,100,100,1,2,1,,gurobi_y_binary_single_flow,True,10
8,100,100,1,2,2,,gurobi_multi_flow,False,10
9,100,100,1,2,2,,gurobi_single_flow,False,10


In [5]:
print('\n\nIncomplete:')
sdf[sdf['random_seed'] != 10]



Incomplete:


Unnamed: 0,pois,sensors,sinks,k,m,heuristic_name,gurobi_model_name,y_binary,random_seed
48,100,500,1,3,2,,gurobi_multi_flow,False,8
49,100,500,1,3,2,,gurobi_single_flow,False,8
50,100,500,1,3,2,,gurobi_y_binary_multi_flow,True,8
51,100,500,1,3,2,,gurobi_y_binary_single_flow,True,8
