# RESULTS EDA

In [None]:
import os
import json
import base64
from math import sqrt, pi

import ijson
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

import multiprocessing

from tqdm.notebook import tqdm

from kcmc_instance import KCMC_Instance

In [None]:
# PARSING THE INSTANCES
instances = pd.read_csv('/data/instances.csv', sep='|', header=None)
instances.columns = ['serial', 'kcmc']
instances.loc[:, 'instance_key'] = instances['serial'].str.split(';', 4).str[:-1]
instances.loc[:, 'pois'] = instances['instance_key'].str[1].str.split(' ').str[0]
instances.loc[:, 'sensors'] = instances['instance_key'].str[1].str.split(' ').str[1]
instances.loc[:, 'sinks'] = instances['instance_key'].str[1].str.split(' ').str[2]
instances.loc[:, 'communication_radius'] = instances['instance_key'].str[2].str.split(' ').str[0]
instances.loc[:, 'coverage_radius'] = instances['instance_key'].str[2].str.split(' ').str[1]
instances.loc[:, 'area_side'] = instances['instance_key'].str[2].str.split(' ').str[2]
instances.loc[:, 'seed'] = instances['instance_key'].str[3].astype(int)
instances.loc[:, 'instance_key'] = instances['instance_key'].str.join('_').str.replace(' ', '_')
instances.loc[:, 'K'] = instances['kcmc'].str.strip().str[2]
instances.loc[:, 'M'] = instances['kcmc'].str.strip().str[4]
len(instances)

In [None]:
prep = pd.read_parquet('/data/preprocessing.parquet')

In [None]:
DATA_DIR = '/data/parsed_results/'

df = pd.concat([
    pd.read_parquet(DATA_DIR+file, engine='fastparquet')
    for file in os.listdir(DATA_DIR) if file.endswith('.pq')
]).sort_values('instance_key').reset_index(drop=True).copy()
df.loc[:, 'seed'] = df['instance_key'].str.split('_').str[-1].str.strip().astype(int)

df = (df.drop(columns=['instance_key'])
        .merge(instances, on='seed')
        .drop_duplicates().reset_index(drop=True)).copy()

df = df.merge(pd.DataFrame(df['SolutionInfo'].apply(lambda s: eval(s)).tolist(),
                           index=df.index),
              left_index=True, right_index=True).copy()

colunas_float = ['gurobi_runtime', 'communication_density', 'coverage_density']
df.loc[:, colunas_float] = df[colunas_float].apply(lambda col: col.astype(float))
colunas_int = ['pois', 'sensors', 'K', 'M']
df.loc[:, colunas_int] = df[colunas_int].apply(lambda col: col.astype(int))

len(df), sorted(df.columns)

In [None]:
df_prep = df.merge(prep, how='left').copy()

len(df_prep)

# RUNTIME

In [None]:
def get_groupby(df, group_columns, target_columns):
    if not isinstance(group_columns, list): group_columns = [group_columns]
    if not isinstance(target_columns, list): target_columns = [target_columns]
    dedup_columns = list(set(group_columns).union({
        'instance_key', 'K', 'M', 'gurobi_model_type'
    }))
    sdf = df.drop_duplicates(subset=dedup_columns).copy()
    return sdf[group_columns+target_columns].groupby(group_columns)

In [None]:
mean_km_runtime = (
    get_groupby(df, ['K', 'M', 'gurobi_model_type'], 'gurobi_runtime')
    .mean()
    .reset_index(drop=False).copy()
)

mean_pi_prep_size = (
    get_groupby(df_prep, ['pois', 'sensors', 'prep_heuristic'], 'prep_runtime')
    .mean()
    .reset_index(drop=False).copy()
).merge(
    get_groupby(df_prep, ['pois', 'sensors', 'prep_heuristic'], 'prep_size')
    .mean()
    .reset_index(drop=False).copy()
).merge(
    get_groupby(df_prep, ['pois', 'sensors', 'prep_heuristic'], 'prep_compression_rate')
    .mean()
    .reset_index(drop=False).copy()
)

In [None]:
mean_pi_prep_size.sort_values('prep_compression_rate', ascending=False).iloc[:50]

In [None]:
fig = px.scatter_3d(
    # df,
    mean_km_runtime,
    x='K', y='M', z='gurobi_runtime',
    color='gurobi_model_type', log_z=True,
    title='Log_RUNTIME on K vs M'
)
fig.show()

In [None]:
fig = px.scatter_3d(df, x='communication_density', y='coverage_density', z='gurobi_runtime',
                    color='gurobi_model_type',
                    # log_x=True, log_y=True,
                    log_z=True,
                    title='Log_RUNTIME on pois vs sensors')
fig.show()

# GUROBI LOGS

In [None]:
df_logs = df[~df['gurobi_logs'].isnull()]
len(df), len(df_logs), len(df_logs['instance_key'].unique())

In [None]:
dict_colunas = {
    'pois': 'pois',
    'sensors': 'sensors',
    #'area': 'area',
    #'coverage': 'coverage_radius',
    #'communication': 'communication_radius',
    'seed': 'seed',
    'K': 'K',
    'M': 'M',
    'model': 'gurobi_model_type',
    #'binary_variables': 'binary_variables',
    'status': 'status',
    'solutions_count': 'solutions_count',
    'node_count': 'node_count',
    #'gurobi_runtime': 'gurobi_runtime',
    #'simplex_iterations': 'simplex_iterations_count',
    'MIPGap': 'mip_gap',
    'ObjVal': 'ObjVal',
    'ObjBound': 'ObjBound',
    'ObjBoundC': 'ObjBoundC'
}

In [None]:
COLSIZE = 9
for instance_key, sdf in df_logs.groupby('instance_key'):
    if not ('179561007' in instance_key): continue
    print(instance_key)
    header = ''
    for col in dict_colunas.keys():
        header += col[:COLSIZE]
        header += ' '*((COLSIZE-len(col[:COLSIZE])) if col not in 'KM' else 1)
        header += '|'
    
    
    for idx, i in sdf.iterrows():
        row = ''
        for col in dict_colunas.values():
            row += str(i[col])[:COLSIZE]
            row += ' '*(COLSIZE-len(str(i[col])[:COLSIZE]) if col not in 'KM' else 1)
            row += '|'
            
        print('|'+header[:-1])
        print('|'+row[:-1], '\n')
        print(i['gurobi_logs'])
        print('-'*70)
        print()
        print()
    print('#'*80)

### Visual

In [None]:
SEED = 147946681
inst = KCMC_Instance(instances[instances['seed'] == SEED].iloc[0]['serial'],
                     True, True, True)
inst

In [None]:
inst.plot(minimal=True)