In [None]:
import os, json
import pandas as pd
from tqdm.notebook import tqdm
from multiprocessing import Pool

files_list = os.listdir('/data/dynamodb_objects')
target_dir = '/data/brief'


def load_file(file):
    if not file.endswith('.json'): return None
    with open('/data/dynamodb_objects/'+file, 'r') as fin:
        item = json.load(fin)
    item.update(json.loads(item['json_solution'])['SolutionInfo'])
    return item

pool = Pool()

data = []
qtd_files = 0
for file in tqdm(pool.imap_unordered(load_file, files_list), total=len(files_list)):
    if file is not None:
        data.append(file)
    if len(data) >= 10:
        df = pd.DataFrame(data)
        df.to_parquet(target_dir+f'/{qtd_files}.pq')
        qtd_files += 1
        data = []
if len(data) > 0:
    df = pd.DataFrame(data)
    df.to_parquet(target_dir+f'/{qtd_files}.pq')
    qtd_files += 1

qtd_files

In [None]:
show = df[[# 'instance_key',
    'pois', 'sensors', 'area', 'coverage', 'communication', 'seed',
    'K', 'M', 'model',
    
    'status', 'solutions_count',
    'gurobi_runtime', 'simplex_iterations_count', 'binary_variables',
    
    
    # 'Status', 'SolCount',
    'NodeCount',
    # 'Runtime', 'IterCount', 'BarIterCount',
    'MIPGap', 'ObjVal', 'ObjBound', # 'ObjBoundC', 
    # 'IntVio', 'BoundVio', 'ConstrVio', 'PoolObjBound', 'PoolObjVal'
   ]].sort_values([
    'pois', 'sensors', 'area', 'coverage', 'communication', 'seed',
    'K', 'M', 'model',
]).reset_index(drop=True)

len(show), show.columns

In [None]:
show.to_csv('/data/brief.csv', index=None)

In [None]:
cols_grupo = ['pois', 'sensors', 'K', 'M', 'model']

agg = {}
for grupo, sdf in df.groupby(cols_grupo):
    sdf_opt = sdf[sdf['status'] == 'OPTIMAL']
    mip = [v for v in sdf['MIPGap'].astype(float) if v < 999999999]
    mip = round(sum(mip)/len(mip), 1) if len(mip) > 0 else '-'
    agg[grupo] = {
        '#opt': len(sdf_opt),
        '#int': len(sdf[sdf['ObjVal'].astype(float).round(1).astype(str).str.endswith('.0')]),
        '#non-zero': len(sdf[sdf['ObjVal'].astype(float).round(1) != 0.0]),
        '#non-zero': len(sdf[sdf['ObjVal'].astype(float).round(1) != 0.0]),
        '#nodes': sdf['NodeCount'].astype(float).mean().round(1),
        'LB': '-' if len(sdf_opt) == 0 else sdf['ObjBound'].astype(float).mean().round(1),
        'UB': '-' if len(sdf_opt) == 0 else sdf['ObjBoundC'].astype(float).mean().round(1),
        '%gap': '-' if len(sdf_opt) == 0 else mip,
        'time(s)': '-' if len(sdf_opt) == 0 else sdf_opt['gurobi_runtime'].astype(float).mean().round(1)
    }
agg = pd.DataFrame(agg).T.reset_index(drop=False).rename(columns={
    'level_0': '|P|', 'level_1': '|I|', 'level_2': 'K', 'level_3': 'M',
    'level_4': 'model'
}).sort_values(['|P|', '|I|', 'K', 'M']).reset_index(drop=True).copy()

agg_sf = agg[agg['model'] == 'single_flow'].drop(columns=['model']).copy()
agg_mf = agg[agg['model'] == 'multi_flow'].drop(columns=['model']).copy()

In [None]:
agg_mf

In [None]:
show[['K', 'M']].drop_duplicates()

In [None]:
import json
import os, json
import pandas as pd
import multiprocessing
from tqdm import tqdm

def get_solution(item):
    if item['solutions_count'] == 0: return None, None
    size = 0
    solution = list('0'*item['sensors'])
    for key, active in item['variables']['x'].items():
        if int(active) == 1:
            pos = int(key.split('i')[-1].split(' ')[0].split("'")[0].split('"')[0])
            solution[pos] = '1'
            size += 1
    return ''.join(solution), size


DATA_DIR = 'brief'
files_list = os.listdir(DATA_DIR)


def process(arq):
    if not arq.endswith('.json'): return None
    
    with open(os.path.join(DATA_DIR, arq), 'rb') as fin:
        item = json.loads(fin.read())  
        
    item['solution'], item['size'] = get_solution(item)
    
    item['gurobi_model_setup_time'] = round(sum([i[1] - i[0] for i in (
        [item['time']['setup']['model']]
      + list(item['time']['setup']['constraints'].values())
    )]) / 1e9, 3)
    item['gurobi_variables_load_time'] = round(sum([i[1] - i[0] for i in (
        list(item['time']['setup']['vars'].values())
    )]) / 1e9, 3)
    
    item['model'] = arq.split('_flow')[0].split('_')[-1]+'_flow'
    
    return item.copy()
    

pool = multiprocessing.Pool()
iterator = pool.imap_unordered(process, files_list)


data = []
for item in tqdm(iterator, total=len(files_list)):
    data.append(item)

In [None]:
import json
import os, json
import pandas as pd
import multiprocessing
from tqdm import tqdm

def get_solution(item):
    if item['solutions_count'] == 0: return None, None
    size = 0
    solution = list('0'*item['sensors'])
    for key, active in item['variables']['x'].items():
        if int(active) == 1:
            pos = int(key.split('i')[-1].split(' ')[0].split("'")[0].split('"')[0])
            solution[pos] = '1'
            size += 1
    return ''.join(solution), size


DATA_DIR = 'results'
files_list = os.listdir(DATA_DIR)


def process(arq):
    if not arq.endswith('.json'): return None
    if os.path.exists(os.path.join('brief', arq)): return None
    
    with open(os.path.join(DATA_DIR, arq), 'rb') as fin:
        item = json.loads(fin.read())
    
    item.pop('variables')
    item.pop('serial')  
    
    with open(os.path.join('brief', arq), 'w') as fout:
        json.dump(item, fout)  


data = []
for item in tqdm(files_list):
    process(item)

In [None]:
show = df[[# 'instance_key',
    'pois', 'sensors', 'area', 'coverage', 'communication', 'seed',
    'K', 'M', 'model',
    'status', 'solutions_count', 'node_count', 'mip_gap', 'size', 
    'gurobi_runtime', 'gurobi_model_setup_time', 'gurobi_variables_load_time',
    'binary_variables', 'simplex_iterations_count',
    # 'solution'
   ]].sort_values([
    'pois', 'sensors', 'area', 'coverage', 'communication', 'seed',
    'K', 'M', 'model',
]).reset_index(drop=True)

len(show), show.columns

In [None]:
show

In [None]:
show['pois'].unique()