In [1]:
from kcmc_instance import KCMC_Instance

import os, multiprocessing, functools

import pandas as pd
from redis import StrictRedis

## Parse data from the REDIS NoSQL Database to Parquet files

In [2]:
# ! rm -rf instances.parquet
# ! mkdir  instances.parquet 

In [3]:
def parse_evaluation(evaluation):
    result = []
    for k_m, msg_k, msg_m in [i.strip().split(' | ')
                              for i in evaluation.strip().split(';') if len(i) > 1]:
        k, m = k_m.strip().split(' ')
        k_success = (msg_k.strip().upper() == 'SUCCESS')
        m_success = (msg_m.strip().upper() == 'SUCCESS')
        result.append({
            'K='+k: k_success,
            'M='+m: m_success,
        })
    return result


def parse_block(df):

    # Parse each evaluation as a list of dicts
    df.loc[:, 'evaluation'] = df['raw_evaluation'].apply(parse_evaluation)

    # Parse the instance as a KCMC_Instance object
    df.loc[:, 'obj_instance'] = df['instance'].apply(
        lambda instance: KCMC_Instance(instance,
                                       accept_loose_pois=True,
                                       accept_loose_sensors=True,
                                       accept_loose_sinks=True)
    )

    # Extract basic attributes of the instance
    df.loc[:, 'key'] = df['obj_instance'].apply(lambda i: i.key_str)
    df.loc[:, 'random_seed'] = df['obj_instance'].apply(lambda i: i.random_seed)
    df.loc[:, 'pois'] = df['obj_instance'].apply(lambda i: i.num_pois)
    df.loc[:, 'sensors'] = df['obj_instance'].apply(lambda i: i.num_sensors)
    df.loc[:, 'sinks'] = df['obj_instance'].apply(lambda i: i.num_sinks)
    df.loc[:, 'area_side'] = df['obj_instance'].apply(lambda i: i.area_side)
    df.loc[:, 'coverage_r'] = df['obj_instance'].apply(lambda i: i.sensor_coverage_radius)
    df.loc[:, 'communication_r'] = df['obj_instance'].apply(lambda i: i.sensor_communication_radius)

    # Extract attributes of the instance that cannot be calculated from other attributes
    
    # Reformat the dataframe
    df = df.explode('evaluation').reset_index(drop=True).copy()
    df = df.merge(pd.DataFrame(df['evaluation'].tolist(), index=df.index),
                  left_index=True, right_index=True)
    df = df[[
            'key', 'random_seed',
            'pois', 'sensors', 'sinks', 'area_side', 'coverage_r', 'communication_r',
        ] + [col for col in df.columns if (col.startswith('K') or col.startswith('M'))]
    ].fillna(False).copy()
    df = df.drop_duplicates().sort_values([
        'pois', 'sensors', 'sinks', 'area_side', 'coverage_r', 'communication_r', 'random_seed'
    ]).reset_index(drop=True)
    
    return df


def parse_key(instance_key):
    key = instance_key.replace('INSTANCE', 'KCMC').replace(':', '_')
    if os.path.exists('instances.parquet/'+key+'.pq'):
        tam = len(pd.read_parquet('instances.parquet/'+key+'.pq'))
        if (tam % 10000) == 0:
            return key, tam
    
    # If we have to reprocess, start our own redis connection and extract the data
    df = []
    redis = StrictRedis('host.docker.internal', decode_responses=True)
    evaluation_key = instance_key.replace('INSTANCE', 'EVALUATION')
    for random_seed, instance in redis.hscan_iter(instance_key):
        df.append({
            'instance': instance,
            'raw_evaluation': redis.hget(evaluation_key, random_seed)
        })
    redis.close()
    
    # With the connection closed, parse and save the data
    df = parse_block(pd.DataFrame(df))
    df.to_parquet(f'instances.parquet/{key}.pq')
    return key, len(df)

In [4]:
redis = StrictRedis('host.docker.internal', decode_responses=True)
list_keys = list(redis.scan_iter('INSTANCE:*'))
redis.close()

# Parse the REDIS data as a DataFrame
pool = multiprocessing.Pool(8)
for num, pair in enumerate(pool.imap_unordered(parse_key, list_keys)):
    key, qtd = pair
    print(round(num/len(list_keys), 3), '\t', qtd, '\t', key)
pool.close()

0.0 	 10000 	 KCMC_10_25_2_886_100_100
0.013 	 10000 	 KCMC_10_100_5_1253_100_100
0.025 	 10000 	 KCMC_10_100_1_1772_100_50
0.038 	 10000 	 KCMC_10_25_2_626_100_200
0.051 	 10000 	 KCMC_10_25_1_626_100_200
0.063 	 10000 	 KCMC_10_50_2_886_100_100
0.076 	 10000 	 KCMC_10_100_1_1253_100_150
0.089 	 10000 	 KCMC_10_50_1_886_100_50
0.101 	 10000 	 KCMC_10_100_1_1772_100_100
0.114 	 10000 	 KCMC_10_50_5_886_100_50
0.127 	 10000 	 KCMC_10_25_1_886_100_100
0.139 	 10000 	 KCMC_10_50_5_886_100_100
0.152 	 10000 	 KCMC_10_50_1_886_100_100
0.165 	 10000 	 KCMC_10_100_1_1772_100_200
0.177 	 10000 	 KCMC_10_25_2_626_100_150
0.19 	 10000 	 KCMC_10_50_2_886_100_50
0.203 	 10000 	 KCMC_10_100_1_1253_100_50
0.215 	 10000 	 KCMC_10_50_1_1253_100_150
0.228 	 10000 	 KCMC_10_50_2_1253_100_150
0.241 	 10005 	 KCMC_10_25_1_313_100_50
0.253 	 10009 	 KCMC_10_50_5_443_100_50
0.266 	 10000 	 KCMC_10_50_2_1253_100_100
0.278 	 10005 	 KCMC_10_25_1_313_100_100
0.291 	 10000 	 KCMC_10_100_1_1253_100_100
0.304 	 6

## VERY Limited EDA

In [10]:
solvency = []
for file in sorted(os.listdir('instances.parquet')):
    data = pd.read_parquet('instances.parquet/'+file)
    solvency.append({
        col: len(data[data[col]])/len(data)
        for col in data.columns
        if col.startswith('K') or col.startswith('M')
    })
    solvency[-1].update({'key': file[:-3]})
solvency = pd.DataFrame(solvency).set_index('key').fillna(0.0)

In [11]:
solvency.sort_values('K=1', ascending=False)

Unnamed: 0_level_0,K=1,M=1,K=2,M=2,K=3,M=3,K=4,M=4,K=5,M=5,K=6,K=7
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
KCMC_10_100_1_626_100_200,0.002243,0.008973,0.003589,0.006281,0.004038,0.004038,0.004486,0.000897,0.002692,0.0,0.0,0.0
KCMC_10_100_5_626_100_200,0.001736,0.008681,0.003472,0.006944,0.005208,0.003472,0.000000,,0.000000,,,
KCMC_10_100_5_626_100_150,0.000590,0.002948,0.001179,0.002358,0.001769,0.001179,0.000000,,0.000000,,,
KCMC_10_100_1_626_100_150,0.000552,0.002024,0.000736,0.001288,0.000552,0.000552,0.000000,0.000000,0.000000,0.0,0.0,0.0
KCMC_10_50_1_443_100_200,0.000200,0.000599,0.000400,0.000400,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
KCMC_10_25_2_626_100_50,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
KCMC_10_25_2_886_100_100,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
KCMC_10_25_2_886_100_150,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
KCMC_10_25_2_886_100_200,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0


## RE-SET THE PARAMETERS

In [6]:
POIS         = [10,  20,   30,  40,   50]
RATE_SENSORS = [2.5, 5.0,  10.0]
RATE_SINKS   = [0.0, 0.05, 0.1]
RATE_AREA    = [8.0, 2.0, 1.0]  # HOW MUCH % of the total area we can COVER
COVERAGE_R   = 100  # BASE-CONSTANT
RATE_COMM_R  = [0.5, 1.0,  1.5, 2.0]

from math import sqrt, pi
combinations = []
covg_r = COVERAGE_R
for pois in POIS:
    for sensors in [int(pois*r) for r in RATE_SENSORS]:
        for sinks in [int(max(sensors*r, 1)) for r in RATE_SINKS]:
            for area in [int(sqrt((1/r)*sensors*pi*covg_r*covg_r)) for r in RATE_AREA]:
                for comm_r in [int(covg_r*r) for r in RATE_COMM_R]:
                    combinations.append((pois, sensors, sinks, area, covg_r, comm_r))
combinations = pd.DataFrame(
    combinations,
    columns=['num_pois', 'num_sensors', 'num_sinks', 'area_side', 'covg_radius', 'comm_radius']
)
print(len(combinations))
combinations = (combinations
    .drop_duplicates()
    .sort_values(list(combinations.columns))
    .reset_index(drop=True)
).copy()
len(combinations)

540


528

In [7]:
# combinations.to_csv('instance_generator_configurations.csv', sep=',', index=None)

In [8]:
combinations

Unnamed: 0,num_pois,num_sensors,num_sinks,area_side,covg_radius,comm_radius
0,10,25,1,313,100,50
1,10,25,1,313,100,100
2,10,25,1,313,100,150
3,10,25,1,313,100,200
4,10,25,1,626,100,50
...,...,...,...,...,...,...
523,50,500,50,2802,100,200
524,50,500,50,3963,100,50
525,50,500,50,3963,100,100
526,50,500,50,3963,100,150
