In [1]:
from kcmc_instance import KCMC_Instance

import os

import pandas as pd
from redis import StrictRedis

## Parse data from the REDIS NoSQL Database to Parquet files

In [2]:
! rm -rf instances.parquet
! mkdir  instances.parquet 

In [3]:
def parse_evaluation(evaluation):
    result = []
    for k_m, msg_k, msg_m in [i.strip().split(' | ')
                              for i in evaluation.strip().split(';') if len(i) > 1]:
        k, m = k_m.strip().split(' ')
        k_success = (msg_k.strip().upper() == 'SUCCESS')
        m_success = (msg_m.strip().upper() == 'SUCCESS')
        result.append({
            'K='+k: k_success,
            'M='+m: m_success,
        })
        
        raise Exception(msg_k + ' | ' + m_success)
        
    return result


def parse_block(df):

    # Parse each evaluation as a list of dicts
    df.loc[:, 'evaluation'] = df['raw_evaluation'].apply(parse_evaluation)

    # Parse the instance as a KCMC_Instance object
    df.loc[:, 'obj_instance'] = df['instance'].apply(
        lambda instance: KCMC_Instance(instance,
                                       accept_loose_pois=True,
                                       accept_loose_sensors=True,
                                       accept_loose_sinks=True)
    )

    # Extract basic attributes of the instance
    df.loc[:, 'key'] = df['obj_instance'].apply(lambda i: i.key_str)
    df.loc[:, 'random_seed'] = df['obj_instance'].apply(lambda i: i.random_seed)
    df.loc[:, 'pois'] = df['obj_instance'].apply(lambda i: i.num_pois)
    df.loc[:, 'sensors'] = df['obj_instance'].apply(lambda i: i.num_sensors)
    df.loc[:, 'sinks'] = df['obj_instance'].apply(lambda i: i.num_sinks)
    df.loc[:, 'area_side'] = df['obj_instance'].apply(lambda i: i.area_side)
    df.loc[:, 'coverage_r'] = df['obj_instance'].apply(lambda i: i.sensor_coverage_radius)
    df.loc[:, 'communication_r'] = df['obj_instance'].apply(lambda i: i.sensor_communication_radius)

    # Extract attributes of the instance that cannot be calculated from other attributes
    
    # Reformat the dataframe
    df = df.explode('evaluation').reset_index(drop=True).copy()
    df = df.merge(pd.DataFrame(df['evaluation'].tolist(), index=df.index),
                  left_index=True, right_index=True)
    df = df[[
            'key', 'random_seed',
            'pois', 'sensors', 'sinks', 'area_side', 'coverage_r', 'communication_r',
        ] + [col for col in df.columns if (col.startswith('K') or col.startswith('M'))]
    ].fillna(False).copy()
    
    return df

In [4]:
# Start the REDIS CONN
redis = StrictRedis('host.docker.internal', decode_responses=True)

# Parse the REDIS data as a DataFrame
for instance_key in redis.scan_iter('INSTANCE:*'):
    key = instance_key.replace('INSTANCE', 'KCMC').replace(':', '_')
    if os.path.exists('instances.parquet/'+key+'.pq'): continue
    print(key)
    
    df = []
    evaluation_key = instance_key.replace('INSTANCE', 'EVALUATION')
    for random_seed, instance in redis.hscan_iter(instance_key):
        df.append({
            'instance': instance,
            'raw_evaluation': redis.hget(evaluation_key, random_seed)
        })
    df = parse_block(pd.DataFrame(df))
    df.to_parquet(f'instances.parquet/{key}.pq')
    del df

# Close the REDIS CONN
redis.close()

KCMC_30_150_15_30000_100_50


Exception: POI 0 COVERAGE 0

## VERY Limited EDA

In [None]:
df = pd.read_parquet('instances.parquet')

for key, data in df.groupby('key'):
    print(key)
    columns = sorted([col for col in data.columns if col.startswith('K') or col.startswith('M')])
    for col in columns:
        print(col, '\t', len(data[data[col]])/len(data))
    print()

In [None]:
POIS         = [10,  20,   30,  40,   50]
RATE_SENSORS = [2.5, 5.0,  10.0]
RATE_SINKS   = [0.0, 0.05, 0.1]
RATE_AREA    = [1,   2.0,  4.0]
COVERAGE_R   = 100  # BASE-CONSTANT
RATE_COMM_R  = [0.5, 1.0,  1.5, 2.0]

from itertools import product
combinations = []
covg_r = COVERAGE_R
for pois in POIS:
    for sensors in [int(pois*r) for r in RATE_SENSORS]:
        for sinks in [int(max(sensors*r, 1)) for r in RATE_SINKS]:
            for area in [int(sensors*covg_r*r) for r in RATE_AREA]:
                for comm_r in [int(covg_r*r) for r in RATE_COMM_R]:
                    combinations.append((pois, sensors, sinks, area, covg_r, comm_r))
combinations = pd.DataFrame(
    combinations,
    columns=['num_pois', 'num_sensors', 'num_sinks', 'area_side', 'covg_radius', 'comm_radius']
)
print(len(combinations))
combinations = (combinations
    .drop_duplicates()
    .sort_values(list(combinations.columns))
    .reset_index(drop=True)
).copy()
len(combinations)

In [None]:
# combinations.to_csv('combinations.csv', sep=',', index=None)