# Instances EDA

In [None]:
! pip install plotly openpyxl

In [None]:
import os
from math import sqrt, pi
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

import multiprocessing

from tqdm.notebook import tqdm

from kcmc_instance import KCMC_Instance

In [None]:
df = []

def med_sensor_degree(item):
    return np.median(KCMC_Instance(item, True, True, True).sensor_degree)

pool = multiprocessing.Pool(7)

for file in tqdm(os.listdir('/data/instances.parquet')):
    if not file.endswith('.pq'): continue
    # print(file, len(item))
    item = pd.read_parquet('/data/instances.parquet/'+file)
    print(file, len(item))
    
    # msd = list(pool.map(med_sensor_degree, item['instance'].tolist()))
    
    item = item[[
        'key',
        'pois', 'sensors', 'sinks', 'area_side', 'coverage_r', 'communication_r',
        'K=1', 'M=1', 'K=2', 'M=2', 'K=3', 'M=3', 'K=4', 'K=5'
    ]]
    # item.loc[:, 'med_sensor_degree'] = msd
    grouped = []
    for key, sdf in item.groupby('key'):
        keyline = sdf.iloc[0].to_dict()
        grouped.append({
            col: keyline[col]
            for col in [
                'key', 'pois', 'sensors', 'sinks', 'area_side', 'coverage_r', 'communication_r'
            ]
        })
        grouped[-1].update({
            col: sdf[col].astype(int).sum()/len(sdf)
            for col in ['K=1', 'M=1', 'K=2', 'M=2', 'K=3', 'M=3', 'K=4', 'K=5']
        })
        grouped[-1].update({
            '|'.join([k, m]): (sdf[k] & sdf[m]).astype(int).sum()/len(sdf)
            for k, m in [
                ('K=1', 'M=1'), ('K=2', 'M=1'), ('K=3', 'M=1'), ('K=4', 'M=1'), ('K=5', 'M=1'),
                                ('K=2', 'M=2'), ('K=3', 'M=2'), ('K=4', 'M=2'), ('K=5', 'M=2'),
                                                ('K=3', 'M=3'), ('K=4', 'M=3'), ('K=5', 'M=3')
            ]
        })
        grouped[-1].update({
            'samples': len(sdf),
            # 'med_sensor_degree': sdf['med_sensor_degree'].mean()
        })
    df.append(pd.DataFrame(grouped))
df = pd.concat(df).reset_index(drop=True)
df = df.sort_values([
    'pois', 'sensors', 'sinks',
    'area_side', 'coverage_r', 'communication_r'
]).reset_index(drop=True)

pool.close()

In [None]:
REFERENCIA = 'K=3|M=2'
# REFERENCIA = 'K=4|M=2'

## COVERAGE DENSITY

$
{\pi*COVG^2 * SENSORES}
\over 
{AREA^2 * POIS}
$

## COMMUNICATION DENSITY

$
{\pi*COMM^2 * SENSORES * BASES}
\over 
{AREA^2}
$

In [None]:
df.loc[:, 'coverage_density'] = (
    (pi*(df['coverage_r']**2)*df['sensors'])
  / (df['pois']*df['area_side']**2)
)
df.loc[:, 'communication_density'] = (
    (pi*(df['communication_r']**2)*df['sensors']*df['sinks'])
  / (df['area_side']**2)
)

In [None]:
df = pd.read_pickle('mantem.pkl')
df = df[df['coverage_density'].round(2).isin({0.16, 0.4})]

In [None]:
len(df)

In [None]:
df_16 = df[df['coverage_density'].round(2) == 0.16]
df_40 = df[df['coverage_density'].round(2) == 0.40]

len(df_16), len(df_40)

In [None]:
df_40[['key', 'samples']]

In [None]:
REFERENCIA

In [None]:
round(df_16[REFERENCIA].mean(), 5), round(df_16[REFERENCIA].std(), 5)

In [None]:
round(df_40[REFERENCIA].mean(), 5), round(df_40[REFERENCIA].std(), 5)

In [None]:
round(0.0467 - 0.05956, 5)

In [None]:
fig = go.Figure(data=[
    go.Box(y=df_16[REFERENCIA], name='COVG_D = 16'),
    go.Box(y=df_40[REFERENCIA], name='COVG_D = 40'),
])
fig.show()

## COVG_D, COMM_D

In [None]:
df_sucesso = df[df[REFERENCIA] > 0.05]

In [None]:
fig = go.Figure(data=[
    go.Scatter(x=df['coverage_density'],      name='TOTAL',
               y=df['communication_density']/1000, mode='markers'),
    go.Scatter(x=df_sucesso['coverage_density'],      name='SUCESSO',
               y=df_sucesso['communication_density']/1000, mode='markers'),
])

fig.update_xaxes(title_text = "coverage_density")
fig.update_yaxes(title_text = "communication_density")
fig.show()

In [None]:
df_sucesso = df[df[REFERENCIA] > 0.05]

fig = go.Figure(data=[
    go.Scatter(x=df['coverage_density'], name='COVG_D x SUCESSO',
               y=df[REFERENCIA],          mode='markers')
])

fig.update_xaxes(title_text = "coverage_density")
fig.update_yaxes(title_text = "SUCESSO")
fig.show()

In [None]:
fig = go.Figure(data=[
    go.Scatter(x=df['communication_density'], name='COMM_D x SUCESSO',
               y=df[REFERENCIA],              mode='markers')
])

fig.update_xaxes(title_text = "communication_density")
fig.update_yaxes(title_text = "SUCESSO")
fig.show()

## FATORES

In [None]:
XIS = 'pois'

fig = go.Figure(data=[
    go.Scatter(x=df[XIS],
               y=df[REFERENCIA],                mode='markers')
])

fig.update_xaxes(title_text = XIS)
fig.update_yaxes(title_text = "SUCESSO")
fig.show()

In [None]:
XIS = 'sensors'

fig = go.Figure(data=[
    go.Scatter(x=df[XIS],
               y=df[REFERENCIA],                mode='markers')
])

fig.update_xaxes(title_text = XIS)
fig.update_yaxes(title_text = "SUCESSO")
fig.show()

In [None]:
XIS = 'pois'

fig = go.Figure(data=[
    go.Box(y=df[df[XIS] == x][REFERENCIA], name=XIS+'='+str(x))
    for x in sorted(df[XIS].unique())
])

fig.update_xaxes(title_text = XIS)
fig.update_yaxes(title_text = "SUCESSO")
fig.show()

In [None]:
XIS = 'sensors_rate'
df.loc[:, 'sensors_rate'] = df['sensors'] / df['pois']

fig = go.Figure(data=[
    go.Box(y=df[df[XIS] == x][REFERENCIA], name=XIS+'='+str(x))
    for x in sorted(df[XIS].unique())
])

fig.update_xaxes(title_text = XIS)
fig.update_yaxes(title_text = "SUCESSO")
fig.show()

In [None]:
XIS = 'sinks_rate'
df.loc[:, 'sinks_rate'] = (df['sinks'] / df['sensors'] / df['pois']).round(6)

fig = go.Figure(data=[
    go.Box(y=df[df[XIS] == x][REFERENCIA], name=XIS+'='+str(x))
    for x in sorted(df[XIS].unique())
])

fig.update_xaxes(title_text = XIS)
fig.update_yaxes(title_text = "SUCESSO")
fig.show()

In [None]:
XIS = 'sinks_rate'
df.loc[:, 'sinks_rate'] = (df['sinks'] / df['area_side']).round(3)

fig = go.Figure(data=[
    go.Box(y=df[df[XIS] == x][REFERENCIA], name=XIS+'='+str(x))
    for x in sorted(df[XIS].unique())
])

fig.update_xaxes(title_text = XIS)
fig.update_yaxes(title_text = "SUCESSO")
fig.show()

In [None]:
XIS = 'd_sensors'
df.loc[:, 'd_sensors'] = (df['sensors'] / df['area_side']).round(3)

fig = go.Figure(data=[
    go.Box(y=df[df[XIS] == x][REFERENCIA], name=XIS+'='+str(x))
    for x in sorted(df[XIS].unique())
])

fig.update_xaxes(title_text = XIS)
fig.update_yaxes(title_text = "SUCESSO")
fig.show()

In [None]:
XIS = 'd_covg'
df.loc[:, 'd_covg'] = ((df['sensors'] * df['coverage_r']) / (df['area_side'] * df['pois'])).round(3)

fig = go.Figure(data=[
    go.Box(y=df[df[XIS] == x][REFERENCIA], name=XIS+'='+str(x))
    for x in sorted(df[XIS].unique())
])

fig.update_xaxes(title_text = XIS)
fig.update_yaxes(title_text = "SUCESSO")
fig.show()

In [None]:
XIS = 'd_comm'
df.loc[:, 'd_comm'] = ((df['sensors'] * df['communication_r']) / (df['area_side'])).round(1)

fig = go.Figure(data=[
    go.Box(y=df[df[XIS] == x][REFERENCIA], name=XIS+'='+str(x))
    for x in sorted(df[XIS].unique())
])

fig.update_xaxes(title_text = XIS)
fig.update_yaxes(title_text = "SUCESSO")
fig.show()

In [None]:
df.columns

In [None]:
XIS = 'med_sensor_degree'

fig = go.Figure(data=[
    go.Box(y=df[df[XIS].round(0) == x][REFERENCIA], name=XIS+'='+str(x))
    for x in sorted(df[XIS].round(0).unique())
])

fig.update_xaxes(title_text = XIS)
fig.update_yaxes(title_text = "SUCESSO")
fig.show()

In [None]:
df[['key', 'samples',
    'coverage_density', 'communication_density',
    'K=3|M=2'
]].sort_values('K=3|M=2', ascending=False).head(50)

In [None]:
df.to_excel('data.xlsx', index=None)

In [None]:
df.columns

In [None]:
df_group['coverage_density']

In [None]:
COVG_ROUND = 2
COMM_ROUND = -1

df_group = df.copy()
# df_group.loc[:, 'coverage_density'] = df_group['coverage_density'].round(COVG_ROUND)
# df_group.loc[:, 'communication_density'] = df_group['communication_density'].round(COMM_ROUND)

grouped = []
for group, sdf in tqdm(
    df_group.groupby(['coverage_density', 'communication_density']),
    total = len(df_group[['coverage_density', 'communication_density']].drop_duplicates())
):
    grouped.append({
        'coverage_density': group[0],
        'communication_density': group[1],
        'num_keys': len(sdf['key'].unique()),
        'num_samples': sdf['samples'].sum()
    })
    for col in sdf.columns:
        if col == 'key': continue
        grouped[-1].update({
            'max_'+col: sdf[col].max(),
            'med_'+col: sdf[col].median(),
            'avg_'+col: sdf[col].mean(),
            'std_'+col: sdf[col].std(),
            'min_'+col: sdf[col].min()
        })
grouped = pd.DataFrame(grouped)

In [None]:
grouped

In [None]:
grouped.to_excel('aggregated_data.xlsx', index=None)

In [None]:
from collections import Counter

In [None]:
covg = sorted(df_group['coverage_density'].round(6))
covg = Counter(covg)

comm = sorted(df_group['communication_density'].round(-1))
comm = Counter(comm)

fig = go.Figure(data=[
    go.Scatter(x=list(covg.keys()), y=list(covg.values()), mode='markers', name='COVG'),
    go.Scatter(x=list(comm.keys()), y=list(comm.values()), mode='markers', name='COMM')
])
fig.show()

In [None]:
missing = df[df['samples'] < 10000][[
    'pois', 'sensors', 'sinks', 'area_side', 'coverage_r', 'communication_r'
]]
missing.columns = 'num_pois,num_sensors,num_sinks,area_side,covg_radius,comm_radius'.split(',')

In [None]:
missing.to_csv('missing.csv', index=None)

## RE-SET THE PARAMETERS

In [None]:
# combinations.to_csv('instance_generator_configurations.csv', sep=',', index=None)

In [None]:
combinations