In [2]:
import numpy as np
import pandas
import geopandas
import h3
from shapely import wkt
from shapely.geometry import Polygon
import matplotlib.pyplot as plt
import h3

In [31]:
import warnings

warnings.simplefilter(action='ignore', category=pandas.errors.PerformanceWarning)

In [4]:
pa_area = pandas.read_csv("/media/gegen07/Expansion/data/pa_interpolated_res_6_interest.csv")

pa_area.reset_index(drop=True, inplace=True)
pa_area['geometry'] = pa_area.geometry.apply(wkt.loads)
pa_area = geopandas.GeoDataFrame(pa_area, geometry="geometry")
pa_area["centroid"] = pa_area.centroid
pa_area["h3_boundary"] = pa_area.centroid.apply(lambda x: h3.geo_to_h3(x.y, x.x, 6))

dd_cells = pa_area.h3_boundary.to_dict()
map_dd_cells = {v: k for k, v in dd_cells.items()}

cells_points = [x.coords[0] for x in pa_area.centroid.values]

In [5]:
import requests
def calculate_distance_matrix_m(
    points
):
    if len(points) < 2:
        return 0

    coords_uri = ";".join(
        ["{},{}".format(point[0], point[1]) for point in points]
    )

    response = requests.get(
        f"http://localhost:5000/table/v1/driving/{coords_uri}?annotations=distance",
        timeout=6000,
    )
    print(coords_uri)
    response.raise_for_status()

    return np.array(response.json()["distances"])

In [7]:
distance_matrix = (calculate_distance_matrix_m(cells_points)/1000)

-48.50329641834121,-1.2764261850790388;-48.547820687447974,-1.3949244522358408;-48.48103683795319,-1.217179397299579;-48.52555770368143,-1.335674587885734;-48.485727664116695,-1.3843471621189751;-48.46346464832969,-1.3250901987055264;-48.54311652303757,-1.227773965800644;-48.441203349903475,-1.2658347713373956;-48.40135825140571,-1.3145018885969155;-48.3790969927124,-1.255239517648233;-48.35683747683136,-1.195978910858984;-48.31697748849417,-1.2446404603259298;-48.41894377535956,-1.206581033540739;-48.33923865460214,-1.3039096937884802;-48.36150157593946,-1.3631806124333699;-48.423621246394205,-1.3737658701375035;-48.428300806155,-1.541001942960954;-48.46815241917296,-1.4922980505871817;-48.34389968063843,-1.4711516392057404;-48.53025882167148,-1.5028650828458308;-48.50799239073983,-1.443605508024859;-48.49042058391353,-1.5515659422309045;-48.406032658236256,-1.4817268906955658;-47.926304743997235,-1.2889758180530502;-47.966251834094486,-1.2402769832546285;-47.9040438207799,-1.22965861

In [28]:
def generate_instance_df(instance, area, distance_matrix, map_dd_cells):
    map_instances = np.array(list(map(lambda x: map_dd_cells[x] if x in map_dd_cells.keys() else np.nan, list(instance.h3_boundary.unique()))))
    map_instances = map_instances[~np.isnan(map_instances)].astype(int)

    distances = []

    for i in map_instances:
        for j in map_instances:
            if i < j:
                distances.append(distance_matrix[i][j])

    distances = np.array(distances)
    distance_stat_list = [np.mean(distances), np.std(distances), np.median(distances), np.max(distances), np.min(distances), np.var(distances)]
    dict_distance = dict(zip(['distance_mean', 'distance_std', 'distance_median', 'distance_max', 'distance_min', 'distance_var'], distance_stat_list))
    df = pandas.DataFrame.from_dict(dict_distance, orient='index').T

    display(df)

    censo_columns=['media_moradores_por_domicilio',
       'media_rendimento_medio_por_morador_com_ou_sem_renda',
       'media_rendimento_medio_por_morador_com_renda',
       'media_rendimento_medio_por_morador_reponsavel_com_ou_sem_Renda',
       'media_rendimento_medio_por_morador_responsavel_com_renda',
       'n_alfabetizados_homens', 'n_alfabetizados_mulheres', 'n_amarelos',
       'n_brancos', 'n_domicilios', 'n_homens', 'n_idade_0_10',
       'n_idade_11_20', 'n_idade_21_30', 'n_idade_31_40', 'n_idade_41_50',
       'n_idade_51_60', 'n_idade_61_70', 'n_idade_71_100', 'n_indigenas',
       'n_moradores', 'n_mulheres', 'n_pardos', 'n_pretos',
       'variancia_moradores_por_domicilio']
    
    stat_censo = area.loc[map_instances, censo_columns].agg({"mean", "std", "median", "max", "min", "var"})
    stats_list = stat_censo.index.tolist()
    columns = stat_censo.columns.tolist()
    for i in range(len(columns)):
        for j in range(len(stats_list)):
            df[f"{columns[i]}_{stats_list[j]}"] = stat_censo.iloc[j, i]
    
    display(df)

In [32]:
pa_instances = pandas.read_csv("../../data/cvrp-instances-1.0/train/cvrp-0-pa-0.csv")
generate_instance_df(pa_instances, pa_area, distance_matrix, map_dd_cells)

Unnamed: 0,distance_mean,distance_std,distance_median,distance_max,distance_min,distance_var
0,35.885667,17.632682,34.72865,84.5981,7.2622,310.911482


Unnamed: 0,distance_mean,distance_std,distance_median,distance_max,distance_min,distance_var,media_moradores_por_domicilio_std,media_moradores_por_domicilio_var,media_moradores_por_domicilio_median,media_moradores_por_domicilio_max,...,n_pretos_median,n_pretos_max,n_pretos_min,n_pretos_mean,variancia_moradores_por_domicilio_std,variancia_moradores_por_domicilio_var,variancia_moradores_por_domicilio_median,variancia_moradores_por_domicilio_max,variancia_moradores_por_domicilio_min,variancia_moradores_por_domicilio_mean
0,35.885667,17.632682,34.72865,84.5981,7.2622,310.911482,0.065425,0.00428,3.814865,3.961011,...,48.545876,72.492331,27.583513,55.014222,0.162862,0.026524,3.299775,4.066958,3.280713,3.349295
