# JSON Partition

La idea es disminuir el tamaño de del JSON que contiene el mapa por municipio, considerando que este archivo **pesa 19.4 MB**. \
Se me ocurren 3 estrategias:
 1. Partir el mapa en 5 partes, una por cada región de colombia: Caribe, Pacifica, Amazonica, Orinoquia y Andina.
 2. Disminuir la cantidad de coordenadas en un 50%, 67%, 75%, 80%, 90%, o 95%.
 3. Combinación de 1 y 2.

Otras estrategias:
 4. No usar coordenadas cuya separación con la coordenada anterior sea inferior a una distancia euclidiana definida.
 
Como dato de referencia, el mapa usado en el caso 4.1 de la semana 5 (us.json) **pesa 88 KB**.

In [None]:
import json
import pandas as pd
from sqlalchemy import create_engine, text

In [None]:
#maximum number of rows to display
pd.options.display.max_rows = 20

DB_USERNAME = 'alagos'
DB_PASSWORD = 'Team67!'
DB_ENDPOINT = 'ds4a-demo-instance.cqjr4hyu9xaq.us-east-1.rds.amazonaws.com'
DB_NAME = 'desertion_pj_team67'
engine=create_engine(f'postgresql://{DB_USERNAME}:{DB_PASSWORD}@{DB_ENDPOINT}/{DB_NAME}', max_overflow=20)

def runQuery(sql):
    result = engine.connect().execution_options(isolation_level="AUTOCOMMIT").execute((text(sql)))
    return pd.DataFrame(result.fetchall(), columns=result.keys())

In [None]:
with open('/Users/alberto/Documents/GitHub/dS4A_team67/App67/data/MGN_MPIO_POLITICO.json') as geo:
    munijson = json.loads(geo.read())

In [None]:
df = runQuery("""
select code_municip, region
from master_table_by_municipio mtbm 
where mtbm.year_cohort = 2019 
and mtbm.dane_alu_01 is not null 
and mtbm.dane_alu_11 is not null
and mtbm.dane_alu_01 > 0;""")

## Estrategia 1
Particiones de JSON file

In [None]:
# Listas de codigos de municipio por region
reg_cari = df[df['region'] == 'Caribe']['code_municip'].to_list()
reg_paci = df[df['region'] == 'Pacifica']['code_municip'].to_list()
reg_amaz = df[df['region'] == 'Amazonica']['code_municip'].to_list()
reg_orin = df[df['region'] == 'Orinoquia']['code_municip'].to_list()
reg_andi = df[df['region'] == 'Andina']['code_municip'].to_list()

In [None]:
new_file = {}
new_file.update( {'type': munijson['type']} )
features_dic = {}
features_array = []

In [None]:
for i in range(len(munijson['features'])):
    if munijson['features'][i]['properties']['MPIO_CCNCT'] in reg_andi:
        features_array.append(munijson['features'][i])

In [None]:
features_dic.update( {'features': features_array} ) 
new_file.update( features_dic )

In [None]:
with open('andina.json', 'w', encoding='utf-8') as f:
    json.dump(new_file, f, separators=(',', ':'))

|**Región**|**Tamaño**|
|:-----:|:-----:|
|Caribe|3.4 MB|
|Pacifica|3.2 MB|
|Amazonica|2.9 MB|
|Orinoquia|2.3 MB|
|Andina|7.5 MB|

## Estrategia 2
Disminuir la cantidad de coordenadas en un 50%, 75% o en 90%.

In [None]:
step = 2 # 2: 50%; 3: 67%; 4: 75%; 5: 80%; 10: 90%; 20: 95%.
features_array = []
for i in range(len(munijson['features'])):
    features_dic = {'type':'Feature','properties':munijson['features'][i]['properties']}
    coordinates = [[]]
    for j in range(0,len(munijson['features'][i]['geometry']['coordinates'][0]),step):
        coordinates[0].append(munijson['features'][i]['geometry']['coordinates'][0][j])
    geometry_dic = {'type':'Polygon'}
    geometry_dic.update( {'coordinates':coordinates})
    features_dic.update( {'geometry': geometry_dic} )
    features_array.append(features_dic)

In [None]:
new_file = {}
new_file.update( {'type': munijson['type']} )
new_file.update( {'features':features_array} )
with open('municipios50.json', 'w', encoding='utf-8') as f:
    json.dump(new_file, f, separators=(',', ':'))

|**Archivo**|**Tamaño**|
|:-----:|:-----:|
|original|19.4 MB|
|municipios50|9.8 MB|
|municipios67|6.7 MB|
|municipios75|5.1 MB|
|municipios80|4.2 MB|
|municipios90|2.3 MB|
|municipios95|1.4 MB|

## Estrategia 3
Combinación de 1 y 2.

In [None]:
step = 10 # 2: 50%; 3: 67%; 4: 75%; 5: 80%; 10: 90%; 20: 95%.
features_array = []
for i in range(len(munijson['features'])):
    if munijson['features'][i]['properties']['MPIO_CCNCT'] in reg_paci:
        features_dic = {'type':'Feature','properties':munijson['features'][i]['properties']}
        coordinates = [[]]
        for j in range(0,len(munijson['features'][i]['geometry']['coordinates'][0]),step):
            coordinates[0].append(munijson['features'][i]['geometry']['coordinates'][0][j])
        geometry_dic = {'type':'Polygon'}
        geometry_dic.update( {'coordinates':coordinates})
        features_dic.update( {'geometry': geometry_dic} )
        features_array.append(features_dic)

In [None]:
new_file = {}
new_file.update( {'type': munijson['type']} )
new_file.update( {'features':features_array} )
with open('pacifico90.json', 'w', encoding='utf-8') as f:
    json.dump(new_file, f, separators=(',', ':'))

|**Región**|**0%**|**50%**|**80%**|**90%**|
|:-----:|:-----:|:-----:|:-----:|:-----:|
|Caribe|3.4 MB|1.7 MB|727 KB|405 KB|
|Pacífica|3.2 MB|1.6 MB|722 KB|418 KB|
|Amazónica|2.9 MB|1.5 MB|594 KB|307 KB|
|Orinoquía|2.3 MB|1.1 MB|470 KB|244 KB|
|Andina|7.5 MB|3.9 MB|1.7 MB|947 KB|

## Pruebas

In [None]:
import plotly.express as px
import numpy as np

In [None]:
#maximum number of rows to display
pd.options.display.max_rows = 20

DB_USERNAME = 'alagos'
DB_PASSWORD = 'Team67!'
DB_ENDPOINT = 'ds4a-demo-instance.cqjr4hyu9xaq.us-east-1.rds.amazonaws.com'
DB_NAME = 'desertion_pj_team67'
engine=create_engine(f'postgresql://{DB_USERNAME}:{DB_PASSWORD}@{DB_ENDPOINT}/{DB_NAME}', max_overflow=20)

def runQuery(sql):
    result = engine.connect().execution_options(isolation_level="AUTOCOMMIT").execute((text(sql)))
    return pd.DataFrame(result.fetchall(), columns=result.keys())

In [None]:
df_drops = runQuery("""
select code_municip, mtbm.dane_alu_11 as nodropouts
from master_table_by_municipio mtbm 
where mtbm.year_cohort = 2019 
and mtbm.dane_alu_01 is not null 
and mtbm.dane_alu_11 is not null
and mtbm.dane_alu_01 > 0;""")

In [None]:
df_drops['nodropouts'] = df_drops['nodropouts'].astype(np.float64)

1. Mapa de todos los municipios: original (19.4 MB) vs 95% (1.4 MB)

Mapa con archivo original (19.4 MB)

In [None]:
"white-bg","open-street-map", "carto-positron", "carto-darkmatter", 
"stamen-terrain", "stamen-toner" or "stamen-watercolor","basic", 
"streets", "outdoors", "light", "dark", "satellite", or "satellite-streets" 

In [None]:
px.choropleth_mapbox(df_drops,                   #Data
        locations='code_municip',                #Column containing the identifiers used in the GeoJSON file 
        featureidkey="properties.MPIO_CCNCT",
        color='nodropouts',                      #Column giving the color intensity of the region
        geojson=munijson,                        #The GeoJSON file
        zoom=4,                                  #Zoom
        #mapbox_style="carto-positron",           #Mapbox style, for different maps you need a Mapbox account and a token
        #mapbox_style="white-bg",
        mapbox_style="satellite-streets",
        center={"lat": 4.0902, "lon": -75.7129}, #Center
        color_continuous_scale="Viridis",        #Color Scheme
        opacity=0.5                              #Opacity of the map
        )

Mapa con archivo reducido al 95% (1.4 MB)

In [None]:
with open('/Users/alberto/Documents/GitHub/dS4A_team67/notebooks/municipios95.json') as geo:
    muni95 = json.loads(geo.read())

In [None]:
ef_map =px.choropleth_mapbox(df_drops,                         #Data
        locations='code_municip',                         #Column containing the identifiers used in the GeoJSON file 
        featureidkey="properties.MPIO_CCNCT",
        color='nodropouts',                      #Column giving the color intensity of the region
        geojson=muni95,                          #The GeoJSON file
        zoom=4,                                   #Zoom
        #mapbox_style="carto-positron",            #Mapbox style, for different maps you need a Mapbox account and a token
        mapbox_style="white-bg",
        center={"lat": 4.0902, "lon": -75.7129}, #Center
        color_continuous_scale="Viridis",         #Color Scheme
        opacity=0.5                              #Opacity of the map
        )
#ef_map.update_geos(visible=False)
ef_map.show()


In [None]:
with open('/Users/alberto/Documents/GitHub/dS4A_team67/notebooks/andina.json') as geo:
    andina = json.loads(geo.read())

In [None]:
px.choropleth_mapbox(df_drops,                         #Data
        locations='code_municip',                         #Column containing the identifiers used in the GeoJSON file 
        featureidkey="properties.MPIO_CCNCT",
        color='nodropouts',                      #Column giving the color intensity of the region
        geojson=andina,                          #The GeoJSON file
        zoom=4,                                   #Zoom
        mapbox_style="carto-positron",            #Mapbox style, for different maps you need a Mapbox account and a token
        center={"lat": 4.0902, "lon": -75.7129}, #Center
        color_continuous_scale="Viridis",         #Color Scheme
        opacity=0.5                              #Opacity of the map
        )


In [None]:
with open('/Users/alberto/Documents/GitHub/dS4A_team67/notebooks/andina90.json') as geo:
    andi90 = json.loads(geo.read())

In [None]:
px.choropleth_mapbox(df_drops,                         #Data
        locations='code_municip',                         #Column containing the identifiers used in the GeoJSON file 
        featureidkey="properties.MPIO_CCNCT",
        color='nodropouts',                      #Column giving the color intensity of the region
        geojson=andi90,                          #The GeoJSON file
        zoom=4,                                   #Zoom
        mapbox_style="carto-positron",            #Mapbox style, for different maps you need a Mapbox account and a token
        center={"lat": 4.0902, "lon": -75.7129}, #Center
        color_continuous_scale="Viridis",         #Color Scheme
        opacity=0.5                              #Opacity of the map
        )
