Llamando mapa con datos

In [1]:
import pandas as pd
import geopandas as gpd

link="https://github.com/chorrillos/preprocesamiento/raw/main/datadismap.gpkg"
datadismap = gpd.read_file(link, layer='DISTRITO')

In [2]:
datadismap.columns

Index(['DEPARTAMEN', 'PROVINCIA', 'DISTRITO', 'INSTITUCIO', 'Poblacion',
       'Superficie', 'Den_pob', 'RENIEC', 'POB-PEN', 'ES_PRIV', 'ES_PUB',
       'N_COMIS', 'IIEE_PROG_PUB020', 'IIEE_PROG_PRIV2020', 'Latitud',
       'Longitud', 'Ejec_reduc_delit', 'Increm_reduc_delit', 'IDH2019',
       'Educ_sec_comp2019_pct', 'NBI2017_pct',
       'niños_no_asisten_escuela2017_pct', 'Viv_sin_serv_hig2017_pct',
       'pob_inf2018', 'pob_supf2018', 'pct_local_no_cobertura_cell',
       'pct_local_cobertura_cell', 'ZG', 'Delitos', 'TID', 'Delitos_todos',
       'geometry'],
      dtype='object')

Proyección del mapa:

In [3]:
datadismap.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [None]:
datadismap.describe()

Recordar paleta colores aqui: [colores](https://matplotlib.org/stable/tutorials/colors/colormaps.html)

# Explorando el IDH

In [None]:
# histograma

import seaborn

ax = seaborn.histplot(datadismap['IDH2019'], bins=5)
# Add rug on horizontal axis
seaborn.rugplot(
    datadismap['IDH2019'], height=0.05, color='red', ax=ax
)

Todos los intervalos posibles:

In [None]:
import mapclassify 
import numpy
numpy.random.seed(12345)

K=5
# mismo ancho interval, posible pocos conteos, interpretacion facil
ei5 = mapclassify.EqualInterval(datadismap['IDH2019'], k=K)
# mismo ancho interval basado en SDs, interpretacion algo facil, inapropiado para valores muy sesgados
msd = mapclassify.StdMean(datadismap['IDH2019'])
# diff ancho interval, conteo similar, interpretacion dificil, repetidos complica cortes                                
q5=mapclassify.Quantiles(datadismap['IDH2019'],k=K)
# diff intervalo basado en similaridad, apropiado para data multimodal
mb5 = mapclassify.MaximumBreaks(datadismap['IDH2019'], k=K)
# diff intervalo basado en similaridad, apropiado para muy sesgada
ht = mapclassify.HeadTailBreaks(datadismap['IDH2019']) 
# diff intervalo basado en similaridad, heuristico
fj5 = mapclassify.FisherJenks(datadismap['IDH2019'], k=K)
# diff intervalo basado en similaridad, optimizador
jc5 = mapclassify.JenksCaspall(datadismap['IDH2019'], k=K)
# diff intervalo basado en similaridad, heuristico
mp5 = mapclassify.MaxP(datadismap['IDH2019'], k=K)   


In [None]:
# resultado de ht
ht

## comparando intervalos

In [None]:
class5 = q5, ei5,msd, ht, mb5, fj5, jc5, mp5
# Collect ADCM for each classifier
fits = numpy.array([ c.adcm for c in class5])
# Convert ADCM scores to a DataFrame
adcms = pd.DataFrame(fits)
# Add classifier names
adcms['classifier'] = [c.name for c in class5]
# Add column names to the ADCM
adcms.columns = ['ADCM', 'Classifier']
ax = seaborn.barplot(
    y='Classifier', x='ADCM', data=adcms, palette='Pastel1'
)

## seleccionando cortes y añadiendo al data frame

In [None]:
datadismap['Quantiles'] = q5.yb 
datadismap['Equal Interval'] = ei5.yb 
#datadismap['StdMean'] = ei5.yb 
#datadismap['Head-Tail Breaks'] = ht.yb
#datadismap['Maximum Breaks'] = mb5.yb
datadismap['Fisher-Jenks'] = fj5.yb
datadismap['Jenks Caspall'] = jc5.yb
datadismap['MaxP'] = mp5.yb

## solo los intervalos para IDH con ZNG y ZG

In [None]:
clasifIDH=datadismap.loc[:,['DISTRITO','IDH2019','Quantiles', 'Equal Interval',
                            'Fisher-Jenks', 'Jenks Caspall', 'MaxP','ZG']].copy()

clasifIDH.sort_values('IDH2019',inplace=True)
clasifIDH=clasifIDH.drop('IDH2019',axis=1).set_index('DISTRITO')
clasifIDH

In [None]:
clasifIDH.drop('ZG',axis=1)

## solo los intervalos para IDH para ZG

In [None]:
clasifIDH_ZG=clasifIDH[clasifIDH.ZG=='ZG']
clasifIDH_ZG=clasifIDH_ZG.drop("ZG",axis=1)

In [None]:
TheCounts={x:clasifIDH_ZG[x].value_counts(sort=False,dropna=False).to_list() for x in clasifIDH_ZG.columns}
pd.DataFrame({key:pd.Series(value) for key, value in TheCounts.items()})

## Identificando patrones ZG e IDH

In [None]:
ZGlist=datadismap[datadismap.ZG=='ZG'].DISTRITO.to_list()

In [None]:
COLORtextY=['red' if y in ZGlist else 'grey' for y  in clasifIDH.index]

In [None]:
import matplotlib.pyplot as plt

f, ax = plt.subplots(1, figsize=(9,300))
seaborn.set(font_scale=1)

ax = seaborn.heatmap(clasifIDH.drop('ZG',axis=1),yticklabels=1,
    ax=ax)
for aTick,aColor in zip (ax.get_yticklabels(), COLORtextY):
    aTick.set_color(aColor)

In [None]:
q5.counts

In [None]:
class5new = q5, ei5, fj5, jc5, mp5
pd.DataFrame(
    {c.name: c.counts for c in class5new},
    index=['Class-{}'.format(i+1) for i in range(5)]
)