# Creación de indicadores a nivel de radio y fracción censal

In [529]:
import pyredatam
from pyredatam import cpv2010arg
import os
import pandas as pd
from scripts.path_finders import get_indicators_path, get_division_path
from scripts.create_indicators import get_data, replace_index, get_or_create_indicators_df
from scripts.geo_utils import join_df_with_shp

Cargamos todos los datos necesarios de la base REDATAM en data frames. El método `get_data` descarga los datos de la base del Censo 2010, a menos que estén en la carpeta *data/censo*.

In [530]:
variables = ["PERSONA.CONDACT", "VIVIENDA.V02", "PERSONA.EDADAGRU",
             "VIVIENDA.INCALCONS", "VIVIENDA.INCALSERV", "HOGAR.ALGUNBI", "HOGAR.INDHAC"]

In [531]:
dfs_frac = [get_data("FRAC", variable) for variable in variables]
dfs_radio = [get_data("RADIO", variable) for variable in variables]
dfs_comuna = [get_data("DPTO", variable) for variable in variables]
dfs_caba = [get_data("PROV", variable) for variable in variables]

In [532]:
dfs_comuna[0].head()

Unnamed: 0.1,Unnamed: 0,Código,Ocupado,Desocupado,Inactivo,Total
0,0,2001,115106,5478,40224,160808
1,1,2002,94799,3703,38033,136535
2,2,2003,111137,4974,40042,156153
3,3,2004,117188,6758,49626,173572
4,4,2005,107804,4481,39007,151292


`replace_index` utiliza los códigos de la base como índice del data frame y crea una columna con el código reexpresado en el formato utilizado en los shapefiles del GCBA.

In [533]:
dfs_radio = [replace_index(df, 9) for df in dfs_radio]
dfs_frac = [replace_index(df, 7) for df in dfs_frac]
dfs_comuna = [replace_index(df, 5) for df in dfs_comuna]
dfs_caba = [replace_index(df, 2) for df in dfs_caba]

In [534]:
dfs_comuna[0].head()

Unnamed: 0_level_0,Ocupado,Desocupado,Inactivo,Total,COMUNAS
Código,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2001,115106,5478,40224,160808,1
2002,94799,3703,38033,136535,2
2003,111137,4974,40042,156153,3
2004,117188,6758,49626,173572,4
2005,107804,4481,39007,151292,5


## Chequeos de consistencia

La cantidad de radios, fracciones, comunas y Ciudades de Buenos Aires (1 sola), debe coincidir para todas las variables censales descargadas.

In [535]:
print "RADIO # de radios", [len(df_radio) for df_radio in dfs_radio]
print "FRAC # de fracciones", [len(df_frac) for df_frac in dfs_frac]
print "DPTO # de comunas", [len(df_comuna) for df_comuna in dfs_comuna]
print "PROV # de CABAs", [len(df_caba) for df_caba in dfs_caba]

RADIO # de radios [3553, 3553, 3553, 3553, 3553, 3553, 3553]
FRAC # de fracciones [351, 351, 351, 351, 351, 351, 351]
DPTO # de comunas [15, 15, 15, 15, 15, 15, 15]
PROV # de CABAs [1, 1, 1, 1, 1, 1, 1]


La suma del total de cada variable censal debe coincidir en distintos niveles de agregación.

In [536]:
for df_frac, df_radio, df_comuna, df_caba, variable in zip(dfs_frac, dfs_radio, dfs_comuna, dfs_caba, variables):
    
    total_frac = sum(df_frac["Total"])
    total_radio = sum(df_radio["Total"])
    total_comuna = sum(df_comuna["Total"])
    total_caba = sum(df_caba["Total"])
    
    print variable.ljust(18), total_radio, total_frac, total_comuna, total_caba, 
    print total_radio == total_frac == total_comuna == total_caba

PERSONA.CONDACT    2390725.0 2390725.0 2390725.0 2390725.0 True
VIVIENDA.V02       1423973.0 1423973.0 1423973.0 1423973.0 True
PERSONA.EDADAGRU   2890151.0 2890151.0 2890151.0 2890151.0 True
VIVIENDA.INCALCONS 1082998.0 1082998.0 1082998.0 1082998.0 True
VIVIENDA.INCALSERV 1082998.0 1082998.0 1082998.0 1082998.0 True
HOGAR.ALGUNBI      1150134.0 1150134.0 1150134.0 1150134.0 True
HOGAR.INDHAC       1150134.0 1150134.0 1150134.0 1150134.0 True


## Creación de indicadores censales

In [537]:
indicators_radio = get_or_create_indicators_df("RADIO", dfs_radio[0], 9)
indicators_frac = get_or_create_indicators_df("FRAC", dfs_frac[0], 7)
indicators_comuna = get_or_create_indicators_df("DPTO", dfs_comuna[0], 5)
indicators_caba = get_or_create_indicators_df("PROV", dfs_caba[0], 2)

In [538]:
indicators_radio.head()

Unnamed: 0_level_0,CO_FRAC_RA,empleo,desocup,inact,ocup_viv,0_14,15_64,mas_65,hab,con_satisf,con_basica,con_insuf,serv_satisf,serv_basica,serv_insuf,nbi,hac-149,hac+150
Código,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
20010101,1_1_1,0.652778,0.035897,0.118056,0.703704,0.142857,0.824405,0.032738,336,0.447368,0.289474,0.263158,0.894737,0.026316,0.078947,0.292308,0.661538,0.338462
20010201,1_2_1,0.680851,0.050847,0.300912,0.905405,0.311715,0.661088,0.027197,478,0.074627,0.492537,0.432836,0.776119,0.007463,0.216418,0.267081,0.565217,0.434783
20010202,1_2_2,0.615094,0.08427,0.34717,0.95,0.339152,0.648379,0.012469,401,0.087719,0.482456,0.429825,0.842105,0.078947,0.078947,0.195312,0.421875,0.578125
20010203,1_2_3,0.639485,0.10241,0.334764,0.942308,0.308605,0.673591,0.017804,337,0.081633,0.438776,0.479592,0.857143,0.071429,0.071429,0.240385,0.442308,0.557692
20010204,1_2_4,0.7275,0.058252,0.28,0.92437,0.335548,0.649502,0.01495,602,0.118182,0.6,0.281818,0.927273,0.045455,0.027273,0.141104,0.478528,0.521472


In [539]:
indicators_frac.head()

Unnamed: 0_level_0,CO_FRACC,empleo,desocup,inact,ocup_viv,0_14,15_64,mas_65,hab,con_satisf,con_basica,con_insuf,serv_satisf,serv_basica,serv_insuf,nbi,hac-149,hac+150
Código,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
200101,001_1,0.652778,0.035897,0.118056,0.703704,0.142857,0.824405,0.032738,336,0.447368,0.289474,0.263158,0.894737,0.026316,0.078947,0.292308,0.661538,0.338462
200102,001_2,0.694599,0.072142,0.272439,0.953185,0.298408,0.686116,0.015476,11243,0.118538,0.477843,0.403619,0.884417,0.027326,0.088257,0.290628,0.436078,0.563922
200103,001_3,0.728815,0.066722,0.239014,0.92954,0.321379,0.669561,0.00906,15894,0.066384,0.305085,0.628531,0.776836,0.050847,0.172316,0.348619,0.346892,0.653108
200104,001_4,0.667888,0.033735,0.293582,0.588288,0.131198,0.650685,0.218117,10366,0.935988,0.054824,0.009188,0.993874,0.001531,0.004594,0.024396,0.903126,0.096874
200105,001_5,0.632911,0.04,0.286498,0.592682,0.124492,0.657739,0.217769,5414,0.92601,0.067181,0.006809,0.998184,0.000454,0.001362,0.00617,0.919788,0.080212


In [540]:
def calculate_indicators_CONDACT(df_indicators, df_condact, df_edadgru):
    pobl_14_mas = df_edadgru["Total"] - df_edadgru["0 - 14"]
    pea = df_condact["Ocupado"] + df_condact["Desocupado"]
    df_indicators["empleo"] = df_condact["Ocupado"] / pobl_14_mas
    df_indicators["desocup"] = df_condact["Desocupado"] / pea
    df_indicators["inact"] = df_condact["Inactivo"] / pobl_14_mas

def calculate_indicators_V02(df_indicators, df):
    df_indicators["ocup_viv"] = df["Con personas presentes"] / df["Total"]

def calculate_indicators_EDADGRU(df_indicators, df):
    df_indicators["0_14"] = df["0 - 14"] / df["Total"]
    df_indicators["15_64"] = df["15 - 64"] / df["Total"]
    df_indicators["mas_65"] = df["65 y más".decode("utf-8")] / df["Total"]
    df_indicators["hab"] = df["Total"]

def calculate_indicators_INCALCONS(df_indicators, df):
    df_indicators["con_satisf"] = df["Satisfactoria"] / df["Total"]
    df_indicators["con_basica"] = df["Básica".decode("utf-8")] / df["Total"]
    df_indicators["con_insuf"] = df["Insuficiente"] / df["Total"]

def calculate_indicators_INCALSERV(df_indicators, df):
    df_indicators["serv_satisf"] = df["Satisfactoria"] / df["Total"]
    df_indicators["serv_basica"] = df["Básica".decode("utf-8")] / df["Total"]
    df_indicators["serv_insuf"] = df["Insuficiente"] / df["Total"]

def calculate_indicators_ALGUNBI(df_indicators, df):
    df_indicators["nbi"] = df["Hogares con NBI"] / df["Total"]

def calculate_indicators_INDHAC(df_indicators, df):
    df_indicators["hac-149"] = sum((df["Hasta   0.50 personas por cuarto"],
                                                df["0.51  -  0.99  personas por cuarto"],
                                                df["1.00  -  1.49  personas por cuarto"])) / df["Total"]
    
    df_indicators["hac+150"] = sum((df["1.50  -  1.99  personas por cuarto"],
                                                df["2.00  -  3.00  personas por cuarto"],
                                                df["Más de  3.00 personas por cuarto".decode("utf-8")])) / df["Total"]

In [541]:
def calculate_indicators(dfs, df_indicators):
    calculate_indicators_CONDACT(df_indicators, dfs[0], dfs[2])
    calculate_indicators_V02(df_indicators, dfs[1])
    calculate_indicators_EDADGRU(df_indicators, dfs[2])
    calculate_indicators_INCALCONS(df_indicators, dfs[3])
    calculate_indicators_INCALSERV(df_indicators, dfs[4])
    calculate_indicators_ALGUNBI(df_indicators, dfs[5])
    calculate_indicators_INDHAC(df_indicators, dfs[6])

calculate_indicators(dfs_radio, indicators_radio)
calculate_indicators(dfs_frac, indicators_frac)
calculate_indicators(dfs_comuna, indicators_comuna)
calculate_indicators(dfs_caba, indicators_caba)

In [542]:
indicators_frac.head()

Unnamed: 0_level_0,CO_FRACC,empleo,desocup,inact,ocup_viv,0_14,15_64,mas_65,hab,con_satisf,con_basica,con_insuf,serv_satisf,serv_basica,serv_insuf,nbi,hac-149,hac+150
Código,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
200101,001_1,0.652778,0.035897,0.118056,0.703704,0.142857,0.824405,0.032738,336,0.447368,0.289474,0.263158,0.894737,0.026316,0.078947,0.292308,0.661538,0.338462
200102,001_2,0.694599,0.072142,0.272439,0.953185,0.298408,0.686116,0.015476,11243,0.118538,0.477843,0.403619,0.884417,0.027326,0.088257,0.290628,0.436078,0.563922
200103,001_3,0.728815,0.066722,0.239014,0.92954,0.321379,0.669561,0.00906,15894,0.066384,0.305085,0.628531,0.776836,0.050847,0.172316,0.348619,0.346892,0.653108
200104,001_4,0.667888,0.033735,0.293582,0.588288,0.131198,0.650685,0.218117,10366,0.935988,0.054824,0.009188,0.993874,0.001531,0.004594,0.024396,0.903126,0.096874
200105,001_5,0.632911,0.04,0.286498,0.592682,0.124492,0.657739,0.217769,5414,0.92601,0.067181,0.006809,0.998184,0.000454,0.001362,0.00617,0.919788,0.080212


In [543]:
indicators_radio.head()

Unnamed: 0_level_0,CO_FRAC_RA,empleo,desocup,inact,ocup_viv,0_14,15_64,mas_65,hab,con_satisf,con_basica,con_insuf,serv_satisf,serv_basica,serv_insuf,nbi,hac-149,hac+150
Código,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
20010101,1_1_1,0.652778,0.035897,0.118056,0.703704,0.142857,0.824405,0.032738,336,0.447368,0.289474,0.263158,0.894737,0.026316,0.078947,0.292308,0.661538,0.338462
20010201,1_2_1,0.680851,0.050847,0.300912,0.905405,0.311715,0.661088,0.027197,478,0.074627,0.492537,0.432836,0.776119,0.007463,0.216418,0.267081,0.565217,0.434783
20010202,1_2_2,0.615094,0.08427,0.34717,0.95,0.339152,0.648379,0.012469,401,0.087719,0.482456,0.429825,0.842105,0.078947,0.078947,0.195312,0.421875,0.578125
20010203,1_2_3,0.639485,0.10241,0.334764,0.942308,0.308605,0.673591,0.017804,337,0.081633,0.438776,0.479592,0.857143,0.071429,0.071429,0.240385,0.442308,0.557692
20010204,1_2_4,0.7275,0.058252,0.28,0.92437,0.335548,0.649502,0.01495,602,0.118182,0.6,0.281818,0.927273,0.045455,0.027273,0.141104,0.478528,0.521472


In [544]:
indicators_comuna.head()

Unnamed: 0_level_0,COMUNAS,empleo,desocup,inact,ocup_viv,0_14,15_64,mas_65,hab,con_satisf,con_basica,con_insuf,serv_satisf,serv_basica,serv_insuf,nbi,hac-149,hac+150
Código,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2001,1,0.66094,0.045429,0.230967,0.599215,0.154119,0.704424,0.141457,205886,0.754696,0.13526,0.110043,0.949451,0.0073,0.043249,0.158983,0.762265,0.237735
2002,2,0.674582,0.037593,0.27064,0.656395,0.110187,0.691848,0.197965,157932,0.918893,0.070327,0.01078,0.991731,0.001467,0.006801,0.020463,0.913787,0.086213
2003,3,0.697068,0.042838,0.251149,0.747373,0.149848,0.690034,0.160118,187537,0.856028,0.100007,0.043965,0.981377,0.00291,0.015713,0.118774,0.795227,0.204773
2004,4,0.679359,0.054524,0.28769,0.840267,0.209613,0.661541,0.128846,218245,0.72866,0.183396,0.087945,0.948177,0.016518,0.035304,0.126584,0.727461,0.272539
2005,5,0.704394,0.039907,0.254873,0.789499,0.145024,0.682914,0.172062,179005,0.883648,0.092044,0.024308,0.986822,0.002813,0.010365,0.060537,0.855477,0.144523


In [545]:
indicators_caba.head()

Unnamed: 0_level_0,empleo,desocup,inact,ocup_viv,0_14,15_64,mas_65,hab,con_satisf,con_basica,con_insuf,serv_satisf,serv_basica,serv_insuf,nbi,hac-149,hac+150
Código,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2,0.68422,0.042921,0.273963,0.760547,0.16349,0.67248,0.16403,2890151,0.856398,0.109963,0.033639,0.980572,0.005062,0.014366,0.059798,0.844103,0.155897


In [546]:
indicators_radio.to_csv(get_indicators_path("RADIO"), encoding="utf-8")
indicators_frac.to_csv(get_indicators_path("FRAC"), encoding="utf-8")
indicators_comuna.to_csv(get_indicators_path("DPTO"), encoding="utf-8")
indicators_caba.to_csv(get_indicators_path("PROV"), encoding="utf-8")

## Añadir indicadores a sus shapefiles

In [547]:
join_df_with_shp(get_division_path("radios_censo_2010"), indicators_radio.set_index("CO_FRAC_RA"), "indicadores")
join_df_with_shp(get_division_path("fracciones_caba_censo_2010"), indicators_frac.set_index("CO_FRACC"), "indicadores")
join_df_with_shp(get_division_path("comunas_caba_censo_2010"), indicators_comuna.set_index("COMUNAS"), "indicadores")