In [4]:
import pyredatam
from pyredatam import cpv2010arg
import os
import pandas as pd
from scripts.path_finders import get_indicators_path
from scripts.create_indicators import get_data, replace_index

Cargamos todos los datos necesarios de la base REDATAM en data frames.

In [5]:
variables = ["PERSONA.CONDACT", "VIVIENDA.V02", "PERSONA.EDADAGRU",
             "VIVIENDA.INCALCONS", "VIVIENDA.INCALSERV", "HOGAR.ALGUNBI",
             "HOGAR.INDHAC"]

In [6]:
dfs_frac = [get_data("FRAC", variable) for variable in variables]
dfs_radio = [get_data("RADIO", variable) for variable in variables]

In [7]:
dfs_frac = [replace_index(df, 7) for df in dfs_frac]
dfs_radio = [replace_index(df, 9) for df in dfs_radio]

## Chequeos de consistencia

In [8]:
[len(df_frac) for df_frac in dfs_frac]

[351, 351, 351, 351, 351, 351, 351]

In [9]:
[len(df_radio) for df_radio in dfs_radio]

[3553, 3553, 3553, 3553, 3553, 3553, 3553]

Los totales de los datos deben coincidir

In [10]:
for df_frac, df_radio, variable in zip(dfs_frac, dfs_radio, variables):
    total_frac = sum(df_frac["Total"])
    total_radio = sum(df_radio["Total"])
    print variable.ljust(18), total_frac, total_radio, total_frac == total_radio

PERSONA.CONDACT    2390725.0 2390725.0 True
VIVIENDA.V02       1423973.0 1423973.0 True
PERSONA.EDADAGRU   2890151.0 2890151.0 True
VIVIENDA.INCALCONS 1082998.0 1082998.0 True
VIVIENDA.INCALSERV 1082998.0 1082998.0 True
HOGAR.ALGUNBI      1150134.0 1150134.0 True
HOGAR.INDHAC       1150134.0 1150134.0 True


## Creación de indicadores

In [11]:
def get_indicators_df(area_level, id_len=7):
    path = get_indicators_path(area_level)
    
    if not os.path.isfile(path):
        return None
    else:
        df = pd.read_csv(path, encoding="utf-8")
        df = replace_index(df, id_len)
        return df

def get_or_create_indicators_df(area_level, df_example, id_len=7):
    df = get_indicators_df(area_level, id_len)
    if df is not None:
        return df
    else:
        return pd.DataFrame(index=df_example.index)    

In [12]:
indicators_frac = get_or_create_indicators_df("FRAC", dfs_frac[0], 7)
indicators_radio = get_or_create_indicators_df("RADIO", dfs_radio[0], 9)

In [13]:
def calculate_indicators_CONDACT(df_indicators, df):
    df_indicators["tasa_empleo"] = df["Ocupado"] / df["Total"]
    df_indicators["tasa_desocupacion"] = df["Desocupado"] / (df["Ocupado"] + df["Desocupado"])
    df_indicators["tasa_inactividad"] = df["Inactivo"] / df["Total"]

def calculate_indicators_V02(df_indicators, df):
    df_indicators["tasa_ocupacion_viviendas"] = df["Con personas presentes"] / df["Total"]

def calculate_indicators_EDADGRU(df_indicators, df):
    df_indicators["0_14"] = df["0 - 14"] / df["Total"]
    df_indicators["15_64"] = df["15 - 64"] / df["Total"]
    df_indicators["mas_65"] = df["65 y más".decode("utf-8")] / df["Total"]
    df_indicators["habitantes"] = df["Total"]

def calculate_indicators_INCALCONS(df_indicators, df):
    df_indicators["calidad_constructiva_satisfactoria"] = df["Satisfactoria"] / df["Total"]
    df_indicators["calidad_constructiva_basica"] = df["Básica".decode("utf-8")] / df["Total"]
    df_indicators["calidad_constructiva_insuficiente"] = df["Insuficiente"] / df["Total"]

def calculate_indicators_INCALSERV(df_indicators, df):
    df_indicators["conexion_servicios_satisfactoria"] = df["Satisfactoria"] / df["Total"]
    df_indicators["conexion_servicios_basica"] = df["Básica".decode("utf-8")] / df["Total"]
    df_indicators["conexion_servicios_insuficiente"] = df["Insuficiente"] / df["Total"]

def calculate_indicators_ALGUNBI(df_indicators, df):
    df_indicators["nbi"] = df["Hogares con NBI"] / df["Total"]

def calculate_indicators_INDHAC(df_indicators, df):
    df_indicators["hacinamiento_hasta_1_49"] = sum((df["Hasta   0.50 personas por cuarto"],
                                                df["0.51  -  0.99  personas por cuarto"],
                                                df["1.00  -  1.49  personas por cuarto"])) / df["Total"]
    
    df_indicators["hacinamiento_hasta_1_49"] = sum((df["1.50  -  1.99  personas por cuarto"],
                                                df["2.00  -  3.00  personas por cuarto"],
                                                df["Más de  3.00 personas por cuarto".decode("utf-8")])) / df["Total"]

In [14]:
calc_methods = [calculate_indicators_CONDACT, calculate_indicators_V02, calculate_indicators_EDADGRU,
                calculate_indicators_INCALCONS, calculate_indicators_INCALSERV, calculate_indicators_ALGUNBI,
                calculate_indicators_INDHAC]

def calculate_indicators(calc_methods, dfs, df_indicators):
    for calc_method, df, variable in zip(calc_methods, dfs, variables):
        print "Calculating indicators of", variable, "with", calc_method.__name__
        calc_method(df_indicators, df)

calculate_indicators(calc_methods, dfs_frac, indicators_frac)
calculate_indicators(calc_methods, dfs_radio, indicators_radio)

Calculating indicators of PERSONA.CONDACT with calculate_indicators_CONDACT
Calculating indicators of VIVIENDA.V02 with calculate_indicators_V02
Calculating indicators of PERSONA.EDADAGRU with calculate_indicators_EDADGRU
Calculating indicators of VIVIENDA.INCALCONS with calculate_indicators_INCALCONS
Calculating indicators of VIVIENDA.INCALSERV with calculate_indicators_INCALSERV
Calculating indicators of HOGAR.ALGUNBI with calculate_indicators_ALGUNBI
Calculating indicators of HOGAR.INDHAC with calculate_indicators_INDHAC
Calculating indicators of PERSONA.CONDACT with calculate_indicators_CONDACT
Calculating indicators of VIVIENDA.V02 with calculate_indicators_V02
Calculating indicators of PERSONA.EDADAGRU with calculate_indicators_EDADGRU
Calculating indicators of VIVIENDA.INCALCONS with calculate_indicators_INCALCONS
Calculating indicators of VIVIENDA.INCALSERV with calculate_indicators_INCALSERV
Calculating indicators of HOGAR.ALGUNBI with calculate_indicators_ALGUNBI
Calculating 

In [15]:
indicators_frac.head()

Unnamed: 0_level_0,tasa_empleo,tasa_desocupacion,tasa_inactividad,tasa_ocupacion_viviendas,CO_FRACC,0_14,15_64,mas_65,calidad_constructiva_satisfactoria,calidad_constructiva_basica,calidad_constructiva_insuficiente,conexion_servicios_satisfactoria,conexion_servicios_basica,conexion_servicios_insuficiente,nbi,hacinamiento_hasta_1_49,habitantes
Código,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
200101,0.820961,0.035897,0.148472,0.703704,001_1,0.142857,0.824405,0.032738,0.447368,0.289474,0.263158,0.894737,0.026316,0.078947,0.292308,0.338462,336
200102,0.680283,0.072142,0.266824,0.953185,001_2,0.298408,0.686116,0.015476,0.118538,0.477843,0.403619,0.884417,0.027326,0.088257,0.290628,0.563922,11243
200103,0.714571,0.066722,0.234342,0.92954,001_3,0.321379,0.669561,0.00906,0.066384,0.305085,0.628531,0.776836,0.050847,0.172316,0.348619,0.653108,15894
200104,0.678205,0.033735,0.298117,0.588288,001_4,0.131198,0.650685,0.218117,0.935988,0.054824,0.009188,0.993874,0.001531,0.004594,0.024396,0.096874,10366
200105,0.669195,0.04,0.302922,0.592682,001_5,0.124492,0.657739,0.217769,0.92601,0.067181,0.006809,0.998184,0.000454,0.001362,0.00617,0.080212,5414


In [16]:
indicators_radio.head()

Unnamed: 0_level_0,tasa_empleo,tasa_desocupacion,tasa_inactividad,tasa_ocupacion_viviendas,CO_FRAC_RA,0_14,15_64,mas_65,calidad_constructiva_satisfactoria,calidad_constructiva_basica,calidad_constructiva_insuficiente,conexion_servicios_satisfactoria,conexion_servicios_basica,conexion_servicios_insuficiente,nbi,hacinamiento_hasta_1_49,habitantes
Código,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
20010101,0.820961,0.035897,0.148472,0.703704,1_1_1,0.142857,0.824405,0.032738,0.447368,0.289474,0.263158,0.894737,0.026316,0.078947,0.292308,0.338462,336
20010201,0.668657,0.050847,0.295522,0.905405,1_2_1,0.311715,0.661088,0.027197,0.074627,0.492537,0.432836,0.776119,0.007463,0.216418,0.267081,0.434783,478
20010202,0.603704,0.08427,0.340741,0.95,1_2_2,0.339152,0.648379,0.012469,0.087719,0.482456,0.429825,0.842105,0.078947,0.078947,0.195312,0.578125,401
20010203,0.610656,0.10241,0.319672,0.942308,1_2_3,0.308605,0.673591,0.017804,0.081633,0.438776,0.479592,0.857143,0.071429,0.071429,0.240385,0.557692,337
20010204,0.691211,0.058252,0.266033,0.92437,1_2_4,0.335548,0.649502,0.01495,0.118182,0.6,0.281818,0.927273,0.045455,0.027273,0.141104,0.521472,602


In [17]:
indicators_frac.to_csv(get_indicators_path("FRAC"), encoding="utf-8")
indicators_radio.to_csv(get_indicators_path("RADIO"), encoding="utf-8")