In [1]:
import pandas as pd
import numpy as np

def load_data(url):
    data = pd.read_csv(url)
    return data

censo = load_data('data/personas_censo_2011_piramides.csv')
deptos = load_data('data/deptos.csv')
locs = load_data('data/locs.csv')

In [2]:
# genera el codcomp
censo['CODLOC'] = censo.DPTO.astype(str) + censo.LOC.astype(str).str.zfill(3)

censo.tail()

Unnamed: 0,DPTO,LOC,PERPH02,PERNA01,CODLOC
3285819,19,971,1,34,19971
3285820,19,971,1,58,19971
3285821,19,971,1,58,19971
3285822,19,971,1,20,19971
3285823,19,971,1,22,19971


In [3]:
locs.tail()

Unnamed: 0,DPTO,LOCALIDAD,CODLOC,NOMBDEPTO,NOMBLOC
610,19,968,19968,TREINTA Y TRES,ARROCERA PROCIPA
611,19,969,19969,TREINTA Y TRES,ARROCERA SAN FERNANDO
612,19,970,19970,TREINTA Y TRES,ARROCERA SANTA FE
613,19,971,19971,TREINTA Y TRES,ARROCERA ZAPATA
614,1,20,1020,MONTEVIDEO,MONTEVIDEO


In [4]:
# calcula n personas por localidad
pob = censo.groupby('CODLOC').count()['DPTO']
print(pob.shape)
pob.head()

(632,)


CODLOC
1020     1298645
10320      62590
10321      27471
10521       2465
10522       6597
Name: DPTO, dtype: int64

In [5]:
# calcula tasa de dependencia por edad
pob_dep = censo.loc[(censo.PERNA01 < 15) | (censo.PERNA01 > 64)].groupby('CODLOC').count()['DPTO']
pob_no_dep = censo.loc[(censo.PERNA01 >= 15) & (censo.PERNA01 <= 64)].groupby('CODLOC').count()['DPTO']
dep_edad = (pob_dep / pob_no_dep)*100

dep_edad.head()

CODLOC
1020     53.084873
10320    48.995429
10321    56.726381
10521    73.225580
10522    58.963855
Name: DPTO, dtype: float64

In [6]:
# calcula índice da masculinidad
varones = censo.loc[censo.PERPH02==1].groupby('CODLOC').count()['DPTO']
mujeres = censo.loc[censo.PERPH02==2].groupby('CODLOC').count()['DPTO']

ind_masc = (varones/mujeres)*100
ind_masc.head()

CODLOC
1020     86.355587
10320    95.502108
10321    91.970650
10521    91.381988
10522    92.500730
Name: DPTO, dtype: float64

In [7]:
data = pd.merge(pob, dep_edad, on='CODLOC', how='left').merge(ind_masc, on='CODLOC', how='left').reset_index()

dict_rename = {'DPTO_x': 'poblacion', 'DPTO_y': 'dep_edad', 'DPTO': 'ind_masc'}

data.rename(columns=dict_rename, inplace=True)

print(data.shape)

data.head()


(632, 4)


Unnamed: 0,CODLOC,poblacion,dep_edad,ind_masc
0,1020,1298645,53.084873,86.355587
1,10320,62590,48.995429,95.502108
2,10321,27471,56.726381,91.97065
3,10521,2465,73.22558,91.381988
4,10522,6597,58.963855,92.50073


In [94]:
data.to_csv('data/data_group.csv', index=False, na_rep=0)

In [8]:
# tramos de edad
def tramos_edad(df):
    import pandas as pd
    import numpy as np
    # genera lista con cortes, para reclasificar el dataframe
    bins = [0 if i==-1 else i for i in range(-1,95,5)]
    bins.append(120)
    # labels
    l1 = [str(i) if i==0 else str(i+1) for i in bins][:19]
    l2 = [str(i) for i in bins][1:]
    labels = ['-'.join([l1[i], l2[i]]) for i in range(19)]
    labels.append('+95')
    # calcula tramos de edad
    df.loc[:, 'tramo'] = pd.cut(df['PERNA01'],
                                bins= bins,
                                include_lowest=True,
                                ordered=True,
                                labels=labels)    
    return df

 # calcula tramos de edad
censo = tramos_edad(censo)
censo.head()

Unnamed: 0,DPTO,LOC,PERPH02,PERNA01,CODLOC,tramo
0,1,20,2,22,1020,20-24
1,1,20,2,84,1020,80-84
2,1,20,1,21,1020,20-24
3,1,20,2,0,1020,0-4
4,1,20,2,31,1020,30-34


In [14]:
def agrupar_df(df, codloc, col_tramo, col_sexo):
    df_group = df.groupby([codloc, col_sexo, col_tramo]).size().reset_index()
    df_group.rename(columns={col_sexo: 'sexo', 0:'personas'}, inplace=True)
    df_group['sexo_label'] = np.where(df_group['sexo'] ==1, 'varones', 'mujeres')
    df_group['tramo_label'] = df_group.tramo.astype(str)
    return df_group

censo_agrupado = agrupar_df(censo, 'CODLOC', 'tramo', 'PERPH02')
censo_agrupado.head()

Unnamed: 0,CODLOC,sexo,tramo,personas,sexo_label,tramo_label
0,1020,1,0-4,41841,varones,0-4
1,1020,1,5-9,42268,varones,5-9
2,1020,1,10-14,43231,varones,10-14
3,1020,1,15-19,47064,varones,15-19
4,1020,1,20-24,50764,varones,20-24


In [15]:
censo_agrupado.tail()

Unnamed: 0,CODLOC,sexo,tramo,personas,sexo_label,tramo_label
25275,9991,2,75-79,0,mujeres,75-79
25276,9991,2,80-84,0,mujeres,80-84
25277,9991,2,85-89,1,mujeres,85-89
25278,9991,2,90-94,0,mujeres,90-94
25279,9991,2,+95,0,mujeres,+95


In [16]:
censo_agrupado.to_csv('data/data_tramos_edad.csv', index=False, na_rep=0)