# DEMOGRAPHIC PROFILING

## NOTEBOOK PREPARATION

In [12]:
from br_demography  import municipality_migration as mm
from br_demography  import municipality_births as mb
from br_demography  import municipality_pop_pyramid as mep
from typing import List
import basedosdados as bd
from dotenv import load_dotenv
import os
import pandas as pd
import numpy as np
# Carrega as variáveis de ambiente do arquivo .env
load_dotenv(dotenv_path='./br_demography/.env')

# Use as variáveis de ambiente conforme necessário
project_id = os.getenv('GOOGLE_CLOUD_PROJECT_ID')

## CENSUS MICRODATA COLLECTION

In [2]:
df_age_pyramid_2022 = mep.query_total_pop_by_sex_age_2022(mun_id=4104006, project_id=project_id)
df_age_pyramid_2022.iloc[np.r_[0:2, -2:0]]

Downloading: 100%|██████████| 42/42 [00:00<00:00, 137.35rows/s]


Unnamed: 0,Sexo,Idade,Pop
0,Masculino,0 a 4 anos,1644
1,Masculino,10 a 14 anos,1839
2,Masculino,100 anos ou mais,0
3,Masculino,15 a 19 anos,1768
4,Masculino,20 a 24 anos,1901
5,Masculino,25 a 29 anos,1909
6,Masculino,30 a 34 anos,1914
7,Masculino,35 a 39 anos,1822
8,Masculino,40 a 44 anos,1775
9,Masculino,45 a 49 anos,1566


In [3]:
df_age_pyramid_2010 = mep.query_total_pop_by_sex_age_2010(mun_id=4104006, project_id=project_id)
df_age_pyramid_2010.iloc[np.r_[0:2, -2:0]]

Downloading: 100%|██████████| 178/178 [00:00<00:00, 539.70rows/s]


Unnamed: 0,Sexo,Idade,Peso
0,Masculino,0,285.329432
1,Masculino,1,308.438674
2,Masculino,2,435.211624
3,Masculino,3,232.882572
4,Masculino,4,313.801776
...,...,...,...
173,Feminino,86,8.856400
174,Feminino,87,10.146322
175,Feminino,88,21.523678
176,Feminino,89,9.678991


In [4]:
df_age_pyramid_2000 = mep.query_total_pop_by_sex_age_2000(mun_id=4104006, project_id=project_id)
df_age_pyramid_2000.iloc[np.r_[0:2, -2:0]]

Downloading: 100%|██████████| 169/169 [00:00<00:00, 533.69rows/s]


Unnamed: 0,Sexo,Idade,Peso
0,Masculino,0,405.448882
1,Masculino,1,289.347423
2,Masculino,2,448.159537
3,Masculino,3,421.941872
4,Masculino,4,446.102287
...,...,...,...
164,Feminino,81,4.532966
165,Feminino,82,8.144345
166,Feminino,83,12.709070
167,Feminino,84,9.751749


## DATA TREATMENT

### DATA STANDARDIZATION

In [5]:
df_age_pyramid_2022 = mep.standard_age_groups(df=df_age_pyramid_2022, age_group_csv_path='./br_demography/source/tab/faixas_etarias_censo_2022.csv', year=2022)
df_age_pyramid_2022.iloc[np.r_[0:2, -2:0]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Pop
Ano,Sexo,Faixa Etária,Unnamed: 3_level_1
2022,Feminino,0 a 9 anos,3369
2022,Feminino,10 a 19 anos,3461
2022,Feminino,20 a 29 anos,3798
2022,Feminino,30 a 39 anos,3962
2022,Feminino,40 a 49 anos,3596
2022,Feminino,50 a 59 anos,2979
2022,Feminino,60 a 69 anos,1998
2022,Feminino,70 a 79 anos,914
2022,Feminino,80 anos ou mais,362
2022,Masculino,0 a 9 anos,3415


In [6]:
df_age_pyramid_2010 = mep.standard_age_groups(df=df_age_pyramid_2010, age_group_csv_path='./br_demography/source/tab/faixas_etarias_censo_2000_2010.csv', year=2010)
df_age_pyramid_2010.iloc[np.r_[0:2, -2:0]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Pop
Ano,Sexo,Faixa Etária,Unnamed: 3_level_1
2010,Feminino,0 a 9 anos,3177
2010,Feminino,10 a 19 anos,3753
2010,Feminino,20 a 29 anos,3565
2010,Feminino,30 a 39 anos,3135
2010,Feminino,40 a 49 anos,2631
2010,Feminino,50 a 59 anos,1784
2010,Feminino,60 a 69 anos,886
2010,Feminino,70 a 79 anos,461
2010,Feminino,80 anos ou mais,149
2010,Masculino,0 a 9 anos,3192


In [7]:
df_age_pyramid_2000 = mep.standard_age_groups(df=df_age_pyramid_2000, age_group_csv_path='./br_demography/source/tab/faixas_etarias_censo_2000_2010.csv', year=2000)
df_age_pyramid_2000.iloc[np.r_[0:2, -2:0]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Pop
Ano,Sexo,Faixa Etária,Unnamed: 3_level_1
2000,Feminino,0 a 9 anos,3720
2000,Feminino,10 a 19 anos,3517
2000,Feminino,20 a 29 anos,3248
2000,Feminino,30 a 39 anos,2793
2000,Feminino,40 a 49 anos,1803
2000,Feminino,50 a 59 anos,993
2000,Feminino,60 a 69 anos,591
2000,Feminino,70 a 79 anos,268
2000,Feminino,80 anos ou mais,63
2000,Masculino,0 a 9 anos,3939


### Integration of Census Data from 2000 to 2022

In [51]:
def concatenate_treated_dfs(dfs: List[pd.DataFrame]):
    """
    Takes dataframes that were treated by standard_age_groups function and concatenates them.
    
    """
    df = pd.concat(objs=dfs, ignore_index=False)
    df = df.reset_index().pivot_table(columns='Ano', index=['Sexo', 'Faixa Etária'], values=['Pop'])
    df = df.astype(int)
    df = df.droplevel(level=0, axis=1)
    df.columns.name = None

    return df

In [52]:
df_age_pyramid = concatenate_treated_dfs(dfs=[df_age_pyramid_2022, df_age_pyramid_2010, df_age_pyramid_2000])
df_age_pyramid.iloc[np.r_[0:2, -2:0]]

## INTERPOLATING POPULATION DATA ACROSS SEX AND AGE GROUPS

In [54]:
# Criação do DataFrame df_taxa_crecimento_pop
df_taxa_crecimento_pop = pd.DataFrame()

# Cálculo de taxa de crescimento geométrico anual para cada período intercensitário
df_taxa_crecimento_pop['2000 a 2010'] = (df_age_pyramid[2010] / df_age_pyramid[2000])**(1/(2010-2000)) - 1
df_taxa_crecimento_pop['2010 a 2022'] = (df_age_pyramid[2022] / df_age_pyramid[2010])**(1/(2022-2010)) - 1

# Iteração a partir das taxas de crescimento e estimativa da população para cada faixa etária e sexo por ano intercensitário
for taxa in df_taxa_crecimento_pop.columns:
    ano_inicial, ano_final = [int(ano.strip()) for ano in taxa.split('a')]    
    for ano in range(ano_inicial, ano_final):
        #inserção dos dados calculados em novas colunas do DataFrame df_age_pyramid
        df_age_pyramid[ano] = (df_age_pyramid[ano_inicial]*((1+ df_taxa_crecimento_pop[taxa])**(ano-ano_inicial))) #.astype(int)


# reorganização da ordem das colunas, de 1996 a 2022 no DataFrame
df_age_pyramid = df_age_pyramid[range(df_age_pyramid.columns.min(), df_age_pyramid.columns.max()+1)]


df_age_pyramid = df_age_pyramid.fillna(0).astype(int)

# salvamento de resultados preliminares
df_age_pyramid.to_csv('./br_demography/results/tab/pop_municipios_rmc_2000_2022_estimativa_intercensitaria.csv', sep=';', encoding='utf-8')



#observação preliminar dos dados
df_age_pyramid.iloc[np.r_[0:2, -2:0]]



Unnamed: 0_level_0,Unnamed: 1_level_0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
Sexo,Faixa Etária,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Feminino,0 a 9 anos,3720,3661,3604,3548,3492,3437,3383,3331,3278,3227,...,3223,3239,3255,3271,3287,3303,3319,3336,3352,3369
Feminino,10 a 19 anos,3517,3539,3562,3586,3609,3633,3656,3680,3704,3728,...,3677,3653,3628,3604,3579,3555,3531,3508,3484,3461
Masculino,70 a 79 anos,250,265,280,297,315,334,354,376,398,422,...,518,543,570,599,628,659,692,727,763,801
Masculino,80 anos ou mais,63,67,71,76,82,87,93,100,106,114,...,147,157,168,179,191,204,217,232,247,264
