<a href="https://colab.research.google.com/github/avellar1975/DataScience/blob/main/jupyter_notebooks/estimativa_populacional_municipio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<img src="https://github.com/avellar1975/DataScience/raw/main/img/bootcamp.jpg">

# Estudo de estimativas populacionais por municípo 2000-2020

## Importando bibliotecas

In [1]:
import pandas as pd

## Leitura de arquivo csv e transformando em DataFrame

<p>Os arquivos foram extraídos do site <a href="https://datasus.saude.gov.br/informacoes-de-saude-tabnet/" target = '_blank'>datasus</a>, opção Demográficas e Socioeconômicas >> População residente >>> Estudo de Estimativas populacionais por município, sexo e idade - 2000-2020
</p>
<p>Um arquivo com os municípios e coluna Unidade de Federação e o outro com o número de habitantes separados por sexo.
</p>

In [2]:
populacao = pd.read_csv('https://raw.githubusercontent.com/avellar1975/DataScience/main/dados/estimativa_populacional.csv', delimiter = ';')
sexo = pd.read_csv('https://raw.githubusercontent.com/avellar1975/DataScience/main/dados/sexo.csv', delimiter = ';')

## Criação da coluna 'UF' e preenchimento com as siglas
<p>A forma que o arquivo foi gerado não nos ajuda muito na análise, por este razão vamos criar uma coluna UF</p>

In [3]:
ufs = ['RO', 'AC', 'AM', 'RR', 'PA', 'AP', 'TO', 'MA', 'PI', 'CE',
       'RN', 'PB', 'PE', 'AL', 'SE', 'BA', 'MG', 'ES', 'RJ', 'SP', 'PR', 'SC',
       'RS', 'MS', 'MT', 'GO', 'DF']

for uf in ufs:
    for index, row in populacao.iterrows():
        if row[uf] != '-':
            populacao.loc[index,'UF'] = uf  

In [4]:
populacao

Unnamed: 0,Município,RO,AC,AM,RR,PA,AP,TO,MA,PI,CE,RN,PB,PE,AL,SE,BA,MG,ES,RJ,SP,PR,SC,RS,MS,MT,GO,DF,Total,UF
0,110001 Alta Floresta D'Oeste,22728,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,22728,RO
1,110037 Alto Alegre dos Parecis,13255,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,13255,RO
2,110040 Alto Paraíso,21847,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,21847,RO
3,110034 Alvorada D'Oeste,14106,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,14106,RO
4,110002 Ariquemes,109523,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,109523,RO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5565,522200 Vianópolis,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,13977,-,13977,GO
5566,522205 Vicentinópolis,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,8873,-,8873,GO
5567,522220 Vila Boa,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,6312,-,6312,GO
5568,522230 Vila Propício,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,5882,-,5882,GO


## Data Frame população separada por sexo

In [5]:
sexo.head()

Unnamed: 0,Município,Masculino,Feminino,Total
0,110001 Alta Floresta D'Oeste,11631,11097,22728
1,110037 Alto Alegre dos Parecis,6764,6491,13255
2,110040 Alto Paraíso,11154,10693,21847
3,110034 Alvorada D'Oeste,7134,6972,14106
4,110002 Ariquemes,54747,54776,109523


> Os Códigos de Municípios do IBGE é um código composto de 7 dígitos, sendo os dois primeiros referentes ao código da Unidade da Federação. Os códigos que estão nas bases do datasus, no campo nome dos municípios, estão incompletos, faltando um dígito final.

## Reduzindo o número de colunas

In [6]:
populacao = populacao[['Município', 'UF', 'Total']]
populacao

Unnamed: 0,Município,UF,Total
0,110001 Alta Floresta D'Oeste,RO,22728
1,110037 Alto Alegre dos Parecis,RO,13255
2,110040 Alto Paraíso,RO,21847
3,110034 Alvorada D'Oeste,RO,14106
4,110002 Ariquemes,RO,109523
...,...,...,...
5565,522200 Vianópolis,GO,13977
5566,522205 Vicentinópolis,GO,8873
5567,522220 Vila Boa,GO,6312
5568,522230 Vila Propício,GO,5882


## Fazendo uma junção (join) entre os dois Data Frames

In [7]:
municipios = populacao.join(sexo.set_index(['Município', 'Total']), on=['Município', 'Total'])
municipios

Unnamed: 0,Município,UF,Total,Masculino,Feminino
0,110001 Alta Floresta D'Oeste,RO,22728,11631,11097
1,110037 Alto Alegre dos Parecis,RO,13255,6764,6491
2,110040 Alto Paraíso,RO,21847,11154,10693
3,110034 Alvorada D'Oeste,RO,14106,7134,6972
4,110002 Ariquemes,RO,109523,54747,54776
...,...,...,...,...,...
5565,522200 Vianópolis,GO,13977,6934,7043
5566,522205 Vicentinópolis,GO,8873,4704,4169
5567,522220 Vila Boa,GO,6312,3646,2666
5568,522230 Vila Propício,GO,5882,3027,2855


## Alterando a ordem das colunas

In [8]:
municipios = municipios[['Município', 'UF', 'Masculino', 'Feminino', 'Total']]
municipios

Unnamed: 0,Município,UF,Masculino,Feminino,Total
0,110001 Alta Floresta D'Oeste,RO,11631,11097,22728
1,110037 Alto Alegre dos Parecis,RO,6764,6491,13255
2,110040 Alto Paraíso,RO,11154,10693,21847
3,110034 Alvorada D'Oeste,RO,7134,6972,14106
4,110002 Ariquemes,RO,54747,54776,109523
...,...,...,...,...,...
5565,522200 Vianópolis,GO,6934,7043,13977
5566,522205 Vicentinópolis,GO,4704,4169,8873
5567,522220 Vila Boa,GO,3646,2666,6312
5568,522230 Vila Propício,GO,3027,2855,5882


## Trabalhando com coordenadas

In [22]:
import folium
from folium import plugins
from folium.plugins import MarkerCluster

In [10]:
# Criando o gráfico:
mapa = folium.Map(
    location=[-22.2445, -43.7129],
    zoom_start=14
)

In [11]:
folium.Marker(
    [-22.245120, -43.700489],
    popup='<i>Busto de Zamenhof</i>', 
    tooltip='Clique aqui!'
    ).add_to(mapa)

mapa

## Carregando base de coordenadas do IBGE

In [12]:
coordenadas = pd.read_csv('https://raw.githubusercontent.com/avellar1975/DataScience/main/dados/coordenadas.csv', delimiter = ',')
coordenadas.head()

Unnamed: 0,codigo_ibge,nome,latitude,longitude,capital,codigo_uf
0,5200050,Abadia de Goiás,-16.7573,-49.4412,0,52
1,3100104,Abadia dos Dourados,-18.4831,-47.3916,0,31
2,5200100,Abadiânia,-16.197,-48.7057,0,52
3,3100203,Abaeté,-19.1551,-45.4444,0,31
4,1500107,Abaetetuba,-1.72183,-48.8788,0,15


## Criando chave para realização de merge
<p>Na base municípios o código ibge que está junto com o nome dos municípios está incompleto (faltando o último dígito) e por isso foi preciso criar uma chave na base coordenadas com 6 (seis) dígito para fazer um merge entre os dois Data Frames.</p>

In [13]:
for index, row in coordenadas.iterrows():
    coordenadas.loc[index, 'chave'] = str(row['codigo_ibge'])[0:6]

coordenadas.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5570 entries, 0 to 5569
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   codigo_ibge  5570 non-null   int64  
 1   nome         5570 non-null   object 
 2   latitude     5570 non-null   float64
 3   longitude    5570 non-null   float64
 4   capital      5570 non-null   int64  
 5   codigo_uf    5570 non-null   int64  
 6   chave        5570 non-null   object 
dtypes: float64(2), int64(3), object(2)
memory usage: 304.7+ KB


## Separando o código no DF Municípios para fazer o merge

In [14]:
for index, row in municipios.iterrows():
    municipios.loc[index, 'chave'] = str(row['Município'].split()[0])

municipios.info()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5570 entries, 0 to 5569
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Município  5570 non-null   object
 1   UF         5570 non-null   object
 2   Masculino  5570 non-null   int64 
 3   Feminino   5570 non-null   int64 
 4   Total      5570 non-null   int64 
 5   chave      5570 non-null   object
dtypes: int64(3), object(3)
memory usage: 261.2+ KB


## Merge entre os dois Data Frames

In [15]:
df = municipios.merge(coordenadas, on='chave')
df

Unnamed: 0,Município,UF,Masculino,Feminino,Total,chave,codigo_ibge,nome,latitude,longitude,capital,codigo_uf
0,110001 Alta Floresta D'Oeste,RO,11631,11097,22728,110001,1100015,Alta Floresta D'Oeste,-11.92830,-61.9953,0,11
1,110037 Alto Alegre dos Parecis,RO,6764,6491,13255,110037,1100379,Alto Alegre dos Parecis,-12.13200,-61.8350,0,11
2,110040 Alto Paraíso,RO,11154,10693,21847,110040,1100403,Alto Paraíso,-9.71429,-63.3188,0,11
3,110034 Alvorada D'Oeste,RO,7134,6972,14106,110034,1100346,Alvorada D'Oeste,-11.34630,-62.2847,0,11
4,110002 Ariquemes,RO,54747,54776,109523,110002,1100023,Ariquemes,-9.90571,-63.0325,0,11
...,...,...,...,...,...,...,...,...,...,...,...,...
5565,522200 Vianópolis,GO,6934,7043,13977,522200,5222005,Vianópolis,-16.74050,-48.5159,0,52
5566,522205 Vicentinópolis,GO,4704,4169,8873,522205,5222054,Vicentinópolis,-17.73220,-49.8047,0,52
5567,522220 Vila Boa,GO,3646,2666,6312,522220,5222203,Vila Boa,-15.03870,-47.0520,0,52
5568,522230 Vila Propício,GO,3027,2855,5882,522230,5222302,Vila Propício,-15.45420,-48.8819,0,52


In [16]:
df.columns

Index(['Município', 'UF', 'Masculino', 'Feminino', 'Total', 'chave',
       'codigo_ibge', 'nome', 'latitude', 'longitude', 'capital', 'codigo_uf'],
      dtype='object')

In [17]:
df1 = df[['codigo_ibge', 'nome', 'UF', 'codigo_uf', 'Masculino', 'Feminino', 'Total', 'latitude', 'longitude']]
df1

Unnamed: 0,codigo_ibge,nome,UF,codigo_uf,Masculino,Feminino,Total,latitude,longitude
0,1100015,Alta Floresta D'Oeste,RO,11,11631,11097,22728,-11.92830,-61.9953
1,1100379,Alto Alegre dos Parecis,RO,11,6764,6491,13255,-12.13200,-61.8350
2,1100403,Alto Paraíso,RO,11,11154,10693,21847,-9.71429,-63.3188
3,1100346,Alvorada D'Oeste,RO,11,7134,6972,14106,-11.34630,-62.2847
4,1100023,Ariquemes,RO,11,54747,54776,109523,-9.90571,-63.0325
...,...,...,...,...,...,...,...,...,...
5565,5222005,Vianópolis,GO,52,6934,7043,13977,-16.74050,-48.5159
5566,5222054,Vicentinópolis,GO,52,4704,4169,8873,-17.73220,-49.8047
5567,5222203,Vila Boa,GO,52,3646,2666,6312,-15.03870,-47.0520
5568,5222302,Vila Propício,GO,52,3027,2855,5882,-15.45420,-48.8819


In [18]:
coord=[]
for lat,lng in zip(df1.latitude.values,df1.longitude.values):
  coord.append([lat,lng])

In [19]:
mapa2 = folium.Map(location=[-15.788497,-47.879873],zoom_start=5,tiles='Stamen Toner')

In [20]:
mapa2.add_child(plugins.HeatMap(coord))        
mapa2



---

## Plotando no gráfico as cidades com *Cluster*

In [64]:
df3 = df1.query('Total > 100000') # Limitar o tamanho da amostra, colocar Total > 0 para trazer toda a base

mapo = folium.Map(
    location=[-10.2400,	-48.3558],
    zoom_start=4,
    tiles='Stamen Terrain'
)


marker_cluster = MarkerCluster(name="Cidades do Brasil").add_to(mapo)

for index, row in df3.iterrows():
    folium.Marker(
        location=[row.latitude, row.longitude],
        popup = f'{row.nome!a}\n{row.Total}',
        icon=folium.Icon(color='red')
    ).add_to(marker_cluster)

In [65]:
df3

Unnamed: 0,codigo_ibge,nome,UF,codigo_uf,Masculino,Feminino,Total,latitude,longitude
4,1100023,Ariquemes,RO,11,54747,54776,109523,-9.90571,-63.0325
23,1100122,Ji-Paraná,RO,11,63909,66100,130009,-10.87770,-61.9322
36,1100205,Porto Velho,RO,11,278533,260821,539354,-8.76077,-63.8999
51,1100304,Vilhena,RO,11,50479,51732,102211,-12.75020,-60.1488
67,1200401,Rio Branco,AC,12,201499,211919,413418,-9.97499,-67.8243
...,...,...,...,...,...,...,...,...,...
5520,5218805,Rio Verde,GO,52,124673,116845,241518,-17.79230,-50.9192
5546,5220454,Senador Canedo,GO,52,59096,59355,118451,-16.70840,-49.0914
5555,5221403,Trindade,GO,52,63731,66092,129823,-16.65170,-49.4927
5563,5221858,Valparaíso de Goiás,GO,52,83693,88442,172135,-16.06510,-47.9757


In [66]:
mapo

#### Fonte de dados:
* http://tabnet.datasus.gov.br/cgi/deftohtm.exe?popsvs/cnv/popbr.def (dados populacionais estimados por cidade)
* https://datasus.saude.gov.br/informacoes-de-saude-tabnet/
* https://minerandodados.com.br/plotando-mapas-interativos-com-python-visualize-dados-de-vendas-por-regiao/