In [1]:
#download population data

import pandas as pd
from pysus.ftp.utils import zfill_year
import requests
from pysus.online_data import IBGE
from pprint import pprint

In [2]:
def get_available_years(source):
    return sorted(set([zfill_year(f.name[-2:]) for f in IBGE.ibge.get_files(source=source)]))


pprint(f"The years available for source `POP` are: {get_available_years('POP')}")

('The years available for source `POP` are: [1992, 1993, 1994, 1995, 1997, '
 '1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, '
 '2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]')


In [7]:
get_available_years('censo')

[1991, 2000, 2010]

In [3]:
help(IBGE.get_population)

Help on function get_population in module pysus.online_data.IBGE:

get_population(year: int, source: Literal['POP', 'censo', 'POPTCU', 'projpop'] = 'POPTCU', censo_data: Literal['ALF', 'ESCA', 'ESCB', 'IDOSO', 'RENDA'] = 'ALF') -> pandas.core.frame.DataFrame
    Get population data from IBGE as shared by DATASUS
    :param year: year of the data
    :param source: 
         "POP" - 1992-presente: Estimativas populacionais estratificadas por 
                 idade e sexo.
         "censo" - 1991, 2000 e 2010: Censos Demográficos
         "POPTCU" - 1992-presente: Estimativas populacionais enviadas para o TCU,
                    estratificadas por idade e sexo pelo MS/SGEP/Datasus.
         "projpop": Estimativas preliminares para os anos intercensitários dos 
                    totais populacionais, estratificadas por idade e sexo pelo 
                    MS/SGEP/Datasus.
    :param censo_data: 
        "ALF": Censo Demográfico
        "ESCA": Censo Escolar da Educação Básica
      

In [4]:
pop_list = [ IBGE.get_population(year, source="POP") for year in get_available_years('POP')]

In [5]:
len(pop_list)

29

In [8]:
df = pd.concat(pop_list)
df.head()

Unnamed: 0,MUNIC_RES,ANO,POPULACAO
0,110001,1992,34768
1,110002,1992,61737
2,110003,1992,8633
3,110004,1992,72462
4,110005,1992,23280


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 158821 entries, 0 to 5569
Data columns (total 3 columns):
 #   Column     Non-Null Count   Dtype 
---  ------     --------------   ----- 
 0   MUNIC_RES  158821 non-null  string
 1   ANO        158821 non-null  string
 2   POPULACAO  158821 non-null  string
dtypes: string(3)
memory usage: 4.8 MB


In [10]:
df.describe()

Unnamed: 0,MUNIC_RES,ANO,POPULACAO
count,158821,158821,158821
unique,11141,29,68845
top,110001,2021,2630
freq,24,5570,21


In [12]:
df.to_csv("sample/population.csv",index=False)