### Import libraries

In [1]:
import requests
import ssl
import pandas as pd
import numpy as np
import os

### Fetch data from the API | Census 2022


In [3]:
class TLSAdapter(requests.adapters.HTTPAdapter):
    def init_poolmanager(self, *args, **kwargs):
        ctx = ssl.create_default_context()
        ctx.set_ciphers("DEFAULT@SECLEVEL=1")
        ctx.options |= 0x4   # <-- the key part here, OP_LEGACY_SERVER_CONNECT
        kwargs["ssl_context"] = ctx
        return super(TLSAdapter, self).init_poolmanager(*args, **kwargs)

url = 'https://servicodados.ibge.gov.br/api/v3/agregados/4712/periodos/2022/variaveis/382?localidades=N6[all]'

with requests.session() as s:
    s.mount("https://", TLSAdapter())
    s.get(url).json()
    response = s.get(url)

### Ensure the request was successful

In [4]:
if response.status_code == 200:
    data = response.json()
    print("Successful request!")
else:
    print(f"Request failed with status code {response.status_code}")

Successful request!


### Cleanning and transforming the data

In [10]:
data = response.json()

# Flatten data
flattened_data = []
for item in data[0]['resultados'][0]['series']:
    id = item['localidade']['id']
    name = item['localidade']['nome']
    population = item['serie']['2022']
    flattened_data.append({'id': id, 'name': name, 'population': population})

# Create DataFrame
df = pd.DataFrame(flattened_data)
df[['city', 'state']] = df['name'].str.split(' - ', expand=True)

# Convert the population column to integers
df['population'] = df['population'].astype(int)

# Add the 'year' column
df['year'] = 2022

# Drop the 'name' column and reorder columns
df = df[['id', 'year', 'population', 'city', 'state']]

# Load existing data from CSV
csv_path = 'C:\\Users\\jlfen\\OneDrive\\Documentos\\JoaoKasten\\005_applied_projects\\project.censo-dataviz\\src\\data\\census_data.csv'
df_existing = pd.read_csv(csv_path)

# Combine new and existing data
df_combined = pd.concat([df_existing, df], ignore_index=True)

# Remove duplicates based on 'id' and 'year' columns
df_combined.drop_duplicates(subset=['id', 'year'], inplace=True)

# Save the combined DataFrame back to the CSV
df_combined.to_csv(csv_path, index=False, encoding='utf-8-sig')

In [11]:
print(df)

           id  year  population                   city state
0     1100015  2022       21441  Alta Floresta D'Oeste    RO
1     1100023  2022       96152              Ariquemes    RO
2     1100031  2022        5363                 Cabixi    RO
3     1100049  2022       86502                 Cacoal    RO
4     1100056  2022       15798             Cerejeiras    RO
...       ...   ...         ...                    ...   ...
5565  5222005  2022       14889             Vianópolis    GO
5566  5222054  2022        8665         Vicentinópolis    GO
5567  5222203  2022        4181               Vila Boa    GO
5568  5222302  2022        5808          Vila Propício    GO
5569  5300108  2022     2792551               Brasília    DF

[5570 rows x 5 columns]
