### Import libraries

In [34]:
import requests
import pandas as pd
import numpy as np
import os

### Fetch data from API | Census 1991, 2000 e 2010

In [35]:
url = 'https://servicodados.ibge.gov.br/api/v3/agregados/156/periodos/1991|2000|2010/variaveis/134?localidades=N6[all]'
response = requests.get(url)

### Ensure the request was successful

In [36]:
if response.status_code == 200:
    data = response.json()
    print("Successful request!")
else:
    print(f"Request fail with status code {response.status_code}")

Successful request!


### Cleanning and transforming the data

In [37]:
data = response.json()

# Flatten data
flattened_data = []
for item in data[0]['resultados'][0]['series']:
    id = item['localidade']['id']
    name = item['localidade']['nome']
    
    # Iterate over years in the 'serie' dictionary
    for year, population in item['serie'].items():
        # Convert '...' to NaN
        if population == '...':
            population = np.nan
        flattened_data.append({'id': id, 'name': name, 'year': year, 'population': population})

# Create DataFrame
df = pd.DataFrame(flattened_data)

# Split 'name' into 'city' and 'state'
df[['city', 'state']] = df['name'].str.split(' - ', expand=True)

# Convert the 'year' and 'population' columns
df['year'] = df['year'].astype(int)
df['population'] = df['population'].fillna(-1).astype(int)
df['population'] = df['population'].replace(-1, np.nan)

# Drop the 'name' columns
df = df.drop(columns=['name'])

# Path to the CSV file
csv_path = 'C:\\Users\\jlfen\\OneDrive\\Documentos\\JoaoKasten\\005_applied_projects\\project.censo-dataviz\\data\\census_data.csv'

# Check if the CSV file already exists
if os.path.exists(csv_path):
    # Load existing data from CSV
    df_existing = pd.read_csv(csv_path, error_bad_lines=False, warn_bad_lines=True)
    
    # Combine new and existing data
    df_combined = pd.concat([df_existing, df], ignore_index=True)
    
    # Remove duplicates based on 'id' and 'year' columns
    df_combined.drop_duplicates(subset=['id', 'year'], inplace=True)
    
    # Save the combined DataFrame back to the CSV
    df_combined.to_csv(csv_path, index=False, encoding='utf-8-sig')
else:
    # If the file doesn't exist, just save the new data
    df.to_csv(csv_path, index=False)




  df_existing = pd.read_csv(csv_path, error_bad_lines=False, warn_bad_lines=True)


  df_existing = pd.read_csv(csv_path, error_bad_lines=False, warn_bad_lines=True)


### Saving _df_ to a csv file