# Geocoding

This file aims to convert IBGE code to its respective Latitude and Longitudes

In [1]:
import os
import requests
import json
from dotenv import load_dotenv

# Load the environment file
load_dotenv("../.env")

# Read the access token
access_token = os.getenv("MAPBOX_ACCESS_TOKEN")

def get_coordinates(municipality, state):
    base_url = "https://api.mapbox.com/geocoding/v5/mapbox.places/"
    
    query = f"{municipality}, {state}, Brazil.json?access_token={access_token}"
    
    url = base_url + query
    
    response = requests.get(url)
    if response.status_code == 200:
        data = json.loads(response.text)
        if data['features']:
            longitude, latitude = data['features'][0]['geometry']['coordinates']
            return latitude, longitude
        else:
            return None
    else:
        print(f"Error: Received status code {response.status_code}")
        return None

In [2]:
locations = [
        {"municipality": "São Paulo", "state": "São Paulo"},
        {"municipality": "Rio de Janeiro", "state": "Rio de Janeiro"},
        {"municipality": "Salvador", "state": "Bahia"}
    ]
    
for location in locations:
    municipality = location["municipality"]
    state = location["state"]
    coordinates = get_coordinates(municipality, state)
    if coordinates:
        latitude, longitude = coordinates
        print(f"The coordinates for {municipality}, {state} are (Latitude: {latitude}, Longitude: {longitude})")
    else:
        print(f"Could not find coordinates for {municipality}, {state}")


The coordinates for São Paulo, São Paulo are (Latitude: -23.510156, Longitude: -46.36696)
The coordinates for Rio de Janeiro, Rio de Janeiro are (Latitude: -22.921032, Longitude: -43.369465)
The coordinates for Salvador, Bahia are (Latitude: -12.98225, Longitude: -38.481277)


In [8]:
import pandas as pd
import numpy as np

In [5]:
df = pd.read_csv("../raw_data/municipality_lookup.csv")

In [9]:
dfs = np.array_split(df, 4)

In [18]:
df1 = dfs[0]

In [21]:
dfs[0].to_csv("../raw_data/municipality_lookup_1.csv", index=False)
dfs[1].to_csv("../raw_data/municipality_lookup_2.csv", index=False)
dfs[2].to_csv("../raw_data/municipality_lookup_3.csv", index=False)
dfs[3].to_csv("../raw_data/municipality_lookup_4.csv", index=False)

### Geocoding DF1

In [20]:
# Initialize empty lists to store latitude and longitude
latitudes = []
longitudes = []

# Loop through the DataFrame and get coordinates
for index, row in df1.iterrows():
    municipality = row['Município']
    state = row['Estado']
    
    # Use the get_coordinates function to get the latitude and longitude
    coordinates = get_coordinates(municipality, state)
    
    if coordinates:
        latitude, longitude = coordinates
        latitudes.append(latitude)
        longitudes.append(longitude)
    else:
        latitudes.append(None)
        longitudes.append(None)

# Add latitude and longitude as new columns in the DataFrame
df1['Latitude'] = latitudes
df1['Longitude'] = longitudes

# Show the updated DataFrame
df1.head(1)

Unnamed: 0,Código_UF,Estado,UF,Código_IBGE,Município,Região,Latitude,Longitude
0,11,Rondônia,RO,1100015,Alta Floresta D'Oeste,Norte,-11.929692,-61.996128


In [22]:
df1.to_csv("../transformed_data/municipality_lookup_1.csv", index=False)

In [23]:
df1 = pd.read_csv("../transformed_data/municipality_lookup_1.csv")
df1

Unnamed: 0,Código_UF,Estado,UF,Código_IBGE,Município,Região,Latitude,Longitude
0,11,Rondônia,RO,1100015,Alta Floresta D'Oeste,Norte,-11.929692,-61.996128
1,11,Rondônia,RO,1100023,Ariquemes,Norte,-9.907652,-63.033069
2,11,Rondônia,RO,1100031,Cabixi,Norte,-13.495606,-60.542444
3,11,Rondônia,RO,1100049,Cacoal,Norte,-11.433300,-61.442050
4,11,Rondônia,RO,1100056,Cerejeiras,Norte,-13.186987,-60.816836
...,...,...,...,...,...,...,...,...
1388,25,Paraíba,PB,2511905,Pitimbu,Nordeste,-7.472806,-34.811233
1389,25,Paraíba,PB,2512002,Pocinhos,Nordeste,-7.076073,-36.060447
1390,25,Paraíba,PB,2512036,Poço Dantas,Nordeste,-6.405016,-38.496479
1391,25,Paraíba,PB,2512077,Poço de José de Moura,Nordeste,-6.574410,-38.511457


In [24]:
df2 = pd.read_csv("../transformed_data/municipality_lookup_2.csv")
df2

Unnamed: 0,Código_UF,Estado,UF,Código_IBGE,Município,Região,Latitude,Longitude
0,25,Paraíba,PB,2512200,Prata,Nordeste,-7.219900,-35.894500
1,25,Paraíba,PB,2512309,Princesa Isabel,Nordeste,-7.736898,-37.992134
2,25,Paraíba,PB,2512408,Puxinanã,Nordeste,-7.147522,-35.961336
3,25,Paraíba,PB,2512507,Queimadas,Nordeste,-7.364040,-35.901664
4,25,Paraíba,PB,2512606,Quixaba,Nordeste,-7.031688,-37.147792
...,...,...,...,...,...,...,...,...
1388,31,Minas Gerais,MG,3146008,Ouro Fino,Sudeste,-22.283056,-46.369167
1389,31,Minas Gerais,MG,3146107,Ouro Preto,Sudeste,-20.385169,-43.503634
1390,31,Minas Gerais,MG,3146206,Ouro Verde de Minas,Sudeste,-18.068722,-41.271062
1391,31,Minas Gerais,MG,3146255,Padre Carvalho,Sudeste,-16.364700,-42.515000


In [25]:
df3 = pd.read_csv("../transformed_data/municipality_lookup_3.csv")
df3

Unnamed: 0,Código_UF,Estado,UF,Código_IBGE,Município,Região,Latitude,Longitude
0,31,Minas Gerais,MG,3146404,Paineiras,Sudeste,-18.904167,-45.534722
1,31,Minas Gerais,MG,3146503,Pains,Sudeste,-20.367265,-45.663335
2,31,Minas Gerais,MG,3146552,Pai Pedro,Sudeste,-15.531125,-43.066710
3,31,Minas Gerais,MG,3146602,Paiva,Sudeste,-21.289441,-43.417535
4,31,Minas Gerais,MG,3146701,Palma,Sudeste,-21.375317,-42.317541
...,...,...,...,...,...,...,...,...
1388,41,Paraná,PR,4119004,Pérola d'Oeste,Sul,-25.827794,-53.743333
1389,41,Paraná,PR,4119103,Piên,Sul,-26.098892,-49.429790
1390,41,Paraná,PR,4119152,Pinhais,Sul,-25.444349,-49.190031
1391,41,Paraná,PR,4119202,Pinhalão,Sul,-23.791378,-50.057040


In [26]:
df4 = pd.read_csv("../transformed_data/municipality_lookup_4.csv")
df4

Unnamed: 0,Código_UF,Estado,UF,Código_IBGE,Município,Região,Latitude,Longitude
0,41,Paraná,PR,4119301,Pinhão,Sul,-25.696089,-51.656637
1,41,Paraná,PR,4119400,Piraí do Sul,Sul,-24.534660,-49.939922
2,41,Paraná,PR,4119509,Piraquara,Sul,-25.442327,-49.060447
3,41,Paraná,PR,4119608,Pitanga,Sul,-24.759586,-51.763758
4,41,Paraná,PR,4119657,Pitangueiras,Sul,-23.233359,-51.585563
...,...,...,...,...,...,...,...,...
1388,52,Goiás,GO,5222005,Vianópolis,Centro Oeste,-16.747587,-48.513174
1389,52,Goiás,GO,5222054,Vicentinópolis,Centro Oeste,-17.735369,-49.804053
1390,52,Goiás,GO,5222203,Vila Boa,Centro Oeste,-15.035775,-47.056144
1391,52,Goiás,GO,5222302,Vila Propício,Centro Oeste,-15.455843,-48.885502


In [27]:
result_df = pd.concat([df1, df2, df3, df4], ignore_index=True)
result_df

Unnamed: 0,Código_UF,Estado,UF,Código_IBGE,Município,Região,Latitude,Longitude
0,11,Rondônia,RO,1100015,Alta Floresta D'Oeste,Norte,-11.929692,-61.996128
1,11,Rondônia,RO,1100023,Ariquemes,Norte,-9.907652,-63.033069
2,11,Rondônia,RO,1100031,Cabixi,Norte,-13.495606,-60.542444
3,11,Rondônia,RO,1100049,Cacoal,Norte,-11.433300,-61.442050
4,11,Rondônia,RO,1100056,Cerejeiras,Norte,-13.186987,-60.816836
...,...,...,...,...,...,...,...,...
5567,52,Goiás,GO,5222005,Vianópolis,Centro Oeste,-16.747587,-48.513174
5568,52,Goiás,GO,5222054,Vicentinópolis,Centro Oeste,-17.735369,-49.804053
5569,52,Goiás,GO,5222203,Vila Boa,Centro Oeste,-15.035775,-47.056144
5570,52,Goiás,GO,5222302,Vila Propício,Centro Oeste,-15.455843,-48.885502


In [28]:
# All expanded
file_path = "../ml_data/municipality_lookup.csv"
result_df.to_csv(file_path, index=False)