In [0]:
import requests
import pandas as pd
import time



In [0]:
def get_coordinates_nominatim(city):
    """Tentativa 1 Fonte: OpenStreetMap Nominatim API."""
    url = f"https://nominatim.openstreetmap.org/search?q={city}&format=json"
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        if data:
            return data[0]["lat"], data[0]["lon"]
    return None, None

def get_coordinates_geocode_xyz(city):
    """Tentativa 2 Fonte: Geocode.xyz API."""
    url = f"https://geocode.xyz/{city}?json=1"
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        if "latt" in data and "longt" in data:
            return data["latt"], data["longt"]
    return None, None

def get_city_coordinates(cities):
    """Executar busca de coordenadas a partir de Series em Pandas"""
    results = []

    for city in cities:
        lat, lon = get_coordinates_nominatim(city)
        
        if lat is None or lon is None:
            time.sleep(1)
            lat, lon = get_coordinates_geocode_xyz(city)

        results.append({"city": city, "lat": lat, "lon": lon})

    return pd.DataFrame(results)


In [0]:
df = pd.DataFrame(columns=['city','lat','lon'])

In [0]:
# Example usage
city_series = pd.Series(["New York", "London", "Tokyo", "São Paulo", "Berlin", "Lisbon", "Madrid", "Paris", "Rio de Janeiro"])
df_coordinates = get_city_coordinates(city_series)

In [0]:
print(df_coordinates)

             city        lat        lon
0        New York   40.67483  -73.97118
1          London   51.51411   -0.11451
2           Tokyo   35.70501  139.51086
3       São Paulo  -10.54156  -37.54603
4          Berlin   52.50234   13.40451
5          Lisbon   38.74422   -9.15188
6          Madrid   40.42959   -3.68876
7           Paris   48.86130    2.34051
8  Rio de Janeiro  -22.89550  -43.40681


In [0]:
print(type(df_coordinates))

<class 'pandas.core.frame.DataFrame'>


In [0]:
df_coordinates_spark = spark.createDataFrame(df_coordinates)

In [0]:
df_coordinates_spark.repartition(20)\
              .write.format("delta")\
              .mode("overwrite")\
              .option("overwriteSchema", "true")\
              .save("/dbfs/Coordenadas/ExtracaoOriginal")