In [0]:
import pandas as pd
import requests
import time
from datetime import datetime



In [0]:
def get_weather_data(lat, lon, city_name):
    """Busca dados climáticos horários com timestamps para uma latitude e longitude."""
    url = (
        f"https://api.open-meteo.com/v1/forecast?"
        f"latitude={lat}&longitude={lon}"
        f"&hourly=temperature_2m,et0_fao_evapotranspiration,precipitation,relative_humidity_2m"
        f"&start_date=2025-01-01&end_date=2025-02-28"
        f"&timezone=auto"
    )
    
    response = requests.get(url)
    
    if response.status_code != 200:
        print(f"Falha ao buscar dados para {city_name}: status {response.status_code}")
        return []
    
    data = response.json()
    hourly = data.get("hourly", {})
    
    times = hourly.get("time", [])
    temps = hourly.get("temperature_2m", [])
    et0s = hourly.get("et0_fao_evapotranspiration", [])
    precs = hourly.get("precipitation", [])
    humids = hourly.get("relative_humidity_2m", [])
    
    records = []
    for t, temp, et0, prec, hum in zip(times, temps, et0s, precs, humids):
        if any(v is None for v in [temp, et0, prec, hum]):
            continue
        
        # Filtrar apenas datas de janeiro de 2025 (opcional)
        dt = datetime.fromisoformat(t)
        if not (datetime(2025, 1, 1) <= dt <= datetime(2025, 1, 31)):
            continue

        records.append({
            "city": city_name,
            "latitude": lat,
            "longitude": lon,
            "datetime": t,
            "temperature": temp,
            "evapotranspiration": et0,
            "precipitation": prec,
            "humidity": hum
        })
    
    return records

In [0]:
df_coordinates_spark = spark.sql(f"""SELECT * FROM delta. `/dbfs/Coordenadas/ExtracaoOriginal` """)

In [0]:
df_coordinates = df_coordinates_spark.toPandas()

In [0]:
weather_results = []

# Loop para buscar dados de cada cidade
for index, row in df_coordinates.iterrows():
    city = row["city"]
    lat, lon = row["lat"], row["lon"]
    
    print(f"Buscando dados para {city} ({lat}, {lon})...")
    
    city_data = get_weather_data(lat, lon, city)
    weather_results.extend(city_data)

    time.sleep(1)  # evitar sobrecarregar a API

# Converter para DataFrame
df_weather = pd.DataFrame(weather_results)

# Salvar
# df_weather.to_csv("weather_data.csv", index=False)
# df_weather.to_excel("weather_data.xlsx", index=False)

print("\nDados climáticos salvos com sucesso!")
print(df_weather.head())


Buscando dados para Rio de Janeiro (-22.89550, -43.40681)...
Buscando dados para São Paulo (-10.54156, -37.54603)...
Buscando dados para Madrid (40.42959, -3.68876)...
Buscando dados para Berlin (52.50234, 13.40451)...
Buscando dados para London (51.51411, -0.11451)...
Buscando dados para Lisbon (38.74422, -9.15188)...
Buscando dados para New York (40.67483, -73.97118)...
Buscando dados para Tokyo (35.70501, 139.51086)...
Buscando dados para Paris (48.86130, 2.34051)...

Dados climáticos salvos com sucesso!
             city   latitude  longitude          datetime  temperature  \
0  Rio de Janeiro  -22.89550  -43.40681  2025-01-13T16:00         24.7   
1  Rio de Janeiro  -22.89550  -43.40681  2025-01-13T17:00         24.2   
2  Rio de Janeiro  -22.89550  -43.40681  2025-01-13T18:00         23.2   
3  Rio de Janeiro  -22.89550  -43.40681  2025-01-13T19:00         22.5   
4  Rio de Janeiro  -22.89550  -43.40681  2025-01-13T20:00         22.6   

   evapotranspiration  precipitation  humi

In [0]:
df_weather.display()

city,latitude,longitude,datetime,temperature,evapotranspiration,precipitation,humidity
Rio de Janeiro,-22.8955,-43.40681,2025-01-13T16:00,24.7,0.32,0.0,64
Rio de Janeiro,-22.8955,-43.40681,2025-01-13T17:00,24.2,0.24,0.0,65
Rio de Janeiro,-22.8955,-43.40681,2025-01-13T18:00,23.2,0.11,0.0,70
Rio de Janeiro,-22.8955,-43.40681,2025-01-13T19:00,22.5,0.04,0.0,72
Rio de Janeiro,-22.8955,-43.40681,2025-01-13T20:00,22.6,0.0,0.0,72
Rio de Janeiro,-22.8955,-43.40681,2025-01-13T21:00,22.5,0.0,0.0,73
Rio de Janeiro,-22.8955,-43.40681,2025-01-13T22:00,22.4,0.0,0.0,74
Rio de Janeiro,-22.8955,-43.40681,2025-01-13T23:00,22.3,0.0,0.0,75
Rio de Janeiro,-22.8955,-43.40681,2025-01-14T00:00,22.1,0.0,0.0,75
Rio de Janeiro,-22.8955,-43.40681,2025-01-14T01:00,22.0,0.0,0.0,77


In [0]:
df_weather.describe()

Unnamed: 0,temperature,evapotranspiration,precipitation,humidity
count,3719.0,3719.0,3719.0,3719.0
mean,10.573434,0.085026,0.11167,69.988438
std,10.176061,0.154738,0.618161,19.771057
min,-11.0,0.0,0.0,11.0
25%,3.5,0.0,0.0,55.0
50%,7.7,0.03,0.0,75.0
75%,15.0,0.08,0.0,86.0
max,38.6,0.88,16.5,99.0


In [0]:
df_weather.isnull().sum()

Out[8]: city                  0
latitude              0
longitude             0
datetime              0
temperature           0
evapotranspiration    0
precipitation         0
humidity              0
dtype: int64

In [0]:
df_weather_spark = spark.createDataFrame(df_weather)

In [0]:
df_weather_spark.repartition(20)\
              .write.format("delta")\
              .mode("overwrite")\
              .option("overwriteSchema", "true")\
              .save("/dbfs/Bronze/ExtraçãoTempo")