In [12]:
import ipynb_helpers as helpers
import aiohttp
import asyncio
from datetime import datetime, timedelta
import json
import pandas as pd
import calendar
import time

In [2]:
api_key = helpers.get_env_var("AEMET_API_KEY")
headers = {'api_key': api_key}

In [4]:
async def get_climatologia_data_rango(
    session: aiohttp.ClientSession,
    year_from: int,
    month_from: int,
    day_from: int,
    year_to: int,
    month_to: int,
    day_to: int
):
    """
    Valores climatológicos de todas las estaciones para el rango de fechas seleccionado
    Rango max: 15 días
    """

    base_url = 'https://opendata.aemet.es/opendata/api'
    endpoint = '/valores/climatologicos/diarios/datos/fechaini/{fechaIniStr}/fechafin/{fechaFinStr}/todasestaciones'
    
    # Fecha formato: (AAAA-MM-DDTHH:MM:SSUTC)
    fechaIni = datetime(year_from, month_from, day_from).strftime("%Y-%m-%dT00:00:00UTC")
    fechaFin = datetime(year_to, month_to, day_to).strftime("%Y-%m-%dT00:00:00UTC")
    
    url = helpers.replace_url_params(f"{base_url}{endpoint}", fechaIniStr=fechaIni, fechaFinStr=fechaFin)
    
    link = await helpers.make_request_async('get', url, session, headers=headers)
    
    if 'datos' in link[0]:
        data = await helpers.make_request_async('get', link[0]['datos'], session, headers=headers)
        return json.loads(data[0])

In [None]:
all_data = [] # declare here to avoid losing data in case of error

In [32]:
async def get_climatologia_since_date(
    start_date,
    delay: int = 5,
    max_retries: int = 3,
):

    today = datetime.now()
    yesterday = today - timedelta(days=1)

    current_date = start_date

    async with aiohttp.ClientSession() as session:
        
        while current_date <= datetime.now():
            
            offsetted_year = current_date.year
            offsetted_month = current_date.month
            offsetted_day = current_date.day

            
            end_date = current_date + timedelta(days=14)

            if end_date > yesterday:
                end_date = yesterday

            end_year = end_date.year
            end_month = end_date.month
            end_day = end_date.day

            print(f"Fetching data for date range {offsetted_year}-{offsetted_month}-{offsetted_day} to {end_year}-{end_month}-{end_day}...")

            for attempt in range(max_retries):

                print(f"Attempt {attempt+1}/{max_retries}")
                
                try:
                    
                    data = await get_climatologia_data_rango(session,
                        offsetted_year, offsetted_month, offsetted_day,
                        end_year, end_month, end_day
                    )

                    

                    if data is not None:
                        all_data.extend(data)
                        print(f"Retrieved data for date range {offsetted_year}-{offsetted_month}-{offsetted_day} to {end_year}-{end_month}-{end_day}")
                    else:
                        print("Data is None")
                        return
        
                    current_date += timedelta(days=15)
                    await asyncio.sleep(5)

                    print("-"*10)
                    
                    break
                    
                except Exception as e:
                    
                    print(e, e.args)
                    print(f"Error. Retrying in {delay} seconds... (Attempt {attempt+1}/{max_retries})")
                    await asyncio.sleep(delay)
                    delay *= 2
                    continue
                    
                raise Exception("Max retries reached. API is blocking requests.")
                

In [None]:
start_date = datetime.now() - timedelta(days=365*n) # For 2 years ago. then it failed at 2024-05-24

In [None]:
await get_climatologia_since_date(start_date)

In [None]:
df = pd.DataFrame(all_data)

In [33]:
df.tail(1)

Unnamed: 0,fecha,indicativo,nombre,provincia,altitud,tmed,prec,tmin,horatmin,tmax,...,hrMedia,dir,velmedia,racha,horaracha,presMax,horaPresMax,presMin,horaPresMin,sol
410038,2024-05-23,1021Y,ARTICUTZA (AUTOMATICA),NAVARRA,305,120,2,79,21:58,160,...,73,,,,,,,,,


In [34]:
start_date = datetime(2024, 5, 24)

In [None]:
await get_climatologia_since_date(start_date)

In [37]:
df = pd.DataFrame(all_data)
df.to_csv('datasets/climatologia_historica_2yrs.csv', index=False)
df.tail()

Unnamed: 0,fecha,indicativo,nombre,provincia,altitud,tmed,prec,tmin,horatmin,tmax,...,hrMedia,dir,velmedia,racha,horaracha,presMax,horaPresMax,presMin,horaPresMin,sol
640498,2025-02-09,C665T,VALLESECO,LAS PALMAS,900,123.0,0,72.0,04:18,174.0,...,70,34,11,42,14:10,,,,,
640499,2025-02-09,4061X,QUINTANAR DE LA ORDEN,TOLEDO,692,,0,,,,...,63,24,3,28,12:00,9451.0,10,9414.0,23.0,
640500,2025-02-09,2096B,LICERAS,SORIA,1150,72.0,2,39.0,02:43,105.0,...,88,29,39,106,01:40,,,,,
640501,2025-02-09,2140A,ALDEANUEVA DE SERREZUELA,SEGOVIA,1135,76.0,20,43.0,00:57,108.0,...,97,24,31,100,10:50,,,,,
640502,2025-02-09,3469A,CÁCERES,CACERES,394,100.0,0,53.0,05:10,147.0,...,93,26,14,53,15:20,9807.0,Varias,9769.0,24.0,48.0
