In [1]:
import sys
sys.path.append('../..')
from datetime import timedelta, datetime
from src.db.db_handler import DBHandler
from helpers.api.aemet_client import AEMETClient
from helpers import config
import pandas as pd
import uuid
import numpy as np

In [2]:
db = DBHandler()

In [3]:
latest = db.get_latest_historical_date('lluvia_historico')
latest

datetime.date(2025, 2, 9)

In [4]:
fechaIni = (latest + timedelta(days=1)).strftime(format="%Y-%m-%d")
fechaFin = datetime.now().date().strftime(format="%Y-%m-%d")
fechaIni, fechaFin

('2025-02-10', '2025-02-25')

In [5]:
extracted = datetime.now().replace(second=0, microsecond=0)
extracted

datetime.datetime(2025, 2, 25, 3, 17)

In [6]:
client = AEMETClient(config.get_env_var('AEMET_API_KEY'))

In [7]:
response = await client.get_valores_climatologicos_diarios_todas_estaciones(fechaIni, fechaFin)

In [20]:
df = pd.DataFrame(response)
df['extracted'] = extracted
df['uuid'] = [uuid.uuid4() for _ in range(len(df))]
df["uuid"] = df["uuid"].astype(str)

In [21]:
def clean_historical(df: pd.DataFrame):

    df.drop(columns=['nombre', 'provincia', 'sol', 'altitud', 'presMax', 'horaPresMax', 'presMin', 'horaPresMin'], inplace=True)
    df = df.rename(columns={'indicativo': 'idema',
                            'hrMax': 'hr_max', 'hrMedia': 'hr_media', 'hrMin': 'hr_min',
                            'horaHrMax': 'hora_hr_max', 'horaHrMin': 'hora_hr_min'})
    
    df = df.replace({'Varias': np.nan})
    
    float_cols = ['tmed', 'prec', 'tmin', 'tmax', 'velmedia', 'racha']
    df[float_cols] = df[float_cols].apply(lambda x: x.str.replace(',', '.', regex=False))
    df[float_cols] = df[float_cols].apply(pd.to_numeric, errors='coerce')
    
    time_cols = ['horatmin', 'horatmax', 'horaracha', 'hora_hr_max', 'hora_hr_min']
    df[time_cols] = df[time_cols].apply(lambda col: pd.to_datetime(col, format='%H:%M', errors='coerce').dt.time)
    
    df = df.replace({pd.NaT: None})
    
    zero_cols = ['dir', 'hr_max', 'hr_min', 'hr_media']
    df[zero_cols] = df[zero_cols].fillna(0)

    return df

In [22]:
df = clean_historical(df)

In [23]:
df.head(3)

Unnamed: 0,fecha,idema,tmed,prec,tmin,horatmin,tmax,horatmax,dir,velmedia,racha,horaracha,hr_media,hr_max,hora_hr_max,hr_min,hora_hr_min,extracted,uuid
0,2025-02-10,2885K,8.8,0.6,3.7,23:37:00,13.9,14:46:00,20,1.9,7.5,14:50:00,88,100,,67,14:50:00,2025-02-25 03:17:00,1f2188e3-176d-44e8-ac23-3f2aab58595e
1,2025-02-10,8293X,12.8,0.0,8.1,06:20:00,17.4,13:10:00,22,1.9,5.3,01:50:00,73,94,23:59:00,56,13:10:00,2025-02-25 03:17:00,c5da22d5-01e5-49ad-8e32-270706dc6625
2,2025-02-10,2918Y,9.8,0.0,6.8,01:28:00,12.7,15:39:00,16,3.3,15.0,22:00:00,79,97,03:10:00,62,14:50:00,2025-02-25 03:17:00,7db78a5e-dbfb-455f-b6b6-428af6888ccd


In [24]:
all_cols = ['uuid', 'idema', 'fecha', 'extracted']
lluvia_cols = ['prec']
humedad_cols = ['hr_media', 'hr_max', 'hora_hr_max', 'hr_min', 'hora_hr_min']
temperatura_cols = ['tmed', 'tmin', 'horatmin', 'tmax', 'horatmax',]
viento_cols = ['velmedia', 'racha', 'horaracha', 'dir']

In [25]:
lluvia_df = df[all_cols + lluvia_cols]
humedad_df = df[all_cols + humedad_cols]
temperatura_df = df[all_cols + temperatura_cols]
viento_df = df[all_cols + viento_cols]

In [26]:
start_date = datetime.strptime(df['fecha'].min(), "%Y-%m-%d")
end_date = datetime.strptime(df['fecha'].max(), "%Y-%m-%d")
start_date, end_date

(datetime.datetime(2025, 2, 10, 0, 0), datetime.datetime(2025, 2, 21, 0, 0))

In [27]:
def insert_batches(table, df, start_date):
    current_date = start_date
    while current_date <= end_date:
        date_str = current_date.strftime("%Y-%m-%d")
        current_date += timedelta(days=1)
        batch = df[df["fecha"] == date_str]
        if len(batch) == 0:
            print(f"No records for date {date_str}")
            continue
        print(f"Inserting records from {date_str}...")
        db.bulk_insert_data(table, batch.to_dict(orient="list"))
        print(f"Inserted records from {date_str}")

In [30]:
insert_batches('temperatura_historico', temperatura_df, start_date)

Inserting records from 2025-02-10...
Inserted records from 2025-02-10
Inserting records from 2025-02-11...
Inserted records from 2025-02-11
Inserting records from 2025-02-12...
Inserted records from 2025-02-12
Inserting records from 2025-02-13...
Inserted records from 2025-02-13
Inserting records from 2025-02-14...
Inserted records from 2025-02-14
Inserting records from 2025-02-15...
Inserted records from 2025-02-15
Inserting records from 2025-02-16...
Inserted records from 2025-02-16
Inserting records from 2025-02-17...
Inserted records from 2025-02-17
Inserting records from 2025-02-18...
Inserted records from 2025-02-18
Inserting records from 2025-02-19...
Inserted records from 2025-02-19
Inserting records from 2025-02-20...
Inserted records from 2025-02-20
Inserting records from 2025-02-21...
Inserted records from 2025-02-21


In [31]:
insert_batches('viento_historico', viento_df, start_date)

Inserting records from 2025-02-10...
Inserted records from 2025-02-10
Inserting records from 2025-02-11...
Inserted records from 2025-02-11
Inserting records from 2025-02-12...
Inserted records from 2025-02-12
Inserting records from 2025-02-13...
Inserted records from 2025-02-13
Inserting records from 2025-02-14...
Inserted records from 2025-02-14
Inserting records from 2025-02-15...
Inserted records from 2025-02-15
Inserting records from 2025-02-16...
Inserted records from 2025-02-16
Inserting records from 2025-02-17...
Inserted records from 2025-02-17
Inserting records from 2025-02-18...
Inserted records from 2025-02-18
Inserting records from 2025-02-19...
Inserted records from 2025-02-19
Inserting records from 2025-02-20...
Inserted records from 2025-02-20
Inserting records from 2025-02-21...
Inserted records from 2025-02-21


In [32]:
insert_batches('humedad_historico', humedad_df, start_date)

Inserting records from 2025-02-10...
Inserted records from 2025-02-10
Inserting records from 2025-02-11...
Inserted records from 2025-02-11
Inserting records from 2025-02-12...
Inserted records from 2025-02-12
Inserting records from 2025-02-13...
Inserted records from 2025-02-13
Inserting records from 2025-02-14...
Inserted records from 2025-02-14
Inserting records from 2025-02-15...
Inserted records from 2025-02-15
Inserting records from 2025-02-16...
Inserted records from 2025-02-16
Inserting records from 2025-02-17...
Inserted records from 2025-02-17
Inserting records from 2025-02-18...
Inserted records from 2025-02-18
Inserting records from 2025-02-19...
Inserted records from 2025-02-19
Inserting records from 2025-02-20...
Inserted records from 2025-02-20
Inserting records from 2025-02-21...
Inserted records from 2025-02-21


In [33]:
insert_batches('lluvia_historico', lluvia_df, start_date)

Inserting records from 2025-02-10...
Inserted records from 2025-02-10
Inserting records from 2025-02-11...
Inserted records from 2025-02-11
Inserting records from 2025-02-12...
Inserted records from 2025-02-12
Inserting records from 2025-02-13...
Inserted records from 2025-02-13
Inserting records from 2025-02-14...
Inserted records from 2025-02-14
Inserting records from 2025-02-15...
Inserted records from 2025-02-15
Inserting records from 2025-02-16...
Inserted records from 2025-02-16
Inserting records from 2025-02-17...
Inserted records from 2025-02-17
Inserting records from 2025-02-18...
Inserted records from 2025-02-18
Inserting records from 2025-02-19...
Inserted records from 2025-02-19
Inserting records from 2025-02-20...
Inserted records from 2025-02-20
Inserting records from 2025-02-21...
Inserted records from 2025-02-21
