In [None]:
import numpy as np
import os
import pandas as pd
from graph_traffic.custom_funcs import  make_stable_values_null, rows_no_change
from graph_traffic.config import data_path

traffic_path = os.path.join(data_path, "03-by-location", "traffic")
meteo_path = os.path.join(data_path, "03-by-location", "meteo")


tmagns = ['intensidad', 'ocupacion', 'vmed']
mmagns = ['temperatura', 'humedad_relativa', 'presion_barometrica', 'radiacion_solar',
          'precipitacion', 'dir_viento', 'velocidad_viento']

mapping = pd.read_csv(os.path.join(data_path, '03-by-location', 'id_mapping.csv'))

traffic_files = os.listdir(traffic_path)
n_traffic_files = len(traffic_files)

for i, t_file in enumerate(traffic_files):
    print(f"{i+1}/{n_traffic_files}", end="\r")
    t_location = t_file.split(".")[0]
    # id de la estación meteorológica correspondiente a cada magnitud
    ids_m = mapping[mapping.id_t == int(t_location)].iloc[0][[f'id_{magn}' for magn in mmagns]].astype(int)

    # Leer los datos de tráfico
    dft = pd.read_csv(f'{traffic_path}/{t_location}.csv', parse_dates=['fecha'], index_col='fecha')
    if dft.empty:
        continue
    # Para cada estación, leemos los datos necesarios
    dfm = {estacion: pd.read_csv(f'{meteo_path}/estacion-{estacion:.0f}.csv', parse_dates=['fecha'], index_col='fecha') for estacion in ids_m.unique()}

    # Si hay más de 4 filas sin cambio, damos el valor por nulo
    dft[tmagns] = dft[tmagns].apply(make_stable_values_null, nrows=4)
    for estacion, dfmi in dfm.items():
        nm = dfmi[mmagns].apply(rows_no_change)
        for m in mmagns:
            if m in ['precipitacion', 'radiacion_solar', 'presion_barometrica']:
                continue
            dfmi[m] = np.where((nm[m]>4) & (dfmi[m]!=0), np.nan, dfmi[m])
        dfm[estacion] = dfmi

    # Hacer el merge de todas las variables meteorológicas con el tráfico
    df = dft
    for m in mmagns:
        df = df.merge(dfm[ids_m[f"id_{m}"]][[m]],
                     left_index=True, right_index=True,
                    how='left')

    del dft, dfm
    df = df.sort_index()
    df[mmagns] = df[mmagns].interpolate(method="linear", limit=4)

    df.to_csv(f"{data_path}/04-traffic-meteo-merged/{t_location}.csv")

df.head()

3634/4609

In [1]:
from graph_traffic.merge_data import merge_data

In [2]:
merge_data(1001)

Unnamed: 0_level_0,id,intensidad,ocupacion,vmed,temperatura,humedad_relativa,presion_barometrica,radiacion_solar,precipitacion,dir_viento,velocidad_viento
fecha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2019-01-01 00:00:00,1001,,,,,,,,,,
2019-01-01 00:15:00,1001,,,,,,,,,,
2019-01-01 00:30:00,1001,,,,,,,,,,
2019-01-01 00:45:00,1001,,,,,,,,,,
2019-01-01 01:00:00,1001,,,,-0.400,85.00,957.00,1.0,0.0,22.00,0.5900
...,...,...,...,...,...,...,...,...,...,...,...
2020-12-31 22:45:00,1001,300.0,,56.0,4.475,95.25,945.25,1.0,0.6,216.25,2.7275
2020-12-31 23:00:00,1001,300.0,,64.0,4.400,95.00,945.00,1.0,0.8,221.00,2.6100
2020-12-31 23:15:00,1001,144.0,,60.0,4.400,95.00,945.00,1.0,0.8,221.00,2.6100
2020-12-31 23:30:00,1001,288.0,,63.0,4.400,95.00,945.00,1.0,0.8,221.00,2.6100
