In [11]:
import pandas as pd
from tqdm import tqdm
import os

In [12]:
df_mapping_no2_to_traffico = pd.read_csv("../../data/processed/mapping/no2_to_traffic_sensor_mapping.csv")

In [13]:
import pandas as pd
from tqdm import tqdm

def process_traffic_data(df_traffic):
    """
    Función para procesar datos de tráfico a nivel horario a partir de datos cada 15 minutos.
    
    Se agrupan los datos por sensor y hora, calculando:
    - **Intensidad**: Promedio de la intensidad (vehículos/hora).
    - **Carga**: Promedio ponderado de la carga, usando la intensidad como peso.
    - **Ocupación**: Promedio ponderado de la ocupación, usando la intensidad como peso.
    - **Velocidad media (vmed)**: Promedio ponderado de la velocidad media, usando la intensidad como peso.

    Parámetros:
        df_traffic (pd.DataFrame): DataFrame con datos de tráfico a nivel de 15 minutos.

    Retorna:
        pd.DataFrame: DataFrame con los datos agregados a nivel horario.
    """

    # Convertir la columna 'fecha' a tipo datetime para manipulación de fechas
    df_traffic['fecha'] = pd.to_datetime(df_traffic['fecha'])

    # Redondear la fecha a la hora más cercana (para agrupar los datos por hora)
    df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')

    # Rellenar valores NaN con -1 para evitar problemas en cálculos posteriores
    df_traffic = df_traffic.fillna(-1)

    # Función para calcular un promedio ponderado
    def weighted_average(group, value_column, weight_column):
        """
        Calcula la media ponderada de una columna usando otra como peso.

        Parámetros:
            group (pd.DataFrame): Grupo de datos a procesar.
            value_column (str): Nombre de la columna cuyo promedio ponderado se calcula.
            weight_column (str): Nombre de la columna que se usa como peso.

        Retorna:
            float: Media ponderada.
        """
        values = group[value_column]
        weights = group[weight_column]
        
        # Si la suma de los pesos es mayor a 0, se calcula la media ponderada, si no, se devuelve 0
        weighted_avg = (values * weights).sum() / weights.sum() if weights.sum() > 0 else 0
        return weighted_avg

    # Agrupar los datos por sensor y hora
    grouped = df_traffic.groupby(['id_trafico', 'hora'])

    # Lista para almacenar los resultados procesados
    result = []

    # Procesar cada grupo de sensor y hora
    for name, group in tqdm(grouped, desc="Processing groups"):
        """
        Para cada grupo (es decir, cada sensor en cada hora):
        - Se calcula la **intensidad** como promedio de la intensidad en los 4 intervalos de 15 minutos.
        - Se calcula la **carga** como un promedio ponderado de los valores de carga usando intensidad como peso.
        - Se calcula la **ocupación** como un promedio ponderado usando la intensidad como peso.
        - Se calcula la **velocidad media (vmed)** como un promedio ponderado usando la intensidad como peso.
        """

        # Intensidad: Como ya está en vehículos/hora, tomamos el promedio, no la suma
        intensidad_mean = group['intensidad'].mean()

        # Carga: Media ponderada con intensidad como peso
        carga_mean = weighted_average(group, 'carga', 'intensidad')

        # Ocupación: Media ponderada con intensidad como peso (no se puede sumar porque es un porcentaje)
        ocupacion_mean = weighted_average(group, 'ocupacion', 'intensidad')

        # Velocidad media: Media ponderada con intensidad como peso
        vmed_weighted = weighted_average(group, 'vmed', 'intensidad')

        # Agregar los resultados procesados a la lista
        result.append({
            'id_trafico': name[0],   # ID del sensor de tráfico
            'hora': name[1],         # Hora agregada
            'intensidad': intensidad_mean,  # Promedio de intensidad (veh/h)
            'carga': carga_mean,           # Promedio ponderado de carga
            'ocupacion': ocupacion_mean,   # Promedio ponderado de ocupación
            'vmed': vmed_weighted          # Promedio ponderado de velocidad media
        })

    # Convertir la lista de resultados en un DataFrame
    df_traffic = pd.DataFrame(result)

    return df_traffic  # Retornar el DataFrame procesado


In [14]:
def corregir_errores(df):
    # Asegurarse de que 'fecha' sea de tipo datetime
    
    # Iterar por las filas donde 'error' es 'S'
    for index, row in df[df['error'] == 'S'].iterrows():
        # Inicializar el contador de minutos
        minutos_atras = 15
        
        try:
            while True:
                # Buscar la fila anterior con el mismo 'id_trafico' y 'error' diferente de 'S'
                fila_anterior = df[
                    (df['id_trafico'] == row['id_trafico']) & 
                    (df['fecha'] == row['fecha'] - pd.Timedelta(minutes=minutos_atras))
                ]
                
                if not fila_anterior.empty and fila_anterior.iloc[0]['error'] != 'S':
                    # Copiar los valores de la fila anterior en la fila con error
                    for col in ['intensidad', 'ocupacion', 'carga', 'vmed', 'periodo_integracion']:
                        df.at[index, col] = fila_anterior.iloc[0][col]
                    
                    # Ajustar el error a 'N' después de la corrección
                    df.at[index, 'error'] = 'N'
                    break  # Salir del bucle si se ha encontrado una fila válida
                
                # Incrementar el contador de minutos para buscar más atrás
                minutos_atras += 15
                
                # Si se han buscado más de un número razonable de filas, salir para evitar bucles infinitos
                if minutos_atras > 180:  # Por ejemplo, buscar hasta 3 horas atrás
                    break
        except:
            df.at[index, col] = -1
        
    return df

In [15]:
df_mapping_no2_to_traffico['id_trafico'] = df_mapping_no2_to_traffico['id_trafico'].astype(int).astype(str)
filtered_traffic_sensors = list(df_mapping_no2_to_traffico.id_trafico.unique())

In [16]:
len(filtered_traffic_sensors)

71

In [17]:
# Define the root directory
root_dir = '../../data/raw/traffic'

# Iterate through all year folders
for year in range(2018, 2025):
    year_folder = os.path.join(root_dir, str(year))
    
    # Check if the year folder exists
    if os.path.isdir(year_folder):
        print(f"Processing folder: {year_folder}")
        
        # Iterate through files in the year folder
        for file_name in os.listdir(year_folder):
            if file_name.endswith('.csv'):  # Ensure it's a CSV file
                
                
                file_path = os.path.join(year_folder, file_name)
                
                try:
                    df = pd.read_csv(file_path, delimiter= ";")
                    
                    df = df.rename(columns = {'hora': 'fecha'})
                    df['fecha'] = pd.to_datetime(df['fecha'])
                        
                    # algunos se llaman id y otros identif
                    # Rename columns if they exist
                    if 'identif' in df.columns:
                        df = df.rename(columns={'identif': 'id_trafico'})

                    if 'id' in df.columns:
                        df = df.rename(columns={'id': 'id_trafico'})
                        
                    df['id_trafico'] = df['id_trafico'].apply(lambda x: str(int(x)) if str(x).isdigit() else str(x))
                    df['id_trafico'] = df['id_trafico'].astype(str)

                    print("len df before filtering ", len(df))
                    print("Unique traffic sensors before filtering:" , df.id_trafico.nunique())

                    df = df[df['id_trafico'].isin(filtered_traffic_sensors)]
                    
                    print("len df after filtering ", len(df))
                    print("Unique traffic sensors after filtering:" , df.id_trafico.nunique())
                    
                    if 'S' in df.error.unique():
                        print(f"There are errors in file: {file_name}")
                        print("There are a total of ", len(df[df['error'] == 'S']) , " errors")
                        
                        df = corregir_errores(df)
                        
                    df = process_traffic_data(df)
                        
                    # Create the new file name
                    new_file_name = f"{file_name[:-4]}_processed.parquet"
                    new_file_path = os.path.join(year_folder, new_file_name)
                    new_file_path = new_file_path.replace("raw", "processed")
                    
                    # Create the processed directory if it does not exist
                    processed_dir = os.path.dirname(new_file_path)
                    os.makedirs(processed_dir, exist_ok=True)

                    # Save the processed DataFrame
                    df.to_parquet(new_file_path, index=False)
                    
                    print(f"Processed and saved: {new_file_path}")
                except Exception as e:
                    print(f"Error processing file {file_name}: {e}")


Processing folder: ../../data/raw/traffic/2018
len df before filtering  11551114
Unique traffic sensors before filtering: 3996
len df after filtering  158742
Unique traffic sensors after filtering: 55


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 39973/39973 [00:05<00:00, 7013.47it/s]


Processed and saved: ../../data/processed/traffic/2018/01-2018_processed.parquet
len df before filtering  11202244
Unique traffic sensors before filtering: 4022
len df after filtering  155102
Unique traffic sensors after filtering: 56


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 38895/38895 [00:05<00:00, 7064.74it/s]


Processed and saved: ../../data/processed/traffic/2018/06-2018_processed.parquet
len df before filtering  11392027
Unique traffic sensors before filtering: 4021
len df after filtering  159522
Unique traffic sensors after filtering: 56


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 40112/40112 [00:05<00:00, 7068.05it/s]


Processed and saved: ../../data/processed/traffic/2018/07-2018_processed.parquet
len df before filtering  10703589
Unique traffic sensors before filtering: 3928
len df after filtering  138327
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 34805/34805 [00:05<00:00, 6571.79it/s]


Processed and saved: ../../data/processed/traffic/2018/11-2018_processed.parquet
len df before filtering  11020967
Unique traffic sensors before filtering: 3916
len df after filtering  144829
Unique traffic sensors after filtering: 52


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 36567/36567 [00:05<00:00, 6525.80it/s]


Processed and saved: ../../data/processed/traffic/2018/10-2018_processed.parquet
len df before filtering  10339553
Unique traffic sensors before filtering: 4001
len df after filtering  145154
Unique traffic sensors after filtering: 55


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 36471/36471 [00:05<00:00, 7035.29it/s]


Processed and saved: ../../data/processed/traffic/2018/02-2018_processed.parquet
len df before filtering  11432654
Unique traffic sensors before filtering: 3997
len df after filtering  159406
Unique traffic sensors after filtering: 55


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 40240/40240 [00:05<00:00, 6967.72it/s]


Processed and saved: ../../data/processed/traffic/2018/03-2018_processed.parquet
len df before filtering  11298892
Unique traffic sensors before filtering: 4014
len df after filtering  159766
Unique traffic sensors after filtering: 55


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 40184/40184 [00:05<00:00, 7047.11it/s]


Processed and saved: ../../data/processed/traffic/2018/08-2018_processed.parquet
len df before filtering  10668743
Unique traffic sensors before filtering: 3910
len df after filtering  141902
Unique traffic sensors after filtering: 52


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 35822/35822 [00:05<00:00, 6600.69it/s]


Processed and saved: ../../data/processed/traffic/2018/09-2018_processed.parquet
len df before filtering  11207552
Unique traffic sensors before filtering: 3927
len df after filtering  152303
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 38299/38299 [00:05<00:00, 6752.01it/s]


Processed and saved: ../../data/processed/traffic/2018/12-2018_processed.parquet
len df before filtering  11501592
Unique traffic sensors before filtering: 4023
len df after filtering  156850
Unique traffic sensors after filtering: 55


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 39368/39368 [00:05<00:00, 6983.16it/s]


Processed and saved: ../../data/processed/traffic/2018/05-2018_processed.parquet
len df before filtering  11140226
Unique traffic sensors before filtering: 4015
len df after filtering  152190
Unique traffic sensors after filtering: 55


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 38213/38213 [00:05<00:00, 7074.37it/s]


Processed and saved: ../../data/processed/traffic/2018/04-2018_processed.parquet
Processing folder: ../../data/raw/traffic/2019
len df before filtering  11203627
Unique traffic sensors before filtering: 3937
len df after filtering  157526
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 39596/39596 [00:05<00:00, 6769.08it/s]


Processed and saved: ../../data/processed/traffic/2019/01-2019_processed.parquet
len df before filtering  11252731
Unique traffic sensors before filtering: 3998
len df after filtering  152075
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 38182/38182 [00:05<00:00, 6758.80it/s]


Processed and saved: ../../data/processed/traffic/2019/07-2019_processed.parquet
len df before filtering  10794005
Unique traffic sensors before filtering: 3982
len df after filtering  145330
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 36510/36510 [00:05<00:00, 6758.01it/s]


Processed and saved: ../../data/processed/traffic/2019/06-2019_processed.parquet
len df before filtering  11478841
Unique traffic sensors before filtering: 4033
len df after filtering  155975
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 39175/39175 [00:05<00:00, 6688.28it/s]


Processed and saved: ../../data/processed/traffic/2019/10-2019_processed.parquet
len df before filtering  11186837
Unique traffic sensors before filtering: 4039
len df after filtering  151855
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 38137/38137 [00:05<00:00, 6662.59it/s]


Processed and saved: ../../data/processed/traffic/2019/11-2019_processed.parquet
len df before filtering  11096650
Unique traffic sensors before filtering: 3963
len df after filtering  146306
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 36931/36931 [00:05<00:00, 6767.20it/s]


Processed and saved: ../../data/processed/traffic/2019/03-2019_processed.parquet
len df before filtering  10060856
Unique traffic sensors before filtering: 3941
len df after filtering  138170
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 34803/34803 [00:05<00:00, 6752.61it/s]


Processed and saved: ../../data/processed/traffic/2019/02-2019_processed.parquet
len df before filtering  11023586
Unique traffic sensors before filtering: 4019
len df after filtering  146194
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 36701/36701 [00:05<00:00, 6727.85it/s]


Processed and saved: ../../data/processed/traffic/2019/09-2019_processed.parquet
len df before filtering  11283290
Unique traffic sensors before filtering: 3998
len df after filtering  150403
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 37917/37917 [00:05<00:00, 6783.83it/s]


Processed and saved: ../../data/processed/traffic/2019/08-2019_processed.parquet
len df before filtering  11518048
Unique traffic sensors before filtering: 4056
len df after filtering  153648
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 38639/38639 [00:05<00:00, 6781.62it/s]


Processed and saved: ../../data/processed/traffic/2019/12-2019_processed.parquet
len df before filtering  10542041
Unique traffic sensors before filtering: 3950
len df after filtering  136575
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 34407/34407 [00:05<00:00, 6179.09it/s]


Processed and saved: ../../data/processed/traffic/2019/04-2019_processed.parquet
len df before filtering  11060255
Unique traffic sensors before filtering: 3968
len df after filtering  144279
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 36285/36285 [00:05<00:00, 6591.27it/s]


Processed and saved: ../../data/processed/traffic/2019/05-2019_processed.parquet
Processing folder: ../../data/raw/traffic/2020
len df before filtering  11577408
Unique traffic sensors before filtering: 4058
len df after filtering  149825
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 37937/37937 [00:05<00:00, 6624.16it/s]


Processed and saved: ../../data/processed/traffic/2020/01-2020_processed.parquet
len df before filtering  11401995
Unique traffic sensors before filtering: 4168
len df after filtering  144868
Unique traffic sensors after filtering: 55


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 37146/37146 [00:05<00:00, 6696.37it/s]


Processed and saved: ../../data/processed/traffic/2020/10-2020_processed.parquet
len df before filtering  11304137
Unique traffic sensors before filtering: 4160
len df after filtering  144334
Unique traffic sensors after filtering: 53


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 36843/36843 [00:05<00:00, 6747.73it/s]


Processed and saved: ../../data/processed/traffic/2020/11-2020_processed.parquet
len df before filtering  10915562
Unique traffic sensors before filtering: 3966
len df after filtering  147082
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 37325/37325 [00:05<00:00, 6991.47it/s]


Processed and saved: ../../data/processed/traffic/2020/07-2020_processed.parquet
len df before filtering  10552512
Unique traffic sensors before filtering: 3970
len df after filtering  140570
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 35622/35622 [00:05<00:00, 7010.66it/s]


Processed and saved: ../../data/processed/traffic/2020/06-2020_processed.parquet
len df before filtering  11050575
Unique traffic sensors before filtering: 4150
len df after filtering  138894
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 35380/35380 [00:05<00:00, 6892.03it/s]


Processed and saved: ../../data/processed/traffic/2020/09-2020_processed.parquet
len df before filtering  10776844
Unique traffic sensors before filtering: 3965
len df after filtering  143775
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 36788/36788 [00:05<00:00, 7221.56it/s]


Processed and saved: ../../data/processed/traffic/2020/08-2020_processed.parquet
len df before filtering  11240571
Unique traffic sensors before filtering: 4068
len df after filtering  146203
Unique traffic sensors after filtering: 52


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 37283/37283 [00:05<00:00, 6843.84it/s]


Processed and saved: ../../data/processed/traffic/2020/03-2020_processed.parquet
len df before filtering  10926382
Unique traffic sensors before filtering: 4071
len df after filtering  140956
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 35706/35706 [00:05<00:00, 6894.88it/s]


Processed and saved: ../../data/processed/traffic/2020/02-2020_processed.parquet
len df before filtering  10603087
Unique traffic sensors before filtering: 4064
len df after filtering  136816
Unique traffic sensors after filtering: 53


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 35347/35347 [00:05<00:00, 6804.09it/s]


Processed and saved: ../../data/processed/traffic/2020/04-2020_processed.parquet
len df before filtering  10905113
Unique traffic sensors before filtering: 4065


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')


len df after filtering  139720
Unique traffic sensors after filtering: 53


Processing groups: 100%|██████████| 35734/35734 [00:05<00:00, 6806.39it/s]


Processed and saved: ../../data/processed/traffic/2020/05-2020_processed.parquet
len df before filtering  11101234
Unique traffic sensors before filtering: 4173
len df after filtering  141860
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 36801/36801 [00:05<00:00, 6803.47it/s]


Processed and saved: ../../data/processed/traffic/2020/12-2020_processed.parquet
Processing folder: ../../data/raw/traffic/2021
len df before filtering  10422264
Unique traffic sensors before filtering: 4173


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')


len df after filtering  129733
Unique traffic sensors after filtering: 53


Processing groups: 100%|██████████| 34287/34287 [00:04<00:00, 7041.43it/s]


Processed and saved: ../../data/processed/traffic/2021/01-2021_processed.parquet
len df before filtering  11753247
Unique traffic sensors before filtering: 4351
len df after filtering  160776
Unique traffic sensors after filtering: 61


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 40748/40748 [00:06<00:00, 6493.15it/s]


Processed and saved: ../../data/processed/traffic/2021/11-2021_processed.parquet
len df before filtering  12036913
Unique traffic sensors before filtering: 4314
len df after filtering  163628
Unique traffic sensors after filtering: 57


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 41419/41419 [00:05<00:00, 6915.15it/s]


Processed and saved: ../../data/processed/traffic/2021/10-2021_processed.parquet
len df before filtering  11577338
Unique traffic sensors before filtering: 4273
len df after filtering  139670
Unique traffic sensors after filtering: 56


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 35442/35442 [00:05<00:00, 6792.57it/s]


Processed and saved: ../../data/processed/traffic/2021/06-2021_processed.parquet
len df before filtering  12132019
Unique traffic sensors before filtering: 4304
len df after filtering  153882
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 39032/39032 [00:05<00:00, 7015.61it/s]


Processed and saved: ../../data/processed/traffic/2021/07-2021_processed.parquet
len df before filtering  11992900
Unique traffic sensors before filtering: 4301
len df after filtering  154380
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 39345/39345 [00:05<00:00, 6914.20it/s]


Processed and saved: ../../data/processed/traffic/2021/08-2021_processed.parquet
len df before filtering  11697249
Unique traffic sensors before filtering: 4318
len df after filtering  151082
Unique traffic sensors after filtering: 58


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 38297/38297 [00:05<00:00, 6932.60it/s]


Processed and saved: ../../data/processed/traffic/2021/09-2021_processed.parquet
len df before filtering  10477478
Unique traffic sensors before filtering: 4181
len df after filtering  131654
Unique traffic sensors after filtering: 54


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 33489/33489 [00:04<00:00, 6904.61it/s]


Processed and saved: ../../data/processed/traffic/2021/02-2021_processed.parquet
len df before filtering  11891658
Unique traffic sensors before filtering: 4255


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')


len df after filtering  144049
Unique traffic sensors after filtering: 52


Processing groups: 100%|██████████| 36675/36675 [00:05<00:00, 6921.30it/s]


Processed and saved: ../../data/processed/traffic/2021/03-2021_processed.parquet
len df before filtering  11937669
Unique traffic sensors before filtering: 4259
len df after filtering  148967
Unique traffic sensors after filtering: 55


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 37858/37858 [00:05<00:00, 6802.85it/s]


Processed and saved: ../../data/processed/traffic/2021/05-2021_processed.parquet
len df before filtering  11495366
Unique traffic sensors before filtering: 4238
len df after filtering  138802
Unique traffic sensors after filtering: 51


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 35328/35328 [00:05<00:00, 6852.46it/s]


Processed and saved: ../../data/processed/traffic/2021/04-2021_processed.parquet
len df before filtering  12309478
Unique traffic sensors before filtering: 4372
len df after filtering  178047
Unique traffic sensors after filtering: 61


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 45004/45004 [00:06<00:00, 6907.23it/s]


Processed and saved: ../../data/processed/traffic/2021/12-2021_processed.parquet
Processing folder: ../../data/raw/traffic/2022
len df before filtering  12429813
Unique traffic sensors before filtering: 4374
len df after filtering  177951
Unique traffic sensors after filtering: 62


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 45047/45047 [00:06<00:00, 6967.56it/s]


Processed and saved: ../../data/processed/traffic/2022/01-2022_processed.parquet
len df before filtering  12511377
Unique traffic sensors before filtering: 4471
len df after filtering  189002
Unique traffic sensors after filtering: 65


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 47576/47576 [00:06<00:00, 6971.41it/s]


Processed and saved: ../../data/processed/traffic/2022/10-2022_processed.parquet
len df before filtering  12096282
Unique traffic sensors before filtering: 4488
len df after filtering  185931
Unique traffic sensors after filtering: 65


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 46722/46722 [00:06<00:00, 6902.67it/s]


Processed and saved: ../../data/processed/traffic/2022/11-2022_processed.parquet
len df before filtering  11796277
Unique traffic sensors before filtering: 4132
len df after filtering  173852
Unique traffic sensors after filtering: 61


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 44018/44018 [00:06<00:00, 7003.48it/s]


Processed and saved: ../../data/processed/traffic/2022/07-2022_processed.parquet
len df before filtering  11399709
Unique traffic sensors before filtering: 4129
len df after filtering  167465
Unique traffic sensors after filtering: 60


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 42196/42196 [00:06<00:00, 6959.33it/s]


Processed and saved: ../../data/processed/traffic/2022/06-2022_processed.parquet
len df before filtering  11796900
Unique traffic sensors before filtering: 4114
len df after filtering  175358
Unique traffic sensors after filtering: 60


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 44372/44372 [00:06<00:00, 6979.18it/s]


Processed and saved: ../../data/processed/traffic/2022/03-2022_processed.parquet
len df before filtering  10794781
Unique traffic sensors before filtering: 4394
len df after filtering  160010
Unique traffic sensors after filtering: 62


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 40460/40460 [00:05<00:00, 6883.47it/s]


Processed and saved: ../../data/processed/traffic/2022/02-2022_processed.parquet
len df before filtering  12168291
Unique traffic sensors before filtering: 4480
len df after filtering  183509
Unique traffic sensors after filtering: 65


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 46207/46207 [00:06<00:00, 6919.34it/s]


Processed and saved: ../../data/processed/traffic/2022/09-2022_processed.parquet
len df before filtering  11693047
Unique traffic sensors before filtering: 4148
len df after filtering  178181
Unique traffic sensors after filtering: 63


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 45087/45087 [00:06<00:00, 6981.80it/s]


Processed and saved: ../../data/processed/traffic/2022/08-2022_processed.parquet
len df before filtering  11323906
Unique traffic sensors before filtering: 4119
len df after filtering  168451
Unique traffic sensors after filtering: 60


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 42616/42616 [00:06<00:00, 7018.92it/s]


Processed and saved: ../../data/processed/traffic/2022/04-2022_processed.parquet
len df before filtering  11732565
Unique traffic sensors before filtering: 4127
len df after filtering  169400
Unique traffic sensors after filtering: 60


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 42735/42735 [00:06<00:00, 6938.44it/s]


Processed and saved: ../../data/processed/traffic/2022/05-2022_processed.parquet
len df before filtering  12512133
Unique traffic sensors before filtering: 4487
len df after filtering  192487
Unique traffic sensors after filtering: 65


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 48331/48331 [00:06<00:00, 6956.99it/s]


Processed and saved: ../../data/processed/traffic/2022/12-2022_processed.parquet
Processing folder: ../../data/raw/traffic/2023
len df before filtering  12644350
Unique traffic sensors before filtering: 4506
len df after filtering  192113
Unique traffic sensors after filtering: 65


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 48273/48273 [00:06<00:00, 6931.10it/s]


Processed and saved: ../../data/processed/traffic/2023/01-2023_processed.parquet
len df before filtering  12774666
Unique traffic sensors before filtering: 4663
len df after filtering  180574
Unique traffic sensors after filtering: 65


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 45523/45523 [00:06<00:00, 6886.85it/s]


Processed and saved: ../../data/processed/traffic/2023/11-2023_processed.parquet
len df before filtering  12946685
Unique traffic sensors before filtering: 4603
len df after filtering  179546
Unique traffic sensors after filtering: 65


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 45348/45348 [00:06<00:00, 6918.86it/s]


Processed and saved: ../../data/processed/traffic/2023/10-2023_processed.parquet
len df before filtering  12485567
Unique traffic sensors before filtering: 4551
len df after filtering  178532
Unique traffic sensors after filtering: 65


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 45088/45088 [00:06<00:00, 6881.53it/s]


Processed and saved: ../../data/processed/traffic/2023/06-2023_processed.parquet
len df before filtering  12945236
Unique traffic sensors before filtering: 4582
len df after filtering  179991
Unique traffic sensors after filtering: 65


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 45591/45591 [00:06<00:00, 6916.12it/s]


Processed and saved: ../../data/processed/traffic/2023/07-2023_processed.parquet
len df before filtering  11388974
Unique traffic sensors before filtering: 4503
len df after filtering  171499
Unique traffic sensors after filtering: 65


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 43412/43412 [00:06<00:00, 6934.20it/s]


Processed and saved: ../../data/processed/traffic/2023/02-2023_processed.parquet
len df before filtering  12732652
Unique traffic sensors before filtering: 4512
len df after filtering  190900
Unique traffic sensors after filtering: 65


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 48112/48112 [00:06<00:00, 6939.91it/s]


Processed and saved: ../../data/processed/traffic/2023/03-2023_processed.parquet
len df before filtering  12744711
Unique traffic sensors before filtering: 4588
len df after filtering  178430
Unique traffic sensors after filtering: 65


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 45418/45418 [00:06<00:00, 6873.14it/s]


Processed and saved: ../../data/processed/traffic/2023/08-2023_processed.parquet
len df before filtering  12518185
Unique traffic sensors before filtering: 4563
len df after filtering  176811
Unique traffic sensors after filtering: 65


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 44461/44461 [00:06<00:00, 6939.18it/s]


Processed and saved: ../../data/processed/traffic/2023/09-2023_processed.parquet
len df before filtering  12926315
Unique traffic sensors before filtering: 4547
len df after filtering  189990
Unique traffic sensors after filtering: 65


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 47733/47733 [00:06<00:00, 6962.55it/s]


Processed and saved: ../../data/processed/traffic/2023/05-2023_processed.parquet
len df before filtering  12438772
Unique traffic sensors before filtering: 4541
len df after filtering  183742
Unique traffic sensors after filtering: 65


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 46202/46202 [00:06<00:00, 6824.02it/s]


Processed and saved: ../../data/processed/traffic/2023/04-2023_processed.parquet
len df before filtering  13224732
Unique traffic sensors before filtering: 4659
len df after filtering  190599
Unique traffic sensors after filtering: 65


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 47876/47876 [00:06<00:00, 6926.83it/s]


Processed and saved: ../../data/processed/traffic/2023/12-2023_processed.parquet
Processing folder: ../../data/raw/traffic/2024
len df before filtering  12779019
Unique traffic sensors before filtering: 4696
len df after filtering  175396
Unique traffic sensors after filtering: 64


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 44131/44131 [00:06<00:00, 6993.80it/s]


Processed and saved: ../../data/processed/traffic/2024/09_2024_processed.parquet
len df before filtering  13343452
Unique traffic sensors before filtering: 4699
len df after filtering  191199
Unique traffic sensors after filtering: 65


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 48040/48040 [00:06<00:00, 6937.04it/s]


Processed and saved: ../../data/processed/traffic/2024/01-2024_processed.parquet
len df before filtering  12918338
Unique traffic sensors before filtering: 4703
len df after filtering  177865
Unique traffic sensors after filtering: 64


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 45093/45093 [00:06<00:00, 6952.30it/s]


Processed and saved: ../../data/processed/traffic/2024/06-2024_processed.parquet
len df before filtering  13351339
Unique traffic sensors before filtering: 4682
len df after filtering  181897
Unique traffic sensors after filtering: 64


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 46142/46142 [00:07<00:00, 6573.90it/s]


Processed and saved: ../../data/processed/traffic/2024/07-2024_processed.parquet
len df before filtering  12513169
Unique traffic sensors before filtering: 4677
len df after filtering  173261
Unique traffic sensors after filtering: 64


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 43539/43539 [00:06<00:00, 6826.75it/s]


Processed and saved: ../../data/processed/traffic/2024/11-2024_processed.parquet
len df before filtering  12918900
Unique traffic sensors before filtering: 4690
len df after filtering  180889
Unique traffic sensors after filtering: 64


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 45475/45475 [00:06<00:00, 6783.54it/s]


Processed and saved: ../../data/processed/traffic/2024/10-2024_processed.parquet
len df before filtering  12532935
Unique traffic sensors before filtering: 4689
len df after filtering  177302
Unique traffic sensors after filtering: 65


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 44676/44676 [00:06<00:00, 6709.42it/s]


Processed and saved: ../../data/processed/traffic/2024/02-2024_processed.parquet
len df before filtering  13451928
Unique traffic sensors before filtering: 4695
len df after filtering  189933
Unique traffic sensors after filtering: 65


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 47787/47787 [00:07<00:00, 6709.97it/s]


Processed and saved: ../../data/processed/traffic/2024/03-2024_processed.parquet
len df before filtering  13214710
Unique traffic sensors before filtering: 4678
len df after filtering  184951
Unique traffic sensors after filtering: 64


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 46974/46974 [00:06<00:00, 6747.81it/s]


Processed and saved: ../../data/processed/traffic/2024/08-2024_processed.parquet
len df before filtering  13296461
Unique traffic sensors before filtering: 4679
len df after filtering  178790
Unique traffic sensors after filtering: 62


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 44869/44869 [00:06<00:00, 6812.83it/s]


Processed and saved: ../../data/processed/traffic/2024/12-2024_processed.parquet
len df before filtering  13357035
Unique traffic sensors before filtering: 4706
len df after filtering  184864
Unique traffic sensors after filtering: 64


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 46887/46887 [00:06<00:00, 6722.65it/s]


Processed and saved: ../../data/processed/traffic/2024/05-2024_processed.parquet
len df before filtering  13010780
Unique traffic sensors before filtering: 4695
len df after filtering  183015
Unique traffic sensors after filtering: 65


  df_traffic['hora'] = df_traffic['fecha'].dt.floor('H')
Processing groups: 100%|██████████| 46185/46185 [00:06<00:00, 6825.62it/s]


Processed and saved: ../../data/processed/traffic/2024/04-2024_processed.parquet
