In [1]:
import pandas as pd
import xarray as xr


In [23]:
code_to_coords = {
    'ES0694A': (2.009802, 41.39216),
    'ES1120A': (1.191975, 41.11588), 
    'ES1892A': (2.237875, 41.44398),
    'ES1983A': (2.082141, 41.32177),
    'ES1148A': (2.222245, 41.425621),
    'ES1992A': (2.115661, 41.387273),
    'ES1438A': (2.15403, 41.385366),
    'ES1679A': (2.187417, 41.386414),
    'ES1124A': (1.239709, 41.159532),
    'ES1312A': (1.200765, 41.103678),
    'ES1666A': (1.24165, 41.117388)
}


In [148]:
import os

# Get all NetCDF files from the folder
folder_path = './raw_Data/DADES_CALIOPE_buenos/NO2/'
nc_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.nc')]
nc_files.sort()  # Sort files to ensure chronological order


In [149]:
final_df = pd.DataFrame()

for nc_file in nc_files:
    ds = xr.open_dataset(nc_file)

    df = ds.sconcno2.to_dataframe().reset_index()
    df = df.rename(columns={'sconcno2': 'concentracion_NO2'})

    lat_lon_df = xr.Dataset({'lat': ds.lat, 'lon': ds.lon}).to_dataframe()
    df = df.merge(lat_lon_df, left_on=['y', 'x'], right_index=True)

    df = df[['lat_x', 'lon_x', 'time', 'concentracion_NO2']]
    df = df.rename(columns={'time': 'fecha'})
    df["concentracion_NO2"] = df["concentracion_NO2"]*1886.6

    for code in code_to_coords.keys():

        value_lon = code_to_coords[code][0]
        value_lat = code_to_coords[code][1]
        margin_lon = 0.0018
        margin_lat = 0.055

        df_values =  df[(df["lon_x"] >= value_lon - margin_lon) & (df["lon_x"] <= value_lon + margin_lon) & (df["lat_x"] >= value_lat - margin_lat) & (df["lat_x"] <= value_lat + margin_lat)]
    
        df_values = df_values.groupby('fecha')['concentracion_NO2'].mean().reset_index()
        df_values["code"] = code

        final_df = pd.concat([final_df, df_values])


In [151]:
final_df

Unnamed: 0,fecha,concentracion_NO2,code
0,2023-01-01 00:00:00,6.490515,ES0694A
1,2023-01-01 01:00:00,5.888712,ES0694A
2,2023-01-01 02:00:00,6.300090,ES0694A
3,2023-01-01 03:00:00,5.855033,ES0694A
4,2023-01-01 04:00:00,6.524580,ES0694A
...,...,...,...
43,2024-01-01 19:00:00,46.334751,ES1666A
44,2024-01-01 20:00:00,41.004780,ES1666A
45,2024-01-01 21:00:00,42.708248,ES1666A
46,2024-01-01 22:00:00,30.172792,ES1666A


In [152]:

final_df.to_csv('./processed_data/caliope/caliope_data.csv', index=False)