In [1]:
import xarray as xr
import sqlite3
import pandas as pd
import numpy as np
import dask
import dask.dataframe as dd
import logging

# Open the GRIB file
print('opening GRIB file')
filename = "src/data/past_climate.grib"
variables = ['tp', 'tcc', 'rh', '2t','2d','10v', '10u'] 

datasets = {}
# Set up Dask to use a single thread
dask.config.set(scheduler='single-threaded')
for var in variables:
    try:
        # Open the GRIB file with chunks
        ds = xr.open_dataset(filename, engine='cfgrib', backend_kwargs={'filter_by_keys': {'shortName': var}}, chunks={'time': 10})
        datasets[var] = ds
    except Exception as e:
        logging.error('Error opening dataset for variable %s: %s', var, e)

opening GRIB file


In [49]:
datasets['tp']['valid_time'] = datasets['tp']['valid_time'].values.flatten()


In [54]:
datasets['tp']
tp_values = datasets['tp'].sel(valid_time='1991-12-31T19:00:00.000000000', latitude=38.96, longitude=-3.435, method = 'nearest')['tp'].values
np.size(tp_values)

13392

In [41]:
datasets['tp']
tp_values = datasets['tp'].sel(time='1994-09-30T18:00:00.000000000', latitude=38.96, longitude=-3.435, method = 'nearest')['tp'].values
tp_values

array([          nan,           nan,           nan,           nan,
                 nan,           nan, 2.1133339e-05, 4.4025786e-05,
       5.4072851e-05, 4.1033709e-05, 4.2383210e-05, 2.2032807e-05],
      dtype=float32)

In [40]:
datasets['tp']
tp_values = datasets['tp'].sel(time='1994-10-31T18:00:00.000000000', latitude=38.96, longitude=-3.435, method = 'nearest')['tp'].values
tp_values


array([           nan,            nan,            nan,            nan,
                  nan,            nan, 7.25808422e-05, 1.07834305e-04,
       1.20537181e-04, 1.26841565e-04, 9.24911583e-05, 8.04631272e-05],
      dtype=float32)

In [35]:

avg = datasets['tp'].sel(time='1994-10-01T18:00:00.000000000', latitude=38.96, longitude=-3.435, method = 'nearest')['tp'].mean(skipna=True).values
avg

array(5.5693952e-05, dtype=float32)

In [29]:
datasets['tp']['time'].values[100:150]

array(['1994-10-01T06:00:00.000000000', '1994-10-01T18:00:00.000000000',
       '1994-10-31T18:00:00.000000000', '1994-11-01T06:00:00.000000000',
       '1994-11-01T18:00:00.000000000', '1994-11-30T18:00:00.000000000',
       '1994-12-01T06:00:00.000000000', '1994-12-01T18:00:00.000000000',
       '1994-12-31T18:00:00.000000000', '1995-01-01T06:00:00.000000000',
       '1995-01-01T18:00:00.000000000', '1995-01-31T18:00:00.000000000',
       '1995-02-01T06:00:00.000000000', '1995-02-01T18:00:00.000000000',
       '1995-02-28T18:00:00.000000000', '1995-03-01T06:00:00.000000000',
       '1995-03-01T18:00:00.000000000', '1995-03-31T18:00:00.000000000',
       '1995-04-01T06:00:00.000000000', '1995-04-01T18:00:00.000000000',
       '1995-04-30T18:00:00.000000000', '1995-05-01T06:00:00.000000000',
       '1995-05-01T18:00:00.000000000', '1995-05-31T18:00:00.000000000',
       '1995-06-01T06:00:00.000000000', '1995-06-01T18:00:00.000000000',
       '1995-06-30T18:00:00.000000000', '1995-07-01