In [16]:
import cdsapi

dataset = "reanalysis-era5-single-levels"
request = {
    "product_type": ["reanalysis"],
    "variable": [
        "2m_temperature",
        "total_precipitation"
    ],
    "year": [str(year) for year in range(1980, 2018)],
    "month": [
        "05", "06", "07",
        "08"
    ],
    "day": [
        "01", "02", "03",
        "04", "05", "06",
        "07", "08", "09",
        "10", "11", "12",
        "13", "14", "15",
        "16", "17", "18",
        "19", "20", "21",
        "22", "23", "24",
        "25", "26", "27",
        "28", "29", "30",
        "31"
    ],
    "time": [
        "10:00", "11:00", "12:00",
        "13:00", "14:00", "15:00",
        "16:00"
    ],
    "data_format": "grib",
    "download_format": "unarchived",
    "area": [38.5, 59.5, 22.5, 78.5]
}

client = cdsapi.Client()
client.retrieve(dataset, request).download()


2025-02-05 22:37:09,749 INFO Request ID is 7dcea2c1-565e-4cc7-8d84-7e305fbd6f14
2025-02-05 22:37:10,077 INFO status has been updated to accepted
2025-02-05 22:37:16,342 INFO status has been updated to running
2025-02-05 22:37:20,015 INFO status has been updated to successful
                                                                                          

'8aef3fa9d9341d0bab5cc2c303e5068b.grib'

In [2]:
import xarray as xr
import cfgrib

precip_data= xr.open_dataset(
    r'D:\FYP\active\8aef3fa9d9341d0bab5cc2c303e5068b.grib',
    engine='cfgrib',
    backend_kwargs={'filter_by_keys': {'shortName': 'tp'}}
)

In [4]:
precip_data.variables

Frozen({'number': <xarray.Variable ()> Size: 8B
[1 values with dtype=int64]
Attributes:
    long_name:      ensemble member numerical id
    units:          1
    standard_name:  realization, 'time': <xarray.IndexVariable 'time' (time: 4674)> Size: 37kB
array(['1980-05-01T06:00:00.000000000', '1980-05-02T06:00:00.000000000',
       '1980-05-03T06:00:00.000000000', ..., '2017-08-29T06:00:00.000000000',
       '2017-08-30T06:00:00.000000000', '2017-08-31T06:00:00.000000000'],
      shape=(4674,), dtype='datetime64[ns]')
Attributes:
    long_name:      initial time of forecast
    standard_name:  forecast_reference_time, 'step': <xarray.IndexVariable 'step' (step: 7)> Size: 56B
array([14400000000000, 18000000000000, 21600000000000, 25200000000000,
       28800000000000, 32400000000000, 36000000000000], dtype='timedelta64[ns]')
Attributes:
    long_name:      time since forecast_reference_time
    standard_name:  forecast_period, 'surface': <xarray.Variable ()> Size: 8B
[1 values with dtyp

In [5]:
import xarray as xr
import cfgrib

# Attempt 1: Load specific parameters
temp_data = xr.open_dataset(
    r'D:\FYP\active\8aef3fa9d9341d0bab5cc2c303e5068b.grib',
    engine='cfgrib',
    backend_kwargs={'filter_by_keys': {'shortName': '2t'}}
)

In [6]:
temp_data.variables

Frozen({'number': <xarray.Variable ()> Size: 8B
[1 values with dtype=int64]
Attributes:
    long_name:      ensemble member numerical id
    units:          1
    standard_name:  realization, 'time': <xarray.IndexVariable 'time' (time: 26691)> Size: 214kB
array(['1980-05-01T10:00:00.000000000', '1980-05-01T11:00:00.000000000',
       '1980-05-01T12:00:00.000000000', ..., '2017-08-31T14:00:00.000000000',
       '2017-08-31T15:00:00.000000000', '2017-08-31T16:00:00.000000000'],
      shape=(26691,), dtype='datetime64[ns]')
Attributes:
    long_name:      initial time of forecast
    standard_name:  forecast_reference_time, 'step': <xarray.Variable ()> Size: 8B
[1 values with dtype=timedelta64[ns]]
Attributes:
    long_name:      time since forecast_reference_time
    standard_name:  forecast_period, 'surface': <xarray.Variable ()> Size: 8B
[1 values with dtype=float64]
Attributes:
    long_name:  original GRIB coordinate for key: level(surface)
    units:      1, 'latitude': <xarray.Inde

In [6]:
precip_data

In [8]:
temp_data

In [46]:
daily_temp_data = temp_data.resample(time="1D").mean()


In [47]:
daily_temp_data

In [48]:
import xarray as xr

# Assuming `temp_data` is your dataset
# Create a new grouping variable based on 7-hour intervals
# Use the modulo operator to group 7-hour blocks as days
temp_data = temp_data.assign_coords(
    custom_day=(temp_data["time"].dt.floor("7H").dt.date)
)

# Perform aggregation using the custom grouping
daily_temp_data = temp_data.groupby("custom_day").mean(dim="time")

# Rename `custom_day` back to `time` for clarity
daily_temp_data = daily_temp_data.rename({"custom_day": "time"})

# Check the result
print(daily_temp_data)


  field_values = method(freq=freq).values


<xarray.Dataset> Size: 76MB
Dimensions:    (time: 3813, latitude: 65, longitude: 77)
Coordinates:
    number     int64 8B 0
    step       timedelta64[ns] 8B 00:00:00
    surface    float64 8B 0.0
  * latitude   (latitude) float64 520B 38.5 38.25 38.0 37.75 ... 23.0 22.75 22.5
  * longitude  (longitude) float64 616B 59.5 59.75 60.0 ... 78.0 78.25 78.5
  * time       (time) object 31kB 1980-05-01 1980-05-02 ... 2017-08-31
Data variables:
    t2m        (time, latitude, longitude) float32 76MB 297.3 297.3 ... 301.7
Attributes:
    GRIB_edition:            1
    GRIB_centre:             ecmf
    GRIB_centreDescription:  European Centre for Medium-Range Weather Forecasts
    GRIB_subCentre:          0
    Conventions:             CF-1.7
    institution:             European Centre for Medium-Range Weather Forecasts
    history:                 2025-01-25T14:03 GRIB to CDM+CF via cfgrib-0.9.1...


In [51]:
len(daily_temp_data.time)

3813

In [49]:
daily_temp_data

In [53]:
# Check for missing dates
expected_dates = pd.date_range(start="1980-05-01", end="2017-08-31", freq="D")
actual_dates = pd.to_datetime(temp_data["time"].values)
missing_dates = expected_dates.difference(actual_dates)
print("Missing dates:", missing_dates)


Missing dates: DatetimeIndex(['1980-05-01', '1980-05-02', '1980-05-03', '1980-05-04',
               '1980-05-05', '1980-05-06', '1980-05-07', '1980-05-08',
               '1980-05-09', '1980-05-10',
               ...
               '2017-08-22', '2017-08-23', '2017-08-24', '2017-08-25',
               '2017-08-26', '2017-08-27', '2017-08-28', '2017-08-29',
               '2017-08-30', '2017-08-31'],
              dtype='datetime64[ns]', length=13637, freq='D')


In [54]:
print(temp_data["time"])


<xarray.DataArray 'time' (time: 26691)> Size: 214kB
array(['1980-05-01T10:00:00.000000000', '1980-05-01T11:00:00.000000000',
       '1980-05-01T12:00:00.000000000', ..., '2017-08-31T14:00:00.000000000',
       '2017-08-31T15:00:00.000000000', '2017-08-31T16:00:00.000000000'],
      shape=(26691,), dtype='datetime64[ns]')
Coordinates:
    number      int64 8B ...
  * time        (time) datetime64[ns] 214kB 1980-05-01T10:00:00 ... 2017-08-3...
    step        timedelta64[ns] 8B ...
    surface     float64 8B ...
    valid_time  (time) datetime64[ns] 214kB ...
    custom_day  (time) object 214kB 1980-05-01 1980-05-01 ... 2017-08-31
Attributes:
    long_name:      initial time of forecast
    standard_name:  forecast_reference_time


In [55]:
daily_dates = pd.to_datetime(temp_data["time"].values).floor('D')
expected_dates = pd.date_range(start="1980-05-01", end="2017-08-31", freq="D")
actual_dates = pd.to_datetime(daily_dates).unique()  # Get unique daily dates
missing_dates = expected_dates.difference(actual_dates)
print("Missing dates:", missing_dates)


Missing dates: DatetimeIndex(['1980-09-01', '1980-09-02', '1980-09-03', '1980-09-04',
               '1980-09-05', '1980-09-06', '1980-09-07', '1980-09-08',
               '1980-09-09', '1980-09-10',
               ...
               '2017-04-21', '2017-04-22', '2017-04-23', '2017-04-24',
               '2017-04-25', '2017-04-26', '2017-04-27', '2017-04-28',
               '2017-04-29', '2017-04-30'],
              dtype='datetime64[ns]', length=9824, freq=None)


In [None]:
import pandas as pd

# Assuming your dataframe is named df and has a datetime column 'timestamp'
df['year'] = df['timestamp'].dt.year

# Check unique years in the dataset
print(sorted(df['year'].unique()))

# Compare with expected range
expected_years = set(range(1980, 2018))
present_years = set(df['year'].unique())
missing_years = expected_years - present_years

print("Missing years:", missing_years)


In [11]:
print(temp_data.time)


<xarray.DataArray 'time' (time: 26691)> Size: 214kB
array(['1980-05-01T10:00:00.000000000', '1980-05-01T11:00:00.000000000',
       '1980-05-01T12:00:00.000000000', ..., '2017-08-31T14:00:00.000000000',
       '2017-08-31T15:00:00.000000000', '2017-08-31T16:00:00.000000000'],
      shape=(26691,), dtype='datetime64[ns]')
Coordinates:
    number      int64 8B ...
  * time        (time) datetime64[ns] 214kB 1980-05-01T10:00:00 ... 2017-08-3...
    step        timedelta64[ns] 8B ...
    surface     float64 8B ...
    valid_time  (time) datetime64[ns] 214kB ...
Attributes:
    long_name:      initial time of forecast
    standard_name:  forecast_reference_time


In [12]:
print(temp_data.time.dt.year.unique())

AttributeError: 'DataArray' object has no attribute 'unique'