In [1]:
%load_ext autoreload
%autoreload 2

# Filter model

In [None]:
from weather_weaver.models.geo import BoundingBox, GeoFilterModel
from weather_weaver.constants import EUROPE_BOUNDING_BOX_STR, ENTSO_E_ISO3_LIST
import pandas as pd
pd.set_option("display.max_columns", 500)

In [None]:
bb = BoundingBox.from_str(EUROPE_BOUNDING_BOX_STR)

In [None]:
geofilter = GeoFilterModel.from_bounding_box(bb)
geofilter = geofilter.filter_iso3s(list_iso3s=ENTSO_E_ISO3_LIST)

In [None]:
gdf = geofilter.filter_df

In [None]:
gdf.query("country_name in ['France', 'Portugal']")

In [None]:
gdf.explore()

# ERA5 download

In [43]:
EUROPE_BOUNDING_BOX_STR

'N: 73.5 W: -27 S: 33 E: 45'

In [44]:
import cdsapi

from weather_weaver.models.geo import BoundingBox, GeoFilterModel
from weather_weaver.constants import EUROPE_BOUNDING_BOX_STR, ENTSO_E_ISO3_LIST

In [45]:
c = cdsapi.Client()

bb = BoundingBox.from_str(EUROPE_BOUNDING_BOX_STR)

geofilter = GeoFilterModel.from_bounding_box(bb)
geofilter = geofilter.filter_iso3s(list_iso3s=ENTSO_E_ISO3_LIST)
latlon_bounds = geofilter.bounds
latlon_bounds

{'min_lon': -24.326184047939336,
 'min_lat': 34.57186941175544,
 'max_lon': 34.00488081232004,
 'max_lat': 71.18547435168055}

In [46]:
c.retrieve(
    'reanalysis-era5-single-levels',
    {
        'product_type': 'reanalysis',
        'format': 'grib',
        'variable': [
            '10m_u_component_of_wind', '10m_v_component_of_wind', '2m_temperature',
            'total_precipitation',
        ],
        'year': [
            '2020', '2021', '2022',
        ],
        'month': [
            '01', '02', '03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
        ],
        'day': [
            '01', '02', '03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
            '13', '14', '15',
            '16', '17', '18',
            '19', '20', '21',
            '22', '23', '24',
            '25', '26', '27',
            '28', '29', '30',
            '31',
        ],
        'time': [
            '00:00', '01:00', '02:00',
            '03:00', '04:00', '05:00',
            '06:00', '07:00', '08:00',
            '09:00', '10:00', '11:00',
            '12:00', '13:00', '14:00',
            '15:00', '16:00', '17:00',
            '18:00', '19:00', '20:00',
            '21:00', '22:00', '23:00',
        ],
        'area': [
            latlon_bounds["max_lat"],
            latlon_bounds["min_lon"],
            latlon_bounds["min_lat"],
            latlon_bounds["max_lon"],
        ],
    },
    'historical_reanalysis_era5.grib')

2024-01-10 13:52:45,650 INFO Welcome to the CDS
2024-01-10 13:52:45,651 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2024-01-10 13:52:45,733 INFO Request is queued


KeyboardInterrupt: 

In [48]:
import os
folder_path = "../data/ecmwf/processed/oper"
for filename in os.listdir(folder_path):
    if filename.endswith(".parquet") and "2t-tp-10u-10v" not in filename:
        new_filename = filename.replace("_fc.parquet", "_2t-tp-10u-10v_fc.parquet")
        os.rename(os.path.join(folder_path, filename), os.path.join(folder_path, new_filename))

In [19]:
from weather_weaver.inputs.ecmwf.processor import EMCWFProcessor

In [30]:
path = "test.grib"

processor = EMCWFProcessor()

dd = processor.transform(path, geo_filter=geofilter)

In [35]:
ddf = dd.groupby(["run_time", "timestamp", "country_name"]).agg({"t2m": "mean"}).compute()

In [39]:
ddf_plot = ddf.reset_index()
ddf_plot = ddf_plot.query("country_name in ['Norway', 'France']")

In [40]:
import plotly.express as px

In [41]:

fig = px.line(ddf_plot, x="timestamp", y="t2m", color="run_time", facet_row="country_name")
fig.show()

In [13]:
import cfgrib

In [17]:
path = "/Users/badrbenmbarek/Documents/work/git/weather-weaver/ec.oper.fc.sfc.128_165_10u.regn1280sc.20190101-20190527.grb.spasub.benmbarek722612/ec.oper.fc.sfc.128_165_10u.regn1280sc.20190101.grb.spasub.benmbarek722612"
ddd  = cfgrib.open_datasets(
    path=path,
    chunks={
        "time": 1,
        "step": -1,
        "longitude": "auto",
        "latitude": "auto",
    },
    backend_kwargs={"indexpath": ""},
)

In [18]:
ddd[0]

Unnamed: 0,Array,Chunk
Bytes,32 B,32 B
Shape,"(4,)","(4,)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 32 B 32 B Shape (4,) (4,) Dask graph 1 chunks in 3 graph layers Data type datetime64[ns] numpy.ndarray",4  1,

Unnamed: 0,Array,Chunk
Bytes,32 B,32 B
Shape,"(4,)","(4,)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.02 MiB,9.02 MiB
Shape,"(4, 640, 924)","(4, 640, 924)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 9.02 MiB 9.02 MiB Shape (4, 640, 924) (4, 640, 924) Dask graph 1 chunks in 3 graph layers Data type float32 numpy.ndarray",924  640  4,

Unnamed: 0,Array,Chunk
Bytes,9.02 MiB,9.02 MiB
Shape,"(4, 640, 924)","(4, 640, 924)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [None]:
const

In [None]:
from weather_weaver.models.geo import GeoFilterModel, BoundingBox
from weather_weaver.constants import ENTSO_E_ISO3_LIST

In [None]:
filter_model = GeoFilterModel.filter_iso3s(ENTSO_E_ISO3_LIST)

In [None]:
filter_model.bounds

In [None]:
import dask_geopandas as dask_gpd
import geopandas as gpd
import pandas as pd

In [None]:
df = pd.DataFrame({'longitude': [-140, 0, 123], 'latitude': [-65, 1, 48]}) 

In [None]:
ddf = dask_gpd.from_geopandas(df, npartitions=1)
geometry = dask_gpd.points_from_xy(ddf.longitude, ddf.latitude, crs="EPSG:4326")
ddf = dask_gpd.from_dask_dataframe(ddf, geometry=geometry)

In [None]:
ddf.crs = 'EPSG:4326'

In [None]:
ddf.crs

# Kill dask clusters

In [None]:
from distributed.client import _global_clients

In [None]:
for k, v in _global_clients.items():
    print(k, v)

In [None]:
# _global_clients[0].cluster.close()

# Explore processed results

In [None]:
import pandas as pd
import dask.dataframe as dd
from pathlib import Path
import plotly.express as px

In [None]:
files = Path("../data/ecmwf/processed/oper/").glob("*/*.parquet")
ddf = dd.read_parquet(list(files))

In [None]:
df_tmp = ddf.groupby(["run_time", "timestamp", "country_name"]).agg({"t2m": "mean"}).compute()
df_tmp.reset_index(inplace=True)

In [None]:
df_plot = df_tmp.query("country_name in ['Norway', 'France']")
fig = px.line(df_plot, x="timestamp", y="t2m", color="run_time", facet_row="country_name")
fig.show()

# Using service

In [None]:
import datetime as dt
from weather_weaver.services.service import WeatherConsumerService

from weather_weaver.outputs.localfs.client import LocalClient
from weather_weaver.inputs.ecmwf import constants as ecmwf_constants
from weather_weaver.inputs.ecmwf.open_data.fetcher import ECMWFOpenDataFetcher
from weather_weaver.inputs.ecmwf.open_data.request import ECMWFOpenDataRequestBuilder
from weather_weaver.inputs.ecmwf.processor import EMCWFProcessor

In [None]:
fetcher = ECMWFOpenDataFetcher()
request_builder = ECMWFOpenDataRequestBuilder()
processor = EMCWFProcessor()
storer = LocalClient()

In [None]:
service = WeatherConsumerService(
    request_builder=request_builder,
    raw_dir=ecmwf_constants.RAW_DIR,
    processed_dir=ecmwf_constants.PROCESSED_DIR,
    fetcher=fetcher,
    processor=processor,
    storer=storer,
)

In [None]:
start = dt.date(2024, 1, 4)
date_offset = 1

In [None]:
all_requests = service._build_default_requests(start=start, date_offset=date_offset)

# check the ones already processed
all_new_requests = [
    t for t in all_requests if not service.storer.exists(path=service.processed_dir / t.file_name)
]

pipeline = (
    service._build_dask_pipeline(all_new_requests) if len(all_new_requests) > 0 else None
)

In [None]:
pipeline.visualize()

In [None]:
processed_files = service.download_datasets(start=start, date_offset=1)

In [None]:
from pathlib import Path

In [None]:
path = Path("/Users/badrbenmbarek/Documents/work/git/weather-weaver/data/ecmwf/raw/oper/20240104_12z_0-90_fc.grib2")

In [None]:
import datetime as dt

import dask.dataframe as dd
import pandas as pd
import plotly.express as px
import xarray as xr
import structlog

from weather_weaver.constants import load_world_countries
from weather_weaver.inputs.ecmwf.constants import ENTSO_E_ISO3_LIST, PROCESSED_DIR
from weather_weaver.outputs.localfs.client import LocalClient as LocalfsClient

from weather_weaver.inputs.ecmwf.open_data import (
    ECMWFOpenDataFetcher,
    ECMWFOpenDataRequest,
    EMCWFOpenDataProcessor,
    GeoFilterModel,
    StreamType,
    RunTime,
    RequestType,
)

pd.set_option("display.max_columns", 500)

logger = structlog.getLogger()

In [None]:
run_date = dt.date(2024, 1, 1)
run_time = RunTime.H00
stream =  StreamType.OPER
request_type = RequestType.FORECAST

data_request = ECMWFOpenDataRequest(
    run_date=run_date,
    run_time=run_time,
    stream=stream,
    request_type=request_type,
)

In [None]:
storage_client = LocalfsClient()
fetcher = ECMWFOpenDataFetcher()

In [None]:
output_path = PROCESSED_DIR / f"{data_request.file_name}.parquet"
if storage_client.exists(path=output_path):
    logger.info(
        event="NWP data already downloaded, skipping!",
        request=data_request,
    )
    pass

In [None]:
out_path = fetcher.download_raw_files(data_request)

In [None]:
processor = EMCWFOpenDataProcessor()

In [None]:
ddf = processor.transform(raw_path=out_path, request=data_request, filter_model=geo_filter)

In [None]:
storage_client.store(ddf=ddf, destination_path=output_path)

In [None]:
# read back saved parquet file

In [None]:
test_df = dd.read_parquet(output_path)

In [None]:
test_df

In [None]:
tt = test_df.groupby(["timestamp", "country_name"]).agg(
    {"u10": "mean", "v10": "mean", "t2m": "mean", "tp": "mean"}
)

In [None]:
ttt = tt.compute()

In [None]:
fig = px.line(ttt.reset_index(), x="value_datetime", y="value", color="NAME", facet_col="variable")
fig.show()

In [None]:
path = "/Users/badrbenmbarek/Downloads/ECMWF_HRES_06Z_20190101.nc"

In [None]:
dds = xr.open_dataset(path)
dds.

In [None]:
area: str = "uk"
hours: int = 48
param_group: str = "basic"

start = dt.date(2023, 12, 26)
end = dt.date(2023, 12, 27)

In [None]:
match param_group:
    case "basic":
        parameters = ["167.128/169.128"]  # 2 Metre Temperature, Dswrf
    case _:
        parameters = list(PARAMETER_ECMWFCODE_MAP.keys())

In [None]:
all_init_times: list[dt.datetime] = [
    pdt.to_pydatetime()
    for pdt in pd.date_range(
        start=start,
        end=end + dt.timedelta(days=1),
        inclusive="left",
        freq="H",
        tz=dt.UTC,
    ).tolist()
    if pdt.hour in [0, 6, 12, 18]
]

In [None]:
for init_time in all_init_times:
    mars_request = buildMarsRequest(parameters=parameters, hours=hours, area=area, it=init_time, list_only=True, target="test.txt")
    break

In [None]:
print(mars_request)

In [None]:
server.execute(mars_request, target="test.txt")