From 2b21eb87d1ea5583738a218784839c2e7d97a7be Mon Sep 17 00:00:00 2001 From: Benjamin Gutzmann Date: Sat, 2 Mar 2024 17:50:35 +0100 Subject: [PATCH] Add NOAA GHCN Hourly API (also known as ISD) --- CHANGELOG.rst | 2 + docs/data/coverage/noaa/ghcn.rst | 5 +- docs/data/coverage/noaa/ghcn/hourly.rst | 494 ++++++++++++++++++ docs/data/introduction.rst | 2 +- tests/provider/noaa/ghcn/test_api_data.py | 1 + tests/provider/noaa/ghcn/test_api_stations.py | 2 +- tests/test_api.py | 3 +- wetterdienst/metadata/parameter.py | 5 + wetterdienst/provider/noaa/ghcn/api.py | 343 +++++++++++- wetterdienst/provider/noaa/ghcn/parameter.py | 93 +++- wetterdienst/provider/noaa/ghcn/unit.py | 25 + 11 files changed, 955 insertions(+), 20 deletions(-) create mode 100644 docs/data/coverage/noaa/ghcn/hourly.rst diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 03583643c..f9912fb4d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,8 @@ Changelog Development *********** +- Add NOAA GHCN Hourly API (also known as ISD) + 0.75.0 (25.02.2024) ******************* diff --git a/docs/data/coverage/noaa/ghcn.rst b/docs/data/coverage/noaa/ghcn.rst index 289c84252..239a44c6a 100644 --- a/docs/data/coverage/noaa/ghcn.rst +++ b/docs/data/coverage/noaa/ghcn.rst @@ -6,10 +6,12 @@ Global Historical Climatology Network Overview ******** -NOAA Global Historical Climatology Network is a collection of **daily** weather data put together from multiple weather +NOAA Global Historical Climatology Network is a collection of **hourly** and **daily** weather data put together from multiple weather services around the world, even those where at this moment no data is publicly offered. Resolution is fixed on daily, because this is the most common and maintainable resolution with most observations practiced all over the world. +The GHCN-Hourly dataset represents the previously existing ISD (Integrated Surface Database). + License ******* @@ -24,4 +26,5 @@ Structure .. toctree:: :maxdepth: 1 + ghcn/hourly ghcn/daily diff --git a/docs/data/coverage/noaa/ghcn/hourly.rst b/docs/data/coverage/noaa/ghcn/hourly.rst new file mode 100644 index 000000000..b0b45ecf9 --- /dev/null +++ b/docs/data/coverage/noaa/ghcn/hourly.rst @@ -0,0 +1,494 @@ +Hourly +###### + +Metadata +******** + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - daily + * - url + - `NOAA GHCN Hourly`_ + +.. _NOAA GHCN Hourly: https://www.ncei.noaa.gov/oa/global-historical-climatology-network/index.html + +Datasets +******** + +Hourly +====== + +Metadata +-------- + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - daily + * - url + - `NOAA GHCN Hourly`_ + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - Historical daily station observations (temperature, pressure, precipitation, sunshine duration, etc.) for Germany + +.. _NOAA GHCN Hourly dataset description: https://www.ncei.noaa.gov/oa/global-historical-climatology-network/hourly/doc/ghcnh_DOCUMENTATION.pdf + +Parameters +---------- + +humidity +^^^^^^^^ + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - relative_humidity + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - Relative humidity is calculated from air (dry bulb) temperature and dewpoint temperature (whole percent) + * - origin unit + - :math:`\%` + * - SI unit + - :math:`\%` + * - constraints + - :math:`\geq{0}, \leq{100}` + +precipitation_height +^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - precipitation + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - total liquid precipitation (rain or melted snow) for past hour; a “T” in the measurement code column indicates a trace amount of precipitation (millimeters) + * - origin unit + - :math:`mm` + * - SI unit + - :math:`kg / m^2` + * - constraints + - :math:`\geq{0}` + +precipitation_height_last_3h +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - precipitation_3_hour + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - 3-hour total liquid precipitation (rain or melted snow) accumulation + from FM12/SYNOP reports; a “T” in the measurement code column indicates a trace amount + of precipitation (millimeters) + * - origin unit + - :math:`mm` + * - SI unit + - :math:`kg / m^2` + * - constraints + - :math:`\geq{0}` + +precipitation_height_last_6h +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - precipitation_6_hour + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - 6-hour total liquid precipitation (rain or melted snow) accumulation + from FM12/SYNOP reports; a “T” in the measurement code column indicates a trace amount + of precipitation (millimeters) + * - origin unit + - :math:`mm` + * - SI unit + - :math:`kg / m^2` + * - constraints + - :math:`\geq{0}` + +precipitation_height_last_9h +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - precipitation_9_hour + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - 9-hour total liquid precipitation (rain or melted snow) accumulation + from FM12/SYNOP reports; a “T” in the measurement code column indicates a trace amount + of precipitation (millimeters) + * - origin unit + - :math:`mm` + * - SI unit + - :math:`kg / m^2` + * - constraints + - :math:`\geq{0}` + +precipitation_height_last_12h +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - precipitation_12_hour + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - 12-hour total liquid precipitation (rain or melted snow) accumulation + from FM12/SYNOP reports; a “T” in the measurement code column indicates a trace amount + of precipitation (millimeters) + * - origin unit + - :math:`mm` + * - SI unit + - :math:`kg / m^2` + * - constraints + - :math:`\geq{0}` + +precipitation_height_last_15h +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - precipitation_15_hour + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - 15-hour total liquid precipitation (rain or melted snow) accumulation + from FM12/SYNOP reports; a “T” in the measurement code column indicates a trace amount + of precipitation (millimeters) + * - origin unit + - :math:`mm` + * - SI unit + - :math:`kg / m^2` + * - constraints + - :math:`\geq{0}` + +precipitation_height_last_18h +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - precipitation_18_hour + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - 18-hour total liquid precipitation (rain or melted snow) accumulation + from FM12/SYNOP reports; a “T” in the measurement code column indicates a trace amount + of precipitation (millimeters) + * - origin unit + - :math:`mm` + * - SI unit + - :math:`kg / m^2` + * - constraints + - :math:`\geq{0}` + +precipitation_height_last_21h +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - precipitation_21_hour + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - 21-hour total liquid precipitation (rain or melted snow) accumulation + from FM12/SYNOP reports; a “T” in the measurement code column indicates a trace amount + of precipitation (millimeters) + * - origin unit + - :math:`mm` + * - SI unit + - :math:`kg / m^2` + * - constraints + - :math:`\geq{0}` + +precipitation_height_last_24h +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - precipitation_24_hour + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - 24-hour total liquid precipitation (rain or melted snow) accumulation + from FM12/SYNOP reports; a “T” in the measurement code column indicates a trace amount + of precipitation (millimeters) + * - origin unit + - :math:`mm` + * - SI unit + - :math:`kg / m^2` + * - constraints + - :math:`\geq{0}` + +pressure_air_sea_level +^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - sea_level_pressure + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - Sea level pressure (hectopascals) + * - origin unit + - :math:`hPa` + * - SI unit + - :math:`Pa` + * - constraints + - :math:`\geq{0}` + +pressure_air_site +^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - station_level_pressure + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - Station pressure (hectopascals) + * - origin unit + - :math:`hPa` + * - SI unit + - :math:`Pa` + * - constraints + - :math:`\geq{0}` + +pressure_air_site_delta_last_3h +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - pressure_3hr_change + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - 3-hour pressure change (hectopascals) + * - origin unit + - :math:`hPa` + * - SI unit + - :math:`Pa` + * - constraints + - :math:`\geq{0}` + +pressure_air_site_reduced +^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - altimeter + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - Reduced pressure (hectopascals) + * - origin unit + - :math:`hPa` + * - SI unit + - :math:`Pa` + * - constraints + - :math:`\geq{0}` + +snow_depth +^^^^^^^^^^ + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - snow_depth + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - depth of snowpack on the ground (centimeters/m) + * - origin unit + - :math:`cm` + * - SI unit + - :math:`m` + * - constraints + - :math:`\geq{0}` + +temperature_air_mean_200 +^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - temperature + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - 2 meter (circa) Above Ground Level Air (dry bulb) Temperature (⁰C to tenths) + * - origin unit + - :math:`°C` + * - SI unit + - :math:`K` + +temperature_dew_point_mean_200 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - dew_point_temperature + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - Dew Point Temperature (⁰C to tenths) + * - origin unit + - :math:`°C` + * - SI unit + - :math:`K` + +temperature_wet_mean_200 +^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - wet_bulb_temperature + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - Wet bulb temperature (⁰C to tenths) + * - origin unit + - :math:`°C` + * - SI unit + - :math:`K` + +visibility_range +^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - visibility + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - horizontal distance at which an object can be seen and identified (kilometers) + * - origin unit + - :math:`km` + * - SI unit + - :math:`m` + * - constraints + - :math:`\geq{0}` + +wind_direction +^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - wind_direction + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - Wind Direction from true north using compass directions (e.g. 360=true north, 180=south, 270=west, etc.). + Note: A direction of “000” is given for calm winds. (whole degrees) + * - origin unit + - :math:`°` + * - SI unit + - :math:`°` + * - constraints + - :math:`\geq{0}, \leq{360}` + +wind_gust_max +^^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - wind_gust + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - Peak short duration (usually < 20 seconds) wind speed (meters per second) that exceeds the wind_speed average + * - origin unit + - :math:`m/s` + * - SI unit + - :math:`m/s` + * - constraints + - :math:`\geq{0}` + +wind_speed +^^^^^^^^^^ + +.. list-table:: + :widths: 20 80 + :stub-columns: 1 + + * - original name + - wind_speed + * - description file + - `NOAA GHCN Hourly dataset description`_ + * - description + - Wind speed (meters per second) + * - origin unit + - :math:`m/s` + * - SI unit + - :math:`m/s` + * - constraints + - :math:`\geq{0}` diff --git a/docs/data/introduction.rst b/docs/data/introduction.rst index 2097c73cf..3e62984c0 100644 --- a/docs/data/introduction.rst +++ b/docs/data/introduction.rst @@ -40,7 +40,7 @@ ECCC (Environnement et Changement Climatique Canada / Environment and Climate Ch NOAA (National Oceanic And Atmospheric Administration / National Oceanic And Atmospheric Administration / United States Of America) - Global Historical Climatology Network - - Historical, daily weather observations from around the globe + - Historical, hourly (ISD) and daily weather observations from around the globe - more then 100k stations - data for weather services which don't publish data themselves diff --git a/tests/provider/noaa/ghcn/test_api_data.py b/tests/provider/noaa/ghcn/test_api_data.py index cdbd83d58..6b6690e66 100644 --- a/tests/provider/noaa/ghcn/test_api_data.py +++ b/tests/provider/noaa/ghcn/test_api_data.py @@ -21,6 +21,7 @@ def test_api_amsterdam(start_date, end_date, default_settings): request = NoaaGhcnRequest( parameter=[NoaaGhcnParameter.DAILY.TEMPERATURE_AIR_MEAN_200], + resolution="daily", start_date=start_date, end_date=end_date, settings=default_settings, diff --git a/tests/provider/noaa/ghcn/test_api_stations.py b/tests/provider/noaa/ghcn/test_api_stations.py index 885766c97..60aba76d9 100644 --- a/tests/provider/noaa/ghcn/test_api_stations.py +++ b/tests/provider/noaa/ghcn/test_api_stations.py @@ -13,7 +13,7 @@ @pytest.mark.remote def test_noaa_ghcn_stations(default_settings): - df = NoaaGhcnRequest(parameter="daily", settings=default_settings).all().df.head(5) + df = NoaaGhcnRequest(parameter="daily", resolution="daily", settings=default_settings).all().df.head(5) df_expected = pl.DataFrame( [ { diff --git a/tests/test_api.py b/tests/test_api.py index 1100eb0b4..987d9984d 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -40,7 +40,8 @@ # IMGW Meteorology ("imgw", "meteorology", {"parameter": "climate", "resolution": "daily"}, "249200180"), # NOAA Ghcn - ("noaa", "ghcn", {"parameter": "precipitation_height"}, None), + ("noaa", "ghcn", {"parameter": "precipitation_height", "resolution": "hourly"}, "AQC00914594"), + ("noaa", "ghcn", {"parameter": "precipitation_height", "resolution": "daily"}, "AQC00914594"), # WSV Pegelonline ("wsv", "pegel", {"parameter": "stage"}, None), # EA Hydrology diff --git a/wetterdienst/metadata/parameter.py b/wetterdienst/metadata/parameter.py index a7120722f..f8eff76a0 100644 --- a/wetterdienst/metadata/parameter.py +++ b/wetterdienst/metadata/parameter.py @@ -121,7 +121,11 @@ class Parameter(Enum): PRECIPITATION_HEIGHT_LAST_1H = "PRECIPITATION_HEIGHT_LAST_1H" PRECIPITATION_HEIGHT_LAST_3H = "PRECIPITATION_HEIGHT_LAST_3H" PRECIPITATION_HEIGHT_LAST_6H = "PRECIPITATION_HEIGHT_LAST_6H" + PRECIPITATION_HEIGHT_LAST_9H = "PRECIPITATION_HEIGHT_LAST_9H" PRECIPITATION_HEIGHT_LAST_12H = "PRECIPITATION_HEIGHT_LAST_12H" + PRECIPITATION_HEIGHT_LAST_15H = "PRECIPITATION_HEIGHT_LAST_15H" + PRECIPITATION_HEIGHT_LAST_18H = "PRECIPITATION_HEIGHT_LAST_18H" + PRECIPITATION_HEIGHT_LAST_21H = "PRECIPITATION_HEIGHT_LAST_21H" PRECIPITATION_HEIGHT_LAST_24H = "PRECIPITATION_HEIGHT_LAST_24H" PRECIPITATION_HEIGHT_MULTIDAY = "PRECIPITATION_HEIGHT_MULTIDAY" # precipitation height consistent with significant weather @@ -217,6 +221,7 @@ class Parameter(Enum): # ---- averaged ---- # TODO: should we add _MEAN here? PRESSURE_AIR_SITE = "PRESSURE_AIR_SITE" # air pressure at station height [SH] + PRESSURE_AIR_SITE_DELTA_LAST_3H = "PRESSURE_AIR_SITE_DELTA_LAST_3H" PRESSURE_AIR_SITE_MAX = "PRESSURE_AIR_SITE_MAX" PRESSURE_AIR_SITE_MIN = "PRESSURE_AIR_SITE_MIN" PRESSURE_AIR_SITE_REDUCED = "PRESSURE_AIR_SITE_REDUCED" diff --git a/wetterdienst/provider/noaa/ghcn/api.py b/wetterdienst/provider/noaa/ghcn/api.py index 527454e3a..c47516721 100644 --- a/wetterdienst/provider/noaa/ghcn/api.py +++ b/wetterdienst/provider/noaa/ghcn/api.py @@ -19,7 +19,7 @@ from wetterdienst.metadata.resolution import Resolution, ResolutionType from wetterdienst.metadata.timezone import Timezone from wetterdienst.provider.noaa.ghcn.parameter import ( - PARAMETER_MULTIPLICATION_FACTORS, + DAILY_PARAMETER_MULTIPLICATION_FACTORS, NoaaGhcnParameter, ) from wetterdienst.provider.noaa.ghcn.unit import NoaaGhcnUnit @@ -30,10 +30,12 @@ class NoaaGhcnDataset(Enum): + HOURLY = "hourly" DAILY = "daily" class NoaaGhcnResolution(Enum): + HOURLY = Resolution.HOURLY.value DAILY = Resolution.DAILY.value @@ -43,12 +45,284 @@ class NoaaGhcnPeriod(Enum): class NoaaGhcnValues(TimeseriesValues): _data_tz = Timezone.DYNAMIC - _base_url = "http://noaa-ghcn-pds.s3.amazonaws.com/csv.gz/by_station/{station_id}.csv.gz" - # use to get timezones from stations_result - # multiplication factors - _mp_factors = PARAMETER_MULTIPLICATION_FACTORS def _collect_station_parameter(self, station_id: str, parameter, dataset) -> pl.DataFrame: + if self.sr.resolution == Resolution.HOURLY: + return self._collect_station_parameter_for_hourly(station_id, parameter, dataset) + else: + return self._collect_station_parameter_for_daily(station_id, parameter, dataset) + + def _collect_station_parameter_for_hourly(self, station_id: str, parameter, dataset) -> pl.DataFrame: + url = f"https://www.ncei.noaa.gov/oa/global-historical-climatology-network/hourly/access/by-station/GHCNh_{station_id}_por.psv" + file = url.format(station_id=station_id) + try: + payload = download_file(file, settings=self.sr.stations.settings, ttl=CacheExpiry.FIVE_MINUTES) + except FileNotFoundError: + return pl.DataFrame() + time_zone = self._get_timezone_from_station(station_id) + df = pl.read_csv(payload, skip_rows=1, separator="|", has_header=False) + df.columns = [ + "station_id", + "name", + "year", + "month", + "day", + "hour", + "minute", + "latitude", + "longitude", + "elevation", + "temperature", + "temperature_measurement_code", + "temperature_quality_code", + "temperature_report_type", + "temperature_source_code", + "temperature_source_station_id", + "dew_point_temperature", + "dew_point_temperature_measurement_code", + "dew_point_temperature_quality_code", + "dew_point_temperature_report_type", + "dew_point_temperature_source_code", + "dew_point_temperature_source_station_id", + "station_level_pressure", + "station_level_pressure_measurement_code", + "station_level_pressure_quality_code", + "station_level_pressure_report_type", + "station_level_pressure_source_code", + "station_level_pressure_source_station_id", + "sea_level_pressure", + "sea_level_pressure_measurement_code", + "sea_level_pressure_quality_code", + "sea_level_pressure_report_type", + "sea_level_pressure_source_code", + "sea_level_pressure_source_station_id", + "wind_direction", + "wind_direction_measurement_code", + "wind_direction_quality_code", + "wind_direction_report_type", + "wind_direction_source_code", + "wind_direction_source_station_id", + "wind_speed", + "wind_speed_measurement_code", + "wind_speed_quality_code", + "wind_speed_report_type", + "wind_speed_source_code", + "wind_speed_source_station_id", + "wind_gust", + "wind_gust_measurement_code", + "wind_gust_quality_code", + "wind_gust_report_type", + "wind_gust_source_code", + "wind_gust_source_station_id", + "precipitation", + "precipitation_measurement_code", + "precipitation_quality_code", + "precipitation_report_type", + "precipitation_source_code", + "precipitation_source_station_id", + "relative_humidity", + "relative_humidity_measurement_code", + "relative_humidity_quality_code", + "relative_humidity_report_type", + "relative_humidity_source_code", + "relative_humidity_source_station_id", + "wet_bulb_temperature", + "wet_bulb_temperature_measurement_code", + "wet_bulb_temperature_quality_code", + "wet_bulb_temperature_report_type", + "wet_bulb_temperature_source_code", + "wet_bulb_temperature_source_station_id", + "pres_wx_mw1", + "pres_wx_mw1_measurement_code", + "pres_wx_mw1_quality_code", + "pres_wx_mw1_report_type", + "pres_wx_mw1_source_code", + "pres_wx_mw1_source_station_id", + "pres_wx_mw2", + "pres_wx_mw2_measurement_code", + "pres_wx_mw2_quality_code", + "pres_wx_mw2_report_type", + "pres_wx_mw2_source_code", + "pres_wx_mw2_source_station_id", + "pres_wx_mw3", + "pres_wx_mw3_measurement_code", + "pres_wx_mw3_quality_code", + "pres_wx_mw3_report_type", + "pres_wx_mw3_source_code", + "pres_wx_mw3_source_station_id", + "pres_wx_au1", + "pres_wx_au1_measurement_code", + "pres_wx_au1_quality_code", + "pres_wx_au1_report_type", + "pres_wx_au1_source_code", + "pres_wx_au1_source_station_id", + "pres_wx_au2", + "pres_wx_au2_measurement_code", + "pres_wx_au2_quality_code", + "pres_wx_au2_report_type", + "pres_wx_au2_source_code", + "pres_wx_au2_source_station_id", + "pres_wx_au3", + "pres_wx_au3_measurement_code", + "pres_wx_au3_quality_code", + "pres_wx_au3_report_type", + "pres_wx_au3_source_code", + "pres_wx_au3_source_station_id", + "pres_wx_aw1", + "pres_wx_aw1_measurement_code", + "pres_wx_aw1_quality_code", + "pres_wx_aw1_report_type", + "pres_wx_aw1_source_code", + "pres_wx_aw1_source_station_id", + "pres_wx_aw2", + "pres_wx_aw2_measurement_code", + "pres_wx_aw2_quality_code", + "pres_wx_aw2_report_type", + "pres_wx_aw2_source_code", + "pres_wx_aw2_source_station_id", + "pres_wx_aw3", + "pres_wx_aw3_measurement_code", + "pres_wx_aw3_quality_code", + "pres_wx_aw3_report_type", + "pres_wx_aw3_source_code", + "pres_wx_aw3_source_station_id", + "snow_depth", + "snow_depth_measurement_code", + "snow_depth_quality_code", + "snow_depth_report_type", + "snow_depth_source_code", + "snow_depth_source_station_id", + "visibility", + "visibility_measurement_code", + "visibility_quality_code", + "visibility_report_type", + "visibility_source_code", + "visibility_source_station_id", + "altimeter", + "altimeter_measurement_code", + "altimeter_quality_code", + "altimeter_report_type", + "altimeter_source_code", + "altimeter_source_station_id", + "pressure_3hr_change", + "pressure_3hr_change_measurement_code", + "pressure_3hr_change_quality_code", + "pressure_3hr_change_report_type", + "pressure_3hr_change_source_code", + "pressure_3hr_change_source_station_id", + "sky_cover_1", + "sky_cover_1_measurement_code", + "sky_cover_1_quality_code", + "sky_cover_1_report_type", + "sky_cover_1_source_code", + "sky_cover_1_source_station_id", + "sky_cover_2", + "sky_cover_2_measurement_code", + "sky_cover_2_quality_code", + "sky_cover_2_report_type", + "sky_cover_2_source_code", + "sky_cover_2_source_station_id", + "sky_cover_3", + "sky_cover_3_measurement_code", + "sky_cover_3_quality_code", + "sky_cover_3_report_type", + "sky_cover_3_source_code", + "sky_cover_3_source_station_id", + "sky_cover_baseht_1", + "sky_cover_baseht_1_measurement_code", + "sky_cover_baseht_1_quality_code", + "sky_cover_baseht_1_report_type", + "sky_cover_baseht_1_source_code", + "sky_cover_baseht_1_source_station_id", + "sky_cover_baseht_2", + "sky_cover_baseht_2_measurement_code", + "sky_cover_baseht_2_quality_code", + "sky_cover_baseht_2_report_type", + "sky_cover_baseht_2_source_code", + "sky_cover_baseht_2_source_station_id", + "sky_cover_baseht_3", + "sky_cover_baseht_3_measurement_code", + "sky_cover_baseht_3_quality_code", + "sky_cover_baseht_3_report_type", + "sky_cover_baseht_3_source_code", + "sky_cover_baseht_3_source_station_id", + "precipitation_3_hour", + "precipitation_3_hour_measurement_code", + "precipitation_3_hour_quality_code", + "precipitation_3_hour_report_type", + "precipitation_3_hour_source_code", + "precipitation_3_hour_source_station_id", + "precipitation_6_hour", + "precipitation_6_hour_measurement_code", + "precipitation_6_hour_quality_code", + "precipitation_6_hour_report_type", + "precipitation_6_hour_source_code", + "precipitation_6_hour_source_station_id", + "precipitation_9_hour", + "precipitation_9_hour_measurement_code", + "precipitation_9_hour_quality_code", + "precipitation_9_hour_report_type", + "precipitation_9_hour_source_code", + "precipitation_9_hour_source_station_id", + "precipitation_12_hour", + "precipitation_12_hour_measurement_code", + "precipitation_12_hour_quality_code", + "precipitation_12_hour_report_type", + "precipitation_12_hour_source_code", + "precipitation_12_hour_source_station_id", + "precipitation_15_hour", + "precipitation_15_hour_measurement_code", + "precipitation_15_hour_quality_code", + "precipitation_15_hour_report_type", + "precipitation_15_hour_source_code", + "precipitation_15_hour_source_station_id", + "precipitation_18_hour", + "precipitation_18_hour_measurement_code", + "precipitation_18_hour_quality_code", + "precipitation_18_hour_report_type", + "precipitation_18_hour_source_code", + "precipitation_18_hour_source_station_id", + "precipitation_21_hour", + "precipitation_21_hour_measurement_code", + "precipitation_21_hour_quality_code", + "precipitation_21_hour_report_type", + "precipitation_21_hour_source_code", + "precipitation_21_hour_source_station_id", + "precipitation_24_hour", + "precipitation_24_hour_measurement_code", + "precipitation_24_hour_quality_code", + "precipitation_24_hour_report_type", + "precipitation_24_hour_source_code", + "precipitation_24_hour_source_station_id", + "remarks", + "remarks_measurement_code", + "remarks_quality_code", + "remarks_report_type", + "remarks_source_code", + "remarks_source_station_id", + ] + if parameter == dataset: + parameter = [par.value for par in NoaaGhcnParameter.HOURLY.HOURLY] + else: + parameter = [parameter.value] + df = df.select( + "station_id", + pl.concat_str(["year", "month", "day", "hour", "minute"], separator="-") + .str.to_datetime("%Y-%m-%d-%H-%M") + .alias("date"), + *parameter, + ) + df = df.melt( + id_vars=["station_id", "date"], value_vars=parameter, variable_name="parameter", value_name="value" + ) + return df.with_columns( + pl.col("date").dt.replace_time_zone(time_zone).dt.replace_time_zone("UTC"), + pl.col("parameter").str.to_lowercase(), + pl.col("value").cast(pl.Float64), + pl.lit(value=None, dtype=pl.Float64).alias("quality"), + ) + + def _collect_station_parameter_for_daily(self, station_id: str, parameter, dataset) -> pl.DataFrame: """ Collection method for NOAA GHCN data. Parameter and dataset can be ignored as data is provided as a whole. @@ -58,10 +332,15 @@ def _collect_station_parameter(self, station_id: str, parameter, dataset) -> pl. :param dataset: dataset being queried :return: dataframe with read data """ - url = self._base_url.format(station_id=station_id) - file = download_file(url, settings=self.sr.stations.settings, ttl=CacheExpiry.FIVE_MINUTES) + url = "http://noaa-ghcn-pds.s3.amazonaws.com/csv.gz/by_station/{station_id}.csv.gz" + file = url.format(station_id=station_id) + payload = download_file(file, settings=self.sr.stations.settings, ttl=CacheExpiry.FIVE_MINUTES) df = pl.read_csv( - source=file, separator=",", has_header=False, infer_schema_length=0, storage_options={"compression": "gzip"} + source=payload, + separator=",", + has_header=False, + infer_schema_length=0, + storage_options={"compression": "gzip"}, ) df = df.rename( mapping={ @@ -91,7 +370,7 @@ def _collect_station_parameter(self, station_id: str, parameter, dataset) -> pl. ) ) ) - df = self._apply_factors(df) + df = self._apply_daily_factors(df) return df.select( pl.col(Columns.STATION_ID.value), pl.col(Columns.DATE.value), @@ -100,7 +379,8 @@ def _collect_station_parameter(self, station_id: str, parameter, dataset) -> pl. pl.col(Columns.QUALITY.value), ) - def _apply_factors(self, df: pl.DataFrame) -> pl.DataFrame: + @staticmethod + def _apply_daily_factors(df: pl.DataFrame) -> pl.DataFrame: """ Method to apply given factors on parameters that have been converted to integers by making their unit one tenth e.g. @@ -110,7 +390,7 @@ def _apply_factors(self, df: pl.DataFrame) -> pl.DataFrame: """ data = [] for (parameter,), group in df.group_by([Columns.PARAMETER.value]): - factor = self._mp_factors.get(parameter) + factor = DAILY_PARAMETER_MULTIPLICATION_FACTORS.get(parameter) if factor: group = group.with_columns(pl.col(Columns.VALUE.value).cast(float).mul(factor)) data.append(group) @@ -136,6 +416,7 @@ class NoaaGhcnRequest(TimeseriesRequest): def __init__( self, parameter: List[Union[str, NoaaGhcnParameter, Parameter]], + resolution: Union[str, NoaaGhcnResolution, Resolution], start_date: Optional[Union[str, dt.datetime]] = None, end_date: Optional[Union[str, dt.datetime]] = None, settings: Optional[Settings] = None, @@ -148,7 +429,7 @@ def __init__( """ super().__init__( parameter=parameter, - resolution=Resolution.DAILY, + resolution=resolution, period=Period.HISTORICAL, start_date=start_date, end_date=end_date, @@ -156,10 +437,42 @@ def __init__( ) def _all(self) -> pl.LazyFrame: + if self.resolution == Resolution.HOURLY: + return self._create_metaindex_for_ghcn_hourly() + else: + return self._create_metaindex_for_ghcn_daily() + + def _create_metaindex_for_ghcn_hourly(self) -> pl.LazyFrame: + file = "https://www.ncei.noaa.gov/oa/global-historical-climatology-network/hourly/doc/ghcnh-station-list.csv" + payload = download_file(file, settings=self.settings, ttl=CacheExpiry.METAINDEX) + df = pl.read_csv( + payload, + has_header=False, + columns=[ + "column_1", + "column_2", + "column_3", + "column_4", + "column_5", + "column_6", + ], + ) + df.columns = [ + "station_id", + "latitude", + "longitude", + "elevation", + "state", + "name", + ] + df = df.with_columns( + pl.all().str.strip_chars(), + ) + return df.lazy() + + def _create_metaindex_for_ghcn_daily(self) -> pl.LazyFrame: """ - Method to acquire station listing - # https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-ghcn - station listing + Method to acquire station listing for ghcn daily | Variable | Columns | Type | Example | |--------------|---------|-----------|-------------| | ID | 1-11 | Character | EI000003980 | diff --git a/wetterdienst/provider/noaa/ghcn/parameter.py b/wetterdienst/provider/noaa/ghcn/parameter.py index ebd663344..f764fb5aa 100644 --- a/wetterdienst/provider/noaa/ghcn/parameter.py +++ b/wetterdienst/provider/noaa/ghcn/parameter.py @@ -9,6 +9,97 @@ class NoaaGhcnParameter(DatasetTreeCore): """NOAA Global Historical Climatology Network Parameters""" + class HOURLY(DatasetTreeCore): + class HOURLY(Enum): + # Relative humidity is calculated from air (dry bulb) temperature and dewpoint temperature (whole percent) + HUMIDITY = "relative_humidity" + # total liquid precipitation (rain or melted snow) for past hour; a “T” in the measurement code column + # indicates a trace amount of precipitation (millimeters) + PRECIPITATION_HEIGHT = "precipitation" + # 3-hour total liquid precipitation (rain or melted snow) accumulation + # from FM12/SYNOP reports; a “T” in the measurement code column indicates a trace amount + # of precipitation (millimeters); accumulations can be reported over 3,6,9,12,15,18,21 and 24 hours. + PRECIPITATION_HEIGHT_LAST_3H = "precipitation_3_hour" + PRECIPITATION_HEIGHT_LAST_6H = "precipitation_6_hour" + PRECIPITATION_HEIGHT_LAST_9H = "precipitation_9_hour" + PRECIPITATION_HEIGHT_LAST_12H = "precipitation_12_hour" + PRECIPITATION_HEIGHT_LAST_15H = "precipitation_15_hour" + PRECIPITATION_HEIGHT_LAST_18H = "precipitation_18_hour" + PRECIPITATION_HEIGHT_LAST_21H = "precipitation_21_hour" + PRECIPITATION_HEIGHT_LAST_24H = "precipitation_24_hour" + # reduction estimates the pressure that would exist at sea level at a point directly below the station + # using a temperature profile based on temperatures that actually exist at the station (hPa) + PRESSURE_AIR_SEA_LEVEL = "sea_level_pressure" + # the pressure that is observed at a specific elevation and is the true barometric pressure of a location. + # It is the pressure exerted by the atmosphere at a point as a result of gravity acting upon the "column" of + # air that lies directly above the point. (hPa) + PRESSURE_AIR_SITE = "station_level_pressure" + # change in atmospheric pressure measured at the beginning and end of a three-hour period; + # accompanied by tendency code in measurement code field (millibars/hPa) + PRESSURE_AIR_SITE_DELTA_LAST_3H = "pressure_3hr_change" + # the pressure "reduced" to mean sea level using the temperature profile of the "standard" atmosphere, + # which is representative of average conditions over the United States at 40 degrees north + # latitude (millibars/hPa) # noqa + PRESSURE_AIR_SITE_REDUCED = "altimeter" + # depth of snowpack on the ground (centimeters/m) + SNOW_DEPTH = "snow_depth" + # 2 meter (circa) Above Ground Level Air (dry bulb) Temperature (⁰C to tenths) + TEMPERATURE_AIR_MEAN_200 = "temperature" + # Dew Point Temperature (⁰C to tenths) + TEMPERATURE_DEW_POINT_MEAN_200 = "dew_point_temperature" + # Wet bulb temperature (⁰C to tenths) + TEMPERATURE_WET_MEAN_200 = "wet_bulb_temperature" + # horizontal distance at which an object can be seen and identified (kilometers) + VISIBILITY_RANGE = "visibility" + # Wind Direction from true north using compass directions (e.g. 360=true north, 180=south, 270=west, etc.). + # Note: A direction of “000” is given for calm winds. (whole degrees) + WIND_DIRECTION = "wind_direction" + # Peak short duration (usually < 20 seconds) wind speed (meters per second) that exceeds the wind_speed + # average + WIND_GUST_MAX = "wind_gust" + # Wind Speed (meters per second) + WIND_SPEED = "wind_speed" + + # the following are left out for now + # pres_wx_mw1 + # pres_wx_mw2 + # pres_wx_mw3 + # pres_wx_au1 + # pres_wx_au2 + # pres_wx_au3 + # pres_wx_aw1 + # pres_wx_aw2 + # pres_wx_aw3 + # sky_cover_1 + # sky_cover_2 + # sky_cover_3 + # sky_cover_baseht_1 + # sky_cover_baseht_2 + # sky_cover_baseht_3 + + HUMIDITY = HOURLY.HUMIDITY + PRECIPITATION_HEIGHT = HOURLY.PRECIPITATION_HEIGHT + PRECIPITATION_HEIGHT_LAST_3H = HOURLY.PRECIPITATION_HEIGHT_LAST_3H + PRECIPITATION_HEIGHT_LAST_6H = HOURLY.PRECIPITATION_HEIGHT_LAST_6H + PRECIPITATION_HEIGHT_LAST_9H = HOURLY.PRECIPITATION_HEIGHT_LAST_9H + PRECIPITATION_HEIGHT_LAST_12H = HOURLY.PRECIPITATION_HEIGHT_LAST_12H + PRECIPITATION_HEIGHT_LAST_15H = HOURLY.PRECIPITATION_HEIGHT_LAST_15H + PRECIPITATION_HEIGHT_LAST_18H = HOURLY.PRECIPITATION_HEIGHT_LAST_18H + PRECIPITATION_HEIGHT_LAST_21H = HOURLY.PRECIPITATION_HEIGHT_LAST_21H + PRECIPITATION_HEIGHT_LAST_24H = HOURLY.PRECIPITATION_HEIGHT_LAST_24H + PRESSURE_AIR_SEA_LEVEL = HOURLY.PRESSURE_AIR_SEA_LEVEL + PRESSURE_AIR_SITE = HOURLY.PRESSURE_AIR_SITE + PRESSURE_AIR_SITE_DELTA_LAST_3H = HOURLY.PRESSURE_AIR_SITE_DELTA_LAST_3H + PRESSURE_AIR_SITE_REDUCED = HOURLY.PRESSURE_AIR_SITE_REDUCED + SNOW_DEPTH = HOURLY.SNOW_DEPTH + TEMPERATURE_AIR_MEAN_200 = HOURLY.TEMPERATURE_AIR_MEAN_200 + TEMPERATURE_DEW_POINT_MEAN_200 = HOURLY.TEMPERATURE_DEW_POINT_MEAN_200 + TEMPERATURE_WET_MEAN_200 = HOURLY.TEMPERATURE_WET_MEAN_200 + VISIBILITY_RANGE = HOURLY.VISIBILITY_RANGE + WIND_DIRECTION = HOURLY.WIND_DIRECTION + WIND_GUST_MAX = HOURLY.WIND_GUST_MAX + WIND_SPEED = HOURLY.WIND_SPEED + class DAILY(DatasetTreeCore): class DAILY(Enum): # The five core values are: @@ -596,7 +687,7 @@ class DAILY(Enum): WEATHER_TYPE_VICINITY_RAIN_SNOW_SHOWER = DAILY.WEATHER_TYPE_VICINITY_RAIN_SNOW_SHOWER -PARAMETER_MULTIPLICATION_FACTORS = { +DAILY_PARAMETER_MULTIPLICATION_FACTORS = { NoaaGhcnParameter.DAILY.PRECIPITATION_HEIGHT.value: 1 / 10, NoaaGhcnParameter.DAILY.PRECIPITATION_HEIGHT_MULTIDAY.value: 1 / 10, NoaaGhcnParameter.DAILY.TEMPERATURE_AIR_MAX_200.value: 1 / 10, diff --git a/wetterdienst/provider/noaa/ghcn/unit.py b/wetterdienst/provider/noaa/ghcn/unit.py index 9f9307132..29ce893e9 100644 --- a/wetterdienst/provider/noaa/ghcn/unit.py +++ b/wetterdienst/provider/noaa/ghcn/unit.py @@ -9,6 +9,31 @@ class NoaaGhcnUnit(DatasetTreeCore): """NOAA Global Historical Climatology Network Parameters""" + class HOURLY(DatasetTreeCore): + class HOURLY(UnitEnum): + HUMIDITY = OriginUnit.PERCENT.value, SIUnit.PERCENT.value + PRECIPITATION_HEIGHT = OriginUnit.MILLIMETER.value, SIUnit.KILOGRAM_PER_SQUARE_METER.value + PRECIPITATION_HEIGHT_LAST_3H = OriginUnit.MILLIMETER.value, SIUnit.KILOGRAM_PER_SQUARE_METER.value + PRECIPITATION_HEIGHT_LAST_6H = OriginUnit.MILLIMETER.value, SIUnit.KILOGRAM_PER_SQUARE_METER.value + PRECIPITATION_HEIGHT_LAST_9H = OriginUnit.MILLIMETER.value, SIUnit.KILOGRAM_PER_SQUARE_METER.value + PRECIPITATION_HEIGHT_LAST_12H = OriginUnit.MILLIMETER.value, SIUnit.KILOGRAM_PER_SQUARE_METER.value + PRECIPITATION_HEIGHT_LAST_15H = OriginUnit.MILLIMETER.value, SIUnit.KILOGRAM_PER_SQUARE_METER.value + PRECIPITATION_HEIGHT_LAST_18H = OriginUnit.MILLIMETER.value, SIUnit.KILOGRAM_PER_SQUARE_METER.value + PRECIPITATION_HEIGHT_LAST_21H = OriginUnit.MILLIMETER.value, SIUnit.KILOGRAM_PER_SQUARE_METER.value + PRECIPITATION_HEIGHT_LAST_24H = OriginUnit.MILLIMETER.value, SIUnit.KILOGRAM_PER_SQUARE_METER.value + PRESSURE_AIR_SEA_LEVEL = OriginUnit.HECTOPASCAL.value, SIUnit.PASCAL.value + PRESSURE_AIR_SITE = OriginUnit.HECTOPASCAL.value, SIUnit.PASCAL.value + PRESSURE_AIR_SITE_DELTA_LAST_3H = OriginUnit.HECTOPASCAL.value, SIUnit.PASCAL.value + PRESSURE_AIR_SITE_REDUCED = OriginUnit.HECTOPASCAL.value, SIUnit.PASCAL.value + SNOW_DEPTH = OriginUnit.CENTIMETER.value, SIUnit.METER.value + TEMPERATURE_AIR_MEAN_200 = OriginUnit.DEGREE_CELSIUS.value, SIUnit.DEGREE_KELVIN.value + TEMPERATURE_DEW_POINT_MEAN_200 = OriginUnit.DEGREE_CELSIUS.value, SIUnit.DEGREE_KELVIN.value + TEMPERATURE_WET_MEAN_200 = OriginUnit.DEGREE_CELSIUS.value, SIUnit.DEGREE_KELVIN.value + VISIBILITY_RANGE = OriginUnit.KILOMETER.value, SIUnit.METER.value + WIND_DIRECTION = OriginUnit.DEGREE.value, SIUnit.DEGREE.value + WIND_GUST_MAX = OriginUnit.METER_PER_SECOND.value, SIUnit.METER_PER_SECOND.value + WIND_SPEED = OriginUnit.METER_PER_SECOND.value, SIUnit.METER_PER_SECOND.value + class DAILY(DatasetTreeCore): class DAILY(UnitEnum): # The five core values are: