In [1]:
import pandas as pd
import numpy as np
import intake

In [2]:
from intake.catalog import Catalog
from intake.catalog.local import LocalCatalogEntry, UserParameter

daily_summaries_by_year = LocalCatalogEntry(
    name='Daily summaries by year',
    description="",
    driver='csv',
    args={
        'urlpath': 'filecache::https://www.ncei.noaa.gov/pub/data/ghcn/daily/by_year/{{ year }}.csv.gz',
        'csv_kwargs': {
            'names': ["id", "date", "element", "data_value", "m_flag", "q_flag", "s_flag", "obs_time"],
            'dtype': {
                "id": "object",
                "date": "object",
                "element": "object",
                "data_value": "Int64",
                "m_flag": "object", 
                "q_flag": "object",
                "s_flag": "object",
                "obs_time": "Int64",
            },
            'blocksize': None,
        }
    },
    parameters=[
        UserParameter(
            name="year",
            description="data collection year",
            type="str",
            default="2023",
        )
    ],
    
)

daily_summaries_cat= Catalog.from_dict(
    {
    "by_year": daily_summaries_by_year,
    },
    name='NOAA Climate Data Daily Summaries',
    description="Global Historical Climate Network includes daily land surface observations from around the world. The GHCN-Daily was developed to meet the needs of climate analysis and monitoring studies that require data at a sub-monthly time resolution (e.g., assessments of the frequency of heavy rainfall, heat wave duration, etc.). The dataset includes observations from World Meteorological Organization, Cooperative, and CoCoRaHS networks. If observed, the station dataset includes max and minimum temperatures, total precipitation, snowfall, and depth of snow on ground. Some U.S. station data are typically delayed only 24 hours.",

)
daily_summaries_cat.save("noaa_daily_summaries.yaml")

noaa_cat = Catalog.from_dict(
    {
        "daily_summaries": LocalCatalogEntry(
            name=daily_summaries_cat.name,
            description=daily_summaries_cat.description,
            driver='intake.catalog.local.YAMLFileCatalog',
            args={
                'path': '{{ CATALOG_DIR }}/noaa_daily_summaries.yaml'
            }
        ),
    },
    name='NOAA Climate Data',
    description='NOAA Climate Data',
    metadata={
        'source': 'https://www.ncei.noaa.gov/metadata/geoportal/rest/metadata/item/gov.noaa.ncdc:C00861/html'
    }
)

In [3]:
noaa_cat.save('noaa_cat.yaml')
noaa_cat = intake.open_catalog('noaa_cat.yaml')
!cat noaa_cat.yaml

description: NOAA Climate Data
metadata:
  source: https://www.ncei.noaa.gov/metadata/geoportal/rest/metadata/item/gov.noaa.ncdc:C00861/html
name: NOAA Climate Data
sources:
  daily_summaries:
    args:
      path: '{{ CATALOG_DIR }}/noaa_daily_summaries.yaml'
    description: Global Historical Climate Network includes daily land surface observations
      from around the world. The GHCN-Daily was developed to meet the needs of climate
      analysis and monitoring studies that require data at a sub-monthly time resolution
      (e.g., assessments of the frequency of heavy rainfall, heat wave duration, etc.).
      The dataset includes observations from World Meteorological Organization, Cooperative,
      and CoCoRaHS networks. If observed, the station dataset includes max and minimum
      temperatures, total precipitation, snowfall, and depth of snow on ground. Some
      U.S. station data are typically delayed only 24 hours.
    driver: intake.catalog.local.YAMLFile

In [4]:
noaa_cat.daily_summaries.by_year.read()

Unnamed: 0,id,date,element,data_value,m_flag,q_flag,s_flag,obs_time
0,AE000041196,20230101,TMAX,252,,,S,
1,AE000041196,20230101,TMIN,149,,,S,
2,AE000041196,20230101,PRCP,0,D,,S,
3,AE000041196,20230101,TAVG,207,H,,S,
4,AEM00041194,20230101,TMAX,255,,,S,
...,...,...,...,...,...,...,...,...
5584987,USC00300364,20230308,SNOW,0,,,H,
5584988,USC00300364,20230308,SNWD,0,,,H,700
5584989,USC00411441,20230308,TOBS,39,,,H,
5584990,USC00461290,20230308,TOBS,-28,,,H,700
