In [19]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#for reading netCDF4 files
import netCDF4 as nc
import pprint

In [20]:
pp = pprint.PrettyPrinter(indent=4)

In [11]:
#file path
filename = "../data/hysets/HYSETS_2020_QC_stations.nc"
#loading file into a nc dataset
ds = nc.Dataset(filename)

'2020-05-26'

In [24]:
print(ds)

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4_CLASSIC data model, file format HDF5):
    title: Hydrometeorological Sandbox of the École de technologie supérieure (HYSETS) for 14425 catchments in North America
    summary: Hydrometeorological Sandbox of the École de technologie supérieure (HYSETS), including precipitation, temperature, discharge and catchment area of 14425 watersheds in North America. This file's meteorological data was collected and processed from Quality-controlled gauges. Provided by the HC3 Laboratory at École de technologie supérieure, Montréal, Canada.
    institution: Hydrology, Climate and Climate Change Laboratory (HC3) at École de technologie supérieure (ETS)
    institute_id: HC3-ETS
    contact: Richard Arsenault: richard.arsenault@etsmtl.ca
    date_created: 2020-05-26
    source: Hydrometric data from USGS National Water Information Service, ECCC Water Survey Canada and CONAGUA in Mexico. Meteorological data from ECCC stations in Canada and GHCND

In [97]:
vdict = {"variables(dimensions)": "float64 time(time), float64 watershedID(watershed), float64 drainage_area(watershed), float64 drainage_area_GSIM(watershed), float64 flag_GSIM_boundaries(watershed), float64 flag_artificial_boundaries(watershed), float64 centroid_lat(watershed), float64 centroid_lon(watershed), float64 elevation(watershed), float64 slope(watershed), float32 discharge(watershed, time), float32 pr(watershed, time), float32 tasmax(watershed, time), float32 tasmin(watershed, time)"}

In [103]:
vdict["variables(dimensions)"] = [x.strip() for x in vdict["variables(dimensions)"].split(",")]

In [107]:
# !pip install xarray

In [109]:
vdict['variables(dimensions)']

['float64 time(time)',
 'float64 watershedID(watershed)',
 'float64 drainage_area(watershed)',
 'float64 drainage_area_GSIM(watershed)',
 'float64 flag_GSIM_boundaries(watershed)',
 'float64 flag_artificial_boundaries(watershed)',
 'float64 centroid_lat(watershed)',
 'float64 centroid_lon(watershed)',
 'float64 elevation(watershed)',
 'float64 slope(watershed)',
 'float32 discharge(watershed',
 'time)',
 'float32 pr(watershed',
 'time)',
 'float32 tasmax(watershed',
 'time)',
 'float32 tasmin(watershed',
 'time)']

In [25]:
pp.pprint(ds.__dict__)

{   'activity': 'HYSETS',
    'cdm_data_type': 'station',
    'contact': 'Richard Arsenault: richard.arsenault@etsmtl.ca',
    'conventions': 'CF-1.6, ACDD-1.3',
    'date_created': '2020-05-26',
    'featureType': 'timeSeries',
    'institute_id': 'HC3-ETS',
    'institution': 'Hydrology, Climate and Climate Change Laboratory (HC3) at '
                   'École de technologie supérieure (ETS)',
    'keywords': 'HYSETS, GHCND, GSIM, hydrology, North America, streamflow, '
                'hydrometeorology, PAVICS, PAVICS-Hydro, modelling',
    'license': 'ODC-BY',
    'source': 'Hydrometric data from USGS National Water Information Service, '
              'ECCC Water Survey Canada and CONAGUA in Mexico. Meteorological '
              'data from ECCC stations in Canada and GHCND in USA and Mexico. '
              'Catchment areas from ECCC HYDAT and USGS.',
    'summary': 'Hydrometeorological Sandbox of the École de technologie '
               'supérieure (HYSETS), including precipit

In [77]:
print(list(ds.variables.keys()))

['time', 'watershedID', 'drainage_area', 'drainage_area_GSIM', 'flag_GSIM_boundaries', 'flag_artificial_boundaries', 'centroid_lat', 'centroid_lon', 'elevation', 'slope', 'discharge', 'pr', 'tasmax', 'tasmin']


In [84]:
def variable_details(ds):
    print("Select a number to see the features")
    for k1, k2 in list(enumerate(ds.variables.keys(),1)):
        print(f"{k1}. {k2}")
    k_input = int(input("Enter any number from above. "))
    print()
    for k1, k2 in list(enumerate(ds.variables.keys(),1)):
        if k1 == k_input:
            print(ds.variables[k2])
    

In [89]:
variable_details(ds)

Select a number to see the features
1. time
2. watershedID
3. drainage_area
4. drainage_area_GSIM
5. flag_GSIM_boundaries
6. flag_artificial_boundaries
7. centroid_lat
8. centroid_lon
9. elevation
10. slope
11. discharge
12. pr
13. tasmax
14. tasmin


Enter any number from above.  5



<class 'netCDF4._netCDF4.Variable'>
float64 flag_GSIM_boundaries(watershed)
    description: Flag to indicate that the boundaries are those from GSIM and the watershed area of the polygon is the one in "drainage_area_GSIM". The "drainage_area" value is the official area at the hydrometric gauging station.
    long_name: flag_contours_from_GSIM
    standard_name: flag_contours_from_GSIM
unlimited dimensions: 
current shape = (14425,)
filling on, default _FillValue of 9.969209968386869e+36 used


In [96]:
ds.variables['time']

'days since 1950-01-01 00:00:00'

In [91]:
ds.variables['centroid_lat']

<class 'netCDF4._netCDF4.Variable'>
float64 centroid_lat(watershed)
    description: Latitude (degrees North) coordinates of the centroid of the watershed
    long_name: centroid_latitude
    standard_name: centroid_latitude
    units: degrees_North (South is negative)
unlimited dimensions: 
current shape = (14425,)
filling on, default _FillValue of 9.969209968386869e+36 used

In [92]:
ds.variables['centroid_lon']

<class 'netCDF4._netCDF4.Variable'>
float64 centroid_lon(watershed)
    description: Longitude (degrees Eest) coordinates of the centroid of the watershed
    long_name: centroid_longitude
    standard_name: centroid_longitude
    units: degrees_East (West is negative)
unlimited dimensions: 
current shape = (14425,)
filling on, default _FillValue of 9.969209968386869e+36 used

In [None]:
ds.variables['time']

## Main Info from Dataset
- Hydrometeorologicl Sandbox data for `14425` catchments in North America
- Source:
    -  **Hydrometric data** from _USGS National Water Information Service_, _ECCC Water Survey_ Canada and _CONAGUA_ in Mexico. 
    - **Meteorological data** from _ECCC stations_ in Canada and _GHCND_ in USA and Mexico. 
    - **Catchment areas** from _ECCC HYDAT_ and _USGS_
- Dimenstions:
    - `time` - **25202**
    - `watershed` - **14425**