In [2]:
""" Created Apr 2021 by GO                                                     """
""" Write ECCC CSV gauge data to NetCDF                                        """
""" Code derived from functions in:                                            """
"""     https://gitlab.com/FA12/datatools/-/blob/master/datatools/datatools.py """
""" Converter helpers. Used to convert various data formats to nc files.       """
""" To do (Apr 12): replace hard-coding of rows extracted                      """
""" """

import csv
import datetime as dt
import glob
import os
import netCDF4 as nc
import numpy as np
import pandas as pd
import pytz
import yaml

def read_hist_hydro(file):
    """ Load fraser discharge data received from ECCC open, historical data portal
    https://wateroffice.ec.gc.ca/search/historical_e.html

    Columns:
    station, param, date, value, SYM
    daily discharge param = 1
    1980-01-01 = row 24779 (zero indexed)
    2019-12-31 = row 39388
    data += cdata[24779:39388]
    """
    data = []
  
    with open(file) as f:
        fiter = csv.reader(f)
        cdata = [d for d in fiter]
    data += cdata[2:]
        
    # convert date
    tt = parse_time([d[2] for d in data])
    
    # convert data
    ff = np.array([float(x[3]) for x in data])
    return tt, ff

def parse_time(t):
    """ Convert time strings to datetime

    Can process strings in the following formats:
    2012-01-04 19:10
    2012-01-04 19:10:00
    2012-01-04 19:10:00+00:00
    2019-05-24 23:13:41.500000+00:00  # 6 decimal places for seconds
    with any delimiters between numbers and + or - before the offset.

    Parameters
    ----------
    t : list/iterable of strings

    Returns
    -------
    np array of datetime objects
    """
    offset_index = 19
    #   strptime is slow, faster version follows
    #   tt = [dt.datetime.strptime(x[:19], '%Y-%m-%d %H:%M:%S') for x in t]
    y = [int(x[0:4]) for x in t]
    m = [int(x[5:7]) for x in t]
    d = [int(x[8:10]) for x in t]
    H = [12 for x in t]
    M = [0 for x in t]
    
    if len(t[0]) > 16:  # seconds are present
        S = [int(x[17:19]) for x in t]
    else:  # no seconds
        S = [0] * len(y)
    if len(t[0]) > 19 and t[0][19] == '.':  # microseconds are present; must be 6 digits
        offset_index += 7
        MS = [int(x[20:26]) for x in t]
    else:
        MS = [0] * len(y)

    tt = np.array([dt.datetime(*x) for x in zip(y, m, d, H, M, S, MS)])

    if len(t[0]) > offset_index and t[0][offset_index] in '+-':  # offset is present
        osgn = [int(x[offset_index] + '1') for x in t]  # offset sign
        ioh0 = offset_index + 1
        ioh1 = offset_index + 3
        iom0 = offset_index + 4
        iom1 = offset_index + 6
        oh = [int(x[ioh0:ioh1]) for x in t]  # offset hour
        om = [int(x[iom0:iom1]) for x in t]  # offset minute
        # time offset
        to = np.array([dt.timedelta(hours=sign * hhh, minutes=sign * mmm)
                       for sign, hhh, mmm in zip(osgn, oh, om)])
        tt -= to  # https://en.wikipedia.org/wiki/ISO_8601#Time_offsets_from_UTC

    return tt

def write_netcdf(fname, date, waterlevel=None, discharge=None, temperature=None, pressure=None):
    """ Write/append specified variables to nc file. """

    var_names = ['waterlevel', 'discharge', 'temperature', 'pressure']
    var_units = ["m", "m^3 s-1", "Celcius", "Pa"]
    var_values = [waterlevel, discharge, temperature, pressure]

    # append if file exists, otherwise write a new file
    with nc.Dataset(fname, 'a' if os.path.isfile(fname) else 'w') as ncf:
        # dimensions
        if 'time_counter' not in ncf.dimensions:
            ncf.createDimension("time_counter", None)

        # time vector
        if 'time_counter' not in ncf.variables:
            time_counter = ncf.createVariable("time_counter", "double", "time_counter", zlib=True, fill_value=0.0)
            time_counter.standard_name = "time"
            time_counter.units = "seconds since 1950-01-01 00:00:00"
            time_counter.calendar = "gregorian"
        else:
            time_counter = ncf.variables['time_counter']
        sz = time_counter.size
        time_index = slice(sz, sz + len(date))  # at which indices to write the new data
        time_counter[time_index] = nc.date2num(date, time_counter.units, time_counter.calendar)

        # variables
        for vname, units, vals in zip(var_names, var_units, var_values):
            if vals is not None:
                if vname not in ncf.variables:
                    var = ncf.createVariable(vname, "float32", "time_counter", zlib=True, fill_value=0.0)
                    var.units = units
                ncf.variables[vname][time_index] = vals

SyntaxError: EOL while scanning string literal (<ipython-input-2-bccf2ee1c521>, line 33)

In [6]:
dates, values = read_hist_hydro('..//data//runoff//gauge//BC_08MF005_Daily_1979_to_2021.csv')
write_netcdf("BC_08MF005_1979_to_2019.nc", dates, discharge=values)

In [5]:
values

array([ 861.,  847.,  826., ..., 1130., 1120., 1130.])