In [21]:
%matplotlib inline
import os
from glob import glob
import numpy as np
import pandas as pd

In [18]:
# helper functions
# - wrapper around reader functions to return a single combined dataframe
from mmctools.dataloaders import read_dir, read_date_dirs, read_files
# - more sophisticated data readers for when default pd.read_csv() doesn't cut it
from mmctools.measurements.radar import profiler # see mmctools.measurements.*

# Example: Data processing
Result of this notebook should be a dataframe with standard output variables and format

## Setup

In [16]:
# dataset name format: project/class.instance.level
dataset = 'wfip2/radar.z04.b0' 
startdate = pd.to_datetime('2016-11-01')
enddate = pd.to_datetime('2016-11-30')

# optional dataset file specs
dataext = 'txt' # file type, dictated by extension
dataext1 = 'winds' # e.g., *.winds.txt

download_path = 'data'
overwrite_files = False # force download even if files already exist

## Download data from the DAP

In [17]:
datapath = os.path.join(download_path, dataset.replace('/','.'))
print('Data path:',datapath)

Data path: data/wfip2.radar.z04.b0


In [12]:
try:
    import A2e
except ImportError:
    print('dap-py module not available; need to manually download files to '+datapath)
else:
    a2e = A2e.A2e()
    a2e.setup_cert_auth()
    filter_arg = {
        'Dataset': dataset,
        'date_time': {
            'between': [startdate.strftime('%Y%m%d%H%M%S'), enddate.strftime('%Y%m%d%H%M%S')]
        }
    }
    if dataext:
        filter_arg['file_type'] = dataext
    if dataext1:
        filter_arg['ext1'] = dataext1
    datafiles = a2e.search(filter_arg)
    print(len(datafiles),'data files selected')
    filelist = a2e.download_files(datafiles, path=download_path, force=overwrite_files)
    if filelist is None:
        print('No files were downloaded; need to manually download files to '+datapath)

Certificate is setup
Valid certificate already created
697 data files selected
Could not place order
Server Returned Bad Status Code
Status Code: 400
Reason: files must share a dataset
No files were downloaded; need to manually download files to data


## Process the downloaded files

read a single directory

In [None]:
%time df = read_dir(os.path.join(datapath,'201611'), reader=profiler, file_filter='*.txt',
                    # additional reader arguments:
                    parse_dates=['date_time'])

read subdirectories with specified date format

In [None]:
%time df = read_date_dirs(datapath, reader=profiler, expected_date_format='%Y%m', ext='txt',
                          # additional reader arguments:
                          parse_dates=['date_time'])

read list of file paths

In [5]:
%time df = read_files(filelist, reader=profiler,
                      # additional reader arguments:
                      parse_dates=['date_time']
                     )

Processing /Users/equon/WFIP2/Wasco/radar.z04.b0/201602
  168 dataframes added
Processing /Users/equon/WFIP2/Wasco/radar.z04.b0/201603
  744 dataframes added
Processing /Users/equon/WFIP2/Wasco/radar.z04.b0/201604
  720 dataframes added
Processing /Users/equon/WFIP2/Wasco/radar.z04.b0/201605
  744 dataframes added
Processing /Users/equon/WFIP2/Wasco/radar.z04.b0/201606
  719 dataframes added
Processing /Users/equon/WFIP2/Wasco/radar.z04.b0/201607
  744 dataframes added
Processing /Users/equon/WFIP2/Wasco/radar.z04.b0/201608
  744 dataframes added
Processing /Users/equon/WFIP2/Wasco/radar.z04.b0/201609
  0 dataframes added
Processing /Users/equon/WFIP2/Wasco/radar.z04.b0/201610
  0 dataframes added
Processing /Users/equon/WFIP2/Wasco/radar.z04.b0/201611
  0 dataframes added
Processing /Users/equon/WFIP2/Wasco/radar.z04.b0/201612
  0 dataframes added
CPU times: user 33.2 s, sys: 1.08 s, total: 34.3 s
Wall time: 39.7 s
