### BOM daily
***
#### Download and calculate extreme statistics using the BOM daily climate data

The data are located in three sources:  
    * http://rs-data1-mel.csiro.au/thredds/catalog/bawap/catalog.html as netCDF  
    * \\osm-06-cdc.it.csiro.au\OSM_CBR_LW_Bawapsilo_home\bawap | /OSM/CBR/LW_Bawapsilo/home/bawap as both netCDF and flt
    * \\wron\TimeSeries\Climate\bawap
    
I can't access the wron, and the linux mount of bawap doesn't seem to mirror the windows dir. Probably easier to work with netCDF, so I'll download it from the thredds server. 

In [39]:
%run startup.py

In [898]:
from maclab.maclab import spatial as sp
from maclab.maclab import utils as ut
from maclab.maclab import slurm as sl
import thredds_crawler
from thredds_crawler.crawl import Crawl
import importlib
import pandas as pd

In [3]:
# working locationsa
RECA = '/OSM/CBR/LW_BACKCAST/work'
src = '%s/SOURCE/clim' %RECA
#os.mkdir(src)

** Download netCDF files from thredds **

In [638]:
# available daily climate fields
fields = ['rain', 'tmax', 'tmin']

"""
Note
----
'rad' is also available, but the time series only extends back to 1990 so is not downloaded
"""

for var in fields:
    
    # config
    var_dir = '{0}/SOURCE/clim/awap_daily/{1}'.format(RECA, var)
    if os.path.exists(var_dir) is not True:
        os.mkdir(var_dir)
    print('Destination directory: %s' %var_dir)
    
    # generate urls
    c = Crawl('http://rs-data1-mel.csiro.au/thredds/catalog/bawap/%s/day/catalog.html' %var, debug = False)
    datasets = [i.id for i in c.datasets]
    server = 'http://rs-data1-mel.csiro.au/thredds/fileServer/'
    urls = [server + i for i in datasets]
    pd.DataFrame(urls).to_csv('{0}/{1}_urls.csv'.format(var_dir, var), index= False)
    print('Found {0} urls to download for {1}'.format(len(urls), var))
    var_dir = '{0}/nc'.format(var_dir)
    if os.path.exists(var_dir) is not True:
        os.mkdir(var_dir)
    print('Destination directory: %s' %var_dir)a
    
    # download
    for nc in urls:
        io.downloadfile(nc, dst=var_dir, auth=False, overwrite=False, verbose=False)
        
    print('{0} completed'.format(var))

Destination directory: /OSM/CBR/LW_BACKCAST/work/SOURCE/clim/awap_daily/rad


** check what's what in the downloaded files and set up for calculating stats ** 

In [932]:
var = 'rain'

In [933]:
def var_summary(var):    
    var_dir = '/OSM/CBR/LW_BACKCAST/work/SOURCE/clim/awap_daily/{0}/nc'.format(var)
    nc = ut.listfiles(var_dir, pattern = '.nc')
    nc.sort(key = ut.tokenize)
    print('Found %s netCDF files in:\n\t%s' %(len(nc), var_dir))

    years = []
    for month in nc:
        this_year = os.path.basename(month).split('-')[-1].split('.')[0][:4]
        years.append(this_year)
    years = list(set(years))
    years.sort()
    print('First and last years downloaded are:\n\t%s, %s' %(years[0], years[-1]))

    # check time series is continuous from 1913
    required = np.arange(1913, 2018, 1)
    required = [str(i) for i in required]
    check = [ut.grep(required, i) for i in set(years)]
    check = sum(check, [])
    if len(check) == len(required):
        print('All years from 1913 are available')

    # init dict from the year 1913 onwards - junk earlier years
    years = years[ut.which(years, '==', '1913')[0]:]
    if years[-1] > '2017':
        years = years[:-1] # junk 2018
    by_year = {k: [] for k in years}

    for yr in by_year.keys():
        for n in nc:
            this_year = os.path.basename(n).split('-')[-1].split('.')[0][:4]
            if this_year == yr:
                by_year[yr].append(n)

    if all([len(by_year[i]) for i in by_year.keys()]):
        print('Have downloaded files for %s years, each year represented by 12 months' %len(by_year))
    #for i in by_year.keys():
    #    print('Year %s has %s files' %(i, len(by_year[i])))
    
    return(by_year)

In [934]:
by_year = var_summary(var)

Found 1424 netCDF files in:
	/OSM/CBR/LW_BACKCAST/work/SOURCE/clim/awap_daily/rain/nc
First and last years downloaded are:
	1900, 2018
All years from 1913 are available
Have downloaded files for 105 years, each year represented by 12 months


** Set up centred climatology **
***
Base climatatology: 1946-1975

In [935]:
def config_base_climatology(by_year, start, stop, var, 
                            dst = None):
    # Get all years between 1946-1975 inclusive
    base_years = np.arange(start, stop, 1).tolist()
    base_years = [str(i) for i in base_years]
    base_years = [by_year[i] for i in base_years]
    base_years = sum(base_years, [])
    base_years[0], base_years[-1]
    
    # dump to file to then run as slurm job
    if dst is None:
        dst = '/OSM/CBR/LW_BACKCAST/work/DEV/awap_indicies/base-period-1960' + \
               '/{0}_nc_files.txt'.format(var)
        
    with open(dst, 'w') as f:
        for i in base_years:
            f.write(i + '\n')
    f.close()
    
    print('txt file written here:\n\t{0}'.format(dst))

In [936]:
config_base_climatology(1946, 1976, var)

txt file written here:
	/OSM/CBR/LW_BACKCAST/work/DEV/awap_indicies/base-period-1960/rain_nc_files.txt


In [937]:
def readfile(src):
    with open(src) as f:
        src = f.readlines()
    f.close()
    src = [x.strip() for x in src] 
    return(src)

In [938]:
# check
readfile('/OSM/CBR/LW_BACKCAST/work/DEV/awap_indicies/base-period-1960/tmax_nc_files.txt')[0:3]

['/OSM/CBR/LW_BACKCAST/work/SOURCE/clim/awap_daily/tmax/nc/bom-tmax_day-19460101-19460131.nc',
 '/OSM/CBR/LW_BACKCAST/work/SOURCE/clim/awap_daily/tmax/nc/bom-tmax_day-19460201-19460228.nc',
 '/OSM/CBR/LW_BACKCAST/work/SOURCE/clim/awap_daily/tmax/nc/bom-tmax_day-19460301-19460331.nc']