From ad8e0c7f94e6c6cc270e880f92d1bd395cc3dd49 Mon Sep 17 00:00:00 2001 From: Kim Whitehall Date: Tue, 28 Oct 2014 08:29:07 -0700 Subject: [PATCH] removing files.py and process.py from te code repo --- mccsearch/code/files.py | 783 --------------------------- mccsearch/code/mccSearch.py | 6 +- mccsearch/code/process.py | 1007 ----------------------------------- 3 files changed, 2 insertions(+), 1794 deletions(-) delete mode 100644 mccsearch/code/files.py delete mode 100644 mccsearch/code/process.py diff --git a/mccsearch/code/files.py b/mccsearch/code/files.py deleted file mode 100644 index b2387547..00000000 --- a/mccsearch/code/files.py +++ /dev/null @@ -1,783 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -""" -Module for handling data input files. Requires netCDF and Numpy be -installed. - -This module can easily open NetCDF, HDF and Grib files. Search the netCDF4 -documentation for a complete list of supported formats. -""" - -from os import path -import netCDF4 -import numpy as np -import numpy.ma as ma -import sys - -from toolkit import process -from utils import fortranfile -from utils import misc - - -VARIABLE_NAMES = {'time': ['time', 'times', 'date', 'dates', 'julian'], - 'latitude': ['latitude', 'lat', 'lats', 'latitudes'], - 'longitude': ['longitude', 'lon', 'lons', 'longitudes'] - } - - -def findunique(seq): - keys = {} - for e in seq: - keys[e] = 1 - return keys.keys() - -def getVariableByType(filename, variableType): - """ - Function that will try to return the variable from a file based on a provided - parameter type. - - Input:: - filename - the file to inspect - variableType - time | latitude | longitude - - Output:: - variable name OR list of all variables in the file if a single variable - name match cannot be found. - """ - try: - f = netCDF4.Dataset(filename, mode='r') - except: - print "netCDF4Error:", sys.exc_info()[0] - raise - - variableKeys = f.variables.keys() - f.close() - variableKeys = [variable.encode().lower() for variable in variableKeys] - variableMatch = VARIABLE_NAMES[variableType] - - commonVariables = list(set(variableKeys).intersection(variableMatch)) - - if len(commonVariables) == 1: - return str(commonVariables[0]) - - else: - return variableKeys - -def getVariableRange(filename, variableName): - """ - Function to return the min and max values of the given variable in - the supplied filename. - - Input:: - filename - absolute path to a file - variableName - variable whose min and max values should be returned - - Output:: - variableRange - tuple of order (variableMin, variableMax) - """ - try: - f = netCDF4.Dataset(filename, mode='r') - except: - print "netCDF4Error:", sys.exc_info()[0] - raise - - varArray = f.variables[variableName][:] - return (varArray.min(), varArray.max()) - - -def read_data_from_file_list(filelist, myvar, timeVarName, latVarName, lonVarName): - ''' - Read in data from a list of model files into a single data structure - - Input: - filelist - list of filenames (including path) - myvar - string containing name of variable to load in (as it appears in file) - Output: - lat, lon - 2D array of latitude and longitude values - timestore - list of times - t2store - numpy array containing data from all files - - NB. originally written specific for WRF netCDF output files - modified to make more general (Feb 2011) - - Peter Lean July 2010 - ''' - - filelist.sort() - filename = filelist[0] - # Crash nicely if 'filelist' is zero length - """TODO: Throw Error instead via try Except""" - if len(filelist) == 0: - print 'Error: no files have been passed to read_data_from_file_list()' - sys.exit() - - # Open the first file in the list to: - # i) read in lats, lons - # ii) find out how many timesteps in the file - # (assume same ntimes in each file in list) - # -allows you to create an empty array to store variable data for all times - tmp = netCDF4.Dataset(filename, mode='r') - latsraw = tmp.variables[latVarName][:] - lonsraw = tmp.variables[lonVarName][:] - if(latsraw.ndim == 1): - lon, lat = np.meshgrid(lonsraw, latsraw) - if(latsraw.ndim == 2): - lon = lonsraw - lat = latsraw - - timesraw = tmp.variables[timeVarName] - ntimes = len(timesraw) - - print 'Lats and lons read in for first file in filelist' - - # Create a single empty masked array to store model data from all files - t2store = ma.zeros((ntimes * len(filelist), len(lat[:, 0]), len(lon[0, :]))) - timestore = ma.zeros((ntimes * len(filelist))) - - # Now load in the data for real - # NB. no need to reload in the latitudes and longitudes -assume invariant - i = 0 - timesaccu = 0 # a counter for number of times stored so far in t2store - # NB. this method allows for missing times in data files - # as no assumption made that same number of times in each file... - - - for i, ifile in enumerate(filelist): - - #print 'Loading data from file: ',filelist[i] - f = netCDF4.Dataset(ifile, mode='r') - t2raw = ma.array(f.variables[myvar][:]) - timesraw = f.variables[timeVarName] - time = timesraw[:] - ntimes = len(time) - print 'file= ', i, 'ntimes= ', ntimes, filelist[i] - print 't2raw shape: ', t2raw.shape - - # Flatten dimensions which needn't exist, i.e. level - # e.g. if for single level then often data have 4 dimensions, when 3 dimensions will do. - # Code requires data to have dimensions, (time,lat,lon) - # i.e. remove level dimensions - # Remove 1d axis from the t2raw array - # Example: t2raw.shape == (365, 180, 360 1) - # After the squeeze you will be left with (365, 180, 360) instead - t2tmp = t2raw.squeeze() - # Nb. if this happens to be data for a single time only, then we just flattened it by accident - # lets put it back... - if t2tmp.ndim == 2: - t2tmp = np.expand_dims(t2tmp, 0) - - t2store[timesaccu + np.arange(ntimes), :, :] = t2tmp[:, :, :] - timestore[timesaccu + np.arange(ntimes)] = time - timesaccu += ntimes - f.close() - - print 'Data read in successfully with dimensions: ', t2store.shape - - # TODO: search for duplicated entries (same time) and remove duplicates. - # Check to see if number of unique times == number of times, if so then no problem - - if(len(np.unique(timestore)) != len(np.where(timestore != 0)[0].view())): - print 'WARNING: Possible duplicated times' - - # Decode model times into python datetime objects. Note: timestore becomes a list (no more an array) here - timestore, _ = process.getModelTimes(filename, timeVarName) - - # Make sure latlon grid is monotonically increasing and that the domains - # are correct - lat, lon, t2store = checkLatLon(lat, lon, t2store) - data_dict = {'lats': lat, 'lons': lon, 'times': timestore, 'data': t2store} - return data_dict - -def select_var_from_file(myfile, fmt='not set'): - ''' - Routine to act as user interface to allow users to select variable of interest from a file. - - Input: - myfile - filename - fmt - (optional) specify fileformat for netCDF4 if filename suffix is non-standard - - Output: - myvar - variable name in file - - Peter Lean September 2010 - ''' - - print fmt - - if fmt == 'not set': - f = netCDF4.Dataset(myfile, mode='r') - - if fmt != 'not set': - f = netCDF4.Dataset(myfile, mode='r', format=fmt) - - keylist = [key.encode().lower() for key in f.variables.keys()] - - i = 0 - for v in keylist: - print '[', i, '] ', f.variables[v].long_name, ' (', v, ')' - i += 1 - - user_selection = raw_input('Please select variable : [0 -' + str(i - 1) + '] ') - - myvar = keylist[int(user_selection)] - - return myvar - -def select_var_from_wrf_file(myfile): - ''' - Routine to act as user interface to allow users to select variable of interest from a wrf netCDF file. - - Input: - myfile - filename - - Output: - mywrfvar - variable name in wrf file - - Peter Lean September 2010 - ''' - - f = netCDF4.Dataset(myfile, mode='r', format='NETCDF4') - keylist = [key.encode().lower() for key in f.variables.keys()] - - i = 0 - for v in keylist: - try: - print '[', i, '] ', f.variables[v].description, ' (', v, ')' - except: - print '' - - i += 1 - - user_selection = raw_input('Please select WRF variable : [0 -' + str(i - 1) + '] ') - - mywrfvar = keylist[int(user_selection)] - - return mywrfvar - -def read_data_from_one_file(ifile, myvar, latVarName, lonVarName, timeVarName, file_type): - """ - Purpose:: - Read in data from one file at a time - - Input:: - filelist - list of filenames (including path) - myvar - string containing name of variable to load in (as it appears in file)s - lonVarName - name of the Longitude Variable - timeVarName - name of the Time Variable - fileType - type of file we are trying to parse - - Output:: - lat, lon - 2D arrays of latitude and longitude values - times - list of times - t2store - numpy array containing data from the file for the requested variable - varUnit - units for the variable given by t2store - """ - f = netCDF4.Dataset(ifile, mode='r') - try: - varUnit = f.variables[myvar].units.encode().upper() - except: - varUnit = raw_input('Enter the model variable unit: \n> ').upper() - t2raw = ma.array(f.variables[myvar][:]) - t2tmp = t2raw.squeeze() - if t2tmp.ndim == 2: - t2tmp = np.expand_dims(t2tmp, 0) - - lonsraw = f.variables[lonVarName][:] - latsraw = f.variables[latVarName][:] - if(latsraw.ndim == 1): - lon, lat = np.meshgrid(lonsraw, latsraw) - if(latsraw.ndim == 2): - lon = lonsraw - lat = latsraw - - f.close() - print ' success read_data_from_one_file: VarName=', myvar, ' Shape(Full)= ', t2tmp.shape, ' Unit= ', varUnit - timestore = process.decode_model_timesK(ifile, timeVarName, file_type) - - # Make sure latlon grid is monotonically increasing and that the domains - # are correct - lat, lon, t2store = checkLatLon(lat, lon, t2tmp) - return lat, lon, timestore, t2store, varUnit - -def findTimeVariable(filename): - """ - Function to find what the time variable is called in a model file. - Input:: - filename - file to crack open and check for a time variable - Output:: - timeName - name of the input file's time variable - variableNameList - list of variable names from the input filename - """ - try: - f = netCDF4.Dataset(filename, mode='r') - except: - print("Unable to open '%s' to try and read the Time variable" % filename) - raise - - variableNameList = [variable.encode() for variable in f.variables.keys()] - # convert all variable names into lower case - varNameListLowerCase = [x.lower() for x in variableNameList] - - # Use "set" types for finding common variable name from in the file and from the list of possibilities - possibleTimeNames = set(['time', 'times', 'date', 'dates', 'julian']) - - # Use the sets to find the intersection where variable names are in possibleNames - timeNameSet = possibleTimeNames.intersection(varNameListLowerCase) - - if len(timeNameSet) == 0: - print("Unable to autodetect the Time Variable Name in the '%s'" % filename) - timeName = misc.askUserForVariableName(variableNameList, targetName ="Time") - - else: - timeName = timeNameSet.pop() - - return timeName, variableNameList - - -def findLatLonVarFromFile(filename): - """ - Function to find what the latitude and longitude variables are called in a model file. - - Input:: - -filename - Output:: - -latVarName - -lonVarName - -latMin - -latMax - -lonMin - -lonMax - """ - try: - f = netCDF4.Dataset(filename, mode='r') - except: - print("Unable to open '%s' to try and read the Latitude and Longitude variables" % filename) - raise - - variableNameList = [variable.encode() for variable in f.variables.keys()] - # convert all variable names into lower case - varNameListLowerCase = [x.lower() for x in variableNameList] - - # Use "set" types for finding common variable name from in the file and from the list of possibilities - possibleLatNames = set(['latitude', 'lat', 'lats', 'latitudes']) - possibleLonNames = set(['longitude', 'lon', 'lons', 'longitudes']) - - # Use the sets to find the intersection where variable names are in possibleNames - latNameSet = possibleLatNames.intersection(varNameListLowerCase) - lonNameSet = possibleLonNames.intersection(varNameListLowerCase) - - if len(latNameSet) == 0 or len(lonNameSet) == 0: - print("Unable to autodetect Latitude and/or Longitude values in the file") - latName = misc.askUserForVariableName(variableNameList, targetName ="Latitude") - lonName = misc.askUserForVariableName(variableNameList, targetName ="Longitude") - - else: - latName = latNameSet.pop() - lonName = lonNameSet.pop() - - lats = np.array(f.variables[latName][:]) - lons = np.array(f.variables[lonName][:]) - - latMin = lats.min() - latMax = lats.max() - - # Convert the lons from 0:360 into -180:180 - lons[lons > 180] = lons[lons > 180] - 360. - lonMin = lons.min() - lonMax = lons.max() - - return latName, lonName, latMin, latMax, lonMin, lonMax - - -def read_data_from_file_list_K(filelist, myvar, timeVarName, latVarName, lonVarName, file_type): - ################################################################################## - # Read in data from a list of model files into a single data structure - # Input: filelist - list of filenames (including path) - # myvar - string containing name of variable to load in (as it appears in file) - # Output: lat, lon - 2D array of latitude and longitude values - # times - list of times - # t2store - numpy array containing data from all files - # Modified from read_data_from_file_list to read data from multiple models into a 4-D array - # 1. The code now processes model data that completely covers the 20-yr period. Thus, - # all model data must have the same time levels (ntimes). Unlike in the oroginal, ntimes - # is fixed here. - # 2. Because one of the model data exceeds 240 mos (243 mos), the model data must be - # truncated to the 240 mons using the ntimes determined from the first file. - ################################################################################## - filelist.sort() - nfiles = len(filelist) - # Crash nicely if 'filelist' is zero length - if nfiles == 0: - print 'Error: no files have been passed to read_data_from_file_list(): Exit' - sys.exit() - - # Open the first file in the list to: - # i) read in lats, lons - # ii) find out how many timesteps in the file (assume same ntimes in each file in list) - # -allows you to create an empty array to store variable data for all times - tmp = netCDF4.Dataset(filelist[0], mode='r', format=file_type) - latsraw = tmp.variables[latVarName][:] - lonsraw = tmp.variables[lonVarName][:] - timesraw = tmp.variables[timeVarName] - - if(latsraw.ndim == 1): - lon, lat = np.meshgrid(lonsraw, latsraw) - - elif(latsraw.ndim == 2): - lon = lonsraw - lat = latsraw - ntimes = len(timesraw); nygrd = len(lat[:, 0]); nxgrd = len(lon[0, :]) - - print 'Lats and lons read in for first file in filelist' - - # Create a single empty masked array to store model data from all files - #t2store = ma.zeros((ntimes*nfiles,nygrd,nxgrd)) - t2store = ma.zeros((nfiles, ntimes, nygrd, nxgrd)) - #timestore=ma.zeros((ntimes*nfiles)) - - ## Now load in the data for real - ## NB. no need to reload in the latitudes and longitudes -assume invariant - #timesaccu=0 # a counter for number of times stored so far in t2store - # NB. this method allows for missing times in data files - # as no assumption made that same number of times in each file... - - for i, ifile in enumerate(filelist): - #print 'Loading data from file: ',filelist[i] - f = netCDF4.Dataset(ifile, mode='r') - t2raw = ma.array(f.variables[myvar][:]) - timesraw = f.variables[timeVarName] - #ntimes=len(time) - #print 'file= ',i,'ntimes= ',ntimes,filelist[i] - ## Flatten dimensions which needn't exist, i.e. level - ## e.g. if for single level then often data have 4 dimensions, when 3 dimensions will do. - ## Code requires data to have dimensions, (time,lat,lon) - ## i.e. remove level dimensions - t2tmp = t2raw.squeeze() - ## Nb. if data happen to be for a single time, we flattened it by accident; lets put it back... - if t2tmp.ndim == 2: - t2tmp = np.expand_dims(t2tmp, 0) - #t2store[timesaccu+np.arange(ntimes),:,:]=t2tmp[0:ntimes,:,:] - t2store[i, 0:ntimes, :, :] = t2tmp[0:ntimes, :, :] - #timestore[timesaccu+np.arange(ntimes)]=time - #timesaccu=timesaccu+ntimes - f.close() - - print 'Data read in successfully with dimensions: ', t2store.shape - - # Decode model times into python datetime objects. Note: timestore becomes a list (no more an array) here - ifile = filelist[0] - timestore, _ = process.getModelTimes(ifile, timeVarName) - - # Make sure latlon grid is monotonically increasing and that the domains - # are correct - lat, lon, t2store = checkLatLon(lat, lon, t2store) - return lat, lon, timestore, t2store - -def find_latlon_ranges(filelist, lat_var_name, lon_var_name): - # Function to return the latitude and longitude ranges of the data in a file, - # given the identifying variable names. - # - # Input: - # filelist - list of filenames (data is read in from first file only) - # lat_var_name - variable name of the 'latitude' variable - # lon_var_name - variable name of the 'longitude' variable - # - # Output: - # latMin, latMax, lonMin, lonMax - self explanatory - # - # Peter Lean March 2011 - - filename = filelist[0] - - try: - f = netCDF4.Dataset(filename, mode='r') - - lats = f.variables[lat_var_name][:] - latMin = lats.min() - latMax = lats.max() - - lons = f.variables[lon_var_name][:] - lons[lons > 180] = lons[lons > 180] - 360. - lonMin = lons.min() - lonMax = lons.max() - - return latMin, latMax, lonMin, lonMax - - except: - print 'Error: there was a problem with finding the latitude and longitude ranges in the file' - print ' Please check that you specified the filename, and variable names correctly.' - - sys.exit() - -def writeBN_lola(fileName, lons, lats): - # write a binary data file that include longitude (1-d) and latitude (1-d) values - - F = fortranfile.FortranFile(fileName, mode='w') - ngrdY = lons.shape[0]; ngrdX = lons.shape[1] - tmpDat = ma.zeros(ngrdX); tmpDat[:] = lons[0, :]; F.writeReals(tmpDat) - tmpDat = ma.zeros(ngrdY); tmpDat[:] = lats[:, 0]; F.writeReals(tmpDat) - # release temporary arrays - tmpDat = 0 - F.close() - -def writeBNdata(fileName, numOBSs, numMDLs, nT, ngrdX, ngrdY, numSubRgn, obsData, mdlData, obsRgnAvg, mdlRgnAvg): - #(fileName,maskOption,numOBSs,numMDLs,nT,ngrdX,ngrdY,numSubRgn,obsData,mdlData,obsRgnAvg,mdlRgnAvg): - # write spatially- and regionally regridded data into a binary data file - missing = -1.e26 - F = fortranfile.FortranFile(fileName, mode='w') - # construct a data array to replace mask flag with a missing value (missing=-1.e12) for printing - data = ma.zeros((nT, ngrdY, ngrdX)) - for m in np.arange(numOBSs): - data[:, :, :] = obsData[m, :, :, :]; msk = data.mask - for n in np.arange(nT): - for j in np.arange(ngrdY): - for i in np.arange(ngrdX): - if msk[n, j, i]: data[n, j, i] = missing - - # write observed data. allowed to write only one row at a time - tmpDat = ma.zeros(ngrdX) - for n in np.arange(nT): - for j in np.arange(ngrdY): - tmpDat[:] = data[n, j, :] - F.writeReals(tmpDat) - - # write model data (dep. on the number of models). - for m in np.arange(numMDLs): - data[:, :, :] = mdlData[m, :, :, :] - msk = data.mask - for n in np.arange(nT): - for j in np.arange(ngrdY): - for i in np.arange(ngrdX): - if msk[n, j, i]: - data[n, j, i] = missing - - for n in np.arange(nT): - for j in np.arange(ngrdY): - tmpDat[:] = data[n, j, :] - F.writeReals(tmpDat) - - data = 0 # release the array allocated for data - # write data in subregions - if numSubRgn > 0: - print 'Also included are the time series of the means over ', numSubRgn, ' areas from obs and model data' - tmpDat = ma.zeros(nT); print numSubRgn - for m in np.arange(numOBSs): - for n in np.arange(numSubRgn): - tmpDat[:] = obsRgnAvg[m, n, :] - F.writeReals(tmpDat) - for m in np.arange(numMDLs): - for n in np.arange(numSubRgn): - tmpDat[:] = mdlRgnAvg[m, n, :] - F.writeReals(tmpDat) - tmpDat = 0 # release the array allocated for tmpDat - F.close() - -def writeNCfile(fileName, numSubRgn, lons, lats, obsData, mdlData, obsRgnAvg, mdlRgnAvg, obsList, mdlList, subRegions): - # write an output file of variables up to 3 dimensions - # fileName: the name of the output data file - # numSubRgn : the number of subregions - # lons[ngrdX]: longitude - # lats[ngrdY]: latitudes - # obsData[nT,ngrdY,ngrdX]: the obs time series of the entire model domain - # mdlData[numMDLs,nT,ngrdY,ngrdX]: the mdltime series of the entire model domain - # obsRgnAvg[numSubRgn,nT]: the obs time series for the all subregions - # mdlRgnAvg[numMDLs,numSubRgn,nT]: the mdl time series for the all subregions - dimO = obsData.shape[0] # the number of obs data - dimM = mdlData.shape[0] # the number of mdl data - dimT = mdlData.shape[1] # the number of time levels - dimY = mdlData.shape[2] # y-dimension - dimX = mdlData.shape[3] # x-dimension - dimR = obsRgnAvg.shape[1] # the number of subregions - f = netCDF4.Dataset(fileName, mode='w', format='NETCDF4') - print mdlRgnAvg.shape, dimM, dimR, dimT - #create global attributes - f.description = '' - # create dimensions - print 'Creating Dimensions within the NetCDF Object...' - f.createDimension('unity', 1) - f.createDimension('time', dimT) - f.createDimension('west_east', dimX) - f.createDimension('south_north', dimY) - f.createDimension('obs', dimO) - f.createDimension('models', dimM) - - # create the variable (real*4) to be written in the file - print 'Creating Variables...' - f.createVariable('lon', 'd', ('south_north', 'west_east')) - f.createVariable('lat', 'd', ('south_north', 'west_east')) - f.createVariable('oDat', 'd', ('obs', 'time', 'south_north', 'west_east')) - f.createVariable('mDat', 'd', ('models', 'time', 'south_north', 'west_east')) - - if subRegions: - f.createDimension('regions', dimR) - f.createVariable('oRgn', 'd', ('obs', 'regions', 'time')) - f.createVariable('mRgn', 'd', ('models', 'regions', 'time')) - f.variables['oRgn'].varAttName = 'Observation time series: Subregions' - f.variables['mRgn'].varAttName = 'Model time series: Subregions' - - loadDataIntoNetCDF(f, obsList, obsData, 'Observation') - print 'Loaded the Observations into the NetCDF' - - loadDataIntoNetCDF(f, mdlList, mdlData, 'Model') - - # create attributes and units for the variable - print 'Creating Attributes and Units...' - f.variables['lon'].varAttName = 'Longitudes' - f.variables['lon'].varUnit = 'degrees East' - f.variables['lat'].varAttName = 'Latitudes' - f.variables['lat'].varUnit = 'degrees North' - f.variables['oDat'].varAttName = 'Observation time series: entire domain' - f.variables['mDat'].varAttName = 'Model time series: entire domain' - - # assign the values to the variable and write it - f.variables['lon'][:] = lons[:] - f.variables['lat'][:] = lats[:] - if subRegions: - f.variables['oRgn'][:] = obsRgnAvg[:] - f.variables['mRgn'][:] = mdlRgnAvg[:] - - f.close() - -def loadDataIntoNetCDF(fileObject, datasets, dataArray, dataType): - """ - Input:: - fileObject - netCDF4 file object data will be loaded into - datasets - List of dataset names - dataArray - Multi-dimensional array of data to be loaded into the NetCDF file - dataType - String with value of either 'Model' or 'Observation' - Output:: - No return value. netCDF4 file object is updated in place - """ - datasetCount = 0 - for datasetCount, dataset in enumerate(datasets): - if dataType.lower() == 'observation': - datasetName = dataset.replace(' ','') - elif dataType.lower() == 'model': - datasetName = path.splitext(path.basename(dataset))[0] - print "Creating variable %s" % datasetName - fileObject.createVariable(datasetName, 'd', ('time', 'south_north', 'west_east')) - fileObject.variables[datasetName].varAttName = 'Obseration time series: entire domain' - print 'Loading values into %s' % datasetName - fileObject.variables[datasetName][:] = dataArray[datasetCount,:,:,:] - -def checkLatLon(latsin, lonsin, datain): - """ - Purpose:: - Checks whether latitudes and longitudes are monotonically increasing - within the domains [-90, 90) and [-180, 180) respectively, and rearranges the input data - accordingly if they are not. - - Input:: - latsin - Array of latitudes read from a raw netcdf file - lonsin - Array of longitudes read from a raw netcdf file - datain - Array of data values read from a raw netcdf file. - The shape is assumed to be (..., nLat, nLon). - - Output:: - latsout - 2D array of (rearranged) latitudes - lonsout - 2D array of (rearranged) longitudes - dataout - Array of (rearranged) data - """ - # Avoid unnecessary shifting if all lons are higher than 180 - if lonsin.min() > 180: - lonsin -= 360 - - # Make sure lats and lons are monotonically increasing - latsDecreasing = np.diff(latsin[:, 0]) < 0 - lonsDecreasing = np.diff(lonsin[0]) < 0 - - # If all values are decreasing then they just need to be reversed - latsReversed, lonsReversed = latsDecreasing.all(), lonsDecreasing.all() - - # If the lat values are unsorted then raise an exception - if not latsReversed and latsDecreasing.any(): - raise ValueError('Latitudes must be monotonically increasing.') - - # Perform same checks now for lons - if not lonsReversed and lonsDecreasing.any(): - raise ValueError('Longitudes must be monotonically increasing.') - - # Also check if lons go from [0, 360), and convert to [-180, 180) - # if necessary - lonsShifted = lonsin.max() > 180 - latsout, lonsout, dataout = latsin[:], lonsin[:], datain[:] - # Now correct data if latlon grid needs to be shifted - if latsReversed: - latsout = latsout[::-1] - dataout = dataout[..., ::-1, :] - - if lonsReversed: - lonsout = lonsout[..., ::-1] - dataout = dataout[..., ::-1] - - if lonsShifted: - lat1d = latsout[:, 0] - dataout, lon1d = shiftgrid(180, dataout, lonsout[0], start=False) - lonsout, latsout = np.meshgrid(lon1d, lat1d) - - return latsout, lonsout, dataout - -def shiftgrid(lon0, datain, lonsin, start= True, cyclic=360.0): - """ - Purpose:: - Shift global lat/lon grid east or west. This function is taken directly - from the (unreleased) basemap 1.0.7 source code as version 1.0.6 does not - currently support arrays with more than two dimensions. - https://github.com/matplotlib/basemap - - Input:: - lon0 - starting longitude for shifted grid (ending longitude if start=False). - lon0 must be on input grid (within the range of lonsin). - datain - original data with longitude the right-most dimension. - lonsin - original longitudes. - start - if True, lon0 represents the starting longitude of the new grid. - if False, lon0 is the ending longitude. Default True. - cyclic - width of periodic domain (default 360) - - Output:: - dataout - data on shifted grid - lonsout - lons on shifted grid - """ - if np.fabs(lonsin[-1]-lonsin[0]-cyclic) > 1.e-4: - # Use all data instead of raise ValueError, 'cyclic point not included' - start_idx = 0 - else: - # If cyclic, remove the duplicate point - start_idx = 1 - if lon0 < lonsin[0] or lon0 > lonsin[-1]: - raise ValueError('lon0 outside of range of lonsin') - i0 = np.argmin(np.fabs(lonsin-lon0)) - i0_shift = len(lonsin)-i0 - if ma.isMA(datain): - dataout = ma.zeros(datain.shape,datain.dtype) - else: - dataout = np.zeros(datain.shape,datain.dtype) - if ma.isMA(lonsin): - lonsout = ma.zeros(lonsin.shape,lonsin.dtype) - else: - lonsout = np.zeros(lonsin.shape,lonsin.dtype) - if start: - lonsout[0:i0_shift] = lonsin[i0:] - else: - lonsout[0:i0_shift] = lonsin[i0:]-cyclic - dataout[...,0:i0_shift] = datain[...,i0:] - if start: - lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx]+cyclic - else: - lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx] - dataout[...,i0_shift:] = datain[...,start_idx:i0+start_idx] - return dataout,lonsout \ No newline at end of file diff --git a/mccsearch/code/mccSearch.py b/mccsearch/code/mccSearch.py index 7bfaed6b..5484df1e 100644 --- a/mccsearch/code/mccSearch.py +++ b/mccsearch/code/mccSearch.py @@ -43,10 +43,6 @@ import matplotlib.colors as mcolors from matplotlib.ticker import FuncFormatter, FormatStrFormatter -#existing modules in services -import Nio -#import files -#import process #----------------------- GLOBAL VARIABLES -------------------------- # --------------------- User defined variables --------------------- #FYI the lat lon values are not necessarily inclusive of the points given. These are the limits @@ -2482,6 +2478,8 @@ def getModelTimes(xtimes, timeVarName): modelTimeStep - 'hourly','daily','monthly','annual' ''' + from ocw import utils + timeFormat = xtimes.units # search to check if 'since' appears in units try: diff --git a/mccsearch/code/process.py b/mccsearch/code/process.py deleted file mode 100644 index 7367ee74..00000000 --- a/mccsearch/code/process.py +++ /dev/null @@ -1,1007 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Collection of functions that process numpy arrays of varying shapes. -Functions can range from subsetting to regridding""" - -# Python Standard Libary Imports -import math -import datetime -import re -import string -import sys -import time - -# 3rd Party Imports -import netCDF4 -import numpy as np -import numpy.ma as ma - - - -def extract_subregion_from_data_array(data, lats, lons, latmin, latmax, lonmin, lonmax): - ''' - Extract a sub-region from a data array. - - Example:: - **e.g.** the user may load a global model file, but only want to examine data over North America - This function extracts a sub-domain from the original data. - The defined sub-region must be a regular lat/lon bounding box, - but the model data may be on a non-regular grid (e.g. rotated, or Guassian grid layout). - Data are kept on the original model grid and a rectangular (in model-space) region - is extracted which contains the rectangular (in lat/lon space) user supplied region. - - INPUT:: - data - 3d masked data array - lats - 2d array of latitudes corresponding to data array - lons - 2d array of longitudes corresponding to data array - latmin, latmax, lonmin, lonmax - bounding box of required region to extract - - OUTPUT:: - data2 - subset of original data array containing only data from required subregion - lats2 - subset of original latitude data array - lons2 - subset of original longitude data array - - ''' - - - - # Mask model data array to find grid points inside the supplied bounding box - whlat = (lats > latmin) & (lats < latmax) - whlon = (lons > lonmin) & (lons < lonmax) - wh = whlat & whlon - - # Find required matrix indices describing the limits of the regular lat/lon bounding box - jmax = np.where(wh == True)[0].max() - jmin = np.where(wh == True)[0].min() - imax = np.where(wh == True)[1].max() - imin = np.where(wh == True)[1].min() - - # Cut out the sub-region from the data array - data2 = ma.zeros((data.shape[0], jmax - jmin, imax - imin)) - data2 = data[:, jmin:jmax, imin:imax] - - # Cut out sub-region from lats,lons arrays - lats2 = lats[jmin:jmax, imin:imax] - lons2 = lons[jmin:jmax, imin:imax] - - return data2, lats2, lons2 - -def calc_area_mean(data, lats, lons, mymask='not set'): - ''' - Calculate Area Average of data in a masked array - - INPUT:: - data: a masked array of data (NB. only data from one time expected to be passed at once) - lats: 2d array of regularly gridded latitudes - lons: 2d array of regularly gridded longitudes - mymask: (optional) defines spatial region to do averaging over - - OUTPUT:: - area_mean: a value for the mean inside the area - - ''' - - - - # If mask not passed in, then set maks to cover whole data domain - if(mymask == 'not set'): - mymask = np.empty(data.shape) - mymask[:] = False # NB. mask means (don't show), so False everywhere means use everything. - - # Dimension check on lats, lons - # Sometimes these arrays are 3d, sometimes 2d, sometimes 1d - # This bit of code just converts to the required 2d array shape - if(len(lats.shape) == 3): - lats = lats[0, :, :] - - if(len(lons.shape) == 3): - lons = lons[0, :, :] - - if(np.logical_and(len(lats.shape) == 1, len(lons.shape) == 1)): - lons, lats = np.meshgrid(lons, lats) - - # Calculate grid length (assuming regular lat/lon grid) - dlat = lats[1, 0] - lats[0, 0] - dlon = lons[0, 1] - lons[0, 0] - - # Calculates weights for each grid box - myweights = calc_area_in_grid_box(lats, dlon, dlat) - - # Create a new masked array covering just user selected area (defined in mymask) - # NB. this preserves missing data points in the observations data - subdata = ma.masked_array(data, mask=mymask) - - if(myweights.shape != subdata.shape): - myweights.resize(subdata.shape) - myweights[1:, :] = myweights[0, :] - - # Calculate weighted mean using ma.average (which takes weights) - area_mean = ma.average(subdata, weights=myweights) - - return area_mean - -def calc_area_in_grid_box(latitude, dlat, dlon): - ''' - Calculate area of regular lat-lon grid box - - INPUT:: - latitude: latitude of grid box (degrees) - dlat: grid length in latitude direction (degrees) - dlon: grid length in longitude direction (degrees) - - OUTPUT:: - A: area of the grid box - - ''' - - R = 6371000 # radius of Earth in metres - C = 2 * math.pi * R - - latitude = np.radians(latitude) - - A = (dlon * (C / 360.)) * (dlat * (C / 360.) * np.cos(latitude)) - - return A - -def do_regrid(q, lat, lon, lat2, lon2, order=1, mdi=-999999999): - """ - This function has been moved to the ocw/dataset_processor module - """ - from ocw import dataset_processor - q2 = dataset_processor._rcmes_spatial_regrid(q, lat, lon, lat2, lon2, order=1) - - return q2 - -def create_mask_using_threshold(masked_array, threshold=0.5): - """ - .. deprecated:: 0.3-incubating - Use :func:`ocw.dataset_processor._rcmes_create_mask_using_threshold` instead. - """ - from ocw import dataset_processor - mask = dataset_processor._rcmes_create_mask_using_threshold(masked_array, threshold) - - return mask - -def calc_average_on_new_time_unit(data, dateList, unit='monthly'): - ''' - Routine to calculate averages on longer time units than the data exists on. - - Example:: - If the data is 6-hourly, calculate daily, or monthly means. - - Input:: - data - data values - dateList - list of python datetime structures corresponding to data values - unit - string describing time unit to average onto. - *Valid values are: 'monthly', 'daily', 'pentad','annual','decadal'* - - Output: - meanstorem - numpy masked array of data values meaned over required time period - newTimesList - a list of python datetime objects representing the data in the new averagin period - *NB.* currently set to beginning of averaging period, - **i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.** - ''' - - # First catch unknown values of time unit passed in by user - acceptable = (unit == 'full') | (unit == 'annual') | (unit == 'monthly') | (unit == 'daily') | (unit == 'pentad') - - if not acceptable: - print 'Error: unknown unit type selected for time averaging' - print ' Please check your code.' - return - - # Calculate arrays of years (2007,2007), - # yearsmonths (200701,200702), - # or yearmonthdays (20070101,20070102) - # -depending on user selected averaging period. - - # Year list - if unit == 'annual': - print 'Calculating annual mean data' - timeunits = [] - - for i in np.arange(len(dateList)): - timeunits.append(str(dateList[i].year)) - - timeunits = np.array(timeunits, dtype=int) - - # YearMonth format list - if unit == 'monthly': - print 'Calculating monthly mean data' - timeunits = [] - - for i in np.arange(len(dateList)): - timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month)) - - timeunits = np.array(timeunits, dtype=int) - - # YearMonthDay format list - if unit == 'daily': - print 'Calculating daily mean data' - timeunits = [] - - for i in np.arange(len(dateList)): - timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + \ - str("%02d" % dateList[i].day)) - - timeunits = np.array(timeunits, dtype=int) - - - # TODO: add pentad setting using Julian days? - - - # Full list: a special case - if unit == 'full': - print 'Calculating means data over full time range' - timeunits = [] - - for i in np.arange(len(dateList)): - timeunits.append(999) # i.e. we just want the same value for all times. - - timeunits = np.array(timeunits, dtype=int) - - - - # empty list to store new times - newTimesList = [] - - # Decide whether or not you need to do any time averaging. - # i.e. if data are already on required time unit then just pass data through and - # calculate and return representative datetimes. - processing_required = True - if len(timeunits) == (len(np.unique(timeunits))): - processing_required = False - - # 1D data arrays, i.e. time series - if data.ndim == 1: - # Create array to store the resulting data - meanstore = np.zeros(len(np.unique(timeunits))) - - # Calculate the means across each unique time unit - i = 0 - for myunit in np.unique(timeunits): - if processing_required: - datam = ma.masked_array(data, timeunits != myunit) - meanstore[i] = ma.average(datam) - - # construct new times list - smyunit = str(myunit) - if len(smyunit) == 4: # YYYY - yyyy = int(myunit[0:4]) - mm = 1 - dd = 1 - if len(smyunit) == 6: # YYYYMM - yyyy = int(smyunit[0:4]) - mm = int(smyunit[4:6]) - dd = 1 - if len(smyunit) == 8: # YYYYMMDD - yyyy = int(smyunit[0:4]) - mm = int(smyunit[4:6]) - dd = int(smyunit[6:8]) - if len(smyunit) == 3: # Full time range - # Need to set an appropriate time representing the mid-point of the entire time span - dt = dateList[-1] - dateList[0] - halfway = dateList[0] + (dt / 2) - yyyy = int(halfway.year) - mm = int(halfway.month) - dd = int(halfway.day) - - newTimesList.append(datetime.datetime(yyyy, mm, dd, 0, 0, 0, 0)) - i = i + 1 - - # 3D data arrays - if data.ndim == 3: - - #datamask = create_mask_using_threshold(data,threshold=0.75) - - # Create array to store the resulting data - meanstore = np.zeros([len(np.unique(timeunits)), data.shape[1], data.shape[2]]) - - # Calculate the means across each unique time unit - i = 0 - datamask_store = [] - for myunit in np.unique(timeunits): - if processing_required: - - mask = np.zeros_like(data) - mask[timeunits != myunit, :, :] = 1.0 - - # Calculate missing data mask within each time unit... - datamask_at_this_timeunit = np.zeros_like(data) - datamask_at_this_timeunit[:] = create_mask_using_threshold(data[timeunits == myunit, :, :], threshold=0.75) - # Store results for masking later - datamask_store.append(datamask_at_this_timeunit[0]) - - # Calculate means for each pixel in this time unit, ignoring missing data (using masked array). - datam = ma.masked_array(data, np.logical_or(mask, datamask_at_this_timeunit)) - meanstore[i, :, :] = ma.average(datam, axis=0) - - # construct new times list - smyunit = str(myunit) - if len(smyunit) == 4: # YYYY - yyyy = int(smyunit[0:4]) - mm = 1 - dd = 1 - if len(smyunit) == 6: # YYYYMM - yyyy = int(smyunit[0:4]) - mm = int(smyunit[4:6]) - dd = 1 - if len(smyunit) == 8: # YYYYMMDD - yyyy = int(smyunit[0:4]) - mm = int(smyunit[4:6]) - dd = int(smyunit[6:8]) - if len(smyunit) == 3: # Full time range - # Need to set an appropriate time representing the mid-point of the entire time span - dt = dateList[-1] - dateList[0] - halfway = dateList[0] + (dt / 2) - yyyy = int(halfway.year) - mm = int(halfway.month) - dd = int(halfway.day) - - newTimesList.append(datetime.datetime(yyyy, mm, dd, 0, 0, 0, 0)) - - i += 1 - - if not processing_required: - meanstorem = data - - if processing_required: - # Create masked array (using missing data mask defined above) - datamask_store = np.array(datamask_store) - meanstorem = ma.masked_array(meanstore, datamask_store) - - return meanstorem, newTimesList - -def calc_running_accum_from_period_accum(data): - ''' - Routine to calculate running total accumulations from individual period accumulations. - :: - - e.g. 0,0,1,0,0,2,2,1,0,0 - -> 0,0,1,1,1,3,5,6,6,6 - - Input:: - data: numpy array with time in the first axis - - Output:: - running_acc: running accumulations - - ''' - - - running_acc = np.zeros_like(data) - - if(len(data.shape) == 1): - running_acc[0] = data[0] - - if(len(data.shape) > 1): - running_acc[0, :] = data[0, :] - - for i in np.arange(data.shape[0] - 1): - if(len(data.shape) == 1): - running_acc[i + 1] = running_acc[i] + data[i + 1] - - if(len(data.shape) > 1): - running_acc[i + 1, :] = running_acc[i, :] + data[i + 1, :] - - return running_acc - -def ignore_boundaries(data, rim=10): - ''' - Routine to mask the lateral boundary regions of model data to ignore them from calculations. - - Input:: - data - a masked array of model data - rim - (optional) number of grid points to ignore - - Output:: - data - data array with boundary region masked - - ''' - - nx = data.shape[1] - ny = data.shape[0] - - rimmask = np.zeros_like(data) - for j in np.arange(rim): - rimmask[j, 0:nx - 1] = 1.0 - - for j in ny - 1 - np.arange(rim): - rimmask[j, 0:nx - 1] = 1.0 - - for i in np.arange(rim): - rimmask[0:ny - 1, i] = 1.0 - - for i in nx - 1 - np.arange(rim): - rimmask[0:ny - 1, i] = 1.0 - - data = ma.masked_array(data, mask=rimmask) - - return data - -def normalizeDatetimes(datetimes, timestep): - """ This function has been moved to the ocw/dataset_processor module """ - from ocw import dataset_processor as dsp - return dsp._rcmes_normalize_datetimes(datetimes, timestep) - -def getModelTimes(modelFile, timeVarName): - ''' - TODO: Do a better job handling dates here - Routine to convert from model times ('hours since 1900...', 'days since ...') - into a python datetime structure - - Input:: - modelFile - path to the model tile you want to extract the times list and modelTimeStep from - timeVarName - name of the time variable in the model file - - Output:: - times - list of python datetime objects describing model data times - modelTimeStep - 'hourly','daily','monthly','annual' - ''' - - f = netCDF4.Dataset(modelFile, mode='r') - xtimes = f.variables[timeVarName] - timeFormat = xtimes.units.encode() - - # search to check if 'since' appears in units - try: - sinceLoc = re.search('since', timeFormat).end() - - except AttributeError: - print 'Error decoding model times: time variable attributes do not contain "since"' - raise - - units = None - TIME_UNITS = ('minutes', 'hours', 'days', 'months', 'years') - # search for 'seconds','minutes','hours', 'days', 'months', 'years' so know units - for unit in TIME_UNITS: - if re.search(unit, timeFormat): - units = unit - break - - # cut out base time (the bit following 'since') - base_time_string = string.lstrip(timeFormat[sinceLoc:]) - # decode base time - base_time = decodeTimeFromString(base_time_string) - times = [] - - for xtime in xtimes[:]: - - # Cast time as an int - #TODO: KDW this may cause problems for data that is hourly with more than one timestep in it - xtime = int(xtime) - - if units == 'minutes': - dt = datetime.timedelta(minutes=xtime) - new_time = base_time + dt - elif units == 'hours': - dt = datetime.timedelta(hours=xtime) - new_time = base_time + dt - elif units == 'days': - dt = datetime.timedelta(days=xtime) - new_time = base_time + dt - elif units == 'months': - # NB. adding months in python is complicated as month length varies and hence ambiguous. - # Perform date arithmatic manually - # Assumption: the base_date will usually be the first of the month - # NB. this method will fail if the base time is on the 29th or higher day of month - # -as can't have, e.g. Feb 31st. - new_month = int(base_time.month + xtime % 12) - new_year = int(math.floor(base_time.year + xtime / 12.)) - new_time = datetime.datetime(new_year, new_month, base_time.day, base_time.hour, base_time.second, 0) - elif units == 'years': - dt = datetime.timedelta(years=xtime) - new_time = base_time + dt - - times.append(new_time) - - try: - if len(xtimes) == 1: - timeStepLength = 0 - else: - timeStepLength = int(xtimes[1] - xtimes[0] + 1.e-12) - - modelTimeStep = getModelTimeStep(units, timeStepLength) - - #if timeStepLength is zero do not normalize times as this would create an empty list for MERG (hourly) data - if timeStepLength != 0: - times = normalizeDatetimes(times, modelTimeStep) - except: - raise - - - return times, modelTimeStep - -def getModelTimeStep(units, stepSize): - # Time units are now determined. Determine the time intervals of input data (mdlTimeStep) - - if units == 'minutes': - if stepSize == 60: - modelTimeStep = 'hourly' - elif stepSize == 1440: - modelTimeStep = 'daily' - # 28 days through 31 days - elif 40320 <= stepSize <= 44640: - modelTimeStep = 'monthly' - # 365 days through 366 days - elif 525600 <= stepSize <= 527040: - modelTimeStep = 'annual' - else: - raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize) - - elif units == 'hours': - #need a check for fractional hrs and only one hr i.e. stepSize=0 e.g. with MERG data - if stepSize == 0 or stepSize == 1: - modelTimeStep = 'hourly' - elif stepSize == 24: - modelTimeStep = 'daily' - elif 672 <= stepSize <= 744: - modelTimeStep = 'monthly' - elif 8760 <= stepSize <= 8784: - modelTimeStep = 'annual' - else: - raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize) - - elif units == 'days': - if stepSize == 1: - modelTimeStep = 'daily' - elif 28 <= stepSize <= 31: - modelTimeStep = 'monthly' - elif 365 <= stepSize <= 366: - modelTimeStep = 'annual' - else: - raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize) - - elif units == 'months': - if stepSize == 1: - modelTimeStep = 'monthly' - elif stepSize == 12: - modelTimeStep = 'annual' - else: - raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize) - - elif units == 'years': - if stepSize == 1: - modelTimeStep = 'annual' - else: - raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize) - - else: - errorMessage = 'the time unit ', units, ' is not currently handled in this version.' - raise Exception(errorMessage) - - return modelTimeStep - -def decodeTimeFromString(time_string): - ''' - Decodes string into a python datetime object - *Method:* tries a bunch of different time format possibilities and hopefully one of them will hit. - :: - - **Input:** time_string - a string that represents a date/time - - **Output:** mytime - a python datetime object - ''' - - # This will deal with times that use decimal seconds - if '.' in time_string: - time_string = time_string.split('.')[0] + '0' - else: - pass - - try: - mytime = time.strptime(time_string, '%Y-%m-%d %H:%M:%S') - mytime = datetime.datetime(*mytime[0:6]) - return mytime - - except ValueError: - pass - - try: - mytime = time.strptime(time_string, '%Y/%m/%d %H:%M:%S') - mytime = datetime.datetime(*mytime[0:6]) - return mytime - - except ValueError: - pass - - try: - mytime = time.strptime(time_string, '%Y%m%d %H:%M:%S') - mytime = datetime.datetime(*mytime[0:6]) - return mytime - - except ValueError: - pass - - try: - mytime = time.strptime(time_string, '%Y:%m:%d %H:%M:%S') - mytime = datetime.datetime(*mytime[0:6]) - return mytime - - except ValueError: - pass - - try: - mytime = time.strptime(time_string, '%Y%m%d%H%M%S') - mytime = datetime.datetime(*mytime[0:6]) - return mytime - - except ValueError: - pass - - try: - mytime = time.strptime(time_string, '%Y-%m-%d %H:%M') - mytime = datetime.datetime(*mytime[0:6]) - return mytime - - except ValueError: - pass - - try: - mytime = time.strptime(time_string, '%Y-%m-%d %H') - mytime = datetime.datetime(*mytime[0:6]) - return mytime - - except ValueError: - pass - - try: - mytime = time.strptime(time_string, '%Y-%m-%d') - mytime = datetime.datetime(*mytime[0:6]) - return mytime - - except ValueError: - pass - - print 'Error decoding time string: string does not match a predefined time format' - return 0 - -def regrid_wrapper(regrid_choice, obsdata, obslats, obslons, wrfdata, wrflats, wrflons): - ''' - Wrapper routine for regridding. - - Either regrids model to obs grid, or obs to model grid, depending on user choice - - Inputs:: - regrid_choice - [0] = Regrid obs data onto model grid or - [1] = Regrid model data onto obs grid - - obsdata,wrfdata - data arrays - obslats,obslons - observation lat,lon arrays - wrflats,wrflons - model lat,lon arrays - - Output:: - rdata,rdata2 - regridded data - lats,lons - latitudes and longitudes of regridded data - - ''' - - # Regrid obs data onto model grid - if(regrid_choice == '0'): - - ndims = len(obsdata.shape) - if(ndims == 3): - newshape = [obsdata.shape[0], wrfdata.shape[1], wrfdata.shape[2]] - nT = obsdata.shape[0] - - if(ndims == 2): - newshape = [wrfdata.shape[0], wrfdata.shape[1]] - nT = 0 - - rdata = wrfdata - lats, lons = wrflats, wrflons - - # Create a new masked array with the required dimensions - tmp = np.zeros(newshape) - rdata2 = ma.MaskedArray(tmp, mask=(tmp == 0)) - tmp = 0 # free up some memory - - rdata2[:] = 0.0 - - if(nT > 0): - for t in np.arange(nT): - rdata2[t, :, :] = do_regrid(obsdata[t, :, :], obslats[:, :], obslons[:, :], wrflats[:, :], wrflons[:, :]) - - if(nT == 0): - rdata2[:, :] = do_regrid(obsdata[:, :], obslats[:, :], obslons[:, :], wrflats[:, :], wrflons[:, :]) - - - # Regrid model data onto obs grid - if(regrid_choice == '1'): - ndims = len(wrfdata.shape) - if(ndims == 3): - newshape = [wrfdata.shape[0], obsdata.shape[1], obsdata.shape[2]] - nT = obsdata.shape[0] - - if(ndims == 2): - newshape = [obsdata.shape[0], obsdata.shape[1]] - nT = 0 - - rdata2 = obsdata - lats, lons = obslats, obslons - - tmp = np.zeros(newshape) - rdata = ma.MaskedArray(tmp, mask=(tmp == 0)) - tmp = 0 # free up some memory - - rdata[:] = 0.0 - - if(nT > 0): - for t in np.arange(nT): - rdata[t, :, :] = do_regrid(wrfdata[t, :, :], wrflats[:, :], wrflons[:, :], obslats[:, :], obslons[:, :]) - - if(nT == 0): - rdata[:, :] = do_regrid(wrfdata[:, :], wrflats[:, :], wrflons[:, :], obslats[:, :], obslons[:, :]) - - - return rdata, rdata2, lats, lons - -def extract_sub_time_selection(allTimes, subTimes, data): - ''' - Routine to extract a sub-selection of times from a data array. - - Example:: - Suppose a data array has 30 time values for daily data for a whole month, - but you only want the data from the 5th-15th of the month. - - Input:: - allTimes - list of datetimes describing what times the data in the data array correspond to - subTimes - the times you want to extract data from - data - the data array - - Output:: - subdata - subselection of data array - - ''' - # Create new array to store the subselection - subdata = np.zeros([len(subTimes), data.shape[1], data.shape[2]]) - - # Loop over all Times and when it is a member of the required subselection, copy the data across - i = 0 # counter for allTimes - subi = 0 # counter for subTimes - for t in allTimes: - if(np.in1d(allTimes, subTimes)): - subdata[subi, :, :] = data[i, :, :] - subi += 1 - i += 1 - - return subdata - -def calc_average_on_new_time_unit_K(data, dateList, unit): - """ - This function has been moved to the ocw/dataset_processor module - """ - from ocw import dataset_processor - temporally_regridded_data = dataset_processor._rcmes_calc_average_on_new_time_unit_K(data, dateList, unit) - return temporally_regridded_data - - -def decode_model_timesK(ifile,timeVarName,file_type): - ################################################################################################# - # Convert model times ('hours since 1900...', 'days since ...') into a python datetime structure - # Input: - # filelist - list of model files - # timeVarName - name of the time variable in the model files - # Output: - # times - list of python datetime objects describing model data times - # Peter Lean February 2011 - ################################################################################################# - f = netCDF4.Dataset(ifile,mode='r',format=file_type) - xtimes = f.variables[timeVarName] - timeFormat = xtimes.units.encode() - #timeFormat = "days since 1979-01-01 00:00:00" - # search to check if 'since' appears in units - try: - sinceLoc = re.search('since',timeFormat).end() - except: - print 'Error decoding model times: time var attributes do not contain "since": Exit' - sys.exit() - # search for 'seconds','minutes','hours', 'days', 'months', 'years' so know units - # TODO: Swap out this section for a set of if...elseif statements - units = '' - try: - _ = re.search('minutes',timeFormat).end() - units = 'minutes' - except: - pass - try: - _ = re.search('hours',timeFormat).end() - units = 'hours' - except: - pass - try: - _ = re.search('days',timeFormat).end() - units = 'days' - except: - pass - try: - _ = re.search('months',timeFormat).end() - units = 'months' - except: - pass - try: - _ = re.search('years',timeFormat).end() - units = 'years' - except: - pass - # cut out base time (the bit following 'since') - base_time_string = string.lstrip(timeFormat[sinceLoc:]) - # decode base time - # 9/25/2012: datetime.timedelta has problem with the argument because xtimes is NioVariable. - # Soln (J. Kim): use a temp variable ttmp in the for loop, then convert it into an integer xtime. - base_time = decodeTimeFromString(base_time_string) - times=[] - for ttmp in xtimes[:]: - xtime = int(ttmp) - if(units=='minutes'): - dt = datetime.timedelta(minutes=xtime); new_time = base_time + dt - if(units=='hours'): - dt = datetime.timedelta(hours=xtime); new_time = base_time + dt - if(units=='days'): - dt = datetime.timedelta(days=xtime); new_time = base_time + dt - if(units=='months'): # NB. adding months in python is complicated as month length varies and hence ambigous. - # Perform date arithmatic manually - # Assumption: the base_date will usually be the first of the month - # NB. this method will fail if the base time is on the 29th or higher day of month - # -as can't have, e.g. Feb 31st. - new_month = int(base_time.month + xtime % 12) - new_year = int(math.floor(base_time.year + xtime / 12.)) - #print type(new_year),type(new_month),type(base_time.day),type(base_time.hour),type(base_time.second) - new_time = datetime.datetime(new_year,new_month,base_time.day,base_time.hour,base_time.second,0) - if(units=='years'): - dt = datetime.timedelta(years=xtime); new_time = base_time + dt - times.append(new_time) - return times - - -def regrid_in_time(data,dateList,unit): - ################################################################################################# - # Routine to calculate averages on longer time units than the data exists on. - # e.g. if the data is 6-hourly, calculate daily, or monthly means. - # Input: - # data - data values - # dateList - list of python datetime structures corresponding to data values - # unit - string describing time unit to average onto - # e.g. 'monthly', 'daily', 'pentad','annual','decadal' - # Output: - # meanstorem - numpy masked array of data values meaned over required time period - # newTimesList - a list of python datetime objects representing the data in the new averagin period - # NB. currently set to beginning of averaging period, - # i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z. - # .............................. - # Jinwon Kim, Sept 30, 2012 - # Created from calc_average_on_new_time_unit_K, Peter's original, with the modification below: - # Instead of masked array used by Peter, use wh to defined data within the averaging range. - ################################################################################################# - - print '*** Begin calc_average_on_new_time_unit_KK ***' - # Check if the user-selected temporal grid is valid. If not, EXIT - acceptable = (unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')|(unit=='pentad') - if not acceptable: - print 'Error: unknown unit type selected for time averaging: EXIT'; return -1,-1,-1,-1 - - # Calculate time arrays of: annual (yyyy, [2007]), monthly (yyyymm, [200701,200702]), daily (yyyymmdd, [20070101,20070102]) - # "%02d" is similar to i2.2 in Fortran - if unit=='annual': # YYYY - timeunits = [] - for i in np.arange(len(dateList)): - timeunits.append(str(dateList[i].year)) - elif unit=='monthly': # YYYYMM - timeunits = [] - for i in np.arange(len(dateList)): - timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month)) - elif unit=='daily': # YYYYMMDD - timeunits = [] - for i in np.arange(len(dateList)): - timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + str("%02d" % dateList[i].day)) - elif unit=='full': # Full list: a special case - comment='Calculating means data over the entire time range: i.e., annual-mean climatology' - timeunits = [] - for i in np.arange(len(dateList)): - timeunits.append(999) # i.e. we just want the same value for all times. - timeunits = np.array(timeunits,dtype=int) - print 'timeRegridOption= ',unit#'; output timeunits= ',timeunits - #print 'timeRegridOption= ',unit,'; output timeunits= ',timeunits - - # empty list to store time levels after temporal regridding - newTimesList = [] - - # Decide whether or not you need to do any time averaging. - # i.e. if data are already on required time unit then just pass data through and calculate and return representative datetimes. - processing_required = True - if len(timeunits)==(len(np.unique(timeunits))): - processing_required = False - print 'processing_required= ',processing_required,': input time steps= ',len(timeunits),': regridded output time steps = ',len(np.unique(timeunits)) - #print 'np.unique(timeunits): ',np.unique(timeunits) - print 'data.ndim= ',data.ndim - - if data.ndim==1: # 1D data arrays, i.e. 1D time series - # Create array to store the temporally regridded data - meanstore = np.zeros(len(np.unique(timeunits))) - # Calculate the means across each unique time unit - i=0 - for myunit in np.unique(timeunits): - if processing_required: - wh = timeunits==myunit - datam = data[wh] - meanstore[i] = ma.average(datam) - # construct new times list - smyunit = str(myunit) - if len(smyunit)==4: # YYYY - yyyy = int(myunit[0:4]) - mm = 1 - dd = 1 - if len(smyunit)==6: # YYYYMM - yyyy = int(smyunit[0:4]) - mm = int(smyunit[4:6]) - dd = 1 - if len(smyunit)==8: # YYYYMMDD - yyyy = int(smyunit[0:4]) - mm = int(smyunit[4:6]) - dd = int(smyunit[6:8]) - if len(smyunit)==3: # Full time range - # Need to set an appropriate time representing the mid-point of the entire time span - dt = dateList[-1]-dateList[0] - halfway = dateList[0]+(dt/2) - yyyy = int(halfway.year) - mm = int(halfway.month) - dd = int(halfway.day) - newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0)) - i = i+1 - - elif data.ndim==3: # 3D data arrays, i.e. 2D time series - # Create array to store the resulting data - meanstore = np.zeros([len(np.unique(timeunits)),data.shape[1],data.shape[2]]) - # Calculate the means across each unique time unit - i=0 - datamask_store = [] - for myunit in np.unique(timeunits): - if processing_required: - wh = timeunits==myunit - datam = data[wh,:,:] - meanstore[i,:,:] = ma.average(datam,axis=0) - # construct new times list - smyunit = str(myunit) - if len(smyunit)==4: # YYYY - yyyy = int(smyunit[0:4]) - mm = 1 - dd = 1 - if len(smyunit)==6: # YYYYMM - yyyy = int(smyunit[0:4]) - mm = int(smyunit[4:6]) - dd = 1 - if len(smyunit)==8: # YYYYMMDD - yyyy = int(smyunit[0:4]) - mm = int(smyunit[4:6]) - dd = int(smyunit[6:8]) - if len(smyunit)==3: # Full time range - # Need to set an appropriate time representing the mid-point of the entire time span - dt = dateList[-1]-dateList[0] - halfway = dateList[0]+(dt/2) - yyyy = int(halfway.year) - mm = int(halfway.month) - dd = int(halfway.day) - newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0)) - i += 1 - - if not processing_required: - meanstorem = data - elif processing_required: - meanstorem = meanstore - - print '*** End calc_average_on_new_time_unit_KK ***' - return meanstorem,newTimesList