In [7]:
!!s3fs mogreps-g /usr/local/share/notebooks/data/mogreps-g -o iam_role=jade-secrets

[]

In [14]:
!!jupyter nbconvert --to script monthly_climate.ipynb

['[NbConvertApp] Converting notebook monthly_climate.ipynb to script',
 '[NbConvertApp] Writing 4051 bytes to monthly_climate.py']

In [12]:
import iris
import os
import datetime
import numpy as np
import sys
import os
import datetime

In [2]:
# Remove this line if not running in notebook.
sys.argv = ['monthly_climate.py', '12', 'B', '/usr/local/share/notebooks/data/mogreps-g/201612/']

In [3]:
try:
    month = int(sys.argv[1])
    assert month in range(1,13)
except Exception:
    print('Arg 1 must be a month range 1-12.')
    
    
try:
    region_name = sys.argv[2]
    assert region_name in ['A','B','C','D']
except Exception:
    print('Arg 2 must be a refion A, B, C or D.')
    
    
    
try:
    dir_path = sys.argv[3]
    assert os.path.exists(dir_path)
except Exception:
    print('Expect two args, fist a integer month (1-12) second region A, B, C or D.')


In [None]:
lat_con = iris.Constraint(latitude = lambda x : x >= 0)
lon_con = iris.Constraint(longitude = lambda x : x >= 180)
not_lat_con = iris.Constraint(latitude = lambda x : x < 0)
not_lon_con = iris.Constraint(longitude = lambda x : x < 180)

regions = {'A':lat_con & lon_con,
           'B':lat_con & not_lon_con,
           'C':not_lat_con & lon_con,
           'D':not_lat_con & not_lon_con
          }

def file_valid_time(file):
    #     filename_template = 'prods_op_'+MODEL+'_{date}_{run}_{member}_{lead_time}.pp'
    _, _, _, date, run, member, lead_time = os.path.basename(file).split('.')[0].split("_")
    run_date = datetime.datetime(
        int(date[:4]),
        int(date[4:6]),
        int(date[6:8])) 
    valid_time = run_date + datetime.timedelta(hours=int(lead_time)) +  datetime.timedelta(hours=int(run))
    return valid_time

def month_in_file(month, file):
    return file_valid_time(file).month == month
        
def load_stash_for_time_and_region(file, stash, region):
    valid_time_in_hours = file_valid_time(file).timestamp() / (60 * 60)
    
    return iris.load(file, 
                     iris.AttributeConstraint(STASH=stash) &
                     region &
                     iris.Constraint(time=lambda t: valid_time_in_hours - 0.1 < t < valid_time_in_hours + 0.1 ))


def average_for_month_and_region(month, region, stash, files):
    print('start month:%s, region:%s, stash:%s' % (month, region, stash))
    data_levels = {}
    for cubes in (load_stash_for_time_and_region(f, stash, region) for f in files if month_in_file(month, f)):
        print('Start processing a cube')
        for cube in cubes:
            for level_slice in cube.slices(['latitude', 'longitude']):
                level = level_slice.coord('pressure').points[0]
                level_stats = data_levels.get(level, None)
                if not level_stats:
                    aggregated_cube = level_slice
                    count = 1
                else:
                    aggregated_cube = level_stats['cube'] + level_slice
                    count = level_stats['count'] + 1
                data_levels[level] = {
                    'cube':aggregated_cube,
                    'count':count
                }
            print('Done a cube')
    
    print('Create averages')
    results = []
    for level, stats in data_levels.items():
        print('for level %s we have %s slices' % (level, stats['count']))
        results.append(stats['cube'] / stats['count'])
        
    return results

In [13]:
# Run this thing....
start_time = datetime.datetime.now()

print("Starting at %s", start_time)

files = [os.path.join(dir_path, f) for f in os.listdir(dir_path)]
print("%d files to process in dir %s" % (len(files), dir_path))

avgs = average_for_month_and_region(month,
                                    regions[region_name],
                                    'm01s16i203',
                                    files[:1])

for c in avgs:
    name = "ava_tem_plevel_%d_%s.nc" % (c.coord('pressure').points[0], region_name)
    iris.save(c, name)
    print("Saved %s" % name)
    
time_taken = datetime.datetime.now() - start_time
print("Finished in %s" % (time_taken))

Starting at %s 2017-03-16 12:02:09.482634
596 files to process in dir /usr/local/share/notebooks/data/mogreps-g/201612/
start month:12, region:ConstraintCombination(Constraint(coord_values={'latitude': <function <lambda> at 0x7f66e985fea0>}), Constraint(coord_values={'longitude': <function <lambda> at 0x7f66e985fae8>}), <built-in function and_>), stash:m01s16i203
Start processing a cube
Done a cube
Create averages
for level 100.0 we have 1 slices
for level 230.0 we have 1 slices
for level 200.0 we have 1 slices
for level 300.0 we have 1 slices
for level 270.0 we have 1 slices
for level 400.0 we have 1 slices
for level 1000.0 we have 1 slices
for level 850.0 we have 1 slices
for level 180.0 we have 1 slices
for level 150.0 we have 1 slices
for level 600.0 we have 1 slices
for level 500.0 we have 1 slices
for level 250.0 we have 1 slices
for level 700.0 we have 1 slices
for level 925.0 we have 1 slices
for level 350.0 we have 1 slices


/opt/conda/lib/python3.5/site-packages/iris/fileformats/netcdf.py:2026: IrisDeprecation: NetCDF default saving behaviour currently assigns the outermost dimensions to unlimited. This behaviour is to be deprecated, in favour of no automatic assignment. To switch to the new behaviour, set iris.FUTURE.netcdf_no_unlimited to True.
  warn_deprecated(msg)


Saved ava_tem_plevel_100_B.nc
Saved ava_tem_plevel_230_B.nc
Saved ava_tem_plevel_200_B.nc
Saved ava_tem_plevel_300_B.nc
Saved ava_tem_plevel_270_B.nc
Saved ava_tem_plevel_400_B.nc
Saved ava_tem_plevel_1000_B.nc
Saved ava_tem_plevel_850_B.nc
Saved ava_tem_plevel_180_B.nc
Saved ava_tem_plevel_150_B.nc
Saved ava_tem_plevel_600_B.nc
Saved ava_tem_plevel_500_B.nc
Saved ava_tem_plevel_250_B.nc
Saved ava_tem_plevel_700_B.nc
Saved ava_tem_plevel_925_B.nc
Saved ava_tem_plevel_350_B.nc
Finished in 0:00:21.173828
