In [73]:
from __future__ import print_function

import os
import tempfile

import boto3 as boto
import numpy as np
from datetime import datetime
import iris

iris.FUTURE.netcdf_no_unlimited = True


def human_bytes(num, suffix='B'):
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f%s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f%s%s" % (num, 'Yi', suffix)


def parse_s3_uri(s3_uri):
    nasty_stuff = s3_uri.split("/")
    _, extension = os.path.splitext(s3_uri)
    bucket = nasty_stuff[2]
    key = "/".join(nasty_stuff[3:])

    return bucket, key, extension


def download_object(s3_uri):
    bucket, key, extension = parse_s3_uri(s3_uri)
#     print(bucket, key, extension)
    data_file = tempfile.NamedTemporaryFile(mode='w+b', suffix=extension)
    s3 = boto.resource('s3', 'eu-west-2')
    s3.Object(bucket, key).download_file(data_file.name)

    return data_file


def sanitize_cubes(cube_in, cube_out):
    for coord in cube_out.coords():
            coord.var_name = None
    cube_in.attributes = None
    cube_out.attributes = None
    cube_out.var_name = None
    if cube_in.units == 'unknown':
        cube_in.units = cube_out.units
        
    return cube_in, cube_out


def assert_cube_lists_equivalent(cubes_in, cubes_out):
    for cube_in in cubes_in:
        if cube_in.name() is not 'unknown':
            name_con = iris.Constraint(name=cube_in.name())
            cell_method_con = iris.Constraint(cube_func=lambda c: c.cell_methods==cube_in.cell_methods)  
            coord_name_con = iris.Constraint(cube_func=lambda c: all([a.name()==b.name() for a, b in zip(c.coords(), cube_in.coords())]))

            [cube_out] = cubes_out.extract(name_con&cell_method_con&coord_name_con)
            sanitized_cube_in, sanitized_cube_out = sanitize_cubes(cube_in, cube_out)

            np.testing.assert_equal(sanitized_cube_in.data, sanitized_cube_out.data)
            assert sanitized_cube_in == sanitized_cube_out
    
    
    
def replace_s3_obj(s3_uri, new_file_name):
    bucket, old_s3_key, extension = parse_s3_uri(s3_uri)
    new_s3_key = "/".join(old_s3_key.split("/")[-1] + new_file_name)

    s3 = boto.resource('s3')
#     s3.Object(bucket, new_s3_key).upload_file(new_file_name)
    print("UPLOADING...")
    
#     s3.Object(bucket, old_s3_key).delete()
    
    
def add_to_dead_letter(s3_uri, dlq_name="dlq"):
    sqs = boto.resource('sqs', 'eu-west-2')
    dlq = sqs.get_queue_by_name(QueueName=dlq_name)
    dlq.send_message(MessageBody=s3_uri)
    
    
def save_to_netcdf(s3_uri, complevel=5):
    data_file_in = download_object(s3_uri)
    cubes_in = iris.load(data_file_in.name)
    
    data_file_out = tempfile.NamedTemporaryFile(mode='w+b', suffix=".nc")
    iris.save(cubes_in, data_file_out.name, netcdf_format="NETCDF4", zlib=True, complevel=complevel)
    return data_file_in, data_file_out
    
    
def convert_object_from_s3(s3_uri):
    """ Converts file to NetCDF """
    data_file_in, data_file_out = save_to_netcdf(s3_uri)
    cubes_in = iris.load(data_file_in.name)
    cubes_out = iris.load(data_file_out.name)
    
    try:
        assert_cube_lists_equivalent(cubes_in, cubes_out)
    except:
#         add_to_dead_letter(s3_uri)
        raise
    else:
        replace_s3_obj(s3_uri, data_file_out.name)
        
        
def test_file_size(s3_uri, complevel=5):
    now = datetime.now()
    data_file_in, data_file_out = save_to_netcdf(s3_uri, complevel)
    comp_time = (datetime.now() - now).total_seconds()
    now = datetime.now()
    cubes_in = iris.load(data_file_in.name)
    [cube.data for cube in cubes_in]
    load_time = (datetime.now() - now).total_seconds()
    return (os.stat(data_file_in.name).st_size, 
            os.stat(data_file_out.name).st_size,
            comp_time, load_time)

In [75]:
convert_object_from_s3("s3://mogreps-g/201612/prods_op_mogreps-g_20161203_00_00_048.pp")

/opt/conda/lib/python3.5/site-packages/iris/fileformats/cf.py:1140: IrisDeprecation: NetCDF default loading behaviour currently does not expose variables which define reference surfaces for dimensionless vertical coordinates as independent Cubes. This behaviour is deprecated in favour of automatic promotion to Cubes. To switch to the new behaviour, set iris.FUTURE.netcdf_promote to True.
  warn_deprecated(msg)


UPLOADING...


In [None]:
import iris
import boto3 as boto

def pickup_job(queue_name="new_files"):
    sqs = boto.resource('sqs')
    queue = sqs.get_queue_by_name(QueueName=queue_name)

if __name__ == "__main__":
    job = pickup_job()
    if job:
        ingest_job(job)

In [39]:
import iris
d = iris.load("../test.pp")



In [41]:
iris.save(d, 'test.nc', netcdf_format="NETCDF4", zlib=True, complevel=5)

In [42]:
d2 = iris.load('test.nc')

/opt/conda/lib/python3.5/site-packages/iris/fileformats/cf.py:1140: IrisDeprecation: NetCDF default loading behaviour currently does not expose variables which define reference surfaces for dimensionless vertical coordinates as independent Cubes. This behaviour is deprecated in favour of automatic promotion to Cubes. To switch to the new behaviour, set iris.FUTURE.netcdf_promote to True.
  warn_deprecated(msg)


In [45]:
print(d)

0: atmosphere_optical_thickness_due_all_ambient_aerosol / (1) (pseudo_level: 6; latitude: 600; longitude: 800)
1: stratiform_snowfall_rate / (kg m-2 s-1) (latitude: 600; longitude: 800)
2: stratiform_snowfall_rate / (kg m-2 s-1) (latitude: 600; longitude: 800)
3: unknown / (unknown)                 (latitude: 600; longitude: 800)
4: cloud_base_altitude_assuming_only_consider_cloud_area_fraction_greater_than_2p5_oktas / (kft) (latitude: 600; longitude: 800)
5: cloud_base_altitude_assuming_only_consider_cloud_area_fraction_greater_than_4p5_oktas / (kft) (latitude: 600; longitude: 800)
6: cloud_base_altitude_assuming_only_consider_cloud_area_fraction_greater_than_6p5_oktas / (kft) (latitude: 600; longitude: 800)
7: cloud_area_fraction_assuming_maximum_random_overlap / (1) (latitude: 600; longitude: 800)
8: cloud_area_fraction_assuming_only_consider_surface_to_1000_feet_asl / (1) (latitude: 600; longitude: 800)
9: wet_bulb_freezing_level_altitude / (m) (latitude: 600; longitude: 800)
10: u

In [48]:
name_con = iris.Constraint(name=d[3].name())
cell_method_con = iris.Constraint(cube_func=lambda c: c.cell_methods==d[3].cell_methods)

In [50]:
print(d2.extract(name_con&cell_method_con))

0: unknown / (1)                       (latitude: 600; longitude: 800)


In [56]:
uk = [_ for _ in d if _.name()=='unknown']

()