In [41]:
from __future__ import print_function

import os
import tempfile

import boto3 as boto
import numpy as np
from datetime import datetime
import iris

iris.FUTURE.netcdf_no_unlimited = True


def human_bytes(num, suffix='B'):
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f%s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f%s%s" % (num, 'Yi', suffix)


def parse_s3_uri(s3_uri):
    nasty_stuff = s3_uri.split("/")
    _, extension = os.path.splitext(s3_uri)
    bucket = nasty_stuff[2]
    key = "/".join(nasty_stuff[3:])

    return bucket, key, extension


def download_object(s3_uri):
    bucket, key, extension = parse_s3_uri(s3_uri)
    print(bucket, key, extension)
    data_file = tempfile.NamedTemporaryFile(mode='w+b', suffix=extension)
    s3 = boto.resource('s3', 'eu-west-2')
    s3.Object(bucket, key).download_file(data_file.name)

    return data_file


def assert_cube_lists_equivalent(cubes_in, cubes_out):
    for cube_in in cubes_in:
        name_con = iris.Constraint(name=cube_in.name())
        cell_method_con = iris.Constraint(cube_func=lambda c: c.cell_methods==cube_in.cell_methods)
        
        [cube_out] = cubes_out.extract(name_con&cell_method_con)
        
        for coord in cube_out.coords():
            coord.var_name = None
        cube_in.attributes = None
        cube_out.attributes = None
            
        np.testing.assert_almost_equal(cube_in.data, cube_out.data)
        np.testing.assert_equal(cube_in.data, cube_out.data)
      
        print(cube_in)
        print(cube_out)
        assert cube_in == cube_out
    
    
def replace_s3_obj(s3_uri, new_file_name):
    bucket, old_s3_key, extension = parse_s3_uri(s3_uri)
    new_s3_key = "/".join(old_s3_key.split("/")[-1] + new_file_name)

    s3 = boto.resource('s3')
#     s3.Object(bucket, new_s3_key).upload_file(new_file_name)
    print("UPLOADING...")
    
#     s3.Object(bucket, old_s3_key).delete()
    
    
def add_to_dead_letter(s3_uri, dlq_name="dlq"):
    sqs = boto.resource('sqs', 'eu-west-2')
    dlq = sqs.get_queue_by_name(QueueName=dlq_name)
    dlq.send_message(MessageBody=s3_uri)
    
def save_to_netcdf(s3_uri):
    data_file_in = download_object(s3_uri)
    cubes_in = iris.load(data_file_in.name)
    
    data_file_out = tempfile.NamedTemporaryFile(mode='w+b', suffix=".nc")
    iris.save(cubes_in, data_file_out.name, netcdf_format="NETCDF4", zlib=True, complevel=5)
    return data_file_in, data_file_out
    
def convert_object_from_s3(s3_uri):
    """ Converts file to NetCDF """
    data_file_in, data_file_out = save_to_netcdf(s3_uri)
    cubes_in = iris.load(data_file_in.name)
    cubes_out = iris.load(data_file_out.name)
    
    try:
        assert_cube_lists_equivalent(cubes_in, cubes_out)
    except:
#         add_to_dead_letter(s3_uri)
        raise
    else:
        replace_s3_obj(s3_uri, data_file_out.name)
        
def test_file_size(s3_uri):
    now = datetime.now()
    data_file_in, data_file_out = save_to_netcdf(s3_uri)
    print("Compression took {} seconds".format((datetime.now() - now).total_seconds()))
    now = datetime.now()
    cubes_in = iris.load(data_file_in.name)
    print("Loading took {} seconds".format((datetime.now() - now).total_seconds()))
    return human_bytes(os.stat(data_file_in.name).st_size), human_bytes(os.stat(data_file_out.name).st_size)

In [None]:
# convert_object_from_s3("s3://mogreps-g/201612/prods_op_mogreps-g_20161203_00_00_048.pp")
print(test_file_size("s3://mogreps/2016/prods_op_mogreps-g_20160101_00_00_003.pp"))

mogreps 2016/prods_op_mogreps-g_20160101_00_00_003.pp .pp


In [None]:
import iris
import boto3 as boto

def pickup_job(queue_name="new_files"):
    sqs = boto.resource('sqs')
    queue = sqs.get_queue_by_name(QueueName=queue_name)

if __name__ == "__main__":
    job = pickup_job()
    if job:
        ingest_job(job)

In [13]:
import boto3 as boto
s3 = boto.resource('s3')
s3.Object("mogreps-g", "201612/prods_op_mogreps-g_20161203_00_00_048.pp").download_file("test.pp")

In [35]:
_ = s3.Object("mogreps-g", "201612/prods_op_mogreps-g_20161203_00_00_048.pp")

In [38]:
_.upload_file?

In [10]:
import iris
d = iris.load("test.pp")



In [23]:
name_con = iris.Constraint(name=d[0].name())
cell_method_con = iris.Constraint(cube_func=lambda c: c.cell_methods==d[0].cell_methods)

In [66]:
d.extract(name_con&cell_method_con)

[<iris 'Cube' of atmosphere_optical_thickness_due_all_ambient_aerosol / (1) (pseudo_level: 6; latitude: 600; longitude: 800)>]