In [24]:
!apt-get install -y graphviz
!pip install graphviz

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following extra packages will be installed:
  fonts-liberation libcdt5 libcgraph6 libgd3 libgvc6 libgvpr2 libpathplan4
  libvpx1 libxaw7 libxdot4 libxmu6 libxpm4 libxt6
Suggested packages:
  gsfonts graphviz-doc libgd-tools
The following NEW packages will be installed:
  fonts-liberation graphviz libcdt5 libcgraph6 libgd3 libgvc6 libgvpr2
  libpathplan4 libvpx1 libxaw7 libxdot4 libxmu6 libxpm4 libxt6
0 upgraded, 14 newly installed, 0 to remove and 23 not upgraded.
Need to get 3914 kB of archives.
After this operation, 18.2 MB of additional disk space will be used.
Get:1 http://security.debian.org/ jessie/updates/main libxpm4 amd64 1:3.5.12-0+deb8u1 [49.2 kB]
Get:2 http://security.debian.org/ jessie/updates/main libgd3 amd64 2.1.0-5+deb8u9 [148 kB]
Get:3 http://httpredir.debian.org/debian/ jessie/main libvpx1 amd64 1.3.0-3 [599 kB]
Get:4 http://httpredir.debian.org/debian/ jessie/main lib

In [34]:
import os
import tempfile
import numpy as np

import boto3 as boto
import iris

import json
import itertools

import dask

from env import *


SQS_ENDPOINT_URL = "https://sqs.eu-west-2.amazonaws.com/"
JOB_QUEUE_NAME = "mogreps-conversion"
USER_ID = "021908831235"
FAILED_QUEUE_NAME = "mogreps-conversion-failed"
PASSED_QUEUE_NAME = "mogreps-conversion-completed"
JOB_QUEUE_URL = SQS_ENDPOINT_URL + USER_ID + "/" + JOB_QUEUE_NAME


iris.FUTURE.netcdf_no_unlimited = True
iris.FUTURE.netcdf_promote = True


def init_aws():
    session = boto.session.Session(region_name="eu-west-2",
                                   aws_access_key_id=AWS_KEY_ID,
                                   aws_secret_access_key=AWS_SECRET_KEY_ID)
    global sqs
    sqs = session.resource('sqs')
    global s3
    s3 = session.resource('s3')
    

def tear_down_aws():
    sqs.meta.client._endpoint.http_session.close()
    s3.meta.client._endpoint.http_session.close()


def parse_s3_uri(s3_uri):
    nasty_stuff = s3_uri.split("/")
    _, extension = os.path.splitext(s3_uri)
    bucket = nasty_stuff[2]
    key = "/".join(nasty_stuff[3:])

    return bucket, key, extension


def download_object(s3_uri):
    bucket, key, extension = parse_s3_uri(s3_uri)
    data_file = tempfile.NamedTemporaryFile(mode='w+b', suffix=extension)
    s3.Object(bucket, key).download_file(data_file.name)

    return data_file


def assert_cube_lists_equivalent(cubes_in, cubes_out):
     for cube_in in cubes_in:
        if cube_in.name() is not 'Unknown':
            name_con = iris.Constraint(name=cube_in.name())
            cell_method_con = iris.Constraint(cube_func=lambda c: c.cell_methods == cube_in.cell_methods)
            dim_coords_con = iris.Constraint(cube_func=lambda c: all([p.name() == q.name() for (p,q) in zip(c.coords(), cube_in.coords())]))

            [cube_out] = cubes_out.extract(name_con&cell_method_con&dim_coords_con)

            for coord in cube_out.coords():
                coord.var_name = None
            cube_in.attributes = None
            cube_out.attributes = None
            cube_in.var_name = None
            cube_out.var_name = None

            np.testing.assert_equal(cube_in.data, cube_out.data)

            assert cube_in == cube_out
    
    
def replace_s3_obj(s3_uri, new_file_name):
    old_bucket, old_s3_key, old_extension = parse_s3_uri(s3_uri)
    
    if 'mogreps-g' in s3_uri:
        new_bucket = 'mogreps-g'
    elif 'mogreps-uk' in s3_uri:
        new_bucket = 'mogreps-uk'
    else:
        raise ValueError
        
    new_s3_key = old_s3_key.split("/")[-1].replace('pp', 'nc')
    
    s3.Object(new_bucket, new_s3_key).upload_file(new_file_name)
    s3.Object(old_bucket, old_s3_key).delete()
    
    
def add_to_queue(s3_uri, queue):
    
    failed_queue = sqs.get_queue_by_name(QueueName=queue)
    failed_queue.send_message(MessageBody=s3_uri)
    
    
def save_to_netcdf(s3_uri, complevel=1):
    data_file_in = download_object(s3_uri)
    try:
        cubes_in = iris.load(data_file_in.name)
    except:
        raise AttributeError
    
    data_file_out = tempfile.NamedTemporaryFile(mode='w+b', suffix=".nc")
    iris.save(cubes_in, data_file_out.name, netcdf_format="NETCDF4", zlib=True, complevel=complevel)
    return data_file_in, data_file_out
    
    
# def get_jobs():
#     setup_aws_session()
#     sqs = boto.resource('sqs')
#     queue = sqs.Queue(JOB_QUEUE_URL)
    
#     some_messages = queue.receive_messages(MaxNumberOfMessages=1)
#     while len(some_messages) > 0:
#         [this_message] = some_messages
#         s3_uri = "s3://mogreps/"+json.loads(this_message.body)["Message"]
#         yield this_message.receipt_handle, s3_uri
#         some_messages = queue.receive_messages(MaxNumberOfMessages=1)


def get_next_job():
    queue = sqs.Queue(JOB_QUEUE_URL)
    
    some_messages = queue.receive_messages(MaxNumberOfMessages=1)
    [this_message] = some_messages
    s3_uri = "s3://mogreps/"+json.loads(this_message.body)["Message"]
    return this_message.receipt_handle, s3_uri
        

def convert_next_object_from_s3():
    """ Converts file to NetCDF """
    init_aws()
    
    job = get_next_job()
    message_handle, s3_uri = job
    
    try:
        data_file_in, data_file_out = save_to_netcdf(s3_uri)
    except Exception as e:
        add_to_queue(s3_uri, FAILED_QUEUE_NAME)
        return str(e) 
        
    try:
        cubes_in = iris.load(data_file_in.name)
        cubes_out = iris.load(data_file_out.name)
        assert_cube_lists_equivalent(cubes_in, cubes_out)
    except Exception as e:
        add_to_queue(s3_uri, FAILED_QUEUE_NAME)
        return str(e)
    else:
        replace_s3_obj(s3_uri, data_file_out.name)
        msg = sqs.Message(JOB_QUEUE_URL, message_handle)
        msg.delete()
        add_to_queue(s3_uri, PASSED_QUEUE_NAME)
    finally:
        tear_down_aws()

In [48]:
import dask
import distributed
e = distributed.Executor("ec2-35-176-25-244.eu-west-2.compute.amazonaws.com:8786")
e

<Client: scheduler="ec2-35-176-25-244.eu-west-2.compute.amazonaws.com:8786" processes=2 cores=6>

In [44]:
f = e.submit(convert_next_object_from_s3)

In [47]:
e.restart()

<Client: scheduler="ec2-35-176-25-244.eu-west-2.compute.amazonaws.com:8786" processes=1 cores=4>

In [50]:
id(convert_next_object_from_s3)

140433458705136

In [57]:
import copy

thisfn = copy.deepcopy(convert_next_object_from_s3)

In [58]:
id(thisfn)

140433458705136

In [49]:
futures = []
for _ in range(2):
    thisfn = convert_next_object_from_s3.copy()
    future = e.submit(convert_next_object_from_s3)
    futures.append(future)
    
# ac = as_completed(futures)

# for _ in ac:
#     future = e.submit(convert_next_object_from_s3)
#     ac.add(future)

In [35]:
f = e.submit(convert_next_object_from_s3)

In [3]:
# futures = []
# for _ in range(3):
#     future = e.submit(convert_next_object_from_s3)
#     futures.append(future)
    
# processed = dask.delayed(futures)

In [4]:
# from datetime import datetime
# then = datetime.now()
_ = processed.compute()
# now = datetime.now()

distributed.utils - ERROR - list-7ebdf1d2-10ce-414a-b000-dd032e078c2a
Traceback (most recent call last):
  File "/opt/conda/lib/python3.5/site-packages/distributed/client.py", line 893, in _gather
    st = self.futures[key]
KeyError: 'list-7ebdf1d2-10ce-414a-b000-dd032e078c2a'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/conda/lib/python3.5/site-packages/distributed/utils.py", line 149, in f
    result[0] = yield gen.maybe_future(func(*args, **kwargs))
  File "/opt/conda/lib/python3.5/site-packages/tornado/gen.py", line 1015, in run
    value = future.result()
  File "/opt/conda/lib/python3.5/site-packages/tornado/concurrent.py", line 237, in result
    raise_exc_info(self._exc_info)
  File "<string>", line 3, in raise_exc_info
  File "/opt/conda/lib/python3.5/site-packages/tornado/gen.py", line 1021, in run
    yielded = self.gen.throw(*exc_info)
  File "/opt/conda/lib/python3.5/site-packages/distributed/client.p

KeyboardInterrupt: 