- Given a lat lon
- Assume maximum temperature
- convert lat lon to correct projection
- join all times together
- from megacube extract that geospatial point
    - investgate interopolation vs. find nearest
- collapse time by mean
- return float

In [105]:
REQUEST = '''{
    "args": {
        "lat": ["51.473"],
        "lon": ["-0.4487"]
    },
    "path": {
        "parameter": "temperature",
        "operation": "mean"
    }
}'''


In [106]:
# GET /helloworld

print('hello world')


hello world


In [107]:
import datetime
import iris

iris.FUTURE.netcdf_promote = True

def load_data(data_path, dates):
    filenames = get_filenames(data_path, dates)
    cubes = iris.load(filenames)
    return iris.cube.CubeList(cubes).concatenate_cube()

In [108]:
import cartopy

def transform_coords(cube, lon, lat):
    expected_proj = cube.coords('projection_y_coordinate')[0].coord_system.as_cartopy_crs()
    given_proj = cartopy.crs.PlateCarree()
    return expected_proj.transform_point(lon, lat, given_proj)

In [109]:

def get_coords(cube, lat, lon):
    x, y = transform_coords(cube, lon, lat)
    first_year = next(cube.slices_over('time'))
    samples = [('projection_y_coordinate', y), ('projection_x_coordinate', x)]
    first_point = first_year.interpolate(samples, iris.analysis.Nearest())
    exact_x = first_point.coord('projection_x_coordinate')[0].points[0]
    exact_y = first_point.coord('projection_y_coordinate')[0].points[0]
    
    return(exact_x, exact_y)
    


In [110]:

def collapse_latlon(cube, x, y):
    x_const = iris.Constraint(projection_x_coordinate=x)
    y_const = iris.Constraint(projection_y_coordinate=y)
    return cube.extract(x_const).extract(y_const)

In [21]:
from datetime import timedelta

def expand_years(start, end, past_years=10):
    years = [
        datetime.datetime(
            d.year - i,
            d.month,
            d.day)
        for i in range(past_years)
        for d in select_all_days(start, end)]
    return years

def select_all_days(start, end):
    delta = end - start
    if delta.days == 0:
        days = [start]
    else:
        days = [end - timedelta(days=i) for i in range(delta.days + 1)] #include start and end
    return days

def extract_dates(cube, dates):
    time_units = cube.coord('time').units
    vals = [time_units.date2num(date + timedelta(hours=12)) for date in dates] #12pm, not midnight
    time_constraint = iris.Constraint(time=vals)
    return cube.extract(time_constraint)

def extract_range(cube, start, end):
    time_units = cube.coord('time').units
    start_val = time_units.date2num(start + timedelta(hours=12)) #12pm, not midnight
    end_val = time_units.date2num(end + timedelta(hours=12))
    time_constraint = iris.Constraint(time=lambda t: start_val <= t.point <= end_val)
    return cube.extract(time_constraint)

In [112]:
def mean(cube):
    return cube.collapsed('time', iris.analysis.MEAN)

def cmax(cube):
    return cube.collapsed('time', iris.analysis.MAX)

def cmin(cube):
    return cube.collapsed('time', iris.analysis.MIN)

In [113]:
patterns = {
    'temperature': 'maximum-temperature',
    'rainfall': 'rainfall'
}

operations = {
    'mean': mean,
    'max': cmax,
    'min': cmin
}

In [None]:
import glob
def get_filenames(data_path, dates):
    years = set([date.year for date in dates])
    filenames = [f for y in years for f in glob.glob('/opt/data/{}/*_{}*.nc'.format(data_path, y))]
    return filenames

In [81]:
from collections import defaultdict
CACHED_POINTS = defaultdict(lambda: None)

In [115]:
import io
import boto3
import uuid
import matplotlib.pyplot as plt
import numpy

def upload_image(byte_data):
    bucket_name = 'microsoft-hack'
    s3 = boto3.resource('s3')
    bucket = s3.Bucket(bucket_name)
    uid = str(uuid.uuid4())
    bucket.put_object(
        Body=byte_data,
        ContentType='image/png',
        Key='{}.png'.format(uid),
        ACL='public-read')
    
    return 'https://s3-eu-west-1.amazonaws.com/{}/{}.png'.format(bucket_name, uid)

def graph(cube, times):
    
    title = cube.standard_name.title().replace('_', ' ')


    time_units = cube.coord('time').units
    times = time_units.num2date(cube.coords('time')[0].points)
    plt.plot(times, cube.data, 'b:o')
    plt.xticks(rotation=45)
    plt.ylabel(title+'('+str(cube.units)+')')
    img_data = io.BytesIO()
    plt.savefig(img_data, format='png')
    plt.clf()
    img_data.seek(0)
    s3_url = upload_image(img_data)
    return s3_url

def get_response(cube, dates, x, y, operation, mode):
    time_cube = collapse_latlon(cube, x, y)
    if mode == "climatology":
        collapsed_time_cube = extract_dates(time_cube, dates)
    else:
        collapsed_time_cube = extract_range(time_cube, dates[-1], dates[0])
    
    s3_url = graph(collapsed_time_cube, dates)

    collapsed_cube = operation(collapsed_time_cube)
    
    val = float(collapsed_cube.data)
    
    response = {
        'value': val,
        'start_date': dates[-1].strftime('%Y-%m-%d'),
        'end_date': dates[0].strftime('%Y-%m-%d'),
        'graph': s3_url
    }
    
    return response

In [116]:
def parse_query(req, mode):
    query = req['args']
    lat = float(query['lat'][0])
    lon = float(query['lon'][0])
    start, end = parse_date_range(query, mode)
    return lat, lon, start, end

def parse_path(req):
    path = req['path']
    param = path['parameter']
    op = path['operation']
    return param, op

def get_start_date(end_date, past_years=10):
    return datetime.datetime(end_date.year - past_years, end_date.month, end_date.day)

def parse_date_range(query, mode):
    # if it's a climatology, default start date to end date
    # if it's a range, default start date to 'records began'
    if 'start_date' and 'end_date' in query.keys():
        start_date = datetime.datetime.strptime(query['start_date'][0], '%Y-%m-%d')
        end_date = datetime.datetime.strptime(query['end_date'][0], '%Y-%m-%d')
    elif 'date' in query.keys():
        start_date = datetime.datetime.strptime(query['date'][0], '%Y-%m-%d')
        end_date = start_date
    else:
        end_date = datetime.datetime.now()
        if mode=='climatology':
            start_date = end_date
        else:
            start_date = get_start_date(end_date)
    return start_date, end_date

In [22]:
def handle_request(req, mode, dates_func):
    param, op = parse_path(req)
    lat, lon, start, end = parse_query(req, mode)

    data_path = patterns[param]
    operation = operations[op]

    dates = dates_func(start, end)
    cube = load_data(data_path, dates)
    x, y = get_coords(cube, lat, lon)
    
    cache_key = (mode, param, x, y, start, end, operation)
    
    if CACHED_POINTS[cache_key] != None:
        response = CACHED_POINTS[cache_key]
    else:
        response = get_response(cube, dates, x, y, operation, mode)
        CACHED_POINTS[cache_key] = response
    return response

In [118]:
# GET /:parameter/:operation/climatology
import json

req = json.loads(REQUEST)
response = handle_request(req, 'climatology', expand_years)
print(json.dumps(response))

{"start_date": "1978-03-02", "value": 9.301027009361668, "end_date": "2017-03-02", "graph": "https://s3-eu-west-1.amazonaws.com/microsoft-hack/8898ff33-d7d0-4a8e-a8a2-e59bdbc2af50.png"}




In [63]:
# GET /:parameter/:operation/range
import json

req = json.loads(REQUEST)
response = handle_request(req, 'range', select_all_days)
print(json.dumps(response))

maximum-temperature/*.nc


CoordinateCollapseError: Cannot collapse a dimension which does not describe any data.

In [197]:
# ResponseInfo GET /:parameter/mean/range
print(json.dumps({
    "headers" : {
        "Content-Type" : "application/json"
    }
}))

{"headers": {"Content-Type": "application/json"}}


In [198]:
# ResponseInfo GET /:parameter/mean/climatology
print(json.dumps({
    "headers" : {
        "Content-Type" : "application/json"
    }
}))

{"headers": {"Content-Type": "application/json"}}


In [50]:
!ls /opt/data

Untitled.ipynb      [35mmaximum-temperature[m[m
