In [1]:
import requests
import pymongo
import pandas as pd
import numpy
import datetime
import os
import json

def batched(iterable, batch_size=10):
    """
    Sequentially yield segments of the iterable in lists of the given size.
    """
    batch = []
    for idx, item in enumerate(iterable):
        batch.append(item)
        batch_idx = idx % batch_size
        if batch_idx == batch_size - 1:
            yield batch
            batch = []
    yield batch

db = pymongo.MongoClient(os.environ['MONGO_HOST'])['flirt']

"Notebook evaluated on: " + str(datetime.datetime.now())

'Notebook evaluated on: 2018-01-23 19:59:23.367399'

In [2]:
events = requests.get('https://eidr-connect.eha.io/api/auto-events', params={
    'limit': 20000
}).json()
len(events)

222

In [3]:
passenger_flows = list(db.passengerFlows.find({
    'simGroup': 'ibis14day'
}))
airport_set = set()
for flow in passenger_flows:
    airport_set.add(flow['arrivalAirport'])
    airport_set.add(flow['departureAirport'])
airport_to_idx = {}
airports = []
for idx, airport in enumerate(airport_set):
    airport_to_idx[airport] = idx
    airports.append(airport)
len(airports)

3623

In [4]:
flow_matrix = numpy.zeros(shape=(len(airport_set), len(airport_set)))
for flow in passenger_flows:
    # Remove US airports from departures?
    flow_matrix[
        airport_to_idx[flow['arrivalAirport']],
        airport_to_idx[flow['departureAirport']]] = flow['estimatedPassengers']
flow_matrix

array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

In [5]:
resolved_events = []
end_date = datetime.datetime.now()
start_date = end_date - datetime.timedelta(days=14)

def resolved_event_iter(events):
    for event_batch in batched(events, 5):
        results = requests.get('https://eidr-connect.eha.io/api/events-with-resolved-data', params={
            'ids': [event['_id'] for event in event_batch],
            'startDate': start_date.isoformat(),
            'endDate': end_date.isoformat(),
            'eventType': 'auto'
        }).json()['events']
        for result in results:
            yield result

events_with_resolved_data = list(zip(events, resolved_event_iter(events)))


In [6]:
# Initialize with names used by airport data set that are not present in the
# world geojson file.
nameToISOs = {
  'North Korea': 'KP',
  'South Korea': 'KR',
  'United States Minor Outlying Islands': 'US',
  'Macau': 'MO',
  'Reunion': 'RE',
  'Christmas Island': 'CX',
  'Guadeloupe': 'GP',
  'Ivory Coast (Cote d\'Ivoire)': 'CI',
  'French Guiana': 'GF',
  'Western Samoa': 'WS',
  'Saint Vincent and Grenadines': 'VC',
  'Guinea Bissau': 'GW',
  'Cocos (Keeling) Islands': 'CC',
  'Grenada and South Grenadines': 'GD',
  'Mayotte': 'YT',
  'Martinique': 'MQ',
  'Tuvalu': 'TV',
  'Gibraltar': 'GI',
  'Bonaire, Saint Eustatius & Saba': None,
  'Palestinian Territory': None,
  'Curacao': None,
  'Unknown Country': None
}
name_props = [
  'name',
  'name_long',
  'formal_en',
  'name_alt',
  'name_sort',
  'formal_en',
  'brk_name'
];


with open("../imports/geoJSON/world.geo.json") as f:
    world_geo_json = json.load(f)
    for feature in world_geo_json['features']:
        properties = feature['properties']
        for prop in name_props:
            value = properties.get(prop)
            if value:
                nameToISOs[value] = properties['iso_a2']

    airport_to_country_code = {
        airport['_id']: nameToISOs[airport['countryName']]
        for airport in db.airports.find({})
    }

    countries_by_code = {
        feature['properties']['iso_a2']: feature['properties']
        for feature in world_geo_json['features']
        if feature['properties']['iso_a2']
    }


In [7]:
probabilty_passenger_infected_matrix = numpy.zeros(shape=(len(events), len(airport_set)))       
for idx, (event, resolved_event) in enumerate(events_with_resolved_data):
    for airport in airports:
        cc = airport_to_country_code.get(airport)
        country_data = countries_by_code.get(cc)
        if country_data:
            prob = float(resolved_event['locations'].get(cc, 0)) / country_data['pop_est']
        else:
            prob = 0
        probabilty_passenger_infected_matrix[idx, airport_to_idx[airport]] = prob
probabilty_passenger_infected_matrix

array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

In [8]:
estimated_infected_flow = probabilty_passenger_infected_matrix.dot(flow_matrix)
ranks = []
for idx, (event, resolved_event) in enumerate(events_with_resolved_data):
    for airport, country_code in airport_to_country_code.items():
        if country_code != "US" or airport not in airport_to_idx:
            continue
        ranks.append({
            'event': {
                '_id': event['_id'],
                'name': resolved_event['eventName'],
                # TODO: Weight by infected passengers
                'locations': resolved_event['locations'],
                'timeseries': resolved_event['timeseries']
            },
            'airportId': airport,
            'rank': estimated_infected_flow[idx, airport_to_idx[airport]]
        })
db.eventAirportRanks.delete_many({})
result = db.eventAirportRanks.insert_many(ranks)
print(len(result.inserted_ids), len(ranks))

(134088, 134088)


In [9]:
db.eventAirportRanks.find_one({
    'rank': {'$gt': 0}
})

{u'_id': ObjectId('5a67949133919a01452d372a'),
 u'airportId': u'SPS',
 u'event': {u'_id': u'RT2Q59owF2Hawxc4A',
  u'locations': {u'AS': 21.75,
   u'IL': 21.75,
   u'IN': 21.75,
   u'PK': 11.08301853587963,
   u'YE': 1076.5544553492646,
   u'ZW': 21.75},
  u'name': u'Human Cholera',
  u'timeseries': [{u'date': u'2018-01-11T00:00:00.000Z',
    u'value': 1413.2352941176468},
   {u'date': u'2018-01-18T00:00:00.000Z', u'value': 922.7647058823529},
   {u'date': u'2018-01-19T00:00:00.000Z', u'value': 87}]},
 u'rank': 6.9722106873665066e-09}