In [24]:
import pandas as pd
import matplotlib.pyplot as plt
import dateutil.parser as dp
import requests
import json
import os.path

from collections import namedtuple

import warnings
warnings.filterwarnings("ignore") 

In [25]:
STORE_PATH="atlas_20171002.hdf5"

with pd.HDFStore(STORE_PATH) as store:
    anchor_df = store['anchor_df']

In [3]:
ANCHOR_NAMES_WE_LIKE = [
    "ar-bue-as4270",   # Buenos Aires, Argentina
    "at-vie-as1120",   # Vienna, Austria
    "au-mel-as38796",  # Melbourne, Austraila
    "au-bne-as4608-2", # Brisbane, Australia
    "bd-dac-as24122",  # Dacca, Bangladesh
    "bf-oua-as37577",  # Ouagadougou, Burkina Faso
    "bg-sof-as8866",   # Sofia, Bulgaria
    "br-sao-as22548",  # Sao Paulo, Brasil
    "ca-mtr-as852",    # Montreal, Canada
    "ca-wnp-as18451",  # Winnepeg, Canada
    "ch-gtg-as20612",  # Glattbrug, Switzerland
    "ch-zrh-as559",    # Zurich, Switzerland
    "cz-brq-as197451", # Brno, Czechia
    "de-fra-as8763",   # Frankfurt, Germany
    "de-ham-as201709", # Hamburg, Germany
    "de-muc-as5539",   # Munich, Germany
    "ee-tll-as51349",  # Talinn, Estonia
    "es-bcn-as13041",  # Barcelona, Spain
    "fr-par-as1307",   # Paris, France
    "gr-ath-as5408",   # Athens, Greece
    "hk-hkg-as43996",  # Hong Kong SAR, China
    "hu-bud-as12303",  # Budapest, Hungary
    "id-jkt-as10208",  # Jakarta, Indonesia
    "ie-dub-as1213",   # Dublin, Ireland
    "in-bom-as33480",  # Mumbai, India
    "it-trn-as12779",  # Turin, Italy
    "jp-tyo-as2500",   # Tokyo, Japan
    "ke-nbo-as37578",  # Nairobi, Kenya
    "kz-ala-as21299",  # Almaty, Kazakhstan
    "nl-ams-as3333",   # Amsterdam, Netherlands
    "nl-haa-as201682", # Haarlen, Netherlands
    "nz-wlg-as9834",   # Wellington, New Zealand
    "qa-doh-as8781",   # Doha, Qatar
    "ru-mow-as15835",  # Moscow, Russia
    "se-sto-as8674",   # Stockholm, Sweden
    "uk-lon-as5607",   # London, England
    "us-dal-as2914",   # Dallas, USA
    "us-den-as7922",   # Denver, USA
    "us-mia-as33280",  # Miami, USA
    "us-sjc-as22300",  # San Jose, USA
]

In [7]:
anchor_df[anchor_df['name'].isin(ANCHOR_NAMES_WE_LIKE)]['ip4'].values

array(['170.210.5.200', '193.171.255.2', '203.119.0.195', '120.29.255.85',
       '210.4.72.46', '197.239.73.59', '213.91.165.185', '200.160.6.35',
       '206.162.189.131', '208.81.1.60', '194.242.34.190', '130.59.80.2',
       '147.229.242.8', '81.91.160.92', '185.41.104.166', '195.30.70.33',
       '195.43.87.140', '84.88.18.254', '132.227.123.3', '83.212.7.42',
       '185.28.220.65', '5.28.0.17', '202.153.128.82', '193.1.201.140',
       '180.149.244.132', '213.212.129.68', '203.178.137.196',
       '196.6.220.130', '85.29.165.14', '193.0.19.107', '185.67.44.230',
       '210.4.208.19', '82.148.114.58', '193.232.226.58', '185.42.136.158',
       '90.223.193.1', '129.250.50.30', '76.26.120.98', '65.22.12.230',
       '74.120.190.250'], dtype=object)

In [9]:
AddrLoc = namedtuple("AddrLoc", ("addr", "lon", "lat"))

def maxmind_location_for_address(addr):
    url = "https://stat.ripe.net/data/geoloc/data.json"
    
    res = requests.get(url, params={"resource": addr})
    if not res.ok:
        print("RIPEstat probe API request failed: "+repr(res.json()))
        return AddrLoc(addr, 0.0,0.0)
    
    try:
        api_content = json.loads(res.content.decode("utf-8"))
        api_loc = api_content['data']['locations'][0]
        print("got location for "+str(addr))
        return AddrLoc(addr,float(api_loc['longitude']),float(api_loc['latitude']))
    except Exception as e:
        print("couldn't process content:")
        print(json.dumps(api_content, indent=2))
        return AddrLoc(addr,0.0,0.0)

In [10]:
addrloc_v4 = map(maxmind_location_for_address, anchor_df[anchor_df['name'].isin(ANCHOR_NAMES_WE_LIKE)]['ip4'].values)

In [11]:
addrloc_v4l = [*addrloc_v4]

got location for 170.210.5.200
got location for 193.171.255.2
got location for 203.119.0.195
got location for 120.29.255.85
got location for 210.4.72.46
got location for 197.239.73.59
got location for 213.91.165.185
got location for 200.160.6.35
got location for 206.162.189.131
got location for 208.81.1.60
got location for 194.242.34.190
got location for 130.59.80.2
got location for 147.229.242.8
got location for 81.91.160.92
got location for 185.41.104.166
got location for 195.30.70.33
got location for 195.43.87.140
got location for 84.88.18.254
got location for 132.227.123.3
got location for 83.212.7.42
got location for 185.28.220.65
got location for 5.28.0.17
got location for 202.153.128.82
got location for 193.1.201.140
got location for 180.149.244.132
got location for 213.212.129.68
got location for 203.178.137.196
got location for 196.6.220.130
got location for 85.29.165.14
got location for 193.0.19.107
got location for 185.67.44.230
got location for 210.4.208.19
got location for

In [26]:
mmdf = pd.DataFrame(addrloc_v4l)
mmdf.index = mmdf['addr']
del(mmdf['addr'])
mmdf.columns = ('mm4lon','mm4lat')

In [28]:
anchor_df = anchor_df.join(mmdf, on="ip4")

In [29]:
STORE_PATH="atlas_20171002.hdf5"

with pd.HDFStore(STORE_PATH) as store:
    store['anchor_df'] = anchor_df