In [1]:
# install Python packages used in this notebook
!pip install pandas numpy python-geohash geohash2 folium



In [2]:
import os
import time
import shutil
import zipfile
import geohash
import geohash2
import numpy as np
import pandas as pd
import folium
import multiprocessing

from io import BytesIO
from urllib.request import urlopen
from folium.features import DivIcon

pd.set_option('max_rows', 15)

# download the current MaxMind package
maxmindURL = 'http://geolite.maxmind.com/download/geoip/database/GeoLite2-City-CSV.zip'
with urlopen(maxmindURL) as response:
    with zipfile.ZipFile(BytesIO(response.read())) as file:
        file.extractall()

# find the newest directory, in case there are old directories left over from previous runs
maxmindDirectory = sorted( [ f for f in os.listdir() if os.path.isdir(f) and f.startswith('GeoLite2-City-CSV') ] )[-1]

# load the MaxMind network data 
maxmindNetworks = pd.read_csv( maxmindDirectory + '/GeoLite2-City-Blocks-IPv4.csv', 
                               header=0, 
                               usecols=['network','geoname_id','latitude','longitude'], 
                               dtype=str,
                               na_values='',
                               keep_default_na=False )
maxmindNetworks = maxmindNetworks.dropna()
maxmindNetworks['geoname_id'] = maxmindNetworks['geoname_id'].astype(int)
maxmindNetworks['latitude'] = maxmindNetworks['latitude'].astype(float)
maxmindNetworks['longitude'] = maxmindNetworks['longitude'].astype(float)
maxmindNetworks.to_csv('maxmindNetworks.csv', index=False)

# load the MaxMind location data 
maxmindLocations = pd.read_csv( maxmindDirectory + '/GeoLite2-City-Locations-en.csv', 
                                header=0, 
                                usecols=['geoname_id','country_iso_code','country_name','subdivision_1_iso_code','subdivision_1_name','city_name'], 
                                dtype=str,
                                na_values='',
                                keep_default_na=False )
maxmindLocations = maxmindLocations.dropna(subset=['country_iso_code'])
maxmindLocations['geoname_id'] = maxmindLocations['geoname_id'].astype(int)
for column in ['country_name','subdivision_1_name','city_name']:
    maxmindLocations[column] = maxmindLocations[column].apply(lambda name: name.replace(',', '') if type(name) is str else name)
maxmindLocations.to_csv('maxmindLocations.csv', index=False)

In [3]:
# display MaxMind network data
maxmindNetworks

Unnamed: 0,network,geoname_id,latitude,longitude
0,1.0.0.0/24,2151718,-37.7000,145.1833
1,1.0.1.0/24,1810821,26.0614,119.3061
2,1.0.2.0/23,1810821,26.0614,119.3061
3,1.0.4.0/22,2077456,-33.4940,143.2104
4,1.0.8.0/21,1809858,23.1167,113.2500
5,1.0.16.0/20,1850147,35.6850,139.7514
6,1.0.32.0/19,1809858,23.1167,113.2500
...,...,...,...,...
2726910,223.255.236.0/22,1796236,31.0456,121.3997
2726911,223.255.240.0/22,1819730,22.2500,114.1667


In [4]:
# display MaxMind location data
maxmindLocations.sort_values('geoname_id')

Unnamed: 0,geoname_id,country_iso_code,country_name,subdivision_1_iso_code,subdivision_1_name,city_name
0,18918,CY,Cyprus,04,Ammochostos,Protaras
1,32909,IR,Iran,07,Ostan-e Tehran,Shahre Jadide Andisheh
2,49518,RW,Rwanda,,,
3,49747,SO,Somalia,BK,Bakool,Oddur
4,51537,SO,Somalia,,,
5,53654,SO,Somalia,BN,Banaadir,Mogadishu
6,54225,SO,Somalia,SH,Lower Shabeelle,Merca
...,...,...,...,...,...,...
103717,11789329,IT,Italy,52,Tuscany,Ospedaletto
103718,11789352,CH,Switzerland,TI,Ticino,Savosa


In [52]:
# add geohash of latitude/longitude and display network data again

def parallelApply(frame, function):
    #with multiprocessing.Pool(os.cpu_count()) as pool:
    with multiprocessing.Pool(8) as pool:
        result = pd.concat(pool.map(function, np.array_split(frame, os.cpu_count())))
    return result

def calculateGeohash6(frame):
    return frame.apply(lambda row: geohash2.encode(row['latitude'],row['longitude'],precision=6),axis=1)

before = time.time()
#maxmindNetworks['geohash6'] = maxmindNetworks.apply(lambda row: geohash2.encode(row['latitude'],row['longitude'],precision=6),axis=1)
maxmindNetworks['geohash6'] = parallelApply(maxmindNetworks, calculateGeohash6)
elapsed = time.time() - before
print('elapsed time: ' + str(elapsed) + ' seconds')
maxmindNetworks

elapsed time: 22.366556644439697 seconds


Unnamed: 0,network,geoname_id,latitude,longitude,geohash6
0,1.0.0.0/24,2151718,-37.7000,145.1833,r1r1x8
1,1.0.1.0/24,1810821,26.0614,119.3061,wssu6b
2,1.0.2.0/23,1810821,26.0614,119.3061,wssu6b
3,1.0.4.0/22,2077456,-33.4940,143.2104,r4jc6y
4,1.0.8.0/21,1809858,23.1167,113.2500,ws0e90
5,1.0.16.0/20,1850147,35.6850,139.7514,xn77h0
6,1.0.32.0/19,1809858,23.1167,113.2500,ws0e90
...,...,...,...,...,...
2726910,223.255.236.0/22,1796236,31.0456,121.3997,wtw2de
2726911,223.255.240.0/22,1819730,22.2500,114.1667,wecntf


In [6]:
# merge network and location data and display results
maxmindNetworkLocations = maxmindNetworks.join(maxmindLocations.set_index('geoname_id'), on='geoname_id') 
maxmindNetworkLocations

Unnamed: 0,network,geoname_id,latitude,longitude,geohash6,country_iso_code,country_name,subdivision_1_iso_code,subdivision_1_name,city_name
0,1.0.0.0/24,2151718,-37.7000,145.1833,r1r1x8,AU,Australia,VIC,Victoria,Research
1,1.0.1.0/24,1810821,26.0614,119.3061,wssu6b,CN,China,FJ,Fujian,Fuzhou
2,1.0.2.0/23,1810821,26.0614,119.3061,wssu6b,CN,China,FJ,Fujian,Fuzhou
3,1.0.4.0/22,2077456,-33.4940,143.2104,r4jc6y,AU,Australia,,,
4,1.0.8.0/21,1809858,23.1167,113.2500,ws0e90,CN,China,GD,Guangdong,Guangzhou
5,1.0.16.0/20,1850147,35.6850,139.7514,xn77h0,JP,Japan,13,Tokyo,Tokyo
6,1.0.32.0/19,1809858,23.1167,113.2500,ws0e90,CN,China,GD,Guangdong,Guangzhou
...,...,...,...,...,...,...,...,...,...,...
2726910,223.255.236.0/22,1796236,31.0456,121.3997,wtw2de,CN,China,SH,Shanghai,Shanghai
2726911,223.255.240.0/22,1819730,22.2500,114.1667,wecntf,HK,Hong Kong,,,


In [7]:
# group networks by location
maxmindNetworksGroupedByLocation = maxmindNetworkLocations.groupby('geoname_id')

In [56]:
# calculate average coordinates and geohash of networks in each location

def parallelApplyGrouped(groupedData, function):
    #with multiprocessing.Pool(os.cpu_count()) as pool:
    with multiprocessing.Pool(6) as pool:
        result = pool.map( function, [ [name,group] for name,group in groupedData ] )
    return result

def networkGroupSummary(args):
    geoname_id = args[0]
    data = args[1]
    networkCount = len(data)
    coordinateCount = len(data.groupby('geohash6').count())
    averageLatitude = data['latitude'].mean()
    averageLongitude = data['longitude'].mean()
    geohash6 = geohash2.encode(averageLatitude,averageLongitude,precision=6)
    return [geoname_id,networkCount,coordinateCount,averageLatitude,averageLongitude,geohash6]
    
before = time.time()
columns = ['geoname_id','networkCount','coordinateCount','averageLatitude','averageLongitude','geohash6']
#maxmindNetworkGroupSummary = pd.DataFrame( [ networkGroupSummary([name,group]) for name,group in maxmindNetworksGroupedByLocation ], columns=columns )
maxmindNetworkGroupSummary = pd.DataFrame( parallelApplyGrouped(maxmindNetworksGroupedByLocation, networkGroupSummary), columns=columns )
elapsed = time.time() - before
print('elapsed time: ' + str(elapsed) + ' seconds')
maxmindNetworkGroupSummary

elapsed time: 65.42401838302612 seconds


Unnamed: 0,geoname_id,networkCount,coordinateCount,averageLatitude,averageLongitude,geohash6
0,18918,2,1,35.0125,34.0583,sy0ppj
1,32909,5,1,35.7004,50.9998,tnk70e
2,49518,65,1,-2.0000,30.0000,kxtkde
3,49747,1,1,4.1213,43.8895,sbxpqw
4,51537,38,1,10.0000,49.0000,t1fbgh
5,53654,4,1,2.0462,45.3341,t025xs
6,54225,1,1,1.7159,44.7717,sbrcfb
...,...,...,...,...,...,...
98434,11789329,1,1,43.7000,10.4167,spz2t5
98435,11789352,3,1,46.0198,8.9414,u0nmg3


In [9]:
# add location data to network group summary
maxmindLocationSummary = maxmindNetworkGroupSummary.join(maxmindLocations.set_index('geoname_id'), on='geoname_id')
maxmindLocationSummary = maxmindLocationSummary.dropna(subset=['country_iso_code'])
maxmindLocationSummary

Unnamed: 0,geoname_id,networkCount,coordinateCount,averageLatitude,averageLongitude,geohash6,country_iso_code,country_name,subdivision_1_iso_code,subdivision_1_name,city_name
0,18918,2,1,35.0125,34.0583,sy0ppj,CY,Cyprus,04,Ammochostos,Protaras
1,32909,5,1,35.7004,50.9998,tnk70e,IR,Iran,07,Ostan-e Tehran,Shahre Jadide Andisheh
2,49518,65,1,-2.0000,30.0000,kxtkde,RW,Rwanda,,,
3,49747,1,1,4.1213,43.8895,sbxpqw,SO,Somalia,BK,Bakool,Oddur
4,51537,38,1,10.0000,49.0000,t1fbgh,SO,Somalia,,,
5,53654,4,1,2.0462,45.3341,t025xs,SO,Somalia,BN,Banaadir,Mogadishu
6,54225,1,1,1.7159,44.7717,sbrcfb,SO,Somalia,SH,Lower Shabeelle,Merca
...,...,...,...,...,...,...,...,...,...,...,...
98434,11789329,1,1,43.7000,10.4167,spz2t5,IT,Italy,52,Tuscany,Ospedaletto
98435,11789352,3,1,46.0198,8.9414,u0nmg3,CH,Switzerland,TI,Ticino,Savosa


In [10]:
# display summary for locations with lots of networks and coordinates
maxmindLocationSummary.sort_values(['coordinateCount','networkCount'],ascending=False)

Unnamed: 0,geoname_id,networkCount,coordinateCount,averageLatitude,averageLongitude,geohash6,country_iso_code,country_name,subdivision_1_iso_code,subdivision_1_name,city_name
5291,756135,2149,199,52.244754,21.003443,u3qcjy,PL,Poland,MZ,Mazovia,Warsaw
36073,2950159,5479,193,52.513743,13.403263,u33d8z,DE,Germany,BE,Land Berlin,Berlin
67179,3530597,4371,163,19.431441,-99.138599,9g3w81,MX,Mexico,CMX,Mexico City,Mexico City
5078,745044,3613,123,41.022007,28.978656,sxk977,TR,Turkey,34,Istanbul,Istanbul
92060,6167865,10114,101,43.666551,-79.431134,dpz82t,CA,Canada,ON,Ontario,Toronto
76870,4699066,7078,99,29.778965,-95.445854,9vk1kj,US,United States,TX,Texas,Houston
15246,2193733,4101,99,-36.869048,174.767408,rckq2b,NZ,New Zealand,AUK,Auckland,Auckland
...,...,...,...,...,...,...,...,...,...,...,...
98424,11748289,1,1,-26.925500,152.109100,r7hnv6,AU,Australia,QLD,Queensland,Cherry Creek
98428,11778479,1,1,35.616700,139.633300,xn7697,JP,Japan,13,Tokyo,Tamagawa


In [11]:
# this function plots the networks in a location on a map

def drawmap(geoname_id,zoomLevel):

    countryName = maxmindLocations.loc[maxmindLocations['geoname_id']==geoname_id,'country_name'].item()
    subdivisionName = maxmindLocations.loc[maxmindLocations['geoname_id']==geoname_id,'subdivision_1_name'].item()
    cityName = maxmindLocations.loc[maxmindLocations['geoname_id']==geoname_id,'city_name'].item()

    group = maxmindNetworksGroupedByLocation.get_group(geoname_id)
    networkCount = len(group)
    averageLatitude, averageLongitude = group[['latitude','longitude']].mean()

    map = folium.Map(location=[averageLatitude, averageLongitude], zoom_start=zoomLevel)

    points = group.groupby(['latitude','longitude'])
    coordinateCount = len(list(points.groups.keys()))

    for key, group in points:
        latitude,longitude = key
        folium.features.Circle(location=[latitude, longitude], radius=200, color='blue').add_to(map)

    print('location ' + str(geoname_id) + ': ' + str(networkCount) + ' networks at ' + str(coordinateCount) + ' coordinates in ' + cityName + ', ' + subdivisionName + ', ' + countryName)
    return map

In [12]:
drawmap(3114472,11) # Pamplona

location 3114472: 233 networks at 6 coordinates in Pamplona, Navarre, Spain


In [13]:
drawmap(4335045,12) # New Orleans

location 4335045: 828 networks at 23 coordinates in New Orleans, Louisiana, United States


In [14]:
drawmap(2867714,11) # Munich

location 2867714: 2538 networks at 67 coordinates in Munich, Bavaria, Germany


In [15]:
drawmap(2950159,10) # Berlin

location 2950159: 5479 networks at 197 coordinates in Berlin, Land Berlin, Germany


In [16]:
# this function finds a geohah that encloses all of the networks in a location and plots them on a map

def findGeohashBBox(minLatitude, minLongitude, maxLatitude, maxLongitude):

    for p in range(12,0,-1):
        geohashCode = geohash2.encode( (maxLatitude+minLatitude)/2, (maxLongitude+minLongitude)/2, precision=p)
        geohashBBox = geohash.bbox(geohashCode)    
        if minLatitude < geohashBBox['s']: continue
        if maxLatitude > geohashBBox['n']: continue
        if minLongitude < geohashBBox['w']: continue
        if maxLongitude > geohashBBox['e']: continue
        return (geohashCode, geohashBBox)
    print('no geohash found that encloses latitude ' + str(minLatitude) + ' to ' + str(maxLatitude) + ', longitude ' + str(minLongitude) + ' to ' + str(maxLongitude))
    return (None, None)

def drawmapWithGeohashBBox(geoname_id,zoomLevel):

    countryName = maxmindLocations.loc[maxmindLocations['geoname_id']==geoname_id,'country_name'].item()
    subdivisionName = maxmindLocations.loc[maxmindLocations['geoname_id']==geoname_id,'subdivision_1_name'].item()
    cityName = maxmindLocations.loc[maxmindLocations['geoname_id']==geoname_id,'city_name'].item()

    group = maxmindNetworksGroupedByLocation.get_group(geoname_id)
    networkCount = len(group)
    minLatitude, minLongitude = group[['latitude','longitude']].min()
    avgLatitude, avgLongitude = group[['latitude','longitude']].mean()
    maxLatitude, maxLongitude = group[['latitude','longitude']].max()

    map = folium.Map(location=[avgLatitude, avgLongitude], zoom_start=zoomLevel)

    points = group.groupby(['latitude','longitude'])
    coordinateCount = len(list(points.groups.keys()))

    for key, group in points:
        latitude,longitude = key
        folium.features.Circle(location=[latitude, longitude], radius=200, color='blue').add_to(map)

    (geohashCode, geohashBBox) = findGeohashBBox(minLatitude, minLongitude, maxLatitude, maxLongitude)
    if geohashCode is None: return None
    
    folium.features.RectangleMarker([(geohashBBox['s'], geohashBBox['w']),(geohashBBox['n'], geohashBBox['e'])],weight=0, fill_color='blue', fill_opacity=0.2,).add_to(map)
        
    print('geohash "' + geohashCode + '" for location ' + str(geoname_id) + ': ' + str(networkCount) + ' networks at ' + str(coordinateCount) + ' coordinates in ' + cityName + ', ' + subdivisionName + ', ' + countryName)
    return map

In [17]:
drawmapWithGeohashBBox(3114472,11) # Pamplona

geohash "ezwg" for location 3114472: 233 networks at 6 coordinates in Pamplona, Navarre, Spain


In [18]:
drawmapWithGeohashBBox(4335045,12) # New Orleans

no geohash found that encloses latitude 29.9049 to 30.0801, longitude -90.2057 to -89.879


In [19]:
drawmapWithGeohashBBox(2867714,8) # Munich

geohash "u28" for location 2867714: 2538 networks at 67 coordinates in Munich, Bavaria, Germany


In [20]:
drawmapWithGeohashBBox(2950159,8) # Berlin

geohash "u33" for location 2950159: 5479 networks at 197 coordinates in Berlin, Land Berlin, Germany


In [21]:
# this function finds a geohah that contains the centroid of the networks in a location and plots them on a map

def drawmapWithGeohashCentroid(geoname_id,zoomLevel):

    countryName = maxmindLocations.loc[maxmindLocations['geoname_id']==geoname_id,'country_name'].item()
    subdivisionName = maxmindLocations.loc[maxmindLocations['geoname_id']==geoname_id,'subdivision_1_name'].item()
    cityName = maxmindLocations.loc[maxmindLocations['geoname_id']==geoname_id,'city_name'].item()

    group = maxmindNetworksGroupedByLocation.get_group(geoname_id)
    networkCount = len(group)

    avgLatitude, avgLongitude = group[['latitude','longitude']].mean()
    map = folium.Map(location=[avgLatitude, avgLongitude], zoom_start=zoomLevel)

    points = group.groupby(['latitude','longitude'])
    coordinateCount = len(list(points.groups.keys()))

    for key, group in points:
        latitude,longitude = key
        folium.features.Circle(location=[latitude, longitude], radius=200, color='blue').add_to(map)

    geohashCode = geohash2.encode(avgLatitude, avgLongitude, precision=5)
    geohashBBox = geohash.bbox(geohashCode)    
    folium.features.RectangleMarker([(geohashBBox['s'], geohashBBox['w']),(geohashBBox['n'], geohashBBox['e'])],weight=0, fill_color='blue', fill_opacity=.33).add_to(map)
        
    print('geohash "' + geohashCode + '" for location ' + str(geoname_id) + ': ' + str(networkCount) + ' networks at ' + str(coordinateCount) + ' coordinates in ' + cityName + ', ' + subdivisionName + ', ' + countryName)
    return map

In [22]:
drawmapWithGeohashCentroid(3114472,11) # Pamplona

geohash "ezwgd" for location 3114472: 233 networks at 6 coordinates in Pamplona, Navarre, Spain


In [23]:
drawmapWithGeohashCentroid(4335045,12) # New Orleans

geohash "9vrfq" for location 4335045: 828 networks at 23 coordinates in New Orleans, Louisiana, United States


In [24]:
drawmapWithGeohashCentroid(2867714,11) # Munich

geohash "u281z" for location 2867714: 2538 networks at 67 coordinates in Munich, Bavaria, Germany


In [25]:
drawmapWithGeohashCentroid(2950159,10) # Berlin

geohash "u33d8" for location 2950159: 5479 networks at 197 coordinates in Berlin, Land Berlin, Germany


In [26]:
# this function finds a geohah that contains the centroid of the networks in a location and plots them on a map

def drawmapWithGeohash5and6Centroid(geoname_id,zoomLevel):

    countryName = maxmindLocations.loc[maxmindLocations['geoname_id']==geoname_id,'country_name'].item()
    subdivisionName = maxmindLocations.loc[maxmindLocations['geoname_id']==geoname_id,'subdivision_1_name'].item()
    cityName = maxmindLocations.loc[maxmindLocations['geoname_id']==geoname_id,'city_name'].item()

    group = maxmindNetworksGroupedByLocation.get_group(geoname_id)
    networkCount = len(group)

    avgLatitude, avgLongitude = group[['latitude','longitude']].mean()
    map = folium.Map(location=[avgLatitude, avgLongitude], zoom_start=zoomLevel)

    points = group.groupby(['latitude','longitude'])
    coordinateCount = len(list(points.groups.keys()))

    for key, group in points:
        latitude,longitude = key
        folium.features.Circle(location=[latitude, longitude], radius=200, color='blue').add_to(map)

    geohashCode5 = geohash2.encode(avgLatitude, avgLongitude, precision=5)
    geohashBBox5 = geohash.bbox(geohashCode5)    
    folium.features.RectangleMarker([(geohashBBox5['s'], geohashBBox5['w']),(geohashBBox5['n'], geohashBBox5['e'])],weight=0, fill_color='blue', fill_opacity=.33).add_to(map)
        
    geohashCode6 = geohash2.encode(avgLatitude, avgLongitude, precision=6)
    geohashBBox6 = geohash.bbox(geohashCode6)    
    folium.features.RectangleMarker([(geohashBBox6['s'], geohashBBox6['w']),(geohashBBox6['n'], geohashBBox6['e'])],weight=0, fill_color='blue', fill_opacity=.5).add_to(map)

    titleCoordinates = [geohashBBox5['s'], (geohashBBox5['w']+geohashBBox5['e'])/2]
    
    titleHTML = cityName + ', ' + subdivisionName + ', ' + countryName + '<br>' + \
                'location: ' + str(geoname_id) + '<br>' + \
                'geohashes: ' + geohashCode5 + ' and ' + geohashCode6 + '<br>' + \
                str(networkCount) + ' networks at ' + str(coordinateCount) + ' coordinates'

    titleCSS = 'color: blue;' + \
                'font-size: 9pt;' + \
                'font-weight: bold;' + \
                'font-style: italic;' + \
                'text-align: center; ' + \
                'padding: 5px;' + \
                'background-color: rgba(255,255,255,0.6);' + \
                'position: fixed;' + \
                'transform: translateX(-50%);'

    folium.map.Marker(
        titleCoordinates,
        icon=DivIcon(icon_size=(300,100), 
                     icon_anchor=(0,0), 
                     html='<div style="' + titleCSS + '">' + titleHTML + '</div>')).add_to(map)
    
    #print('geohashes "' + geohashCode5 + '" and "' + geohashCode6 + '" for location ' + str(geoname_id) + ': ' + str(networkCount) + ' networks at ' + str(coordinateCount) + ' coordinates in ' + cityName + ', ' + subdivisionName + ', ' + countryName)
    return map

#drawmapWithGeohash5and6Centroid(3114472,11) # Pamplona

In [27]:
drawmapWithGeohash5and6Centroid(3114472,11) # Pamplona

In [28]:
drawmapWithGeohash5and6Centroid(4335045,12) # New Orleans

In [29]:
drawmapWithGeohash5and6Centroid(2867714,11) # Munich

In [30]:
drawmapWithGeohash5and6Centroid(2950159,10) # Berlin

In [31]:
drawmapWithGeohash5and6Centroid(3114472,11).save('map.Pamplona.html')
drawmapWithGeohash5and6Centroid(4335045,12).save('map.NewOrleans.html') 
drawmapWithGeohash5and6Centroid(2867714,11).save('map.Munich.html') 
drawmapWithGeohash5and6Centroid(2950159,10).save('map.Berlin.html') 

In [32]:
# store a sorted list of all country codes and names
countryCodes = maxmindLocations[['country_iso_code','country_name']].drop_duplicates().sort_values('country_iso_code')
countryCodes.to_csv('countryCodes.all.csv',index=False)
countryCodes

Unnamed: 0,country_iso_code,country_name
56610,AD,Andorra
672,AE,United Arab Emirates
6881,AF,Afghanistan
72236,AG,Antigua and Barbuda
72154,AI,Anguilla
1333,AL,Albania
240,AM,Armenia
...,...,...
74684,WS,Samoa
6124,XK,Kosovo


In [33]:
# store a sorted list of all subidivison codes and names
subdivisionCodesAll = maxmindLocations[['country_iso_code','subdivision_1_iso_code','country_name','subdivision_1_name']].drop_duplicates(['country_iso_code','subdivision_1_iso_code']).sort_values(['country_iso_code','subdivision_1_iso_code'])
subdivisionCodesAll.to_csv('subdivisionCodes.all.csv',index=False)
subdivisionCodesAll

Unnamed: 0,country_iso_code,subdivision_1_iso_code,country_name,subdivision_1_name
56618,AD,02,Andorra,Canillo
56617,AD,03,Andorra,Encamp
56615,AD,04,Andorra,La Massana
56610,AD,05,Andorra,Ordino
56611,AD,06,Andorra,Sant Julià de Loria
56612,AD,07,Andorra,Andorra la Vella
56614,AD,08,Andorra,Escaldes-Engordany
...,...,...,...,...
6357,ZW,BU,Zimbabwe,Bulawayo
6354,ZW,HA,Zimbabwe,Harare


In [34]:
# this function returns a list of subdivision codes and names for the specified country

def subdivisionCodes(countryCode):
    subdivisions = maxmindLocations[maxmindLocations['country_iso_code']==countryCode][['subdivision_1_iso_code','subdivision_1_name']].drop_duplicates()
    return subdivisions

In [35]:
# get a sorted list of subidivison codes and names for the U.S.
subdivisionCodesUS = subdivisionCodes('US').sort_values('subdivision_1_iso_code')
subdivisionCodesUS.to_csv('subdivisionCodes.US.csv',index=False)
subdivisionCodesUS

Unnamed: 0,subdivision_1_iso_code,subdivision_1_name
93259,AK,Alaska
74716,AL,Alabama
74731,AR,Arkansas
91517,AZ,Arizona
91521,CA,California
92552,CO,Colorado
83164,CT,Connecticut
...,...,...
74724,VA,Virginia
90705,VT,Vermont


In [36]:
# get a sorted list of subidivison codes and names for Russia
subdivisionCodesRU = subdivisionCodes('RU').sort_values('subdivision_1_iso_code')
subdivisionCodesRU.to_csv('subdivisionCodes.RU.csv',index=False)
subdivisionCodesRU

Unnamed: 0,subdivision_1_iso_code,subdivision_1_name
1567,AD,Respublika Adygeya
8598,AL,Altai
8572,ALT,Altai Krai
12647,AMU,Amurskaya Oblast'
1616,ARK,Arkhangelskaya
2061,AST,Astrakhanskaya Oblast'
1563,BA,Bashkortostan
...,...,...
1568,VLG,Vologodskaya Oblast'
1582,VOR,Voronezhskaya Oblast'


In [37]:
# get a sorted list of subidivison codes and names for China
subdivisionCodesCN = subdivisionCodes('CN').sort_values('subdivision_1_iso_code')
subdivisionCodesCN.to_csv('subdivisionCodes.CN.csv',index=False)
subdivisionCodesCN

Unnamed: 0,subdivision_1_iso_code,subdivision_1_name
8962,AH,Anhui
10700,BJ,Beijing
10751,CQ,Chongqing
10590,FJ,Fujian
10587,GD,Guangdong
10591,GS,Gansu
10608,GX,Guangxi
...,...,...
10585,SX,Shanxi
10638,TJ,Tianjin


In [38]:
# store a sorted list of subidivison codes and names actually used by networks
subdivisionCodesUsed = maxmindLocationSummary[['country_iso_code','subdivision_1_iso_code','country_name','subdivision_1_name']].drop_duplicates(['country_iso_code','subdivision_1_iso_code']).sort_values(['country_iso_code','subdivision_1_iso_code'])
subdivisionCodesUsed.to_csv('subdivisionCodes.used.csv',index=False)
subdivisionCodesUsed

Unnamed: 0,country_iso_code,subdivision_1_iso_code,country_name,subdivision_1_name
52217,AD,02,Andorra,Canillo
52216,AD,03,Andorra,Encamp
52214,AD,04,Andorra,La Massana
52209,AD,05,Andorra,Ordino
52210,AD,06,Andorra,Sant Julià de Loria
52211,AD,07,Andorra,Andorra la Vella
52213,AD,08,Andorra,Escaldes-Engordany
...,...,...,...,...
6194,ZW,BU,Zimbabwe,Bulawayo
6191,ZW,HA,Zimbabwe,Harare
