In [2]:
import os
import zipfile
from urllib.request import urlopen
from io import BytesIO
import pandas as pd
pd.set_option('max_rows', 15)

maxmindURL = 'http://geolite.maxmind.com/download/geoip/database/GeoLite2-City-CSV.zip'

print('running ...')

with urlopen(maxmindURL) as response:
    with zipfile.ZipFile(BytesIO(response.read())) as file:
        file.extractall()

# find the newest directory, in case there are old directories left over from previous runs
maxmindDirectory = sorted( [ f for f in os.listdir() if os.path.isdir(f) and f.startswith('GeoLite2-City-CSV') ] )[-1]

# load the MaxMind network and location data 
maxmindNetworks = pd.read_csv(maxmindDirectory + '/GeoLite2-City-Blocks-IPv4.csv', header=0)
maxmindLocations = pd.read_csv(maxmindDirectory + '/GeoLite2-City-Locations-en.csv', header=0)

print('... done')

running ...
... done


In [3]:
# display the MaxMind network table
maxmindNetworks

Unnamed: 0,network,geoname_id,registered_country_geoname_id,represented_country_geoname_id,is_anonymous_proxy,is_satellite_provider,postal_code,latitude,longitude,accuracy_radius
0,1.0.0.0/24,2151718.0,2077456.0,,0,0,3095,-37.7000,145.1833,1000.0
1,1.0.1.0/24,1810821.0,1814991.0,,0,0,,26.0614,119.3061,50.0
2,1.0.2.0/23,1810821.0,1814991.0,,0,0,,26.0614,119.3061,50.0
3,1.0.4.0/22,2077456.0,2077456.0,,0,0,,-33.4940,143.2104,1000.0
4,1.0.8.0/21,1809858.0,1814991.0,,0,0,,23.1167,113.2500,50.0
5,1.0.16.0/20,1850147.0,1861060.0,,0,0,190-0031,35.6850,139.7514,500.0
6,1.0.32.0/19,1809858.0,1814991.0,,0,0,,23.1167,113.2500,50.0
...,...,...,...,...,...,...,...,...,...,...
2726910,223.255.236.0/22,1796236.0,1814991.0,,0,0,,31.0456,121.3997,50.0
2726911,223.255.240.0/22,1819730.0,1819730.0,,0,0,,22.2500,114.1667,50.0


In [39]:
# create a group of frames, one for each value of 'geoname_id'
grouped = maxmindNetworks[['geoname_id','network','latitude','longitude']].groupby(['geoname_id'])

In [40]:
# display the first row of each frame in the group
grouped.first()

Unnamed: 0_level_0,network,latitude,longitude
geoname_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
18918.0,82.114.56.0/22,35.0125,34.0583
32909.0,185.128.136.0/22,35.7004,50.9998
49518.0,5.62.63.84/30,-2.0000,30.0000
49747.0,154.72.48.0/24,4.1213,43.8895
51537.0,5.62.63.128/30,10.0000,49.0000
53654.0,41.78.74.0/25,2.0462,45.3341
54225.0,41.223.110.0/24,1.7159,44.7717
...,...,...,...
11789329.0,176.206.32.0/19,43.7000,10.4167
11789352.0,84.227.196.0/23,46.0198,8.9414


In [41]:
# display the group for 'geoname_id' value '32900'
grouped.get_group(32909.0)

Unnamed: 0,latitude,longitude,network
2124523,35.7004,50.9998,185.128.136.0/22
2124527,35.7004,50.9998,185.128.152.0/22
2124530,35.7004,50.9998,185.128.164.0/22
2125130,35.7004,50.9998,185.136.192.0/22
2579058,35.7004,50.9998,212.33.200.0/21


In [42]:
# display a list of 'geoname_id' values
sorted(list(grouped.groups))

[18918.0,
 32909.0,
 49518.0,
 49747.0,
 51537.0,
 53654.0,
 54225.0,
 55671.0,
 57289.0,
 58933.0,
 58994.0,
 59611.0,
 60928.0,
 63795.0,
 64013.0,
 69543.0,
 71136.0,
 71137.0,
 74477.0,
 75427.0,
 77726.0,
 77843.0,
 78751.0,
 78754.0,
 79415.0,
 80384.0,
 81302.0,
 87205.0,
 88319.0,
 88562.0,
 88903.0,
 89113.0,
 90150.0,
 90552.0,
 94787.0,
 94824.0,
 95445.0,
 95446.0,
 96991.0,
 97848.0,
 98182.0,
 98229.0,
 98410.0,
 98530.0,
 98822.0,
 98860.0,
 99072.0,
 99131.0,
 99237.0,
 99347.0,
 99434.0,
 99532.0,
 99608.0,
 100425.0,
 101484.0,
 101554.0,
 101628.0,
 102318.0,
 102358.0,
 102527.0,
 102585.0,
 102651.0,
 102891.0,
 102912.0,
 103012.0,
 103035.0,
 103174.0,
 103369.0,
 103630.0,
 104514.0,
 104515.0,
 104923.0,
 105072.0,
 105298.0,
 105299.0,
 105343.0,
 106281.0,
 107304.0,
 107312.0,
 107744.0,
 107797.0,
 107968.0,
 108179.0,
 108410.0,
 108411.0,
 108435.0,
 108512.0,
 108648.0,
 108927.0,
 109118.0,
 109223.0,
 109224.0,
 109323.0,
 109353.0,
 109435.0,
 109436.

In [43]:
# display each group
for geoname, group in grouped:
    print('~~~~~~~~~~~~~~~~~~~',geoname,'~~~~~~~~~~~~~~~~~~~~~~')
    print(group)

~~~~~~~~~~~~~~~~~~~ 18918.0 ~~~~~~~~~~~~~~~~~~~~~~
        geoname_id         network  latitude  longitude
998732     18918.0  82.114.56.0/22   35.0125    34.0583
998733     18918.0  82.114.60.0/23   35.0125    34.0583
~~~~~~~~~~~~~~~~~~~ 32909.0 ~~~~~~~~~~~~~~~~~~~~~~
         geoname_id           network  latitude  longitude
2124523     32909.0  185.128.136.0/22   35.7004    50.9998
2124527     32909.0  185.128.152.0/22   35.7004    50.9998
2124530     32909.0  185.128.164.0/22   35.7004    50.9998
2125130     32909.0  185.136.192.0/22   35.7004    50.9998
2579058     32909.0   212.33.200.0/21   35.7004    50.9998
~~~~~~~~~~~~~~~~~~~ 49518.0 ~~~~~~~~~~~~~~~~~~~~~~
         geoname_id          network  latitude  longitude
40923       49518.0    5.62.63.84/30      -2.0       30.0
242554      49518.0   41.74.160.0/20      -2.0       30.0
245426      49518.0   41.138.82.0/23      -2.0       30.0
245428      49518.0   41.138.85.0/24      -2.0       30.0
245429      49518.0   41.138.86.0/2

KeyboardInterrupt: 

In [44]:
# display latitude/longitude values in each group
for geoname, group in grouped:
    print('~~~~~~~~~~~~~~~~~~~',geoname,'~~~~~~~~~~~~~~~~~~~~~~')
    print(group[['latitude','longitude']])

~~~~~~~~~~~~~~~~~~~ 18918.0 ~~~~~~~~~~~~~~~~~~~~~~
        latitude  longitude
998732   35.0125    34.0583
998733   35.0125    34.0583
~~~~~~~~~~~~~~~~~~~ 32909.0 ~~~~~~~~~~~~~~~~~~~~~~
         latitude  longitude
2124523   35.7004    50.9998
2124527   35.7004    50.9998
2124530   35.7004    50.9998
2125130   35.7004    50.9998
2579058   35.7004    50.9998
~~~~~~~~~~~~~~~~~~~ 49518.0 ~~~~~~~~~~~~~~~~~~~~~~
         latitude  longitude
40923        -2.0       30.0
242554       -2.0       30.0
245426       -2.0       30.0
245428       -2.0       30.0
245429       -2.0       30.0
248010       -2.0       30.0
248546       -2.0       30.0
...           ...        ...
2339185      -2.0       30.0
2339188      -2.0       30.0
2339189      -2.0       30.0
2339191      -2.0       30.0
2339192      -2.0       30.0
2339194      -2.0       30.0
2339195      -2.0       30.0

[65 rows x 2 columns]
~~~~~~~~~~~~~~~~~~~ 49747.0 ~~~~~~~~~~~~~~~~~~~~~~
         latitude  longitude
1857077    4.1213    4

KeyboardInterrupt: 

In [59]:
# display groups with more than one latitude/longitude value
groupCount = len(list(grouped.groups))
multiCoordinateCount = 0
for geoname_id, group in grouped:
    coordinateCount = len(group.groupby(['latitude', 'longitude']).count())
    if coordinateCount!=1:
        multiCoordinateCount = multiCoordinateCount + 1 
        #networkCount = len(group)
        #print('geoname_id',geoname_id,'has',networkCount,'networks with',coordinateCount,'unique latitude/longitude values')
print(multiCoordinateCount,'of',groupCount,'groups have more than one latitude/longitude value')

7603 of 98441 groups have more than one latitude/longitude value
