# Top .UK Domains Mapped

 ## Install Needed Libraries and  Grab Data 

We are going to install an NSLookup and GeoIP Python Library and grab a MaxMind lookup database. 

In [13]:
! curl -L https://git.io/GeoLite2-City.mmdb -o GeoLite2-City.mmdb 
! pip3 install geoip2
! pip3 install nslookup

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100   152  100   152    0     0    266      0 --:--:-- --:--:-- --:--:--   266
100   626  100   626    0     0    857      0 --:--:-- --:--:-- --:--:--   857
100 70.3M  100 70.3M    0     0  9866k      0  0:00:07  0:00:07 --:--:-- 11.0M
[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621[0m
[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issue

## Code

### Import Neeeded Libraries & Configurations 

In [14]:
import folium
from folium.plugins import MarkerCluster
import geoip2.database
import io
from nslookup import Nslookup
import os
import pandas as pd
import requests
import sys
import warnings
import zipfile

f = open(os.devnull, 'w')
sys.stderr = f
dns_query = Nslookup(dns_servers=["8.8.8.8"])

### Grab Latest DNS File

In [15]:
r = requests.get("http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip")
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall()

### Read The CSV into a dataframe

In [16]:
df = pd.read_csv ('top-1m.csv', names=['Rank', 'DNS'], header=None)
df['DNS'] = df['DNS'].astype('str')

### Remove Rows That Are Not .co.uk

In [17]:
topuk = df[df["DNS"].str.contains(".co.uk")]
topuk = topuk.dropna().head(500)

### Perform DNS Lookups On .co.uk Domains

In [18]:
topuk['ip_address'] = topuk['DNS'].map(lambda host:dns_query.dns_lookup(host).answer)
topuk["ip_address"] = topuk["ip_address"].str[0]
topuk = topuk.dropna()
topuk

Unnamed: 0,Rank,DNS,ip_address
2650,2651,google.co.uk,142.250.180.3
3306,3307,www.google.co.uk,216.58.212.227
3872,3873,amazon.co.uk,54.239.34.171
4613,4614,bbc.co.uk,151.101.0.81
4860,4861,metadsp.co.uk,34.237.229.167
...,...,...,...
90237,90238,www.national-lottery.co.uk,194.246.78.103
90421,90422,static-guim-co-uk.cdn.ampproject.org,142.250.178.1
90423,90424,visualsoft.co.uk,109.108.148.102
90521,90522,iplayer-web.files.bbci.co.uk,104.72.154.141


### Preform GeoIP Lookup Of IP Addresses

In [19]:
def get_latitude(ip):
    try:
        reader = geoip2.database.Reader('GeoLite2-City.mmdb')
        response = reader.city(ip)
        output = response.location.latitude
        return output
    except:
        return pd.np.nan


def get_longitude(ip):
    try:
        reader = geoip2.database.Reader('GeoLite2-City.mmdb')
        response = reader.city(ip)
        output = response.location.longitude
        return output
    except:
        return pd.np.nan

def get_country(ip):
    try:
        reader = geoip2.database.Reader('GeoLite2-City.mmdb')
        response = reader.city(ip)
        output = str(response.country.iso_code)
        return output
    except:
        return pd.np.nan

unique_ips = topuk['ip_address'].unique()
unique_ips = pd.Series(unique_ips, index = unique_ips)
topuk['Latitude'] = topuk['ip_address'].map(unique_ips.apply(get_latitude))
topuk['Longitude'] = topuk['ip_address'].map(unique_ips.apply(get_longitude))
topuk['Country'] = topuk['ip_address'].map(unique_ips.apply(get_country))
topuk = topuk.dropna()
topuk

Unnamed: 0,Rank,DNS,ip_address,Latitude,Longitude,Country
2650,2651,google.co.uk,142.250.180.3,37.7510,-97.8220,US
3306,3307,www.google.co.uk,216.58.212.227,37.7510,-97.8220,US
3872,3873,amazon.co.uk,54.239.34.171,53.3382,-6.2591,IE
4613,4614,bbc.co.uk,151.101.0.81,37.7510,-97.8220,US
4860,4861,metadsp.co.uk,34.237.229.167,39.0469,-77.4903,US
...,...,...,...,...,...,...
90237,90238,www.national-lottery.co.uk,194.246.78.103,51.4964,-0.1224,GB
90421,90422,static-guim-co-uk.cdn.ampproject.org,142.250.178.1,37.7510,-97.8220,US
90423,90424,visualsoft.co.uk,109.108.148.102,55.7626,-4.1831,GB
90521,90522,iplayer-web.files.bbci.co.uk,104.72.154.141,51.5095,-0.0955,GB


### Map Sites

In [20]:
map = folium.Map(location=[50, 0], tiles='cartodb positron', zoom_start=4)

mcluster = MarkerCluster().add_to(map)

for index, row in topuk.iterrows():
    folium.Marker(location=[row['Latitude'],row['Longitude']], popup=(row.DNS+"\n"+row.ip_address)).add_to(mcluster)

map