**BD Domains Enrichment**

In [3]:
%%capture
! curl -L https://git.io/GeoLite2-City.mmdb -o GeoLite2-City.mmdb
! pip3 install geoip2
! pip3 install nslookup

In [4]:
%%capture
!wget -q https://bangladesh-domains.domaintools.com/bangladesh-domains-2022-03-{01..04}.txt
!wget -q https://bangladesh-domains.domaintools.com/bangladesh-domains-2022-02-{25..28}.txt
!grep -h -v ^#  bangladesh-domains-2022-03-{01..03}.txt >> domains.txt
!grep -h -v ^#  bangladesh-domains-2022-02-{25..28}.txt >> domains.txt
!wc -l domains.txt
!sort -u -o domains.txt domains.txt
!wc -l domains.txt

In [5]:
import folium
from folium.plugins import MarkerCluster
import geoip2.database
import io
from nslookup import Nslookup
import os
import pandas as pd
import requests
import sys
import warnings
import zipfile

f = open(os.devnull, 'w')
sys.stderr = f
dns_query = Nslookup(dns_servers=["8.8.8.8"])

In [6]:
df = pd.read_csv ('domains.txt', names=['DNS'], header=None)
df['DNS'] = df['DNS'].astype('str')

In [7]:
df['ip_address'] = df['DNS'].map(lambda host:dns_query.dns_lookup(host).answer)
df["ip_address"] = df["ip_address"].str[0]
df = df.dropna()
df

Unnamed: 0,DNS,ip_address
0,01soft.com.bd,103.125.255.10
1,1000cars.com.bd,74.208.42.202
2,1000fix.com.bd,85.187.128.60
3,1001tk.com.bd,66.29.153.222
4,10fix.com.bd,103.161.46.131
...,...,...
1939,csra.teletalk.com.bd,103.230.107.248
1940,csra.teletalk.com.bd,103.230.107.248
1941,csra.teletalk.com.bd,103.230.107.248
1942,alljobs.teletalk.com.bd,103.230.106.215


In [8]:
def get_latitude(ip):
    try:
        reader = geoip2.database.Reader('GeoLite2-City.mmdb')
        response = reader.city(ip)
        output = response.location.latitude
        return output
    except:
        return pd.np.nan


def get_longitude(ip):
    try:
        reader = geoip2.database.Reader('GeoLite2-City.mmdb')
        response = reader.city(ip)
        output = response.location.longitude
        return output
    except:
        return pd.np.nan

def get_country(ip):
    try:
        reader = geoip2.database.Reader('GeoLite2-City.mmdb')
        response = reader.city(ip)
        output = str(response.country.iso_code)
        return output
    except:
        return pd.np.nan

unique_ips = df['ip_address'].unique()
unique_ips = pd.Series(unique_ips, index = unique_ips)
df['Latitude'] = df['ip_address'].map(unique_ips.apply(get_latitude))
df['Longitude'] = df['ip_address'].map(unique_ips.apply(get_longitude))
df['Country'] = df['ip_address'].map(unique_ips.apply(get_country))
df = df.dropna()
df

Unnamed: 0,DNS,ip_address,Latitude,Longitude,Country
0,01soft.com.bd,103.125.255.10,23.7018,90.3742,BD
1,1000cars.com.bd,74.208.42.202,37.7510,-97.8220,US
2,1000fix.com.bd,85.187.128.60,1.2868,103.8503,SG
3,1001tk.com.bd,66.29.153.222,37.7510,-97.8220,US
4,10fix.com.bd,103.161.46.131,23.7018,90.3742,BD
...,...,...,...,...,...
1939,csra.teletalk.com.bd,103.230.107.248,22.8159,89.5660,BD
1940,csra.teletalk.com.bd,103.230.107.248,22.8159,89.5660,BD
1941,csra.teletalk.com.bd,103.230.107.248,22.8159,89.5660,BD
1942,alljobs.teletalk.com.bd,103.230.106.215,22.8159,89.5660,BD


In [9]:
df['Country'].value_counts().head(10)

Unnamed: 0_level_0,count
Country,Unnamed: 1_level_1
BD,515
US,273
SG,88
DE,45
IN,27
FI,25
CA,14
GB,10
NL,7
PL,5


In [10]:
df.to_csv('BD-Domains.csv', index=False)


In [11]:
map = folium.Map(location=[0, 0], tiles='cartodb positron', zoom_start=3)

mcluster = MarkerCluster().add_to(map)

for index, row in df.iterrows():
    folium.Marker(location=[row['Latitude'],row['Longitude']], popup=(row.DNS+"\n"+row.ip_address)).add_to(mcluster)

map.save("index.html")
map
