In [1]:
from scapy.all import traceroute
import pandas as pd
import requests
import re
import json
import time

In [2]:
def get_re_url(data, param=None):
    url, expr, lamb = data["url"], data["expr"], (lambda x:x) if not "lambda" in data else data["lambda"]
    result = {}
    
    response = requests.get(url.replace("{param}", param))
    
    for k, v in expr.items():
        search_result = re.search(v, response.text)
        
        if search_result != None:
            result[k] = search_result.groups()[0]
        else:
            result[k] = ""
    
    return lamb(result)

In [3]:
def convert_extreme_ip_lookup(data):
    obj = json.loads("{" + data["data"] + "}")

    return {
        "hostname": obj["ipName"]
        ,"country": obj["country"]
        ,"state": obj["region"]
        ,"city": obj["city"]
        ,"isp": obj["isp"]
        ,"latitude": obj["lat"]
        ,"longitude": obj["lon"]
    }

In [4]:
re_tags = [
    {
        "url": "https://extreme-ip-lookup.com/{param}",
        "expr": {
            'data': 'URLIP={([^}]+)}'
        },
        "lambda": convert_extreme_ip_lookup
    },
    {
        "url": "https://whatismyipaddress.com/ip/{param}",
        "expr": {
            "hostname": '<tr>[ |\t|\n||\r]*<th>[ |\t|\n||\r]*Hostname:[ |\t|\n||\r]*<\/th><td>(.*)[ |\t|\n||\r]*<\/td>'
            ,"country": '<tr>[ |\t|\n||\r]*<th>[ |\t|\n||\r]*Country:[ |\t|\n||\r]*<\/th><td>(.*)<img'
            ,"state": '<tr>[ |\t|\n||\r]*<th>[ |\t|\n||\r]*State\/Region:[ |\t|\n||\r]*<\/th><td>(.*)[ |\t|\n||\r]*<\/td>'
            ,"city": '<tr>[ |\t|\n||\r]*<th>[ |\t|\n||\r]*City:[ |\t|\n||\r]*<\/th><td>(.*)[ |\t|\n||\r]*<\/td>'
            ,"isp": '<tr>[ |\t|\n||\r]*<th>[ |\t|\n||\r]*ISP:[ |\t|\n||\r]*<\/th><td>(.*)[ |\t|\n||\r]*<\/td>'
            ,"latitude": '<tr>[ |\t|\n||\r]*<th>[ |\t|\n||\r]*Latitude:[ |\t|\n||\r]*<\/th><td>[ |\t|\n||\r]*(.*)[ |\t|\n||\r]*<\/td>'
            ,"longitude": '<tr>[ |\t|\n||\r]*<th>[ |\t|\n||\r]*Longitude:[ |\t|\n||\r]*<\/th><td>[ |\t|\n||\r]*(.*)[ |\t|\n||\r]*<\/td>'
        }
    }
]

In [27]:
from scapy.all import *

In [41]:
table = []

result, _ = traceroute("www.cmjornal.pt", maxttl=32, verbose=False)

for snd, rcv in result:
    result = get_re_url(re_tags[0], param=rcv.src)
    time.sleep(1)
    
    table.append((
        snd.ttl
        , rcv.src
        , result["hostname"]
        , result["country"]
        , result["state"]
        , result["city"]
        , result["isp"]
        , result["latitude"]
        , result["longitude"]
        , rcv.time*1000 - snd.sent_time*1000
    ))

table_df = (
    pd.DataFrame(table, columns=["ttl", "address", "hostname", "country", "state", "city", "isp", "latitude", "longitude", "ms"])
    .sort_values("ttl")
    .drop_duplicates(subset=["address"], keep="first")
)

table_df

Unnamed: 0,ttl,address,hostname,country,state,city,isp,latitude,longitude,ms
0,1,192.168.15.1,,,,,,,,1.477539
1,2,179.185.128.209,179.185.128.209.dynamic.adsl.gvt.net.br,Brazil,Sao Paulo,Indaiatuba,Telefonica Brasil S.a,-23.08842,-47.2119,3.078369
3,3,177.16.40.9,177.16.40.9.static.host.gvt.net.br,Brazil,Parana,Curitiba,Telefonica Brasil S.a,-25.42778,-49.27306,5.979492
2,4,152.255.133.70,152-255-133-70.user.vivozap.com.br,Brazil,Sao Paulo,Sao Paulo,Telefonica Brasil S.a,-23.5475,-46.63611,4.640625
4,6,216.184.112.148,grtfortw1-0-1-0-24-0-4.net.telefonicaglobalsol...,United States,Florida,Miami,Telxius,25.77427,-80.19366,12.706055
6,7,94.142.98.175,,Spain,Madrid,Madrid,Telxius,40.4165,-3.70256,83.216064
5,8,213.140.36.89,,Spain,Madrid,Madrid,Telxius,40.4165,-3.70256,82.218262
8,9,154.54.11.157,be3017.ccr21.mia03.atlas.cogentco.com,United States,Florida,Miami,African Network Information Center,25.77427,-80.19366,83.963867
7,10,154.54.47.29,be3401.ccr22.mia01.atlas.cogentco.com,United States,Florida,Miami,African Network Information Center,25.77427,-80.19366,81.962646
9,11,154.54.47.17,be3400.ccr21.mia01.atlas.cogentco.com,United States,Florida,Miami,African Network Information Center,25.77427,-80.19366,82.176514


In [44]:
table = []

result, _ = traceroute("www.polytechnique.edu", maxttl=32, verbose=False)

for snd, rcv in result:
    result = get_re_url(re_tags[0], param=rcv.src)
    time.sleep(1)
    
    table.append((
        snd.ttl
        , rcv.src
        , result["hostname"]
        , result["country"]
        , result["city"]
        , result["isp"]
        , result["latitude"]
        , result["longitude"]
        , (rcv.time - snd.sent_time) * 1000
    ))

table_df = (
    pd.DataFrame(table, columns=["ttl", "address", "hostname", "country", "city", "isp", "latitude", "longitude", "ms"])
    .sort_values("ttl")
    .drop_duplicates(subset=["address"], keep="first")
)

table_df

Unnamed: 0,ttl,address,hostname,country,city,isp,latitude,longitude,ms
0,1,192.168.15.1,,,,,,,1.072168
1,2,179.185.128.209,179.185.128.209.dynamic.adsl.gvt.net.br,Brazil,Indaiatuba,Telefonica Brasil S.a,-23.08842,-47.2119,4.041433
3,3,177.16.40.11,177.16.40.11.static.host.gvt.net.br,Brazil,Curitiba,Telefonica Brasil S.a,-25.42778,-49.27306,5.130053
2,4,152.255.133.70,152-255-133-70.user.vivozap.com.br,Brazil,Sao Paulo,Telefonica Brasil S.a,-23.5475,-46.63611,3.709793
4,6,5.53.1.202,,Spain,Madrid,Telxius,40.4165,-3.70256,14.693499
5,7,176.52.250.246,,Spain,Madrid,Telxius,40.4165,-3.70256,80.699921
6,8,84.16.15.165,,United States,New York,Telxius,40.71427,-74.00597,81.404448
7,9,208.116.240.149,ae5.cr6-mia1.ip4.gtt.net,United States,Atlanta,GTT,33.749,-84.38798,82.136393
8,10,77.67.123.206,renater-gw-ix1.gtt.net,Germany,Dusseldorf,GTT,51.22172,6.77616,170.569658
12,12,194.199.156.25,,France,Paris,Renater,48.85341,2.3488,176.885605
