# Traceroute Analysis

In [1]:
import pandas as pd
from sqlalchemy import create_engine
import pycountry
from countryinfo import CountryInfo
import json
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import geocoder as gc
import geoip2.database
import geopandas
import time
import datetime

engine = create_engine("postgresql://postgres:postgres@vm-robert-richter.cloud.dhclab.i.hpi.de:5432/postgres") # postgresql://user:password@host:port/databasename")

## Visualizing Country Traceroute

In [None]:
def visualise_traceroute(countries, start=-1, end=-1, df=pd.DataFrame()):
    if len(df) == 0:
        countries_query = "country = '" + countries[0] + "'"
        for i in range(1, len(countries)):
            countries_query += " or country = '" + countries[i] + "'"
        
        timestamp_query = ""
        if start != -1 and end != -1:
            timestamp_query = "and timestamp >= " + str(start) + " and timestamp < " + str(end)
            
        df = pd.read_sql_query(
            "SELECT * FROM (traceroute_data td JOIN ripe_atlas_probe_data pbd ON td.prb_id = pbd.id) WHERE (" + countries_query + ") " + timestamp_query + " and destination_ip_responded = True",
            con=engine).drop(columns=['msm_id', 'prb_id', 'af', 'paris_id', 'destination_ip_responded', 'source_platform', 'id', 'ipv4', 'asn', 'longitude', 'latitude', 'country'])
    
        if len(df) == 0:
            print("No data available for countries " + str(countries) + " in the timeframe " + str(start) + " - " + str(end))
            return
    
    routes = [json.loads(df['result'][i]) for i in range(0, len(df))]
    ip_routes = [
        [ hop.get('result')[0].get('from') for hop in route ]
        for route in routes
    ]

    reader = geoip2.database.Reader('./data/GeoLite2-City.mmdb')
    detailed_routes = []
    for route in ip_routes:
        r = []
        for ip in route:
            try:
                res = reader.city(ip)
                r.append( (res.country.name, res.city.name, res.location.latitude, res.location.longitude) )
            except:
                pass
        detailed_routes.append(r)
    
    cities = []
    points_to_plot = []
    geo_dfs = []
    for detailed_route in detailed_routes:
        longitudes = []
        latitudes = []
        for (_, city, lat, long) in detailed_route:
            if not city in cities: 
                cities.append(city)
                points_to_plot.append( (long,lat) )
            longitudes.append(long)
            latitudes.append(lat)
    
        df = pd.DataFrame({
            "Latitude": latitudes,
            "Longitude": longitudes,
        })
        gdf = geopandas.GeoDataFrame(df, geometry=geopandas.points_from_xy(df.Longitude, df.Latitude), crs="EPSG:4326")
        geo_dfs.append(gdf)

    print(cities)
    # Grab low resolution world file
    world = geopandas.read_file("./data/ne_110m_admin_0_countries.shp")
    world = world.set_index("SOV_A3")
    world.head()
    
    fig, gax = plt.subplots(figsize=(18,10))
    
    # By only plotting rows in which the continent is 'South America' we only plot SA.
    world.plot(ax=gax, edgecolor='black',color='white')
    
    edges = []
    for gdf in geo_dfs:
        for i in range(0, len(gdf)-1):
            x1,y1 = gdf['Longitude'][i], gdf['Latitude'][i]
            x2,y2 = gdf['Longitude'][i+1], gdf['Latitude'][i+1]
            if not (x1,y1,x2,y2) in edges:
                edges.append( (x1,y1,x2,y2) )
    
    for x1,y1,x2,y2 in edges:
        if x1 != x2 or y1 != y2:
            plt.arrow(x1, y1, x2 - x1, y2 - y1, width=0.8, length_includes_head=True, head_length=5)
    
    gax.set_xlabel('longitude')
    gax.set_ylabel('latitude')
    
    gax.spines['top'].set_visible(False)
    gax.spines['right'].set_visible(False)

    for (x,y) in points_to_plot:
        plt.plot(x, y, 'go')
    
    filename = "data/traceroute-origin-" + str(start) + "-" + str(end) + "-"
    for c in countries:
        filename += c + "-"
    plt.savefig(filename + ".png")
    plt.savefig(filename + ".svg")
    plt.show()

# Expects date in format. 13.01.2022
def timestamp_from_date(date):
    return time.mktime(datetime.datetime.strptime(date, "%d.%m.%Y").timetuple())

start_2022 = timestamp_from_date("01.01.2022")
start_2023 = timestamp_from_date("01.01.2023")
start_2024 = timestamp_from_date("01.01.2024")
now = timestamp_from_date("01.07.2024")

visualise_traceroute(['PH'], start_2022, start_2023)
visualise_traceroute(['PH'], start_2023, start_2024)
visualise_traceroute(['PH'], start_2024, now)

visualise_traceroute(['ES'], start_2022, start_2023)
visualise_traceroute(['ES'], start_2023, start_2024)
visualise_traceroute(['ES'], start_2024, now)

visualise_traceroute(['KI'], start_2022, start_2023)
visualise_traceroute(['KI'], start_2023, start_2024)
visualise_traceroute(['KI'], start_2024, now)

data = []
with open("./data/kroot_traceroute_terrestrial_connection.json", "r") as f:
    d = json.loads(f.readlines()[0])
    for p in d:
        data.append(json.dumps(p['result']))
df = pd.DataFrame(data, columns=['result'])

visualise_traceroute([], df=df)