In [47]:
import parse_output
import pandas as pd
import time
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import csv
import pyasn

In [14]:
asndb = pyasn.pyasn('data/ip-to-asn.dat')
def was_traceroute_successful(traceroute):
    traceroute_ips = traceroute.filter(regex=".*ip.*")
    """Returns true if we found the resolved IP in the traceroute (success)."""
    ips = list(traceroute_ips)
    resolved_ips = [ip for ip in ips if ip is not None]
    dest_ip = list(resolved_ips)[0]
    if dest_ip in resolved_ips[1:]:
        return True
    dest_asn = asndb.lookup(dest_ip)
    if dest_asn in [asndb.lookup(ip) for ip in resolved_ips[1:]]:
        return True
    return False

In [15]:
def get_final_rtt(traceroute_rtts):
    traceroute_rtts = traceroute_rtts.filter(regex='.*rtt.*')
    final_rtt = None
    for rtt in traceroute_rtts:
        try:
            final_rtt = float(rtt)
        except:
            pass
    assert final_rtt != None
    return final_rtt 

In [16]:
def cleanup_trees_for_rtt_analysis(trees):
    # Only consider successful rtts
    print(f'{len(trees)} total traceroutes.')
    successful_rtt_trees = trees[trees.apply(lambda x: was_traceroute_successful(x), axis=1)]

    # We only care about the rtts
    print(f'{len(successful_rtt_trees)} fully successful traceroutes.')
    rtt_trees = successful_rtt_trees.filter(regex=".*rtt.*|dest name|time")
    # We only want the final rtt we observed
    rtt_trees["final_rtt"] = rtt_trees.apply(lambda row: get_final_rtt(row), axis=1)

    rtt_trees['time'] = pd.to_datetime(rtt_trees['time'].apply(lambda x: time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(x))))

    # Simplified into 3 columns
    simplified_rtts = rtt_trees[["dest name", "final_rtt", "time"]]

    return simplified_rtts

In [17]:
aws_ohio_data_filepath = "data/aws-ohio-results-1637264695.csv"
aws_ohio_trees = parse_output.parse_output(aws_ohio_data_filepath)
aws_ohio_simplified_rtts = cleanup_trees_for_rtt_analysis(aws_ohio_trees)

home_victor_data_filepath = "data/home-cambridge-victor-results-1637268720.csv"
home_victor_trees = parse_output.parse_output(home_victor_data_filepath)
home_victor_simplified_rtts = cleanup_trees_for_rtt_analysis(home_victor_trees)

36860 total traceroutes.
9013 fully successful traceroutes.




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



29421 total traceroutes.
11338 fully successful traceroutes.


In [None]:
 # inner join the dest names to filter on
ohio_dests = pd.DataFrame(aws_ohio_simplified_rtts["dest name"].unique())
home_dests = pd.DataFrame(home_victor_simplified_rtts["dest name"].unique())
merged_dests = ohio_dests.merge(home_dests, how="inner")
aws_ohio_comparable = aws_ohio_simplified_rtts[aws_ohio_simplified_rtts.apply(lambda row: row['dest name'] in list(merged_dests[0]), axis=1)]
home_comparable = home_victor_simplified_rtts[home_victor_simplified_rtts.apply(lambda row: row['dest name'] in list(merged_dests[0]), axis=1)]
home_comparable.to_csv("output/home-comparable.csv")
aws_ohio_comparable.to_csv("output/aws-ohio-comparable.csv")

In [5]:
home_victor_data_filepath = "data/home-cambridge-victor-results-1637268720.csv"
home_victor_trees = parse_output.parse_output(home_victor_data_filepath)

In [10]:
weekend_start = datetime(year=2021, month=11, day=19, hour=12).timestamp()
weekend_end = datetime(year=2021, month=11, day=22, hour=0).timestamp()
weekday_end = datetime(year=2021, month=11, day=25, hour=0).timestamp()

home_victor_weekend_trees = home_victor_trees[(home_victor_trees.time > weekend_start) & (home_victor_trees.time < weekend_end)]
home_victor_weekday_trees = home_victor_trees[(home_victor_trees.time > weekend_end) & (home_victor_trees.time < weekday_end)]

weekend_trees = cleanup_trees_for_rtt_analysis(home_victor_weekend_trees)
weekend_trees.to_csv("output/home-victor-weekend.csv")
weekday_trees = cleanup_trees_for_rtt_analysis(home_victor_weekday_trees)
weekday_trees.to_csv("output/home-victor-weekday.csv")

6156 total traceroutes.
2384 fully successful traceroutes.
5675 total traceroutes.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rtt_trees["final_rtt"] = rtt_trees.apply(lambda row: get_final_rtt(row), axis=1)


2173 fully successful traceroutes.


In [None]:
### Plot time series
for name, df in home_victor_simplified_rtts.groupby('dest name'):
    df['smoothed_rtt'] = df['final_rtt'].rolling(10).mean()
    #fig = px.line(df, x='time', y='smoothed_rtt', title=name)
    ymin, ymax = df['smoothed_rtt'].min(), df['smoothed_rtt'].max()
    df['weekend'] = np.where(df['time'].dt.day_name().isin(['Saturday', 'Sunday']), ymax, ymin)
    data = []
    data.append(
        go.Scatter(x=df['time'], y=df['weekend'], fill='tonext', fillcolor='#d9d9d9', mode='lines', line=dict(width=0, shape='hvh'), showlegend=False, hoverinfo=None,)
    )
    data.append(
        go.Scatter(
            x=df['time'],
            y=df['smoothed_rtt'],
            mode='lines+markers',
        )
    )
    fig = go.Figure(data=data)
    fig.update_layout(title=name)
    fig.show()