In [1]:
import subprocess
import datetime
import re
import pandas as pd
from scipy.stats import describe
import math
from bokeh.plotting import figure, output_file, output_notebook, show, save, reset_output
from bokeh.models import LinearAxis, Range1d

# Output to notebook & file
reset_output()
output_notebook()
output_file(f'{datetime.datetime.now().date()}_plot.html')

# Collect data

In [None]:
host = "www.google.com"
n_tests = 50
run_hours = 3
stop_time = datetime.datetime.now() + datetime.timedelta(hours=run_hours)

result_string_dict = {}

while datetime.datetime.now() < stop_time:
    now = datetime.datetime.now()
    test_time = now.time().isoformat('seconds')
    
    ping = subprocess.Popen(
        ["ping", "-c", f"{n_tests}", host],
        stdout = subprocess.PIPE,
        stderr = subprocess.PIPE
    )

    out, error = ping.communicate()
    result = out.decode('utf-8')
    result_string_dict[test_time] = result
    with open(f'results/{now.date()}_{test_time}_ping-test_x{n_tests}.txt', 'w') as result_file:
        result_file.write(result)
    
    
    
print("COMPLETE")

# Data munging

In [None]:
def extract_ping_data(time, raw_string):
    """
    Extract data from string into dictionary.
    """
    ping_time_regex = r'\d+ bytes from \d+\.\d+\.\d+\.\d+: icmp_seq=\d+ ttl=\d+ time=(\d+\.\d+) ms'

    r_ping_list = raw_string.split('\n')[1:-5]  # This is tailored to the current output of ping, should get all pings
    matched = list(map(lambda s: re.fullmatch(ping_time_regex, s), r_ping_list))
    ping_times = [float(match.group(1)) for match in matched if match is not None]
    return_dict = {
        'time': datetime.datetime.strptime(time, '%H:%M:%S').time(),
        'timeout_count': len([n for n in matched if n is None])
    }
    if len(ping_times) > 0:
        statistics = describe(ping_times)
        return_dict['min'] = statistics.minmax[0]
        return_dict['mean'] = statistics.mean
        return_dict['max'] = statistics.minmax[1]
        return_dict['sd'] = statistics.variance**(1/2)
    else:
        return_dict['min'] = 0
        return_dict['mean'] = 0
        return_dict['max'] = 0
        return_dict['sd'] = 0
    return return_dict

In [None]:
extracted = [
    extract_ping_data(k, v)
    for k, v in result_string_dict.items()
]
data = pd.DataFrame(extracted).set_index('time')
data.head()

# Visualise

In [None]:
p = figure(plot_width=900, plot_height=600, 
           x_axis_type='datetime',
           y_range=(0, data['max'].max()))

p.yaxis.axis_label = 'Ping Time (ms)'

# Setting the second y axis range name and range
p.extra_y_ranges = {"timeout": Range1d(start=0, end=data['timeout_count'].max())}


# Add timeout bars
p.vbar(x=data.index.tolist(), top=data['timeout_count'], 
       width=30000, bottom=0, color="khaki", alpha = 0.4, legend='timeouts',
       y_range_name="timeout")


# Add mean ping time line
p.line(data.index.tolist(), data['max'], legend='max ping',
       line_width = 1, color = 'red', alpha=0.5)

# Add mean ping time line
p.line(data.index.tolist(), data['mean'], legend='mean ping',
       line_width = 1, color = 'black')

# Add mean ping time line
p.line(data.index.tolist(), data['min'], legend='min ping',
       line_width = 1, color = 'green', alpha=0.5)

# Adding the second axis to the plot.  
p.add_layout(LinearAxis(y_range_name="timeout", axis_label="Time out count"), 'right')

p.title.text = f'Ping to {host} between {data.index.min()} and {data.index.max()}'

save(p)
# show the results
show(p)