In [1]:
import subprocess
import datetime
import re
import pandas as pd
from scipy.stats import describe
import math
from bokeh.plotting import figure, output_file, output_notebook, show, save, reset_output
from bokeh.models import LinearAxis, Range1d

# Output to notebook & file
reset_output()
output_notebook()
output_file(f'{datetime.datetime.now().date()}_plot.html')

# Collect data

In [2]:
host = "www.google.com"
n_tests = 50
run_hours = 3
stop_time = datetime.datetime.now() + datetime.timedelta(hours=run_hours)

print(f'Test will stop at {stop_time}')

result_string_dict = {}

# Keep testing until stop time has been passed. Will ususally run slightly over, especially if n_tests is large.
while datetime.datetime.now() < stop_time:
    now = datetime.datetime.now()
    test_time = now.time().isoformat('seconds')
    
    # Use OSX ping to perform tests in "batches" of n_tests
    ping = subprocess.Popen(
        ['ping', '-c', f'{n_tests}', host],
        stdout = subprocess.PIPE,
        stderr = subprocess.PIPE
    )
    out, error = ping.communicate()
    result = out.decode('utf-8')
    
    # Store with tim as a key for later use
    result_string_dict[test_time] = result
    
    # Save each result to a file. These files aren't used in the rest of the script, but 
    # storing the raw data is a good idea. We can always write a quick script to collect 
    # the data again if this crashes / gets interrupted or whatever.
    with open(f'results/{now.date()}_{test_time}_ping-test_x{n_tests}.txt', 'w') as result_file:
        result_file.write(result)
    
    
    
print("COMPLETE")

COMPLETE


# Data munging

In [3]:
def extract_ping_data(time, raw_string):
    """
    Extract data from a dictionary item into a dictionary of key statistics.
    
    This function is based on the particular structure of the osx command ping, so
    if you're running this on a different version or os then you may have to update this.
    """
    # List of actual ping results
    r_ping_list = raw_string.split('\n')[1:-5]
    
    # Extract ping times
    ping_time_regex = r'\d+ bytes from \d+\.\d+\.\d+\.\d+: icmp_seq=\d+ ttl=\d+ time=(\d+\.\d+) ms'
    matched = list(map(lambda s: re.fullmatch(ping_time_regex, s), r_ping_list))
    ping_times = [float(match.group(1)) for match in matched if match is not None]
    
    # Populate fields of interest, dealing with the special case of ALL timeouts.
    # (yes my connection is that bad...)
    return_dict = {
        'time': datetime.datetime.strptime(time, '%H:%M:%S').time(),
        'timeout_count': len([n for n in matched if n is None])
    }
    if len(ping_times) > 0:
        statistics = describe(ping_times)
        return_dict['min'] = statistics.minmax[0]
        return_dict['mean'] = statistics.mean
        return_dict['max'] = statistics.minmax[1]
        return_dict['sd'] = statistics.variance**(1/2)
    else:
        return_dict['min'] = 0
        return_dict['mean'] = 0
        return_dict['max'] = 0
        return_dict['sd'] = 0
    
    return return_dict

In [4]:
# Apply function and place data into a pandas data frame
extracted = [
    extract_ping_data(k, v)
    for k, v in result_string_dict.items()
]
data = pd.DataFrame(extracted).set_index('time')
data.head()

Unnamed: 0_level_0,max,mean,min,sd,timeout_count
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
09:15:33,356.276,48.73668,35.166,49.514176,0
09:16:24,4035.124,461.70064,34.711,1026.963041,3
09:17:16,2357.391,190.49754,34.463,509.212515,2
09:18:05,601.346,55.55106,35.496,80.968208,0
09:18:54,60.45,37.72206,34.408,3.774526,0


# Visualise

In [5]:
p = figure(plot_width=900, plot_height=600, 
           x_axis_type='datetime',
           y_range=(0, data['max'].max()))

# Set up y axes
p.yaxis.axis_label = 'Ping Time (ms)'
p.extra_y_ranges = {'timeout': Range1d(start=0, end=data['timeout_count'].max())}

# Add timeout bars on second y axis. 
p.vbar(x=data.index.tolist(), top=data['timeout_count'], 
       width=30000, bottom=0, color='khaki', alpha = 0.4, legend='timeouts',
       y_range_name='timeout')


# Add max ping time line
p.line(data.index.tolist(), data['max'], legend='max ping',
       line_width = 1, color = 'red', alpha=0.5)

# Add mean ping time line
p.line(data.index.tolist(), data['mean'], legend='mean ping',
       line_width = 1, color = 'black')

# Add min ping time line
p.line(data.index.tolist(), data['min'], legend='min ping',
       line_width = 1, color = 'green', alpha=0.5)

# Adding the second axis to the plot.  
p.add_layout(LinearAxis(y_range_name='timeout', axis_label='Time out count'), 'right')

p.title.text = f'Ping to {host} between {data.index.min()} and {data.index.max()}'

save(p)
show(p)