In [1]:
from IPython.display import display, Math, Latex, HTML
HTML('''<script>
  function code_toggle() {
    if (code_shown){
      $('div.input').hide('500');
      $('#toggleButton').val('Show Code')
    } else {
      $('div.input').show('500');
      $('#toggleButton').val('Hide Code')
    }
    code_shown = !code_shown
  }
  
  $( document ).ready(function(){
    code_shown=false;
    $('div.input').hide()
  });
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>''') 

## Ping latency, Upload/Download speeds  by day of the week

In [2]:
#Load libraries:
from data_exploration import *

In [3]:
#Set up test time interval:
time_interval='2w' #5d
print("Time interval: ", time_interval)

Time interval:  2w


In [4]:
#Set up influxdb connection:
client, client_df = connect_to_influxdb()

### We will use 2 approaches:
#### By day of the week
We will find mean, max and median ping latency/upload/download speed for all devices grouped by day of the week.
#### By time group
We will find mean, max and median ping latency/upload/download speed for all devices grouped by :
 - **group1** Weekday: Mon, Tue, Wed, Thur, Fri
 - **group2** Weekend: Sut, Sun

## Ping Latency

### Data coming from collectd

In [5]:
device_numbers=get_tag_values_influxdb(client_influx=client,table_name='PING', tag_name='SK_PI')
device_numbers=list(map(int, device_numbers))
device_numbers= sorted(device_numbers)
#print(device_numbers)

In [6]:
ping_mean_query="SELECT MAX(PING),MEAN(PING), MEDIAN(PING) FROM PING WHERE PING!=0 AND time >= now()-"+\
                time_interval+" GROUP BY time(1h), SK_PI;"

In [7]:
ping_hourly_dataframe=get_3_stats_influxdb(client_influx=client,
                                           query_influx=ping_mean_query,
                                           stat_name1='max',
                                           stat_name2='mean',
                                           stat_name3='median',
                                           device_numbers=device_numbers)

In [8]:
#To check:
#ping_hourly_dataframe[ping_hourly_dataframe["SK_PI"]==3]
#q="SELECT MAX(PING), MEAN(PING), MEDIAN(PING) FROM PING WHERE PING!=0 AND time >= now()-4w AND SK_PI='3' GROUP BY time(1h);"
#ping_df = get_dataframe_from_influxdb(client_df=client_df,query_influx=q,table_name='PING')
#ping_df

In [9]:
ping_hourly_dataframe["weekday"]=ping_hourly_dataframe["time"].dt.weekday_name
ping_hourly_dataframe["weekday"] = pd.Categorical(ping_hourly_dataframe["weekday"], ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"])

In [10]:
device_number=7
subset=ping_hourly_dataframe[ping_hourly_dataframe["SK_PI"]==device_number]
simple_boxplot(dataframe=subset,plot_value='mean',sort_value='weekday',
               title="Ping latency(collectd) by day of the week for the device "+str(device_number)+" over the last "+time_interval, 
               ytitle="Miliseconds",weekdays=True)

In [11]:
by_weekday_by_device_p1=mean_max_median_by2(input_dataframe=ping_hourly_dataframe,value1="mean", value2="max",
                                          value3="median",group_by_value="weekday")

In [12]:
device_number=7
subset=by_weekday_by_device_p1[by_weekday_by_device_p1["SK_PI"]==device_number]
combined_bar_plot_3traces(xvalues=subset["weekday"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Ping latency(collectd) by day of the week for the device "+str(device_number)+" over the last "+time_interval,
                         xtitle="weekday",
                         stack=False)

In [13]:
weekday="Sunday"
subset=by_weekday_by_device_p1[by_weekday_by_device_p1["weekday"]==weekday]
combined_bar_plot_3traces(xvalues=subset["SK_PI"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Ping latency(collectd) by day of the week for the day "+str(weekday)+" over the last "+time_interval,
                         stack=False)

In [14]:
pd.options.mode.chained_assignment = None
ping_hourly_dataframe["day_group"]="Weekday"
ping_hourly_dataframe.loc[(ping_hourly_dataframe["weekday"]=="Sunday")|(ping_hourly_dataframe["weekday"]=="Saturday"),"day_group"]="Weekend"

In [15]:
device_number=7
subset=ping_hourly_dataframe[ping_hourly_dataframe["SK_PI"]==device_number]
simple_boxplot(dataframe=subset,plot_value='mean',sort_value='day_group',
               title="Ping latency(collectd) by day group for the device "+str(device_number)+" over the last "+time_interval,
               ytitle="Miliseconds")

In [16]:
by_group_by_device_p1 = mean_max_median_by2(input_dataframe=ping_hourly_dataframe,value1="mean", value2="max",
                                             value3="median",group_by_value="day_group")

In [17]:
device_number=7
subset=by_group_by_device_p1[by_group_by_device_p1["SK_PI"]==device_number]
combined_bar_plot_3traces(xvalues=subset["day_group"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Ping latency(collectd) by day group for the device "+str(device_number)+" over the last "+time_interval,
                         xtitle="day_group",
                         stack=False)

In [18]:
day_group="Weekend"
subset=by_group_by_device_p1[by_group_by_device_p1["day_group"]==day_group]
combined_bar_plot_3traces(xvalues=subset["SK_PI"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Ping latency(collectd) by device for "+day_group+"s over the last "+time_interval,
                         stack=False)

### Data coming from speedtest

In [19]:
query_ping = "SELECT * FROM SPEEDTEST_PING WHERE PROVIDER!='iperf' AND time >= now()-"+time_interval+" AND PING!=0;"
ping_speedtest_dataframe = get_dataframe_from_influxdb(client_df=client_df,query_influx=query_ping,table_name='SPEEDTEST_PING')

In [20]:
ping_speedtest_dataframe["weekday"]=ping_speedtest_dataframe["time"].dt.weekday_name
ping_speedtest_dataframe["weekday"] = pd.Categorical(ping_speedtest_dataframe["weekday"], ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"])

In [21]:
device_number=7
subset=ping_speedtest_dataframe[ping_speedtest_dataframe["SK_PI"]==device_number]
simple_boxplot(dataframe=subset,plot_value='PING',sort_value='weekday',
               title="Ping latency(speedtest) by day of the week for the device "+str(device_number)+" over the last "+time_interval,
               ytitle="Miliseconds",weekdays=True)

In [22]:
by_weekday_by_device_p2=mean_max_median_by2(input_dataframe=ping_speedtest_dataframe,value1="PING", value2="PING",
                                          value3="PING",group_by_value="weekday", rename_columns=True)

In [23]:
device_number=7
subset=by_weekday_by_device_p2[by_weekday_by_device_p2["SK_PI"]==device_number]
combined_bar_plot_3traces(xvalues=subset["weekday"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Ping latency(speedtest) by day of the week for the device "+str(device_number)+" over the last "+time_interval,
                         xtitle="day of the week",
                         stack=False)

In [24]:
weekday="Tuesday"
subset=by_weekday_by_device_p2[by_weekday_by_device_p2["weekday"]==weekday]
combined_bar_plot_3traces(xvalues=subset["SK_PI"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Ping latency(speedtest) by day of the week for the day "+str(weekday)+" over the last "+time_interval,
                         stack=False)

In [25]:

ping_speedtest_dataframe["day_group"]="Weekday"
ping_speedtest_dataframe.loc[(ping_speedtest_dataframe["weekday"]=="Sunday")|(ping_speedtest_dataframe["weekday"]=="Saturday"),"day_group"]="Weekend"

In [26]:
device_number=7
subset=ping_speedtest_dataframe[ping_speedtest_dataframe["SK_PI"]==device_number]
simple_boxplot(dataframe=subset,plot_value='PING',sort_value='day_group',
               title="Ping latency(speedtest) by day group for the device "+str(device_number)+" over the last "+time_interval,
               ytitle="Miliseconds")

In [27]:
by_group_by_device_p2 = mean_max_median_by2(input_dataframe=ping_speedtest_dataframe,value1="PING", value2="PING",
                                             value3="PING",group_by_value="day_group", rename_columns=True)

In [28]:
device_number=7
subset=by_group_by_device_p2[by_group_by_device_p2["SK_PI"]==device_number]
combined_bar_plot_3traces(xvalues=subset["day_group"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Ping latency(speedtest) by day group for the device "+str(device_number)+" over the last "+time_interval,
                         xtitle="day_group",
                         stack=False)

In [29]:
day_group="Weekday"
subset=by_group_by_device_p2[by_group_by_device_p2["day_group"]==day_group]
combined_bar_plot_3traces(xvalues=subset["SK_PI"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Ping latency(speedtest) by device for "+day_group+"s over the last "+time_interval,
                         stack=False)

## Upload speed

In [30]:
query_upload = "SELECT * FROM SPEEDTEST_UPLOAD WHERE PROVIDER!='iperf' AND time >= now()-"+time_interval+" AND UPLOAD!=0;"
upload_speedtest_dataframe = get_dataframe_from_influxdb(client_df=client_df,query_influx=query_upload,table_name='SPEEDTEST_UPLOAD')

In [31]:
upload_speedtest_dataframe["weekday"]=upload_speedtest_dataframe["time"].dt.weekday_name
upload_speedtest_dataframe["weekday"] = pd.Categorical(upload_speedtest_dataframe["weekday"], ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"])

In [32]:
device_number=7
subset=upload_speedtest_dataframe[upload_speedtest_dataframe["SK_PI"]==device_number]
simple_boxplot(dataframe=subset,plot_value='UPLOAD',sort_value='weekday',
               title="Upload speed by day of the week for the device "+str(device_number)+" over the last "+time_interval,
               ytitle="Mbps",uploadline=True, weekdays=True)

In [33]:
by_weekday_by_device_u=mean_max_median_by2(input_dataframe=upload_speedtest_dataframe,value1="UPLOAD", value2="UPLOAD",
                                          value3="UPLOAD",group_by_value="weekday", rename_columns=True)

In [34]:
device_number=7
subset=by_weekday_by_device_u[by_weekday_by_device_u["SK_PI"]==device_number]
uploadline=go.Scatter(x=subset["weekday"],y=[10] * len(subset["weekday"]), mode='markers',marker=dict(color='red'), name='10Mps')
combined_bar_plot_3traces(xvalues=subset["weekday"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Upload speed by day of the week for the device "+str(device_number)+" over the last "+time_interval,
                         xtitle="weekday",
                         ytitle="Mbps",
                         line=uploadline,
                         stack=False)

In [35]:
weekday="Friday"
subset=by_weekday_by_device_u[by_weekday_by_device_u["weekday"]==weekday]
uploadline=go.Scatter(x=subset["SK_PI"],y=[10] * len(subset["SK_PI"]), mode='markers',marker=dict(color='red'), name='10Mps')
combined_bar_plot_3traces(xvalues=subset["SK_PI"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Upload speed by day of the week for the day "+str(weekday)+" over the last "+time_interval,
                         ytitle="Mbps",
                         line=uploadline,
                         stack=False)

In [36]:
upload_speedtest_dataframe["day_group"]="Weekday"
upload_speedtest_dataframe.loc[(upload_speedtest_dataframe["weekday"]=="Sunday")|(upload_speedtest_dataframe["weekday"]=="Saturday"),"day_group"]="Weekend"

In [37]:
device_number=7
subset=upload_speedtest_dataframe[upload_speedtest_dataframe["SK_PI"]==device_number]
simple_boxplot(dataframe=subset,plot_value='UPLOAD',sort_value='day_group',
               title="Upload speed by day group for device "+str(device_number)+" over the last "+time_interval,
               ytitle="Mbps",uploadline=True)

In [38]:
by_group_by_device_u = mean_max_median_by2(input_dataframe=upload_speedtest_dataframe,value1="UPLOAD", value2="UPLOAD",
                                             value3="UPLOAD",group_by_value="day_group", rename_columns=True)

In [39]:
device_number=7
subset=by_group_by_device_u[by_group_by_device_u["SK_PI"]==device_number]
uploadline=go.Scatter(x=subset["day_group"],y=[10] * len(subset["day_group"]), mode='markers',marker=dict(color='red'), name='10Mps')
combined_bar_plot_3traces(xvalues=subset["day_group"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Upload speed by day group for the device "+str(device_number)+" over the last "+time_interval,
                         ytitle="Mbps",
                         xtitle="day_group",
                         line=uploadline,
                         stack=False)

In [40]:
day_group="Weekend"
subset=by_group_by_device_u[by_group_by_device_u["day_group"]==day_group]
uploadline=go.Scatter(x=subset["SK_PI"],y=[10] * len(subset["SK_PI"]), mode='markers',marker=dict(color='red'), name='10Mps')
combined_bar_plot_3traces(xvalues=subset["SK_PI"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Upload speed by device for "+day_group+"s over the last "+time_interval,
                         ytitle="Mbps",
                         line=uploadline,
                         stack=False)

## Download speed 

In [41]:
query_download = "SELECT * FROM SPEEDTEST_DOWNLOAD WHERE PROVIDER!='iperf' AND time >= now()-"+time_interval+" AND DOWNLOAD!=0;"
download_speedtest_dataframe = get_dataframe_from_influxdb(client_df=client_df,query_influx=query_download,table_name='SPEEDTEST_DOWNLOAD')

In [42]:
download_speedtest_dataframe["weekday"]=download_speedtest_dataframe["time"].dt.weekday_name
upload_speedtest_dataframe["weekday"] = pd.Categorical(upload_speedtest_dataframe["weekday"], ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"])

In [43]:
device_number=7
subset=download_speedtest_dataframe[download_speedtest_dataframe["SK_PI"]==device_number]
simple_boxplot(dataframe=subset,plot_value='DOWNLOAD',sort_value='weekday',
               title="Download speed by day of the week for the device "+str(device_number)+" over the last "+time_interval,
               ytitle="Mbps",downloadline=True, weekdays=True)

In [44]:
by_weekday_by_device_d=mean_max_median_by2(input_dataframe=download_speedtest_dataframe,value1="DOWNLOAD", value2="DOWNLOAD",
                                          value3="DOWNLOAD",group_by_value="weekday", rename_columns=True)

In [45]:
device_number=7
subset=by_weekday_by_device_d[by_weekday_by_device_d["SK_PI"]==device_number]
downloadline=go.Scatter(x=subset["weekday"],y=[50] * len(subset["weekday"]), mode='markers',marker=dict(color='red'), name='50Mps')
combined_bar_plot_3traces(xvalues=subset["weekday"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Download speed by day of the week for the device "+str(device_number)+" over the last "+time_interval,
                         xtitle="hour",
                         ytitle="Mbps",
                         line=downloadline,
                         stack=False)

In [46]:
weekday="Sunday"
subset=by_weekday_by_device_d[by_weekday_by_device_d["weekday"]==weekday]
downloadline=go.Scatter(x=subset["SK_PI"],y=[50] * len(subset["SK_PI"]), mode='markers',marker=dict(color='red'), name='50Mps')
combined_bar_plot_3traces(xvalues=subset["SK_PI"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Download speed by day of the week for the day "+weekday+" over the last "+time_interval,
                         ytitle="Mbps",
                         line=downloadline,
                         stack=False)

In [47]:
download_speedtest_dataframe["day_group"]="Weekday"
download_speedtest_dataframe.loc[(download_speedtest_dataframe["weekday"]=="Sunday")|(download_speedtest_dataframe["weekday"]=="Saturday"),"day_group"]="Weekend"

In [48]:
device_number=7
subset=download_speedtest_dataframe[download_speedtest_dataframe["SK_PI"]==device_number]
simple_boxplot(dataframe=subset,plot_value='DOWNLOAD',sort_value='day_group',
               title="Download speed by day group for device "+str(device_number)+" over the last "+time_interval,
               ytitle="Mbps",downloadline=True)

In [49]:
by_group_by_device_d = mean_max_median_by2(input_dataframe=download_speedtest_dataframe,value1="DOWNLOAD", value2="DOWNLOAD",
                                             value3="DOWNLOAD",group_by_value="day_group", rename_columns=True)

In [50]:
device_number=7
subset=by_group_by_device_d[by_group_by_device_d["SK_PI"]==device_number]
downloadline=go.Scatter(x=subset["day_group"],y=[50] * len(subset["day_group"]), mode='markers',marker=dict(color='red'), name='50Mps')
combined_bar_plot_3traces(xvalues=subset["day_group"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Download speed by day group for device "+str(device_number)+" over the last "+time_interval,
                         ytitle="Mbps",
                         xtitle="day_group",
                         line=downloadline,
                         stack=False)

In [51]:
day_group="Weekend"
subset=by_group_by_device_d[by_group_by_device_d["day_group"]==day_group]
downloadline=go.Scatter(x=subset["SK_PI"],y=[50] * len(subset["SK_PI"]), mode='markers',marker=dict(color='red'), name='50Mps')
combined_bar_plot_3traces(xvalues=subset["SK_PI"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Download speed by device for "+day_group+"s over the last "+time_interval,
                         ytitle="Mbps",
                         line=downloadline,
                         stack=False)