In [1]:
from IPython.display import display, Math, Latex, HTML
HTML('''<script>
  function code_toggle() {
    if (code_shown){
      $('div.input').hide('500');
      $('#toggleButton').val('Show Code')
    } else {
      $('div.input').show('500');
      $('#toggleButton').val('Hide Code')
    }
    code_shown = !code_shown
  }
  
  $( document ).ready(function(){
    code_shown=false;
    $('div.input').hide()
  });
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>''') 

## Ping latency, Upload/Download speeds  by hour of the day

In [2]:
#Load libraries:
from data_exploration import *

In [3]:
#Set up test time interval:
time_interval='2w' #4w
print("Time interval: ", time_interval)

Time interval:  2w


In [4]:
#Set up starting point, by default if will start from current time
starting_point=datetime.now().strftime('%Y-%m-%d %H:%M:%S')
#starting point="2019-01-10 14:00:00"  # to set upl alternative starting point
print("Starting point:",starting_point )

Starting point: 2019-01-21 20:39:37


In [5]:
#Set up influxdb connection:
client, client_df = connect_to_influxdb()

### We will use 2 approaches:
#### By hour
We will find mean, max and median ping latency/upload/download speed for all devices grouped by hour.
#### By time group
We will find mean, max and median ping latency/upload/download speed for all devices grouped by time of the day:
 - **group1** night: 23:00 - 07:00
 - **group2** day: 07:00 - 17:00
 - **group3** evening: 17:00 - 23:00

## Ping Latency

### Data coming from collectd

In [6]:
device_numbers=get_tag_values_influxdb(client_influx=client,table_name='PING', tag_name='SK_PI')
device_numbers=list(map(int, device_numbers))
device_numbers= sorted(device_numbers)
#print(device_numbers)

In [7]:
ping_mean_query="SELECT MAX(PING),MEAN(PING), MEDIAN(PING) FROM PING WHERE PING!=0 AND time >= '"+starting_point+"'-"+\
                time_interval+" GROUP BY time(1h), SK_PI;"

In [8]:
ping_hourly_dataframe=get_3_stats_influxdb(client_influx=client,
                                           query_influx=ping_mean_query,
                                           stat_name1='max',
                                           stat_name2='mean',
                                           stat_name3='median',
                                           device_numbers=device_numbers)

In [9]:
#To check:
#ping_hourly_dataframe[ping_hourly_dataframe["SK_PI"]==3]
#q="SELECT MAX(PING), MEAN(PING), MEDIAN(PING) FROM PING WHERE PING!=0 AND time >= '"+starting_point+"'-4w AND SK_PI='3' GROUP BY time(1h);"
#ping_df = get_dataframe_from_influxdb(client_df=client_df,query_influx=q,table_name='PING')
#ping_df

In [10]:
ping_hourly_dataframe["hour"]=pd.to_numeric(ping_hourly_dataframe["time"].dt.hour)

In [11]:
device_number=7
subset=ping_hourly_dataframe[ping_hourly_dataframe["SK_PI"]==device_number]
simple_boxplot(dataframe=subset,plot_value='mean',sort_value='hour',
               title="Ping latency(collectd) by hour for the device "+str(device_number)+" over the "+time_interval+" starting from "+starting_point, 
               ytitle="Miliseconds")

In [12]:
by_hour_by_device_p1=mean_max_median_by2(input_dataframe=ping_hourly_dataframe,value1="mean", value2="max",
                                          value3="median",group_by_value="hour")

In [13]:
##attempt to make plotly interactive, which is not working
##https://plot.ly/python/dropdowns/
devices_list=by_hour_by_device_p1["SK_PI"].unique()
device_numbers_buttons=[]
for i in devices_list:
    subset=by_hour_by_device_p1[by_hour_by_device_p1["SK_PI"]==i]
    device_numbers_buttons.append(
        dict(
        args=[ { 
            'x':subset["hour"],
            'y':subset["max"]},
           { 
            'x':subset["hour"],
            'y':subset["mean"]},
            { 
            'x':subset["hour"],
            'y':subset["median"]}
         ],
        label=i,
        method='restyle'
    )
    )
updatemenus1=list([
    dict(
        buttons = device_numbers_buttons,
        pad = {'r': 0, 't': 10},
        x = 0.1,
        xanchor = 'left',
        y = 1.0,
        yanchor = 'top',
        active = 99
    )
    ])
annotations1 = list([
    dict(text='Device<br>Number', x=0.01, y=0.99, yref='paper', align='left', showarrow=False,font=dict(size=14))
])

In [14]:
device_number=3
subset=by_hour_by_device_p1[by_hour_by_device_p1["SK_PI"]==device_number]
combined_bar_plot_3traces(xvalues=subset["hour"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Ping latency(collectd) by hour for the device "+str(device_number)+" over the "+time_interval+" starting from "+starting_point,
                         xtitle="hour",
                         stack=False)#,updatemenus=updatemenus1, annotations=annotations1)

In [15]:
hour=7
subset=by_hour_by_device_p1[by_hour_by_device_p1["hour"]==hour]
combined_bar_plot_3traces(xvalues=subset["SK_PI"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Ping latency(collectd) by hour for the hour "+str(hour)+" over the "+time_interval+" starting from "+starting_point,
                         stack=False)

In [16]:
pd.options.mode.chained_assignment = None
ping_hourly_dataframe["time_group"]=""
ping_hourly_dataframe.loc[(ping_hourly_dataframe["hour"]>23)|(ping_hourly_dataframe["hour"]<=7),"time_group"]="night"
ping_hourly_dataframe.loc[(ping_hourly_dataframe["hour"]>7)&(ping_hourly_dataframe["hour"]<=17),"time_group"]="day"
ping_hourly_dataframe.loc[(ping_hourly_dataframe["hour"]>17)&(ping_hourly_dataframe["hour"]<=23),"time_group"]="evening"

In [17]:
device_number=7
subset=ping_hourly_dataframe[ping_hourly_dataframe["SK_PI"]==device_number]
simple_boxplot(dataframe=subset,plot_value='mean',sort_value='time_group',
               title="Ping latency(collectd) by time group for the device "+str(device_number)+" over the "+time_interval+" starting from "+starting_point,
               ytitle="Miliseconds", jitter=True)

In [18]:
by_group_by_device_p1 = mean_max_median_by2(input_dataframe=ping_hourly_dataframe,value1="mean", value2="max",
                                             value3="median",group_by_value="time_group")

In [19]:
device_number=7
subset=by_group_by_device_p1[by_group_by_device_p1["SK_PI"]==device_number]
combined_bar_plot_3traces(xvalues=subset["time_group"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Ping latency(collectd) by time group for the device "+str(device_number)+" over the "+time_interval+" starting from "+starting_point,
                         xtitle="time_group",
                         stack=False)

In [20]:
time_group="day"
subset=by_group_by_device_p1[by_group_by_device_p1["time_group"]==time_group]
combined_bar_plot_3traces(xvalues=subset["SK_PI"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Ping latency(collectd) by device for "+time_group+" time over the "+time_interval+" starting from "+starting_point,
                         stack=False)

### Data coming from speedtest

In [21]:
query_ping = "SELECT * FROM SPEEDTEST_PING WHERE PROVIDER!='iperf' AND time >='"+starting_point+"'-"+time_interval+" AND PING!=0;"
ping_speedtest_dataframe = get_dataframe_from_influxdb(client_df=client_df,query_influx=query_ping,table_name='SPEEDTEST_PING')

In [22]:
ping_speedtest_dataframe["hour"]=pd.to_numeric(ping_speedtest_dataframe["time"].dt.hour)

In [23]:
device_number=7
subset=ping_speedtest_dataframe[ping_speedtest_dataframe["SK_PI"]==device_number]
simple_boxplot(dataframe=subset,plot_value='PING',sort_value='hour',
               title="Ping latency(speedtest) by hour for the device "+str(device_number)+" over the "+time_interval+" starting from "+starting_point,
               ytitle="Miliseconds")

In [24]:
by_hour_by_device_p2=mean_max_median_by2(input_dataframe=ping_speedtest_dataframe,value1="PING", value2="PING",
                                          value3="PING",group_by_value="hour", rename_columns=True)

In [25]:
device_number=7
subset=by_hour_by_device_p2[by_hour_by_device_p2["SK_PI"]==device_number]
combined_bar_plot_3traces(xvalues=subset["hour"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Ping latency(speedtest) by hour for the device "+str(device_number)+" over the "+time_interval+" starting from "+starting_point,
                         xtitle="hour",
                         stack=False)

In [26]:
hour=7
subset=by_hour_by_device_p2[by_hour_by_device_p2["hour"]==hour]
combined_bar_plot_3traces(xvalues=subset["SK_PI"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Ping latency(speedtest) by hour for the hour "+str(hour)+" over the "+time_interval+" starting from "+starting_point,
                         stack=False)

In [27]:
ping_speedtest_dataframe["time_group"]=""
ping_speedtest_dataframe.loc[(ping_speedtest_dataframe["hour"]>23)|(ping_speedtest_dataframe["hour"]<=7),"time_group"]="night"
ping_speedtest_dataframe.loc[(ping_speedtest_dataframe["hour"]>7)&(ping_speedtest_dataframe["hour"]<=17),"time_group"]="day"
ping_speedtest_dataframe.loc[(ping_speedtest_dataframe["hour"]>17)&(ping_speedtest_dataframe["hour"]<=23),"time_group"]="evening"

In [28]:
device_number=7
subset=ping_speedtest_dataframe[ping_speedtest_dataframe["SK_PI"]==device_number]
simple_boxplot(dataframe=subset,plot_value='PING',sort_value='time_group',
               title="Ping latency(speedtest) by time group for the device "+str(device_number)+" over the "+time_interval+" starting from "+starting_point,
               ytitle="Miliseconds", jitter=True)

In [29]:
by_group_by_device_p2 = mean_max_median_by2(input_dataframe=ping_speedtest_dataframe,value1="PING", value2="PING",
                                             value3="PING",group_by_value="time_group", rename_columns=True)

In [30]:
device_number=7
subset=by_group_by_device_p2[by_group_by_device_p2["SK_PI"]==device_number]
combined_bar_plot_3traces(xvalues=subset["time_group"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Ping latency(speedtest) by time group for the device "+str(device_number)+" over the "+time_interval+" starting from "+starting_point,
                         xtitle="time_group",
                         stack=False)

In [31]:
time_group="night"
subset=by_group_by_device_p2[by_group_by_device_p2["time_group"]==time_group]
combined_bar_plot_3traces(xvalues=subset["SK_PI"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Ping latency(speedtest) by device for "+time_group+" time over the "+time_interval+" starting from "+starting_point,
                         stack=False)

## Ping droprate

In [32]:
pingdroprate_query="SELECT COUNT(PING_DROPRATE) FROM PING WHERE PING_DROPRATE>0 AND time >= '"+starting_point+"'-"+\
                time_interval+" GROUP BY time(1h), SK_PI;"

In [33]:
pingdroprate_dataframe=get_1_stats_influxdb(client_influx=client,
                               query_influx=pingdroprate_query,
                               stat_name='count',
                               device_numbers=device_numbers)

In [34]:
pingdroprate_query_total="SELECT COUNT(PING_DROPRATE) FROM PING WHERE time >= '"+starting_point+"'-"+\
                time_interval+" GROUP BY time(1h), SK_PI;"

In [35]:
pingdroprate_dataframe_total=get_1_stats_influxdb(client_influx=client,
                               query_influx=pingdroprate_query_total,
                               stat_name='count',
                               device_numbers=device_numbers)
pingdroprate_dataframe_total.rename(columns={'count':'count_total'}, inplace=True)

In [36]:
pingdroprate_dataframe = pd.merge(pingdroprate_dataframe, pingdroprate_dataframe_total,  how='outer', left_on=['SK_PI','time'], right_on = ['SK_PI','time'])

In [37]:
pingdroprate_dataframe["hour"]=pd.to_numeric(pingdroprate_dataframe["time"].dt.hour)
pingdroprate_dataframe["count_percent"]=pingdroprate_dataframe["count"]/pingdroprate_dataframe["count_total"]*100

In [38]:
device_number=7
subset=pingdroprate_dataframe[pingdroprate_dataframe["SK_PI"]==device_number]
simple_boxplot(dataframe=subset,plot_value='count_percent',sort_value='hour',
               title="Ping droprate by hour for the device "+str(device_number)+" over the "+time_interval+" starting from "+starting_point,
               ytitle="Percent of packets dropped")

In [39]:
by_hour_by_device_d=mean_max_median_by2(input_dataframe=pingdroprate_dataframe,value1="count", value2="count",
                                          value3="count_percent",group_by_value="hour", rename_columns=True)

In [40]:
device_number=7
subset=by_hour_by_device_d[by_hour_by_device_d["SK_PI"]==device_number]
combined_bar_plot_3traces(xvalues=subset["hour"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Ping droprate by hour for the device "+str(device_number)+" over the "+time_interval+" starting from "+starting_point,
                         xtitle="hour",
                         ytitle="Percent of packets dropped",
                         stack=False)

In [41]:
hour=7
subset=by_hour_by_device_d[by_hour_by_device_d["hour"]==hour]
combined_bar_plot_3traces(xvalues=subset["SK_PI"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Ping droprate by hour for the hour "+str(hour)+" over the "+time_interval+" starting from "+starting_point,
                         ytitle="Percent of packets dropped",
                         stack=False)

In [42]:
pingdroprate_dataframe["time_group"]=""
pingdroprate_dataframe.loc[(pingdroprate_dataframe["hour"]>23)|(pingdroprate_dataframe["hour"]<=7),"time_group"]="night"
pingdroprate_dataframe.loc[(pingdroprate_dataframe["hour"]>7)&(pingdroprate_dataframe["hour"]<=17),"time_group"]="day"
pingdroprate_dataframe.loc[(pingdroprate_dataframe["hour"]>17)&(pingdroprate_dataframe["hour"]<=23),"time_group"]="evening"

In [43]:
device_number=7
subset=pingdroprate_dataframe[pingdroprate_dataframe["SK_PI"]==device_number]
simple_boxplot(dataframe=subset,plot_value='count_percent',sort_value='time_group',
               title="Ping droprate by time group for the device "+str(device_number)+" over the "+time_interval+" starting from "+starting_point,
               ytitle="Percent of packets dropped", jitter=True)

In [44]:
by_group_by_device_d = mean_max_median_by2(input_dataframe=pingdroprate_dataframe,value1="count_percent", value2="count_percent",
                                             value3="count_percent",group_by_value="time_group", rename_columns=True)

In [45]:
device_number=7
subset=by_group_by_device_d[by_group_by_device_d["SK_PI"]==device_number]
combined_bar_plot_3traces(xvalues=subset["time_group"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Ping droprate by time group for the device "+str(device_number)+" over the "+time_interval+" starting from "+starting_point,
                         xtitle="time_group",
                         ytitle="Percentage of packets dropped",
                         stack=False)

In [46]:
time_group="night"
subset=by_group_by_device_d[by_group_by_device_d["time_group"]==time_group]
combined_bar_plot_3traces(xvalues=subset["SK_PI"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         title="Ping droprate by device for "+time_group+" time over the "+time_interval+" starting from "+starting_point,
                         ytitle="Percentage of packets dropped",
                         stack=False)

## Upload speed

In [47]:
query_upload = "SELECT * FROM SPEEDTEST_UPLOAD WHERE PROVIDER!='iperf' AND time >= '"+starting_point+"'-"+time_interval+" AND UPLOAD!=0;"
upload_speedtest_dataframe = get_dataframe_from_influxdb(client_df=client_df,query_influx=query_upload,table_name='SPEEDTEST_UPLOAD')

In [48]:
upload_speedtest_dataframe["hour"]=pd.to_numeric(upload_speedtest_dataframe["time"].dt.hour)

In [49]:
device_number=7
subset=upload_speedtest_dataframe[upload_speedtest_dataframe["SK_PI"]==device_number]
simple_boxplot(dataframe=subset,plot_value='UPLOAD',sort_value='hour',
               title="Upload speed by hour for the device "+str(device_number)+" over the "+time_interval+" starting from "+starting_point,
               ytitle="Mbps",uploadline=True)

In [50]:
#by_hour_by_device_u=mean_max_median_by2(input_dataframe=upload_speedtest_dataframe,value1="UPLOAD", value2="UPLOAD",
#                                          value3="UPLOAD",group_by_value="hour", rename_columns=True)
by_hour_by_device_u = mean_max_median_min_by2(input_dataframe=upload_speedtest_dataframe,value1="UPLOAD", value2="UPLOAD",
                                              value3="UPLOAD",value4="UPLOAD",group_by_value="hour", rename_columns=True)

In [51]:
device_number=7
subset=by_hour_by_device_u[by_hour_by_device_u["SK_PI"]==device_number]
uploadline=go.Scatter(x=subset["hour"],y=[10] * len(subset["hour"]), mode='markers',marker=dict(color='red'), name='10Mps')
combined_bar_plot_4traces(xvalues=subset["hour"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         yvalues4=subset["min"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         name4="Min",
                         title="Upload speed by hour for the device "+str(device_number)+" over the "+time_interval+" starting from "+starting_point,
                         xtitle="hour",
                         ytitle="Mbps",
                         line=uploadline,
                         stack=False)

In [52]:
hour=7
subset=by_hour_by_device_u[by_hour_by_device_u["hour"]==hour]
uploadline=go.Scatter(x=subset["SK_PI"],y=[10] * len(subset["SK_PI"]), mode='markers',marker=dict(color='red'), name='10Mps')
combined_bar_plot_4traces(xvalues=subset["SK_PI"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         yvalues4=subset["min"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         name4="Min",
                         title="Upload speed by hour for the hour "+str(hour)+" over the "+time_interval+" starting from "+starting_point,
                         ytitle="Mbps",
                         line=uploadline,
                         stack=False)

In [53]:
upload_speedtest_dataframe["time_group"]=""
upload_speedtest_dataframe.loc[(upload_speedtest_dataframe["hour"]>23)|(upload_speedtest_dataframe["hour"]<=7),"time_group"]="night"
upload_speedtest_dataframe.loc[(upload_speedtest_dataframe["hour"]>7)&(upload_speedtest_dataframe["hour"]<=17),"time_group"]="day"
upload_speedtest_dataframe.loc[(upload_speedtest_dataframe["hour"]>17)&(upload_speedtest_dataframe["hour"]<=23),"time_group"]="evening"

In [54]:
device_number=7
subset=upload_speedtest_dataframe[upload_speedtest_dataframe["SK_PI"]==device_number]
simple_boxplot(dataframe=subset,plot_value='UPLOAD',sort_value='time_group',
               title="Upload speed by timegroup for device "+str(device_number)+" over the "+time_interval+" starting from "+starting_point,
               ytitle="Mbps",uploadline=True, jitter=True)

In [55]:
#by_group_by_device_u = mean_max_median_by2(input_dataframe=upload_speedtest_dataframe,value1="UPLOAD", value2="UPLOAD",
#                                             value3="UPLOAD",group_by_value="time_group", rename_columns=True)
by_group_by_device_u = mean_max_median_min_by2(input_dataframe=upload_speedtest_dataframe,value1="UPLOAD", value2="UPLOAD",
                                              value3="UPLOAD",value4="UPLOAD",group_by_value="time_group", rename_columns=True)

In [56]:
device_number=7
subset=by_group_by_device_u[by_group_by_device_u["SK_PI"]==device_number]
uploadline=go.Scatter(x=subset["time_group"],y=[10] * len(subset["time_group"]), mode='markers',marker=dict(color='red'), name='10Mps')
combined_bar_plot_4traces(xvalues=subset["time_group"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         yvalues4=subset["min"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         name4="Min",
                         title="Upload speed by time group for the device "+str(device_number)+" over the "+time_interval+" starting from "+starting_point,
                         ytitle="Mbps",
                         xtitle="time_group",
                         line=uploadline,
                         stack=False)

In [57]:
time_group="night"
subset=by_group_by_device_u[by_group_by_device_u["time_group"]==time_group]
uploadline=go.Scatter(x=subset["SK_PI"],y=[10] * len(subset["SK_PI"]), mode='markers',marker=dict(color='red'), name='10Mps')
combined_bar_plot_4traces(xvalues=subset["SK_PI"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         yvalues4=subset["min"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         name4="Min",
                         title="Upload speed by device for "+time_group+" time over the "+time_interval+" starting from "+starting_point,
                         ytitle="Mbps",
                         line=uploadline,
                         stack=False)

## Download speed 

In [58]:
query_download = "SELECT * FROM SPEEDTEST_DOWNLOAD WHERE PROVIDER!='iperf' AND time >= '"+starting_point+"'-"+time_interval+" AND DOWNLOAD!=0;"
download_speedtest_dataframe = get_dataframe_from_influxdb(client_df=client_df,query_influx=query_download,table_name='SPEEDTEST_DOWNLOAD')

In [59]:
download_speedtest_dataframe["hour"]=pd.to_numeric(download_speedtest_dataframe["time"].dt.hour)

In [60]:
device_number=7
subset=download_speedtest_dataframe[download_speedtest_dataframe["SK_PI"]==device_number]
simple_boxplot(dataframe=subset,plot_value='DOWNLOAD',sort_value='hour',
               title="Download speed by hour for the device "+str(device_number)+" over the "+time_interval+" starting from "+starting_point,
               ytitle="Mbps",downloadline=True)

In [61]:
#by_hour_by_device_d=mean_max_median_by2(input_dataframe=download_speedtest_dataframe,value1="DOWNLOAD", value2="DOWNLOAD",
#                                          value3="DOWNLOAD",group_by_value="hour", rename_columns=True)
by_hour_by_device_d=mean_max_median_min_by2(input_dataframe=download_speedtest_dataframe,value1="DOWNLOAD", value2="DOWNLOAD",
                                          value3="DOWNLOAD",value4="DOWNLOAD",group_by_value="hour", rename_columns=True)

In [62]:
device_number=7
subset=by_hour_by_device_d[by_hour_by_device_d["SK_PI"]==device_number]
downloadline=go.Scatter(x=subset["hour"],y=[50] * len(subset["hour"]), mode='markers',marker=dict(color='red'), name='50Mps')
combined_bar_plot_4traces(xvalues=subset["hour"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         yvalues4=subset["min"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         name4="Min",
                         title="Download speed by hour for the device "+str(device_number)+" over the "+time_interval+" starting from "+starting_point,
                         xtitle="hour",
                         ytitle="Mbps",
                         line=downloadline,
                         stack=False)

In [63]:
hour=7
subset=by_hour_by_device_d[by_hour_by_device_d["hour"]==hour]
downloadline=go.Scatter(x=subset["SK_PI"],y=[50] * len(subset["SK_PI"]), mode='markers',marker=dict(color='red'), name='50Mps')
combined_bar_plot_4traces(xvalues=subset["SK_PI"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         yvalues4=subset["min"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         name4="Min",
                         title="Download speed by hour for the hour "+str(hour)+" over the "+time_interval+" starting from "+starting_point,
                         ytitle="Mbps",
                         line=downloadline,
                         stack=False)

In [64]:
download_speedtest_dataframe["time_group"]=""
download_speedtest_dataframe.loc[(download_speedtest_dataframe["hour"]>23)|(download_speedtest_dataframe["hour"]<=7),"time_group"]="night"
download_speedtest_dataframe.loc[(download_speedtest_dataframe["hour"]>7)&(download_speedtest_dataframe["hour"]<=17),"time_group"]="day"
download_speedtest_dataframe.loc[(download_speedtest_dataframe["hour"]>17)&(download_speedtest_dataframe["hour"]<=23),"time_group"]="evening"

In [65]:
device_number=7
subset=download_speedtest_dataframe[download_speedtest_dataframe["SK_PI"]==device_number]
simple_boxplot(dataframe=subset,plot_value='DOWNLOAD',sort_value='time_group',
               title="Download speed by time group for the device "+str(device_number)+" over the "+time_interval+" starting from "+starting_point,
               ytitle="Mbps",downloadline=True, jitter=True)

In [66]:
#by_group_by_device_d = mean_max_median_by2(input_dataframe=download_speedtest_dataframe,value1="DOWNLOAD", value2="DOWNLOAD",
#                                             value3="DOWNLOAD",group_by_value="time_group", rename_columns=True)
by_group_by_device_d = mean_max_median_min_by2(input_dataframe=download_speedtest_dataframe,value1="DOWNLOAD", value2="DOWNLOAD",
                                             value3="DOWNLOAD",value4="DOWNLOAD",group_by_value="time_group", rename_columns=True)

In [67]:
device_number=7
subset=by_group_by_device_d[by_group_by_device_d["SK_PI"]==device_number]
downloadline=go.Scatter(x=subset["time_group"],y=[50] * len(subset["time_group"]), mode='markers',marker=dict(color='red'), name='50Mps')
combined_bar_plot_4traces(xvalues=subset["time_group"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         yvalues4=subset["min"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         name4="Min",
                         title="Download speed by time group for the device "+str(device_number)+" over the "+time_interval+" starting from "+starting_point,
                         ytitle="Mbps",
                         xtitle="time_group",
                         line=downloadline,
                         stack=False)

In [68]:
time_group="night"
subset=by_group_by_device_d[by_group_by_device_d["time_group"]==time_group]
downloadline=go.Scatter(x=subset["SK_PI"],y=[50] * len(subset["SK_PI"]), mode='markers',marker=dict(color='red'), name='50Mps')
combined_bar_plot_4traces(xvalues=subset["SK_PI"],
                         yvalues1=subset["max"],
                         yvalues2=subset["mean"],
                         yvalues3=subset["median"],
                         yvalues4=subset["min"],
                         name1="Max",
                         name2="Mean",
                         name3="Median",
                         name4="Min",
                         title="Download speed by device for "+time_group+" time over the "+time_interval+" starting from "+starting_point,
                         ytitle="Mbps",
                         line=downloadline,
                         stack=False)