In [1]:
# Check 'Websites
import requests
import pandas as pd
import sys
import os
import subprocess
from time import sleep
import re


# Define City and others

In [27]:
BuildServerList = False
RunSpeedTests = False
raw_list_of_servers='speedtest_servers.csv'
city='Bangalore'
raw_result_file=city+'_speedtest_results_raw.csv'
final_result_file=city+'_speedtest_results_final.csv'


# 1 Prepare Data and build list of active servers

In [3]:
def run_measurement(server):
    speedtest = ["speedtest", "-o", server]        
    #subprocess.run(irtt_raw)       
    p1 = subprocess.Popen(speedtest, stdout=subprocess.PIPE)
    p2 = subprocess.Popen(["tail"], stdin=p1.stdout, stdout=subprocess.PIPE)
    p1.stdout.close()  # Allow p1 to receive a SIGPIPE if p2 exits.
    output = p2.communicate()[0].decode("utf-8")
    #line=self.parse_output(output)        
    return output

In [4]:
def prepare_data(raw_list_of_servers, city):
    df_server_raw=pd.read_csv(raw_list_of_servers)
    df_server_raw=df_server_raw.loc[df_server_raw['City']==city]
    city_servers=df_server_raw['Host'].apply(lambda x: x[:x.rfind(":")])
    return city_servers

def check_servers_with_ping(list_of_filtered_servers):
        active_servers=[]
        for host in list_of_filtered_servers:
            param = '-n' if os.sys.platform.lower()=='win32' else '-c'
            hostname = host #example
            response = os.system(f"ping {param} 1 {hostname}")
            #and then check the response...
            if response == 0:
                #print(f"{hostname} is up!")
                active_servers.append(hostname)
            else:
                pass
                #print(f"{hostname} is down!")
        return active_servers
#print(list_of_active_servers)

In [5]:
if BuildServerList:
    city_servers=prepare_data(raw_list_of_servers, city)
    list_of_active_servers=check_servers_with_ping(city_servers)
    series_of_active_servers=pd.Series(list_of_active_servers)
    series_of_active_servers.to_csv(city+'_active_servers.csv', index=None, header=None)
    series_of_active_servers

# 2 Perform Speedtest Measurements

In [6]:
servers=pd.read_csv(city+'_active_servers.csv', header=None)
#display(servers)
#for server in servers[0]:
#    print(server)

In [7]:
if RunSpeedTests:
    dict_of_results={}
    for server in servers[0]:
        hostname=servers[0][3]
        result=run_measurement(hostname)
        dict_of_results[server] = result
        sleep(30)
    else: # only executed if for-loop termintates regularly
        for key in dict_of_results: print(dict_of_results.get(key))
        df_result=pd.DataFrame.from_dict(dict_of_results, orient='index')
        df_result=df_result.reset_index()
        df_result.rename(columns={'index':'speedtest_server', 0: 'speedtest_result'}, inplace=True)
        df_result.to_csv(raw_result_file, index=False)

# Parse results to atomic attributes

Split speedtest result into lines and evaluate each line  
Move each line into a form which can easily be splitted and unpacked  
e.g.:  
idle_latency, idle_jitter, idle_latency_low, idle_latency_high,*rest =idle_latency.replace('(',',').replace(')',',').split(',')  
will replace "(" or ")" by colon and than split the whole which results in list of items  
apply specific Parser to results of each line to extract parameters which again are parsed to extract values and units 



In [29]:
def extract_value_unit(temp_dict):
    '''temp_dict: dictionary of parameter_name and parameter_result which is combination of value  and unit e.g. 10.3ms
    extract pattern of digits and pattern of characters
    return new dictionary with keys for values and units'''
    # return temp_dict
    local_dict={}
    char_pattern = r'[a-z,A-Z,µ]+'
    num_pattern=r'[0-9,.]+'
    for key in temp_dict.keys():
        ls_value=re.findall(num_pattern,temp_dict[key])
        ls_unit=re.findall(char_pattern,temp_dict[key])
        local_dict[key]=ls_value[0]
        local_dict[key+'_unit']=ls_unit[0]
        #print(type(local_dict[key]))     
    return local_dict

def parse_idle_latency(idle_latency):    
    temp_dict={}
    idle_latency, idle_jitter, idle_latency_low, idle_latency_high,*rest =idle_latency.replace('(',',').replace(')',',').split(',')
    temp_dict['idle_latency'] = idle_latency
    temp_dict['idle_jitter'] = idle_jitter.replace('jitter:','')
    temp_dict['idle_latency_low'] = idle_latency_low.replace('low:','')
    temp_dict['idle_latency_high'] = idle_latency_high.replace('high:','')    
    return extract_value_unit(temp_dict)

def parse_upload(upload):
    temp_dict={}
    upload_speed, _, upload_latency, upload_jitter, upload_latency_low,upload_latency_high, *rest = upload.replace('(',',').replace(')',',').split(',')
    temp_dict['upload_speed'] = upload_speed
    temp_dict['upload_latency'] = upload_latency.replace('latency:','')
    temp_dict['upload_jitter'] = upload_jitter.replace('jitter:','')
    temp_dict['upload_latency_low'] = upload_latency_low.replace('low:','')
    temp_dict['upload_latency_high'] = upload_latency_high.replace('high:','')    
    return extract_value_unit(temp_dict)

def parse_download(download):
    temp_dict={}
    download_speed, _, download_latency, download_jitter, download_latency_low, download_latency_high, *rest = download.replace('(',',').replace(')',',').split(',')  
    temp_dict['download_speed'] = download_speed
    temp_dict['download_latency'] = download_latency.replace('latency:','')
    temp_dict['download_jitter'] = download_jitter.replace('jitter:','')
    temp_dict['download_latency_low'] = download_latency_low.replace('low:','')
    temp_dict['download_latency_high'] = download_latency_high.replace('high:','')    
    return extract_value_unit(temp_dict)

def parse_results(series_result, series_server):    
    list_of_results=[]
    for idx, result_item in enumerate(series_result):        
        dict_line_result={} 
        dict_line_result['speedtest_url']=series_server[idx]
        result_item = result_item.splitlines()
        keys={'Server', 'Idle Latency', 'Download', 'Upload', 'Packet Loss'}
        for index, line in enumerate(result_item):              
            if line.find('Server:') != -1:
                dict_line_result['server'] = line[line.find('Server:')+len('Server:'):]
            elif line.find('Idle Latency:') != -1:
                #idle_latency = line[line.find('Idle Latency:')+len('Idle Latency:'):]
                dict_line_result = dict_line_result | parse_idle_latency(line[line.find('Idle Latency:')+len('Idle Latency:'):]) # merge dictionaries             
            elif line.find('Download:') != -1:                
                dict_line_result = dict_line_result | parse_download(line[line.find('Download:')+len('Download:'):] + 'latency:' + result_item[index+1] ) # merge dictionaries                 
                
                #dict_line_result['download'] = line[line.find('Download:')+len('Download:'):] +'latency:'+result_item[index+1]
            elif line.find('Upload:') != -1:
                dict_line_result = dict_line_result | parse_upload(line[line.find('Upload:')+len('Upload:'):] + 'latency:' + result_item[index+1] ) # merge dictionaries                                        
                #dict_line_result['upload'] = line[line.find('Upload:')+len('Upload:'):] + 'latency:' + result_item[index+1]
            elif line.find('Packet Loss') != -1: dict_line_result['packet_loss'] = line[line.find('Packet Loss:')+len('Packet Loss:'):]
            
        for key in dict_line_result.keys():
            #print(dict_line_result[key])
            dict_line_result[key]=dict_line_result[key].replace(' ','').strip()
        list_of_results.append(dict_line_result)    
    return pd.DataFrame(list_of_results)
    
#dict_line_result=parse_results(results['speedtest_result'])

results=pd.read_csv(raw_result_file)    
df_line_result=parse_results(results['speedtest_result'],results['speedtest_server'] )
print(df_line_result)
df_line_result.to_csv
df_line_result.to_csv(final_result_file, index=False)

                       speedtest_url                               server  \
0           speedtest.fibronet.co.in  NetronBroadband-Bangalore(id:34768)   
1    speedtestblr.airtelbroadband.in  NetronBroadband-Bangalore(id:34768)   
2                      103.42.84.254  NetronBroadband-Bangalore(id:34768)   
3                speedtest.netron.in  NetronBroadband-Bangalore(id:34768)   
4              bangspeed.hathway.com  NetronBroadband-Bangalore(id:34768)   
5             speedtest.in2cable.com  NetronBroadband-Bangalore(id:34768)   
6              speedtest.belltele.in  NetronBroadband-Bangalore(id:34768)   
7                  speedtest.bbnl.in  NetronBroadband-Bangalore(id:34768)   
8                speedtest.arcnet.in  NetronBroadband-Bangalore(id:34768)   
9              speedtest.fibronet.in  NetronBroadband-Bangalore(id:34768)   
10      blrspeedtest.cityonlines.com  NetronBroadband-Bangalore(id:34768)   
11              speedtest.airwir.com  NetronBroadband-Bangalore(id:34768)   

In [None]:
upload_speed, _, upload_latency, upload_jitter, upload_latency_low,upload_latency_high, *rest = df_line_result['upload'][1].replace('(',',').replace(')',',').split(',')
upload_speed, upload_latency, upload_jitter, upload_latency_low,upload_latency_high,

In [None]:
dict_latency={}
idle_latency, idle_jitter, idle_latency_low, idle_latency_high,*rest =df_line_result['idle_latency'][1].replace('(',',').replace(')',',').split(',')
idle_latency, idle_jitter, idle_latency_low, idle_latency_high

In [None]:
dict_latency['idle_latency'] = idle_latency
dict_latency['idle_jitter'] = idle_jitter.replace('jitter:','')
dict_latency['idle_latency_low'] = idle_latency_low.replace('low:','')
dict_latency['idle_latency_high'] = idle_latency_high.replace('high:','')
dict_latency
