In [1]:
import pandas as pd
import numpy as np
import csv
from tqdm import tqdm

In [9]:
def find_non_null_timestamps(stats_dict):
	'''Function to find the roughly 30 second range of timestamps which Web RTC consistently provides data for.'''

	global_leftmost = -1
	global_rightmost = 99999999999
	for key, val in stats_dict.items():
		leftmost = None
		rightmost = None
		for i in range(len(val)):
			if val[i] != -1 and val[i] != "-1":
				if leftmost == None:
					leftmost = i
					rightmost = i
				if rightmost != None:
					rightmost = i
			elif (val[i] == -1 or val[i] == "-1") and rightmost != None:
				break
		if leftmost > global_leftmost:
			global_leftmost = leftmost
		if rightmost < global_rightmost:
			global_rightmost = rightmost

	return global_leftmost + 1, global_rightmost


def aggregate_across_call_single_stat(stat_list, percentile, leftmost, rightmost):
	'''function to aggregate Web RTC stat for a whole call into one'''
	
	if rightmost - leftmost <= 0:
		raise ValueError("Error: Statistic found for which Web RTC recorded no data for.") 
	else:
		stat_list_reduced = np.array(stat_list[leftmost : rightmost + 1])
		agg_value = np.percentile(stat_list_reduced, percentile)
		return agg_value


def find_agg_stats_single_call(parent_file_path, treatment):
	'''function which outputs a list of aggregated statistics (one for every
    Web RTC stat parsed), across the call'''
	
	file_path = parent_file_path + str(treatment) + ".csv"
	df = pd.read_csv(file_path) #read in as pandas data frame
	leftmost, rightmost = find_non_null_timestamps(df.to_dict(orient='list'))
	
	small_is_bad = [
		          "IT01V_packetsRecieved_ellen",
			      "IT01V_packetsRecieved_aadya",
				  "IT01V_frameWidth_ellen",
				  "IT01V_frameWidth_aadya",
				  "IT01V_frameHeight_ellen",
				  "IT01V_frameHeight_aadya",
				  "IT01V_framesPerSecond_ellen",
				  "IT01V_framesPerSecond_aadya",
				  "IT01V_bytesReceived_in_bits/s_ellen",
				  "IT01V_bytesReceived_in_bits/s_aadya",
				  "IT01A_bytesReceived_in_bits/s_ellen",
				  "IT01A_bytesReceived_in_bits/s_aadya",
				  "OT01V_packetsSent/s_ellen",
				  "OT01V_packetsSent/s_aadya",
				  "OT01V_bytesSent_in_bits/s_ellen",
				  "OT01V_bytesSent_in_bits/s_aadya",
				  "OT01V_frameWidth_ellen",
				  "OT01V_frameWidth_aadya",
				  "OT01V_framesPerSecond_ellen",
				  "OT01V_framesPerSecond_aadya",
				  ]
	big_is_bad = [
				  "IT01V_packetsLost_ellen", 
				  "IT01V_packetsLost_aadya",
				  "IT01V_totalFreezesDuration_ellen",
				  "IT01V_totalFreezesDuration_aadya",
				  "IT01V_totalProcessingDelay_ellen",
				  "IT01V_totalProcessingDelay_aadya",
				  "IT01V_jitter_ellen",
				  "IT01V_jitter_aadya",
				  "IT01V_jitterBufferDelay/emissions_ellen",
				  "IT01V_jitterBufferDelay/emissions_aadya",
				  "IT01A_jitterBufferDelay/emissions_ellen",
				  "IT01A_jitterBufferDelay/emissions_aadya",
				  "OT01V_totalPacketSendDelay_ellen",
				  "OT01V_totalPacketSendDelay_aadya",
				  "OT01V_totalPacketSendDelay/packetsSent_in_ms_ellen",
				  "OT01V_totalPacketSendDelay/packetsSent_in_ms_aadya",
				  "RIV_roundTripTime_ellen",
				  "RIV_roundTripTime_aadya",
				  "RIV_fractionLost_ellen",
				  "RIV_fractionLost_aadya",
				  "RIA_fractionLost_ellen",
				  "RIA_fractionLost_aadya",
				  "RIA_roundTripTime_ellen",
				  "RIA_roundTripTime_aadya",
				  "ROA_roundTripTime_ellen",
				  "ROA_roundTripTime_aadya",
				  "AP_totalPlayoutDelay_ellen",
				  "AP_totalPlayoutDelay_aadya"]
                  
	agg_values = []
	for col_name, col_data in df.iteritems():
		if col_name in small_is_bad:
			percentile = 10 
			agg_values.append(aggregate_across_call_single_stat(col_data, percentile, leftmost, rightmost))
		elif col_name in big_is_bad:
			percentile = 90
			agg_values.append(aggregate_across_call_single_stat(col_data, percentile, leftmost, rightmost))
			
	return agg_values
		

def create_regression_table(readin_parent_file_path, writeout_file_path, lowest_treatment_number, highest_treatment_number):
	
    header = [
		"call_number",
		"IT01V_packetsRecieved_ellen",
        "IT01V_packetsRecieved_aadya",
		"IT01V_packetsLost_ellen", 
        "IT01V_packetsLost_aadya",
        "IT01V_frameWidth_ellen",
        "IT01V_frameWidth_aadya",
        "IT01V_frameHeight_ellen",
        "IT01V_frameHeight_aadya",
		"IT01V_totalFreezesDuration_ellen",
        "IT01V_totalFreezesDuration_aadya",
        "IT01V_framesPerSecond_ellen",
        "IT01V_framesPerSecond_aadya",
        "IT01V_bytesReceived_in_bits_s_ellen",
        "IT01V_bytesReceived_in_bits_s_aadya",
		"IT01V_totalProcessingDelay_ellen",
        "IT01V_totalProcessingDelay_aadya",
        "IT01V_jitter_ellen",
        "IT01V_jitter_aadya",
        "IT01V_jitterBufferDelay_emissions_ellen",
        "IT01V_jitterBufferDelay_emissions_aadya",
        "IT01A_bytesReceived_in_bits_s_ellen",
        "IT01A_bytesReceived_in_bits_s_aadya",
		"IT01A_jitterBufferDelay_emissions_ellen",
        "IT01A_jitterBufferDelay_emissions_aadya",
        "OT01V_packetsSent_s_ellen",
        "OT01V_packetsSent_s_aadya",
        "OT01V_bytesSent_in_bits_s_ellen",
        "OT01V_bytesSent_in_bits_s_aadya",
        "OT01V_frameWidth_ellen",
        "OT01V_frameWidth_aadya",
        "OT01V_framesPerSecond_ellen",
        "OT01V_framesPerSecond_aadya",
		"OT01V_totalPacketSendDelay_ellen",
        "OT01V_totalPacketSendDelay_aadya",
        "OT01V_totalPacketSendDelay_packetsSent_in_ms_ellen",
        "OT01V_totalPacketSendDelay_packetsSent_in_ms_aadya",
        "RIV_roundTripTime_ellen",
        "RIV_roundTripTime_aadya",
        "RIV_fractionLost_ellen",
        "RIV_fractionLost_aadya",
        "RIA_fractionLost_ellen",
        "RIA_fractionLost_aadya",
        "RIA_roundTripTime_ellen",
        "RIA_roundTripTime_aadya",
        "ROA_roundTripTime_ellen",
        "ROA_roundTripTime_aadya",
        "AP_totalPlayoutDelay_ellen",
        "AP_totalPlayoutDelay_aadya"
    ]
	# Write out to a CSV
    with open(writeout_file_path, mode='w', newline='') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(header)
        for treatment in tqdm(range(lowest_treatment_number, highest_treatment_number + 1)):
            try:
                row = [treatment] + find_agg_stats_single_call(readin_parent_file_path, treatment)
                writer.writerow(row)
            except Exception as E:
                print(f"WARNING: unable to generate aggregate statistics for treatment {treatment} due to ...")
                print("      ", E)

### Execution cells:

In [10]:
# STAGE 1

readin_parent_file_path = "../parsed_CSVs/stage_1/treatment"
writeout_file_path = "independent_vars_tables/stage_1_independent_vars_table.csv"
lowest_treatment_number = 1
highest_treatment_number = 300
create_regression_table(readin_parent_file_path, writeout_file_path, lowest_treatment_number, highest_treatment_number)

 88%|████████▊ | 265/300 [00:03<00:00, 69.47it/s]

       Error: Statistic found for which Web RTC recorded no data for.


100%|██████████| 300/300 [00:04<00:00, 68.30it/s]


In [11]:
# STAGE 2

readin_parent_file_path = "../parsed_CSVs/stage_2/treatment"
writeout_file_path = "independent_vars_tables/stage_2_independent_vars_table.csv"
lowest_treatment_number = 1
highest_treatment_number = 300
create_regression_table(readin_parent_file_path, writeout_file_path, lowest_treatment_number, highest_treatment_number)

 15%|█▌        | 45/300 [00:00<00:04, 59.85it/s]

       Error: Statistic found for which Web RTC recorded no data for.


 62%|██████▏   | 185/300 [00:03<00:01, 67.24it/s]

       Error: Statistic found for which Web RTC recorded no data for.
       Error: Statistic found for which Web RTC recorded no data for.
       Error: Statistic found for which Web RTC recorded no data for.


 79%|███████▉  | 237/300 [00:03<00:00, 69.93it/s]

       Error: Statistic found for which Web RTC recorded no data for.
       Error: Statistic found for which Web RTC recorded no data for.


 89%|████████▉ | 268/300 [00:04<00:00, 70.97it/s]

       [Errno 2] No such file or directory: '../parsed_CSVs/stage_2/treatment256.csv'


100%|██████████| 300/300 [00:04<00:00, 63.21it/s]

       Error: Statistic found for which Web RTC recorded no data for.





In [12]:
# STAGE 3

readin_parent_file_path = "../parsed_CSVs/stage_3/treatment"
writeout_file_path = "independent_vars_tables/stage_3_independent_vars_table.csv"
lowest_treatment_number = 1
highest_treatment_number = 300
create_regression_table(readin_parent_file_path, writeout_file_path, lowest_treatment_number, highest_treatment_number)

  3%|▎         | 9/300 [00:00<00:03, 81.93it/s]

       [Errno 2] No such file or directory: '../parsed_CSVs/stage_3/treatment1.csv'
       [Errno 2] No such file or directory: '../parsed_CSVs/stage_3/treatment5.csv'
       [Errno 2] No such file or directory: '../parsed_CSVs/stage_3/treatment11.csv'


 65%|██████▍   | 194/300 [00:02<00:01, 72.33it/s]

       [Errno 2] No such file or directory: '../parsed_CSVs/stage_3/treatment180.csv'


 86%|████████▌ | 257/300 [00:03<00:00, 72.80it/s]

       Error: Statistic found for which Web RTC recorded no data for.
       [Errno 2] No such file or directory: '../parsed_CSVs/stage_3/treatment252.csv'


 94%|█████████▎| 281/300 [00:03<00:00, 72.45it/s]

       [Errno 2] No such file or directory: '../parsed_CSVs/stage_3/treatment272.csv'


100%|██████████| 300/300 [00:04<00:00, 70.33it/s]


### For fieldwork stats:

In [13]:
# Fieldwork

rural_town_names = ["colac", "dunkeld", "ararat", "bendigo", "elmore", "shep", "wang", "myrtleford", "euroa", "seymore"]
urban_suburb_names = ["dandenong", "mordialloc", "brighton", "toorak", "cbd", "brunswickwest", "northcote"]

writeout_file_path = "independent_vars_tables/fieldwork_independent_vars_table.csv"

header = [
		"ID",
		"IT01V_packetsRecieved_ellen",
        "IT01V_packetsRecieved_aadya",
		"IT01V_packetsLost_ellen", 
        "IT01V_packetsLost_aadya",
        "IT01V_frameWidth_ellen",
        "IT01V_frameWidth_aadya",
        "IT01V_frameHeight_ellen",
        "IT01V_frameHeight_aadya",
		"IT01V_totalFreezesDuration_ellen",
        "IT01V_totalFreezesDuration_aadya",
        "IT01V_framesPerSecond_ellen",
        "IT01V_framesPerSecond_aadya",
        "IT01V_bytesReceived_in_bits_s_ellen",
        "IT01V_bytesReceived_in_bits_s_aadya",
		"IT01V_totalProcessingDelay_ellen",
        "IT01V_totalProcessingDelay_aadya",
        "IT01V_jitter_ellen",
        "IT01V_jitter_aadya",
        "IT01V_jitterBufferDelay_emissions_ellen",
        "IT01V_jitterBufferDelay_emissions_aadya",
        "IT01A_bytesReceived_in_bits_s_ellen",
        "IT01A_bytesReceived_in_bits_s_aadya",
		"IT01A_jitterBufferDelay_emissions_ellen",
        "IT01A_jitterBufferDelay_emissions_aadya",
        "OT01V_packetsSent_s_ellen",
        "OT01V_packetsSent_s_aadya",
        "OT01V_bytesSent_in_bits_s_ellen",
        "OT01V_bytesSent_in_bits_s_aadya",
        "OT01V_frameWidth_ellen",
        "OT01V_frameWidth_aadya",
        "OT01V_framesPerSecond_ellen",
        "OT01V_framesPerSecond_aadya",
		"OT01V_totalPacketSendDelay_ellen",
        "OT01V_totalPacketSendDelay_aadya",
        "OT01V_totalPacketSendDelay_packetsSent_in_ms_ellen",
        "OT01V_totalPacketSendDelay_packetsSent_in_ms_aadya",
        "RIV_roundTripTime_ellen",
        "RIV_roundTripTime_aadya",
        "RIV_fractionLost_ellen",
        "RIV_fractionLost_aadya",
        "RIA_fractionLost_ellen",
        "RIA_fractionLost_aadya",
        "RIA_roundTripTime_ellen",
        "RIA_roundTripTime_aadya",
        "ROA_roundTripTime_ellen",
        "ROA_roundTripTime_aadya",
        "AP_totalPlayoutDelay_ellen",
        "AP_totalPlayoutDelay_aadya"
    ]

with open(writeout_file_path, mode='w', newline='') as csv_file:
    writer = csv.writer(csv_file)
    writer.writerow(header)

    for town in rural_town_names:
        for location in range(1, 4):
            readin_parent_file_path = f"../parsed_CSVs/fieldwork/{town}/{town}{location}/test"
            for test in range(1, 5):
                try:
                    row = find_agg_stats_single_call(readin_parent_file_path, test)
                    writer.writerow([f"{town}{location}{test}"] + row)
                except Exception as E:
                    if test != 4:
                        print(f"WARNING: unable to generate aggregate statistics for {town}{location}{test} due to...")
                        print("       ", E)
    
    for suburb in urban_suburb_names:
        readin_parent_file_path = f"../parsed_CSVs/fieldwork/{suburb}/test"
        for test in range(1, 4):
            try:
                row = find_agg_stats_single_call(readin_parent_file_path, test)
                writer.writerow([f"{suburb}{test}"] + row)
            except Exception as E:
                print(f"WARNING: unable to generate aggregate statistics for {suburb}{test}")
                print("       ", E)

        [Errno 2] No such file or directory: '../parsed_CSVs/fieldwork/colac/colac1/test1.csv'
        [Errno 2] No such file or directory: '../parsed_CSVs/fieldwork/colac/colac1/test3.csv'
        Error: Statistic found for which Web RTC recorded no data for.
        Error: Statistic found for which Web RTC recorded no data for.
        [Errno 2] No such file or directory: '../parsed_CSVs/fieldwork/colac/colac3/test2.csv'
        [Errno 2] No such file or directory: '../parsed_CSVs/fieldwork/colac/colac3/test3.csv'
        [Errno 2] No such file or directory: '../parsed_CSVs/fieldwork/dunkeld/dunkeld1/test1.csv'
        [Errno 2] No such file or directory: '../parsed_CSVs/fieldwork/dunkeld/dunkeld1/test3.csv'
        [Errno 2] No such file or directory: '../parsed_CSVs/fieldwork/dunkeld/dunkeld2/test1.csv'
        Error: Statistic found for which Web RTC recorded no data for.
        [Errno 2] No such file or directory: '../parsed_CSVs/fieldwork/dunkeld/dunkeld3/test2.csv'
        [Er

In [14]:
def combine_subjective_and_independent_vars_table(csv1_path, csv2_path, output_csv_path):
    # Read both CSV files into pandas dataframes
    df1 = pd.read_csv(csv1_path)
    df2 = pd.read_csv(csv2_path)

    # Rename the first column to 'identifier' in both dataframes if not already named
    df1.rename(columns={df1.columns[0]: 'identifier'}, inplace=True)
    df2.rename(columns={df2.columns[0]: 'identifier'}, inplace=True)

    # Merge the two dataframes on the 'identifier' column using an inner join
    merged_df = pd.merge(df1, df2, on='identifier', how='inner')

    # Write the merged dataframe to a new CSV file
    merged_df.to_csv(output_csv_path, index=False)

independent_vars_file_path = "independent_vars_tables/fieldwork_independent_vars_table.csv"
subjective_file_path = "../test_combos/fieldwork/fieldwork_subjective.csv"
writeout_file_path = "regression_tables/fieldwork_regression_table.csv"
combine_subjective_and_independent_vars_table(independent_vars_file_path, subjective_file_path, writeout_file_path)