In [1]:
import pandas as pd
import numpy as np
import csv
from tqdm import tqdm

In [16]:
def find_non_null_timestamps(stats_dict):
	'''Function to find the roughly 30 second range of timestamps which Web RTC consistently provides data for.'''

	global_leftmost = -1
	global_rightmost = 99999999999
	for key, val in stats_dict.items():
		leftmost = None
		rightmost = None
		for i in range(len(val)):
			if val[i] != -1 and val[i] != "-1":
				if leftmost == None:
					leftmost = i
					rightmost = i
				if rightmost != None:
					rightmost = i
			elif (val[i] == -1 or val[i] == "-1") and rightmost != None:
				break
		if leftmost > global_leftmost:
			global_leftmost = leftmost
		if rightmost < global_rightmost:
			global_rightmost = rightmost

	return global_leftmost + 1, global_rightmost


def aggregate_across_call_single_stat(stat_list, percentile, leftmost, rightmost):
	'''function to aggregate Web RTC stat for a whole call into one'''
	
	if rightmost - leftmost <= 0:
		raise ValueError("Error: Statistic found for which Web RTC recorded no data for.") 
	else:
		stat_list_reduced = np.array(stat_list[leftmost : rightmost + 1])
		agg_value = np.percentile(stat_list_reduced, percentile)
		return agg_value


def find_agg_stats_single_call(parent_file_path, treatment):
	'''function which outputs a list of aggregated statistics (one for every
    Web RTC stat parsed), across the call'''
	
	file_path = parent_file_path + str(treatment) + ".csv"
	df = pd.read_csv(file_path) #read in as pandas data frame
	leftmost, rightmost = find_non_null_timestamps(df.to_dict(orient='list'))
	
	small_is_bad = [
		          "IT01V_packetsRecieved_ellen",
			      "IT01V_packetsRecieved_aadya",
				  "IT01V_frameWidth_ellen",
				  "IT01V_frameWidth_aadya",
				  "IT01V_frameHeight_ellen",
				  "IT01V_frameHeight_aadya",
				  "IT01V_framesPerSecond_ellen",
				  "IT01V_framesPerSecond_aadya",
				  "IT01V_bytesReceived_in_bits/s_ellen",
				  "IT01V_bytesReceived_in_bits/s_aadya",
				  "IT01A_bytesReceived_in_bits/s_ellen",
				  "IT01A_bytesReceived_in_bits/s_aadya",
				  "OT01V_packetsSent/s_ellen",
				  "OT01V_packetsSent/s_aadya",
				  "OT01V_bytesSent_in_bits/s_ellen",
				  "OT01V_bytesSent_in_bits/s_aadya",
				  "OT01V_frameWidth_ellen",
				  "OT01V_frameWidth_aadya",
				  "OT01V_framesPerSecond_ellen",
				  "OT01V_framesPerSecond_aadya",
				  ]
	big_is_bad = [
				  "IT01V_packetsLost_ellen", 
				  "IT01V_packetsLost_aadya",
				  "IT01V_totalFreezesDuration_ellen",
				  "IT01V_totalFreezesDuration_aadya",
				  "IT01V_totalProcessingDelay_ellen",
				  "IT01V_totalProcessingDelay_aadya",
				  "IT01V_jitter_ellen",
				  "IT01V_jitter_aadya",
				  "IT01V_jitterBufferDelay/emissions_ellen",
				  "IT01V_jitterBufferDelay/emissions_aadya",
				  "IT01A_jitterBufferDelay/emissions_ellen",
				  "IT01A_jitterBufferDelay/emissions_aadya",
				  "OT01V_totalPacketSendDelay_ellen",
				  "OT01V_totalPacketSendDelay_aadya",
				  "OT01V_totalPacketSendDelay/packetsSent_in_ms_ellen",
				  "OT01V_totalPacketSendDelay/packetsSent_in_ms_aadya",
				  "RIV_roundTripTime_ellen",
				  "RIV_roundTripTime_aadya",
				  "RIV_fractionLost_ellen",
				  "RIV_fractionLost_aadya",
				  "RIA_fractionLost_ellen",
				  "RIA_fractionLost_aadya",
				  "RIA_roundTripTime_ellen",
				  "RIA_roundTripTime_aadya",
				  "ROA_roundTripTime_ellen",
				  "ROA_roundTripTime_aadya",
				  "AP_totalPlayoutDelay_ellen",
				  "AP_totalPlayoutDelay_aadya"]
                  
	agg_values = []
	for col_name, col_data in df.iteritems():
		if col_name in small_is_bad:
			percentile = 10 
			agg_values.append(aggregate_across_call_single_stat(col_data, percentile, leftmost, rightmost))
		elif col_name in big_is_bad:
			percentile = 90
			agg_values.append(aggregate_across_call_single_stat(col_data, percentile, leftmost, rightmost))
			
	return agg_values
		

def create_regression_table(readin_parent_file_path, writeout_file_path, lowest_treatment_number, highest_treatment_number):
	
    header = [
		"call_number",
		"IT01V_packetsRecieved_ellen",
        "IT01V_packetsRecieved_aadya",
		"IT01V_packetsLost_ellen", 
        "IT01V_packetsLost_aadya",
        "IT01V_frameWidth_ellen",
        "IT01V_frameWidth_aadya",
        "IT01V_frameHeight_ellen",
        "IT01V_frameHeight_aadya",
		"IT01V_totalFreezesDuration_ellen",
        "IT01V_totalFreezesDuration_aadya",
        "IT01V_framesPerSecond_ellen",
        "IT01V_framesPerSecond_aadya",
        "IT01V_bytesReceived_in_bits_s_ellen",
        "IT01V_bytesReceived_in_bits_s_aadya",
		"IT01V_totalProcessingDelay_ellen",
        "IT01V_totalProcessingDelay_aadya",
        "IT01V_jitter_ellen",
        "IT01V_jitter_aadya",
        "IT01V_jitterBufferDelay_emissions_ellen",
        "IT01V_jitterBufferDelay_emissions_aadya",
        "IT01A_bytesReceived_in_bits_s_ellen",
        "IT01A_bytesReceived_in_bits_s_aadya",
		"IT01A_jitterBufferDelay_emissions_ellen",
        "IT01A_jitterBufferDelay_emissions_aadya",
        "OT01V_packetsSent_s_ellen",
        "OT01V_packetsSent_s_aadya",
        "OT01V_bytesSent_in_bits_s_ellen",
        "OT01V_bytesSent_in_bits_s_aadya",
        "OT01V_frameWidth_ellen",
        "OT01V_frameWidth_aadya",
        "OT01V_framesPerSecond_ellen",
        "OT01V_framesPerSecond_aadya",
		"OT01V_totalPacketSendDelay_ellen",
        "OT01V_totalPacketSendDelay_aadya",
        "OT01V_totalPacketSendDelay_packetsSent_in_ms_ellen",
        "OT01V_totalPacketSendDelay_packetsSent_in_ms_aadya",
        "RIV_roundTripTime_ellen",
        "RIV_roundTripTime_aadya",
        "RIV_fractionLost_ellen",
        "RIV_fractionLost_aadya",
        "RIA_fractionLost_ellen",
        "RIA_fractionLost_aadya",
        "RIA_roundTripTime_ellen",
        "RIA_roundTripTime_aadya",
        "ROA_roundTripTime_ellen",
        "ROA_roundTripTime_aadya",
        "AP_totalPlayoutDelay_ellen",
        "AP_totalPlayoutDelay_aadya"
    ]

	# Write out to a CSV
    with open(writeout_file_path, mode='w', newline='') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(header)
        for treatment in tqdm(range(lowest_treatment_number, highest_treatment_number + 1)):
            try:
                row = [treatment] + find_agg_stats_single_call(readin_parent_file_path, treatment)
                writer.writerow(row)
            except Exception as E:
                print(f"WARNING: unable to generate aggregate statistics for treatment {treatment} due to ...")
                print("      ", E)


def combine_tables(csv1_path, csv2_path, output_csv_path):
    '''function to simply merge two tables into one baed on the name in the first column being the same'''
	# Read both CSV files into pandas dataframes
    df1 = pd.read_csv(csv1_path)
    df2 = pd.read_csv(csv2_path)

    # Rename the first column to 'identifier' in both dataframes if not already named
    df1.rename(columns={df1.columns[0]: 'identifier'}, inplace=True)
    df2.rename(columns={df2.columns[0]: 'identifier'}, inplace=True)

    # Merge the two dataframes on the 'identifier' column using an inner join
    merged_df = pd.merge(df1, df2, on='identifier', how='inner')

    # Write the merged dataframe to a new CSV file
    merged_df.to_csv(output_csv_path, index=False)

### Execution cells:

In [20]:
# STAGE 1

readin_parent_file_path = "../parsed_CSVs/stage_1/treatment"
writeout_file_path = "independent_vars_tables/stage_1_independent_vars_table.csv"
lowest_treatment_number = 1
highest_treatment_number = 300
create_regression_table(readin_parent_file_path, writeout_file_path, lowest_treatment_number, highest_treatment_number)

#merging with treatment conditions:
treatment_csv_filepath = "../test_combos/stage_1/test_combos_shuffled_aadya.csv"
output_file_path = "regression_tables/stage_1_regression_table.csv"
combine_tables(writeout_file_path, treatment_csv_filepath, output_file_path)

 87%|████████▋ | 262/300 [00:03<00:00, 59.37it/s]

       Error: Statistic found for which Web RTC recorded no data for.


100%|██████████| 300/300 [00:04<00:00, 65.61it/s]


In [21]:
# STAGE 2

readin_parent_file_path = "../parsed_CSVs/stage_2/treatment"
writeout_file_path = "independent_vars_tables/stage_2_independent_vars_table.csv"
lowest_treatment_number = 1
highest_treatment_number = 300
create_regression_table(readin_parent_file_path, writeout_file_path, lowest_treatment_number, highest_treatment_number)

#merging with treatment conditions:
treatment_csv_filepath = "../test_combos/stage_2/test_combos_shuffled_aadya.csv"
output_file_path = "regression_tables/stage_2_regression_table.csv"
combine_tables(writeout_file_path, treatment_csv_filepath, output_file_path)

 13%|█▎        | 40/300 [00:00<00:03, 67.54it/s]

       Error: Statistic found for which Web RTC recorded no data for.


 63%|██████▎   | 189/300 [00:02<00:01, 71.93it/s]

       Error: Statistic found for which Web RTC recorded no data for.
       Error: Statistic found for which Web RTC recorded no data for.
       Error: Statistic found for which Web RTC recorded no data for.


 78%|███████▊  | 235/300 [00:03<00:00, 70.33it/s]

       Error: Statistic found for which Web RTC recorded no data for.
       Error: Statistic found for which Web RTC recorded no data for.


 88%|████████▊ | 265/300 [00:03<00:00, 68.29it/s]

       [Errno 2] No such file or directory: '../parsed_CSVs/stage_2/treatment256.csv'


100%|██████████| 300/300 [00:04<00:00, 66.20it/s]

       Error: Statistic found for which Web RTC recorded no data for.





In [23]:
# STAGE 3

readin_parent_file_path = "../parsed_CSVs/stage_3/treatment"
writeout_file_path = "independent_vars_tables/stage_3_independent_vars_table.csv"
lowest_treatment_number = 1
highest_treatment_number = 300
create_regression_table(readin_parent_file_path, writeout_file_path, lowest_treatment_number, highest_treatment_number)

#merging with treatment conditions:
treatment_csv_filepath_1 = "../test_combos/stage_3/test_combos_shuffled_aadya.csv"
treatment_csv_filepath_2 = "../test_combos/stage_3/test_combos_shuffled_ellen.csv"
output_file_path = "regression_tables/stage_3_regression_table.csv"
combine_tables(writeout_file_path, treatment_csv_filepath_1, output_file_path)
combine_tables(output_file_path, treatment_csv_filepath_2, output_file_path)

  5%|▍         | 14/300 [00:00<00:04, 69.72it/s]

       [Errno 2] No such file or directory: '../parsed_CSVs/stage_3/treatment1.csv'
       [Errno 2] No such file or directory: '../parsed_CSVs/stage_3/treatment5.csv'
       [Errno 2] No such file or directory: '../parsed_CSVs/stage_3/treatment11.csv'


 63%|██████▎   | 188/300 [00:02<00:01, 73.36it/s]

       [Errno 2] No such file or directory: '../parsed_CSVs/stage_3/treatment180.csv'


 87%|████████▋ | 261/300 [00:03<00:00, 73.36it/s]

       Error: Statistic found for which Web RTC recorded no data for.
       [Errno 2] No such file or directory: '../parsed_CSVs/stage_3/treatment252.csv'


 95%|█████████▌| 285/300 [00:04<00:00, 72.75it/s]

       [Errno 2] No such file or directory: '../parsed_CSVs/stage_3/treatment272.csv'


100%|██████████| 300/300 [00:04<00:00, 69.33it/s]


### For fieldwork stats:

In [8]:
# Fieldwork

rural_town_names = ["colac", "dunkeld", "ararat", "bendigo", "elmore", "shep", "wang", "myrtleford", "euroa", "seymore"]
urban_suburb_names = ["dandenong", "mordialloc", "brighton", "toorak", "cbd", "brunswickwest", "northcote"]

writeout_file_path = "independent_vars_tables/fieldwork_independent_vars_table.csv"

header = [
		"ID",
		"IT01V_packetsRecieved_ellen",
        "IT01V_packetsRecieved_aadya",
		"IT01V_packetsLost_ellen", 
        "IT01V_packetsLost_aadya",
        "IT01V_frameWidth_ellen",
        "IT01V_frameWidth_aadya",
        "IT01V_frameHeight_ellen",
        "IT01V_frameHeight_aadya",
		"IT01V_totalFreezesDuration_ellen",
        "IT01V_totalFreezesDuration_aadya",
        "IT01V_framesPerSecond_ellen",
        "IT01V_framesPerSecond_aadya",
        "IT01V_bytesReceived_in_bits_s_ellen",
        "IT01V_bytesReceived_in_bits_s_aadya",
		"IT01V_totalProcessingDelay_ellen",
        "IT01V_totalProcessingDelay_aadya",
        "IT01V_jitter_ellen",
        "IT01V_jitter_aadya",
        "IT01V_jitterBufferDelay_emissions_ellen",
        "IT01V_jitterBufferDelay_emissions_aadya",
        "IT01A_bytesReceived_in_bits_s_ellen",
        "IT01A_bytesReceived_in_bits_s_aadya",
		"IT01A_jitterBufferDelay_emissions_ellen",
        "IT01A_jitterBufferDelay_emissions_aadya",
        "OT01V_packetsSent_s_ellen",
        "OT01V_packetsSent_s_aadya",
        "OT01V_bytesSent_in_bits_s_ellen",
        "OT01V_bytesSent_in_bits_s_aadya",
        "OT01V_frameWidth_ellen",
        "OT01V_frameWidth_aadya",
        "OT01V_framesPerSecond_ellen",
        "OT01V_framesPerSecond_aadya",
		"OT01V_totalPacketSendDelay_ellen",
        "OT01V_totalPacketSendDelay_aadya",
        "OT01V_totalPacketSendDelay_packetsSent_in_ms_ellen",
        "OT01V_totalPacketSendDelay_packetsSent_in_ms_aadya",
        "RIV_roundTripTime_ellen",
        "RIV_roundTripTime_aadya",
        "RIV_fractionLost_ellen",
        "RIV_fractionLost_aadya",
        "RIA_fractionLost_ellen",
        "RIA_fractionLost_aadya",
        "RIA_roundTripTime_ellen",
        "RIA_roundTripTime_aadya",
        "ROA_roundTripTime_ellen",
        "ROA_roundTripTime_aadya",
        "AP_totalPlayoutDelay_ellen",
        "AP_totalPlayoutDelay_aadya"
    ]

with open(writeout_file_path, mode='w', newline='') as csv_file:
    writer = csv.writer(csv_file)
    writer.writerow(header)

    for town in rural_town_names:
        for location in range(1, 4):
            readin_parent_file_path = f"../parsed_CSVs/fieldwork/{town}/{town}{location}/test"
            for test in range(1, 5):
                try:
                    row = find_agg_stats_single_call(readin_parent_file_path, test)
                    writer.writerow([f"{town}{location}{test}"] + row)
                except Exception as E:
                    if test != 4:
                        print(f"WARNING: unable to generate aggregate statistics for {town}{location}{test} due to...")
                        print("       ", E)
    
    for suburb in urban_suburb_names:
        readin_parent_file_path = f"../parsed_CSVs/fieldwork/{suburb}/test"
        for test in range(1, 4):
            try:
                row = find_agg_stats_single_call(readin_parent_file_path, test)
                writer.writerow([f"{suburb}{test}"] + row)
            except Exception as E:
                print(f"WARNING: unable to generate aggregate statistics for {suburb}{test}")
                print("       ", E)

        [Errno 2] No such file or directory: '../parsed_CSVs/fieldwork/colac/colac1/test1.csv'
        [Errno 2] No such file or directory: '../parsed_CSVs/fieldwork/colac/colac1/test3.csv'
        Error: Statistic found for which Web RTC recorded no data for.
        Error: Statistic found for which Web RTC recorded no data for.
        [Errno 2] No such file or directory: '../parsed_CSVs/fieldwork/colac/colac3/test2.csv'
        [Errno 2] No such file or directory: '../parsed_CSVs/fieldwork/colac/colac3/test3.csv'
        [Errno 2] No such file or directory: '../parsed_CSVs/fieldwork/dunkeld/dunkeld1/test1.csv'
        [Errno 2] No such file or directory: '../parsed_CSVs/fieldwork/dunkeld/dunkeld1/test3.csv'
        [Errno 2] No such file or directory: '../parsed_CSVs/fieldwork/dunkeld/dunkeld2/test1.csv'
        Error: Statistic found for which Web RTC recorded no data for.
        [Errno 2] No such file or directory: '../parsed_CSVs/fieldwork/dunkeld/dunkeld3/test2.csv'
        [Er

In [5]:
independent_vars_file_path = "independent_vars_tables/fieldwork_independent_vars_table.csv"
subjective_file_path = "../test_combos/fieldwork/fieldwork_subjective.csv"
writeout_file_path = "regression_tables/fieldwork_regression_table.csv"
combine_tables(independent_vars_file_path, subjective_file_path, writeout_file_path)

## Random testing/checking functions:

In [15]:
parent_file_path_of_parsed_csvs = "../parsed_CSVs/stage_2/treatment"

for i in range(1, 301):

    df = pd.read_csv(parent_file_path_of_parsed_csvs + str(i) + ".csv")
    riv_fraction_lost_col_e = df['RIV_fractionLost_ellen']
    riv_fraction_lost_col_a = df['RIV_fractionLost_aadya']
    tally = 0
    for element in riv_fraction_lost_col_e:
        if element > 0:
            tally +=1 
    for element in riv_fraction_lost_col_a:
        if element > 0:
            tally +=1
    
    print(i, tally)


1 0
2 0
3 6
4 3
5 0
6 6
7 1
8 5
9 0
10 0
11 4
12 0
13 1
14 1
15 0
16 2
17 8
18 6
19 0
20 0
21 0
22 2
23 5
24 0
25 0
26 1
27 0
28 2
29 1
30 8
31 0
32 2
33 4
34 0
35 0
36 0
37 0
38 0
39 0
40 1
41 0
42 0
43 0
44 0
45 2
46 8
47 0
48 2
49 2
50 1
51 9
52 0
53 0
54 0
55 0
56 2
57 2
58 0
59 0
60 1
61 0
62 0
63 1
64 0
65 0
66 1
67 0
68 0
69 1
70 0
71 2
72 0
73 0
74 2
75 2
76 0
77 0
78 0
79 4
80 0
81 2
82 2
83 0
84 5
85 0
86 0
87 0
88 2
89 0
90 0
91 1
92 0
93 2
94 2
95 0
96 2
97 1
98 0
99 3
100 0
101 1
102 0
103 2
104 3
105 0
106 1
107 0
108 0
109 2
110 4
111 2
112 0
113 1
114 5
115 0
116 1
117 0
118 0
119 5
120 0
121 1
122 0
123 3
124 0
125 0
126 0
127 0
128 0
129 2
130 1
131 7
132 0
133 2
134 1
135 5
136 0
137 0
138 2
139 0
140 3
141 0
142 3
143 0
144 0
145 4
146 0
147 1
148 0
149 0
150 8
151 3
152 2
153 1
154 9
155 0
156 0
157 1
158 0
159 7
160 0
161 0
162 4
163 0
164 4
165 0
166 0
167 7
168 0
169 0
170 3
171 1
172 0
173 0
174 4
175 2
176 0
177 1
178 2
179 2
180 2
181 0
182 0
183 2
184 2
185 

FileNotFoundError: [Errno 2] No such file or directory: '../parsed_CSVs/stage_2/treatment256.csv'

In [13]:
df1 = pd.read_csv("independent_vars_tables/stage_1_independent_vars_table.csv")
df2 = pd.read_csv("independent_vars_tables/stage_1_independent_vars_table222.csv")

are_equal = df1.equals(df2)
print(are_equal)

True
