In [10]:
import json
import csv
from datetime import datetime
import math
import ast
import numbers

## Simple data conversion functions:


In [2]:
#### SIMPLE CONVERSION/CLEANING FUNCTIONS

def iso_to_unix_time(iso_string):
    '''funtion converting ISO time (like in Web RTC) to unix time'''

    dt = datetime.strptime(iso_string, "%Y-%m-%dT%H:%M:%S.%fZ")
    unix_time = int(dt.timestamp())
    return unix_time

def convert_to_sec_minus_10_hrs(timestamp):
    '''so for some reason Web RTC's timestamps are 10 hours later than the real time
    of the call (24-hour time conversion glitch?). so this function converts a millisecond
    unix timestamp into seconds, and takes away 10 hours.'''
    new_timestamp = math.floor(float(timestamp) / 1000) - 36000
    return new_timestamp

def separate_by_comma(text_list):
    '''Function which takes a list in text form and converts it to a proper Python list'''
    
    try:
        # Use the `ast.literal_eval` method which safely evaluates a string containing
        # a Python literal expression (e.g., a list).
        parsed_list = ast.literal_eval(text_list)
        
        # Ensure the parsed output is a list
        if isinstance(parsed_list, list):
            return parsed_list
        else:
            raise ValueError("Input is not a list")
    except (ValueError, SyntaxError):
        raise ValueError("Input is not properly formatted or is not a list")
    
def parse_audio_codec(codec_info):

    new_codec_info = []
    for string in codec_info:
        for char in range(len(string)):
            if string[char:char+2] == " (":
                codec_config = string[char+1:]
                break
        if "usedtx" in codec_config:
            dtx = True
        else:
            dtx = False
        if "useinbandfec" in codec_config:
            fec = True
        else:
            fec = False
        new_codec_info.append((dtx, fec))
    
    return new_codec_info

def parse_video_codec(codec_info):

    new_codec_info = []
    for string in codec_info:
        for char in range(len(string)):
            if string[char:char+2] == " (":
                codec_name = string[:char]
                break
        new_codec_info.append(codec_name)

    return new_codec_info

## Main parsing function:

In [3]:
def get_stats(file_path, verbose=False):
    '''
    This is the nested dictionary structure in the json .txt dump:
    dump_file_name -> PeerConnections -> the 3rd dictionary (alphanumeric code) -> stats 
    
    This function parses the relevant stats and saves them in custom data types (dictionaries).
    '''
    
    #opening the dump .txt JSON file
    with open(file_path, 'r') as file:
        dump = json.load(file)
    
    #navigate to where all the stats are stored in the dump
    peer_connections = dump.get('PeerConnections', {})
    keys_list = list(peer_connections.keys())
    third_dictionary = peer_connections.get(keys_list[-1], {})
    stats = third_dictionary.get('stats', {})
    
    #target substrings to pattern match for in stats
    target_substrings_IT01V = [
        '-[packetsReceived/s]',
        '-packetsLost', 
        '-frameWidth', 
        '-frameHeight',
        '-framesPerSecond', 
        '-totalFreezesDuration',
        '-[bytesReceived_in_bits/s]',
        '-totalProcessingDelay',
        '-jitterBufferDelay',
        '-[codec]',
        '-timestamp']
    target_substrings_IT01A = [
        '-[bytesReceived_in_bits/s]',
        '-jitterBufferDelay',
        '-[codec]',
        '-timestamp']
    target_substrings_OT01V = [
        '-[packetsSent/s]',
        '-[bytesSent_in_bits/s]',
        '-frameWidth',
        '-framesPerSecond',
        '-totalPacketSendDelay',
        '-[totalPacketSendDelay/packetsSent_in_ms]',
        '-qualityLimitationReason',
        '-qualityLimitationResolutionChanges',
        '-timestamp']
    #target_substrings_OT01A = [
        #'-[bytesSent_in_bits/s]',
        #'-timestamp']
    target_substrings_RIV = [
        '-roundTripTime',
        '-fractionLost',
        '-timestamp']
    target_substrings_RIA = [
        '-fractionLost',
        '-roundTripTime',
        '-timestamp']
    target_substrings_ROA = [
        '-roundTripTime',
        '-timestamp']
    target_substrings_SV2 = [
        '-width',
        '-height',
        '-framesPerSecond',
        '-timestamp']
    target_substrings_AP = [
        '-totalPlayoutDelay',
        '-timestamp']
    
    #final dictionary data types to store all the values. 
    #each (None None None) triple will be filled with (values, start time, end time)
    target_values_dict_IT01V = {
        '-[packetsReceived/s]': (None, None, None),
        '-packetsLost': (None, None, None),
        '-frameWidth': (None, None, None),
        '-frameHeight': (None, None, None),
        '-totalFreezesDuration': (None, None, None),
        '-framesPerSecond': (None, None, None),
        '-[bytesReceived_in_bits/s]': (None, None, None),
        '-totalProcessingDelay': (None, None, None),
        '-jitter': (None, None, None),
        '-jitterBufferDelay': (None, None, None),
        '-[codec]': (None, None, None),
        '-timestamp': (None, None, None)}
    target_values_dict_IT01A = {
        '-[bytesReceived_in_bits/s]': (None, None, None),
        '-jitterBufferDelay': (None, None, None),
        '-[codec]': (None, None, None),
        '-timestamp': (None, None, None)}
    target_values_dict_OT01V = {
        '-[packetsSent/s]': (None, None, None),
        '-[bytesSent_in_bits/s]': (None, None, None),
        '-frameWidth': (None, None, None),
        '-framesPerSecond': (None, None, None),
        '-totalPacketSendDelay': (None, None, None),
        '-[totalPacketSendDelay/packetsSent_in_ms]': (None, None, None),
        '-qualityLimitationReason': (None, None, None),
        '-qualityLimitationResolutionChanges': (None, None, None),
        '-timestamp': (None, None, None)}
    #target_values_dict_OT01A = {
        #'-[bytesSent_in_bits/s]': (None, None, None),
        #'-timestamp': (None, None, None)}
    target_values_dict_RIV = {
        '-roundTripTime': (None, None, None),
        '-fractionLost': (None, None, None),
        '-timestamp': (None, None, None)}
    target_values_dict_RIA = {
        '-fractionLost': (None, None, None),
        '-roundTripTime': (None, None, None),
        '-timestamp': (None, None, None)}
    target_values_dict_ROA = {
        '-roundTripTime': (None, None, None),
        '-timestamp': (None, None, None)}
    target_values_dict_SV2 = {
        '-width': (None, None, None),
        '-height': (None, None, None),
        '-framesPerSecond': (None, None, None),
        '-timestamp': (None, None, None)}
    target_values_dict_AP = {
        '-totalPlayoutDelay': (None, None, None),
        '-timestamp': (None, None, None)}
    
    #begin searching for the target statistics
    for key, value in stats.items():
        key_string = str(key)
        
        # inbound video ones
        if key_string[:5] == 'IT01V': 
            for target_substring in target_substrings_IT01V:
                if target_substring in key_string:
                    info = stats.get(key, {}) #jump into the innermost dictionary
                    if target_values_dict_IT01V[target_substring] == (None, None, None):
                        if target_substring == "-[codec]":
                            target_values_dict_IT01V[target_substring] = (parse_video_codec(separate_by_comma(info['values'])), iso_to_unix_time(info['startTime']), iso_to_unix_time(info['endTime']))
                        else:
                            target_values_dict_IT01V[target_substring] = (separate_by_comma(info['values']), iso_to_unix_time(info['startTime']), iso_to_unix_time(info['endTime']))
            #special case for finding jitter because it is a substring of other keys too
            if key_string[-7:] == '-jitter':
                info = stats.get(key, {})
                if target_values_dict_IT01V['-jitter'] == (None, None, None):
                    target_values_dict_IT01V['-jitter'] = (separate_by_comma(info['values']), iso_to_unix_time(info['startTime']), iso_to_unix_time(info['endTime']))
        
        # inbound audio ones
        elif key_string[:5] == 'IT01A':
            for target_substring in target_substrings_IT01A:
                if target_substring in key_string:
                    info = stats.get(key, {})
                    if target_values_dict_IT01A[target_substring] == (None, None, None):
                        if target_substring == "-[codec]":
                            target_values_dict_IT01A[target_substring] = (parse_audio_codec(separate_by_comma(info['values'])), iso_to_unix_time(info['startTime']), iso_to_unix_time(info['endTime']))
                        else:
                            target_values_dict_IT01A[target_substring] = (separate_by_comma(info['values']), iso_to_unix_time(info['startTime']), iso_to_unix_time(info['endTime']))
    
        # outbound video ones
        elif key_string[:5] == 'OT01V':
            for target_substring in target_substrings_OT01V:
                if target_substring in key_string:
                    info = stats.get(key, {})
                    if target_values_dict_OT01V[target_substring] == (None, None, None):
                        target_values_dict_OT01V[target_substring] = (separate_by_comma(info['values']), iso_to_unix_time(info['startTime']), iso_to_unix_time(info['endTime']))

        # outbound audio ones
        #elif key_string[:5] == 'OT01A':
            #for target_substring in target_substrings_OT01A:
                #if target_substring in key_string:
                    #info = stats.get(key, {})
                    #if target_values_dict_OT01A[target_substring] == (None, None, None):
                        #target_values_dict_OT01A[target_substring] = (separate_by_comma(info['values']), iso_to_unix_time(info['startTime']), iso_to_unix_time(info['endTime']))

        # remote inbound video ones
        elif key_string[:3] == 'RIV':
            for target_substring in target_substrings_RIV:
                if target_substring in key_string:
                    info = stats.get(key, {})
                    if target_values_dict_RIV[target_substring] == (None, None, None):
                        target_values_dict_RIV[target_substring] = (separate_by_comma(info['values']), iso_to_unix_time(info['startTime']), iso_to_unix_time(info['endTime']))
        
        # remote inbound audio ones
        elif key_string[:3] == 'RIA':
            for target_substring in target_substrings_RIA:
                if target_substring in key_string:
                    info = stats.get(key, {})
                    if target_values_dict_RIA[target_substring] == (None, None, None):
                        target_values_dict_RIA[target_substring] = (separate_by_comma(info['values']), iso_to_unix_time(info['startTime']), iso_to_unix_time(info['endTime']))
        
        # remote outbound audio ones
        elif key_string[:3] == 'ROA':
            for target_substring in target_substrings_ROA:
                if target_substring in key_string:
                    info = stats.get(key, {})
                    if target_values_dict_ROA[target_substring] == (None, None, None):
                        target_values_dict_ROA[target_substring] = (separate_by_comma(info['values']), iso_to_unix_time(info['startTime']), iso_to_unix_time(info['endTime']))
                    
        # video source ones
        elif key_string[:3] == 'SV2':
            for target_substring in target_substrings_SV2:
                if target_substring in key_string:
                    info = stats.get(key, {})
                    if target_values_dict_SV2[target_substring] == (None, None, None):
                        target_values_dict_SV2[target_substring] = (separate_by_comma(info['values']), iso_to_unix_time(info['startTime']), iso_to_unix_time(info['endTime']))
                    
        # audio playout ones
        elif key_string[:2] == 'AP':
            for target_substring in target_substrings_AP:
                if target_substring in key_string:
                    info = stats.get(key, {})
                    if target_values_dict_AP[target_substring] == (None, None, None):
                        target_values_dict_AP[target_substring] = (separate_by_comma(info['values']), iso_to_unix_time(info['startTime']), iso_to_unix_time(info['endTime']))

    # Making a global dictionary with unique keys names 
    single_person_dict = {
        'IT01V_packetsRecieved': target_values_dict_IT01V['-[packetsReceived/s]'],
        'IT01V_packetsLost': target_values_dict_IT01V['-packetsLost'],
        'IT01V_frameWidth': target_values_dict_IT01V['-frameWidth'],
        'IT01V_frameHeight': target_values_dict_IT01V['-frameHeight'],
        'IT01V_totalFreezesDuration': target_values_dict_IT01V['-totalFreezesDuration'],
        'IT01V_framesPerSecond': target_values_dict_IT01V['-framesPerSecond'],
        'IT01V_bytesReceived_in_bits/s': target_values_dict_IT01V['-[bytesReceived_in_bits/s]'],
        'IT01V_totalProcessingDelay': target_values_dict_IT01V['-totalProcessingDelay'],
        'IT01V_jitter': target_values_dict_IT01V['-jitter'],
        'IT01V_jitterBufferDelay': target_values_dict_IT01V['-jitterBufferDelay'],
        'IT01V_codec': target_values_dict_IT01V['-[codec]'],
        'IT01V_timestamps': target_values_dict_IT01V['-timestamp'],
        'IT01A_bytesReceived_in_bits/s': target_values_dict_IT01A['-[bytesReceived_in_bits/s]'],
        'IT01A_jitterBufferDelay': target_values_dict_IT01A['-jitterBufferDelay'],
        'IT01A_(dtx, fec):': target_values_dict_IT01A['-[codec]'],
        'IT01A_timestamps': target_values_dict_IT01A['-timestamp'],
        'OT01V_packetsSent/s': target_values_dict_OT01V['-[packetsSent/s]'],
        'OT01V_bytesSent_in_bits/s': target_values_dict_OT01V['-[bytesSent_in_bits/s]'],
        'OT01V_frameWidth': target_values_dict_OT01V['-frameWidth'],
        'OT01V_framesPerSecond': target_values_dict_OT01V['-framesPerSecond'],
        'OT01V_totalPacketSendDelay': target_values_dict_OT01V['-totalPacketSendDelay'],
        'OT01V_totalPacketSendDelay/packetsSent_in_ms': target_values_dict_OT01V['-[totalPacketSendDelay/packetsSent_in_ms]'],
        'OT01V_qualityLimitationReason': target_values_dict_OT01V['-qualityLimitationReason'],
        'OT01V_qualityLimitationResolutionChanges': target_values_dict_OT01V['-qualityLimitationResolutionChanges'],
        'OT01V_timestamps': target_values_dict_OT01V['-timestamp'],
        'RIV_roundTripTime': target_values_dict_RIV['-roundTripTime'],
        'RIV_fractionLost': target_values_dict_RIV['-fractionLost'],
        'RIV_timestamps': target_values_dict_RIV['-timestamp'],
        'RIA_fractionLost': target_values_dict_RIA['-fractionLost'],
        'RIA_roundTripTime': target_values_dict_RIA['-roundTripTime'],
        'RIA_timestamps': target_values_dict_RIA['-timestamp'],
        'ROA_roundTripTime': target_values_dict_ROA['-roundTripTime'],
        'ROA_timestamps': target_values_dict_ROA['-timestamp'],
        'SV2_width': target_values_dict_SV2['-width'],
        'SV2_height': target_values_dict_SV2['-height'],
        'SV2_framesPerSecond': target_values_dict_SV2['-framesPerSecond'],
        'SV2_timestamps': target_values_dict_SV2['-timestamp'],
        'AP_totalPlayoutDelay': target_values_dict_AP['-totalPlayoutDelay'],
        'AP_timestamps': target_values_dict_AP['-timestamp']}
    
    if verbose:    
        for key, value in single_person_dict.items():
            print(key, ": ", value[0])
            print("Start Time: ", value[1], " |  End Time: ", value[2])
            print("\n")
        
    return single_person_dict


## Cleaning and formatting functions:

In [4]:
def combine_dictionaries(dict_ellen, dict_aadya):
    global_dict = {}
    for key, val in dict_ellen.items():
        key_string = str(key)
        global_dict[key_string + "_ellen"] = val
        global_dict[key_string + "_aadya"] = dict_aadya[key_string]
    return global_dict


def populate_global_table(global_dict, mistake_tally):

    global_start = 999999999999999999999999
    global_end = 0
    
    for key, val in global_dict.items():
        start_time = val[1]
        end_time = val[2]
        if start_time < global_start:
            global_start = start_time
        if end_time > global_end:
            global_end = end_time

    total_time = global_end - global_start + 1

    #populate a rectangular table with -1 for every timestamp
    global_table = []
    for key, val in global_dict.items():
        global_table.append([-1] * total_time)

    #truncate timestamps to basic unix timecodes, (round to closest second)
    for key, val in global_dict.items():
        key_string = str(key)
        if "timestamps" in key_string:
            old_timestamps = val[0]
            new_timestamps = []
            for time in old_timestamps:
                new_timestamps.append(convert_to_sec_minus_10_hrs(time))
            global_dict[key] = (new_timestamps, val[1], val[2])

    #replace -1s in the timestamps where data exists for every stat for ellen
    row_number = 0
    for key, val in global_dict.items():
        key_string = str(key)
        person = key_string[-5:]
        if key_string[:5] == 'IT01V': 
            timestamps = global_dict['IT01V_timestamps_' + person][0]
        elif key_string[:5] == 'IT01A':
            timestamps = global_dict['IT01A_timestamps_' + person][0]
        elif key_string[:5] == 'OT01V':
            timestamps = global_dict['OT01V_timestamps_' + person][0]
        #elif key_string[:5] == 'OT01A':
            #timestamps = global_dict['OT01A_timestamps_' + person][0]
        elif key_string[:3] == 'RIV':
            timestamps = global_dict['RIV_timestamps_' + person][0]
        elif key_string[:3] == 'RIA':
            timestamps = global_dict['RIA_timestamps_' + person][0]
        elif key_string[:3] == 'ROA':
            timestamps = global_dict['ROA_timestamps_' + person][0]
        elif key_string[:3] == 'SV2':
            timestamps = global_dict['SV2_timestamps_' + person][0]
        elif key_string[:2] == 'AP':
            timestamps = global_dict['AP_timestamps_' + person][0]

        start_time = val[1]
        end_time = val[2]
        
        # Timing error handling:
        if start_time < timestamps[0]:
            start_time_index = 0 
            mistake_tally['start time errors'] += 1
        else:
            start_time_index = None
        if end_time > timestamps[-1]:
            end_time_index = 0
            mistake_tally['end time errors'] += 1
        else:
            start_time_index = None
        
        for time in range(len(timestamps)):
            if timestamps[time] == start_time:
                start_time_index = time
            if timestamps[time] == end_time:
                end_time_index = time 
        appropriate_timestamps = timestamps[start_time_index : end_time_index + 1]
        
        #sneaky cleaning in the cases where Web RTC makes a mistake:
        if start_time < timestamps[0]:
            if len(val[0]) > len(appropriate_timestamps):
                difference = len(val[0]) - len(appropriate_timestamps)
            val = (val[0][difference:], val[1], val[2])
        if end_time > timestamps[-1]:
            if len(val[0]) > len(appropriate_timestamps):
                difference = len(val[0]) - len(appropriate_timestamps)
            val = (val[0][:-difference], val[1], val[2])
        
        if len(appropriate_timestamps) != len(val[0]):
            #print("Timing Error found:", key, "| len_times:", len(appropriate_timestamps), "| len_vals:", len(val[0]))
            difference = len(appropriate_timestamps) - len(val[0])
            if difference == 1:
                mistake_tally['missed val errors (off by 1 only)'] += 1
            elif difference > 1:
                mistake_tally['missed val errors (off by > 1)'].append(difference)
            elif difference == -1:
                mistake_tally['extra vals errors (off by 1 only)'] += 1
            elif difference < -1:
                mistake_tally['extra vals errors (off by > 1)'].append(-1 * difference)
            appropriate_timestamps = appropriate_timestamps[ : -1 * abs(difference)] #bad but neccessary assumption LIMITATION LIMITATION LIMITATION

        
        for t in range(len(appropriate_timestamps)):
            time = appropriate_timestamps[t]
            global_table[row_number][time - global_start] = val[0][t]
        row_number += 1
    
    return global_table


def fix_unwanted_nulls(global_table):
    '''function that iterates through a populated global table, finding instances where a stat 
    only has values recorded once every two seconds, and replaces the empty-second entries with
    the average on the values on either side.'''

    for stat in global_table:
        for i in range(1, len(stat) - 1):
            if (stat[i-1] != -1) and (stat[i] == -1) and (stat[i+1] != -1):
                
                if isinstance(stat[i-1], float) or isinstance(stat[i+1], float):
                    stat[i] = (stat[i-1] + stat[i+1]) / 2 #replace the null with the average of it's left-right neighbours
                
                elif isinstance(stat[i-1], int):
                    stat[i] = round((stat[i-1] + stat[i+1]) / 2) #replace the null with the rounded average of it's left-right neighbours
                
                else:
                    stat[i] = stat[i-1] #if stat's data are all strings (e.g "(True, False)"), replace null with previous entry


def writeout(global_table, global_dict, treatment_number):

    # Flip the table (rows -> columns and columns -> rows) for writeout nice-ness
    global_table_flipped = []
    for col in range(len(global_table[0])):
        row_flipped = []
        for row in range(len(global_table)):
            row_flipped.append(global_table[row][col])
        global_table_flipped.append(row_flipped)

    # Write out to a CSV
    output_file = f"CSVs/stage_one/treatment{treatment_number}.csv"
    with open(output_file, mode='w', newline='') as csv_file:
        writer = csv.writer(csv_file)
        header = []
        for key, val in global_dict.items():
            header.append(key)
        writer.writerow(header)
        for row in global_table_flipped:
            writer.writerow(row)

    print(f"Data has been written to {output_file}")
    

## Controller functions:

In [39]:
def parse_clean_writeout(parent_file_path, treatment_number, mistake_tally, read_only=False, verbose=False):
    file_path_ellen = parent_file_path + str(treatment_number) + "_ellen.txt"
    file_path_aadya = parent_file_path + str(treatment_number) + "_aadya.txt"
    dict_ellen = get_stats(file_path_ellen)
    dict_aadya = get_stats(file_path_aadya)
    global_dict = combine_dictionaries(dict_ellen, dict_aadya)
    global_table = populate_global_table(global_dict, mistake_tally)
    fix_unwanted_nulls(global_table)
    if not read_only:
        writeout(global_table, global_dict, treatment_number)

def parse_range(parent_file_path, lowest_treatment, highest_treatment, read_only=False, verbose=False):
    mistake_tally = {
        'start time errors': 0,
        'end time errors': 0,
        'missed val errors (off by 1 only)': 0,
        'missed val errors (off by > 1)': [],
        'extra vals errors (off by 1 only)': 0,
        'extra vals errors (off by > 1)': []     
    }

    for i in range(lowest_treatment, highest_treatment + 1):
        print("Parsing Treatment", i)
        parse_clean_writeout(parent_file_path, i, mistake_tally, read_only, verbose)

    print("\nParsing complete! Here is the number Web RTC timing errors encountered...")
    print("___________________________________________________________________________")
    for key, val in mistake_tally.items():
        print(key + ":", val)
    print("___________________________________________________________________________")

In [42]:
#file paths for testing_9_Aug
file_path_parent_01 = "testing_stats/testing_9_Aug/treatment"
#file path parent for testing_13_Aug
file_path_parent_02 = "testing_stats/testing_13_Aug/treatment" 
#file path parent for testing_27_Aug
file_path_parent_03 = "testing_stats/testing_27_Aug/treatment"
#file path parent for testing_30_Aug
file_path_parent_04 = "testing_stats/testing_30_Aug/treatment"
#file path parent for stage one treatments
file_path_parent_05 = "testing_stats/stage_1/treatment"

lowest_treatment_number = 1
highest_treatment_number = 300
read_only_status = False
verbose_status = False
parse_range(file_path_parent_05, lowest_treatment_number, highest_treatment_number, read_only_status, verbose_status)

Parsing Treatment 1
Data has been written to CSVs/stage_one/treatment1.csv
Parsing Treatment 2
Data has been written to CSVs/stage_one/treatment2.csv
Parsing Treatment 3
Data has been written to CSVs/stage_one/treatment3.csv
Parsing Treatment 4
Data has been written to CSVs/stage_one/treatment4.csv
Parsing Treatment 5
Data has been written to CSVs/stage_one/treatment5.csv
Parsing Treatment 6
Data has been written to CSVs/stage_one/treatment6.csv
Parsing Treatment 7
Data has been written to CSVs/stage_one/treatment7.csv
Parsing Treatment 8
Data has been written to CSVs/stage_one/treatment8.csv
Parsing Treatment 9
Data has been written to CSVs/stage_one/treatment9.csv
Parsing Treatment 10
Data has been written to CSVs/stage_one/treatment10.csv
Parsing Treatment 11
Data has been written to CSVs/stage_one/treatment11.csv
Parsing Treatment 12
Data has been written to CSVs/stage_one/treatment12.csv
Parsing Treatment 13
Data has been written to CSVs/stage_one/treatment13.csv
Parsing Treatment