In [None]:
import os
os.chdir("./2023-02-26_SegLenTestsOnMlcNetC")

In [None]:
settings = ["5s", "10s", "20s"]

In [None]:
def set_std_plot_params(fig):
    # Set aesthetics.
    fig.update_layout(font = dict(size = 24))
    # Remove the background coloring.
    fig.update_layout({"plot_bgcolor": "rgba(0,0,0,0)",
                       "paper_bgcolor": "rgba(0,0,0,0)"})
    # Make the gridlines visible on the transparent background.
    fig.update_xaxes(showgrid = True, gridwidth = 1, gridcolor = "rgba(169,169,169,0.5)")
    fig.update_yaxes(showgrid = True, gridwidth = 1, gridcolor = "rgba(169,169,169,0.5)")

### Get length of video file

In [None]:
# Find the first JS log file in the directory and find out how long the video is. I assume that all log data in this entire directory has the same length of video.
import os
import re
SEC_IN_MIN = 60
seg_lens = ["5s", "10s", "20s"]
vid_length_pattern = re.compile("^LOG  [\d\.]+,([\d\.]+):.+$", re.M)

video_length_sec = None

for seg_len in seg_lens:
    dir_path = "./{seg_len}".format(seg_len = seg_len)
    
    for file_name in os.listdir(dir_path):
        file_path = os.path.join(dir_path, file_name)
        if not os.path.isdir(file_path): # Only looking to read directories (that are symlinks).
            continue
        jsLog_names = [filename for filename in os.listdir(file_path) if filename.startswith("Tester_") and filename.endswith(".log")]
        if len(jsLog_names) != 1:
            continue
        jsLog_name = jsLog_names[0]
        
        last_line = None
        with open(os.path.join(file_path, jsLog_name), "r") as jsLog:
            for line in jsLog:
                pass
            last_line = line
        
        match = vid_length_pattern.match(last_line)
        if not match:
            continue
        video_length_sec = match.group(1)
        break
        
    if video_length_sec is not None:
        break

video_length_sec = float(video_length_sec)
print("Video length (sec): {vidlen}".format(vidlen = video_length_sec))
video_length_min = video_length_sec / SEC_IN_MIN

## Stalls

In [None]:
import os
import re

stall_start_pattern = re.compile("^STALL  started at ([\d\.]+) sec", re.M)
stall_end_pattern = re.compile("^STALL  ([\d]+) ms and stopped at [\d\.]+ sec", re.M)

def ComputeStallData(dir_path):
    jsLog_names = [filename for filename in os.listdir(dir_path) if filename.startswith("Tester_") and filename.endswith(".log")]
    if len(jsLog_names) != 1:
        return
    jsLog_name = jsLog_names[0]
    jsLog = open(os.path.join(dir_path, jsLog_name), "r")
    
    stall_starts = list()
    stall_lengths_ms = list()
    
    # Read through all the lines.
    while line := jsLog.readline():
        start_match = stall_start_pattern.match(line)
        if start_match:
            stall_start_sec = float(start_match.group(1))
            stall_starts.append(stall_start_sec)
        
        end_match = stall_end_pattern.match(line)
        if end_match:
            stall_len_ms = int(end_match.group(1))
            stall_lengths_ms.append(stall_len_ms)
    
    return stall_starts, stall_lengths_ms

In [None]:
import numpy as np
import os
import scipy.stats as st

seg_lens = ["5s", "10s", "20s"]
conf_interval = 0.95

avg_stall_cts = list()
avg_stall_cts_conf = list()
stdev_stall_cts = list()

avg_stall_lengths = list()
avg_stall_lengths_conf = list()
stdev_stall_lengths = list()

stall_start_times = list()

for seg_len in seg_lens:
    dir_path = "./{seg_len}".format(seg_len = seg_len)
    
    seglen_aggregate_stall_lengths = list()
    seglen_aggregate_stall_counts = list()
    seglen_aggregate_stall_starts = list()
    
    for file_name in os.listdir(dir_path):
        file_path = os.path.join(dir_path, file_name)
        if not os.path.isdir(file_path): # Only looking to read directories (that are symlinks).
            continue
        cur_stall_starts_sec, cur_stall_lengths_ms = ComputeStallData(file_path)
        seglen_aggregate_stall_lengths += cur_stall_lengths_ms
        seglen_aggregate_stall_counts.append(len(cur_stall_lengths_ms))
        seglen_aggregate_stall_starts.append(cur_stall_starts_sec)

    if len(seglen_aggregate_stall_lengths) == 0:
        seglen_aggregate_stall_lengths.append(0)
    if len(seglen_aggregate_stall_counts) == 0:
        seglen_aggregate_stall_counts.append(0)
    
    avg_cts = np.mean(seglen_aggregate_stall_counts)
    avg_stall_cts.append(avg_cts)
    # For some reason, when computing confidence intervals on a list of zeros, it just returns [nan, nan], which seems wrong to me. Manually returning [0, 0].
    if all([ct == 0 for ct in seglen_aggregate_stall_counts]):
        avg_stall_cts_conf.append([0, 0])
    else:
        avg_stall_cts_conf.append(st.norm.interval(alpha = conf_interval, loc = avg_cts, scale = st.sem(seglen_aggregate_stall_counts)))
    stdev_stall_cts.append(np.std(seglen_aggregate_stall_counts))
    
    avg_lengths = np.mean(seglen_aggregate_stall_lengths)
    avg_stall_lengths.append(avg_lengths)
    # For some reason, when computing confidence intervals on a list of zeros, it just returns [nan, nan], which seems wrong to me. Manually returning [0, 0].
    if all([ct == 0 for ct in seglen_aggregate_stall_lengths]):
        avg_stall_lengths_conf.append([0, 0])
    else:
        avg_stall_lengths_conf.append(st.norm.interval(alpha = conf_interval, loc = avg_lengths, scale = st.sem(seglen_aggregate_stall_lengths)))
    stdev_stall_lengths.append(np.std(seglen_aggregate_stall_lengths))
    
    stall_start_times.append(seglen_aggregate_stall_starts)

### Stall lengths

In [None]:
avg_stall_lengths

In [None]:
avg_stall_lengths_conf

In [None]:
stdev_stall_lengths

### Stall counts

In [None]:
avg_stall_cts

In [None]:
avg_stall_cts_conf

In [None]:
stdev_stall_cts

In [None]:
import numpy as np
stalls_permin = np.divide(avg_stall_cts, video_length_min)
print("Average stalls per minute of video (5s, 10s, 20s): {stallavg}".format(stallavg = stalls_permin))

### Stall totals

In [None]:
import numpy as np
stall_totals = np.multiply(avg_stall_cts, avg_stall_lengths)
print("Stall totals (# of stalls * length of stalls) (sec):\n{vols}".format(vols = stall_totals / 1000))

In [None]:
import numpy as np
stall_totals_permin = np.divide(stall_totals, video_length_min)
print("Average stall totals per minute of video (seconds of stall / minute of video):\n{avgvol}".format(avgvol = stall_totals_permin / 1000))

## Quality Changes

In [None]:
log_res_pattern = re.compile("^LOG  [^,]+,[^,]+,([\dx]+).+$")
qual_change_pattern = re.compile("^QUAL  at [\d\.]+ to ([\dx]+)$")

def ComputeQualChangeData(dir_path):
    jsLog_names = [filename for filename in os.listdir(dir_path) if filename.startswith("Tester_") and filename.endswith(".log")]
    if len(jsLog_names) != 1:
        return
    jsLog_name = jsLog_names[0]
    jsLog = open(os.path.join(dir_path, jsLog_name), "r")
    
    startRes = None
    resAmnts = dict()
    
    # Read through all the lines.
    while line := jsLog.readline():
        log_res_match = log_res_pattern.match(line)
        if log_res_match and startRes == None:
            # This is the first LOG line. Let's get the quality level in case it never changes so there are no QUAL entries.
            startRes = log_res_match.group(1)
            resAmnts[startRes] = resAmnts.get(startRes, 0) + 1
        
        match = qual_change_pattern.match(line)
        if match:
            resolution = match.group(1)
            resAmnts[resolution] = resAmnts.get(resolution, 0) + 1 # Increase number of occurrences of this resolution by 1.
    
    return resAmnts

In [None]:
seg_lens = ["5s", "10s", "20s"]
avg_qual_changes = list()
stdev_qual_changes = list()
for seg_len in seg_lens:
    #print("Segment length: " + seg_len)
    dir_path = "./{seg_len}".format(seg_len = seg_len)
    
    #run_ct = 0
    cur_qual_changes = list()
    #sum_qual_changes = 0
    
    for file_name in os.listdir(dir_path):
    #for run_idx in range(0, 3): # Index from [0..2].
        file_path = os.path.join(dir_path, file_name)
        if not os.path.isdir(file_path): # Only looking to read directories (that are symlinks).
            continue
        resAmnts = ComputeQualChangeData(file_path)
        
        #print(resAmnts)
        #run_ct += 1
        # Subtract one because the video always starts at a certain quality, that's not a quality change.
        qual_changes = sum(resAmnts.values()) - 1
        cur_qual_changes.append(qual_changes)
        #sum_qual_changes += qual_changes
        #seg_stall_ct_sum += stall_ct
        #seg_stall_len_sum += avg_stall_len_ms
        #print("Stall count: " + str(stall_ct))
        #print("Average stall length (ms): " + str(avg_stall_len_ms))
    
    avg_qual_changes.append(np.mean(cur_qual_changes))
    stdev_qual_changes.append(np.std(cur_qual_changes))
    #avg_qual_changes.append(sum_qual_changes / run_ct)
    #avg_stall_cts.append(seg_stall_ct_sum / run_ct)
    #avg_stall_lengths.append(seg_stall_len_sum / run_ct)

In [None]:
avg_qual_changes

In [None]:
stdev_qual_changes

## Quality Data

In [None]:
reso_to_bitrate_kbps = {
    "480x270": 2000,
    "640x360": 3000,
    "960x540": 5000,
    "1280x720": 10000,
    "1920x1080": 17200,
    "3840x2160": 40000,
}

In [None]:
log_res_pattern = re.compile("^LOG  [^,]+,[^,]+,([\dx]+).+$")

def ComputeQualData(dir_path):
    jsLog_names = [filename for filename in os.listdir(dir_path) if filename.startswith("Tester_") and filename.endswith(".log")]
    #print(jsLog_names)
    if len(jsLog_names) != 1:
        return
    jsLog_name = jsLog_names[0]
    jsLog = open(os.path.join(dir_path, jsLog_name), "r")
    
    resAmnts = dict()
    resOccurrences = list()
    
    # Read through all the lines.
    while line := jsLog.readline():
        log_res_match = log_res_pattern.match(line)
        if not log_res_match:
            continue

        startRes = log_res_match.group(1)
        #print(startRes)
        resAmnts[startRes] = resAmnts.get(startRes, 0) + 1
        resOccurrences.append(startRes)
        
    return resOccurrences

In [None]:
seg_lens = ["5s", "10s", "20s"]
avg_stall_cts = list()
avg_stall_lengths = list()
resOccurrences = list()
for seg_len in seg_lens:
    print("Segment length: " + seg_len)
    dir_path = "./{seg_len}".format(seg_len = seg_len)
    
    run_ct = 0
    seg_stall_ct_sum = 0
    seg_stall_len_sum = 0
    resQtys = list()
    
    for file_name in os.listdir(dir_path):
    #for run_idx in range(0, 3): # Index from [0..2].
        file_path = os.path.join(dir_path, file_name)
        if not os.path.isdir(file_path): # Only looking to read directories (that are symlinks).
            continue
        #resAmnts = ComputeQualData(file_path)
        resQtys += ComputeQualData(file_path)
        #print(resAmnts)
        #run_ct += 1
        #seg_stall_ct_sum += stall_ct
        #seg_stall_len_sum += avg_stall_len_ms
        #print("Stall count: " + str(stall_ct))
        #print("Average stall length (ms): " + str(avg_stall_len_ms))
    
    resOccurrences.append(resQtys)
    #avg_stall_cts.append(seg_stall_ct_sum / run_ct)
    #avg_stall_lengths.append(seg_stall_len_sum / run_ct)

In [None]:
len(resOccurrences[2])

In [None]:
from collections import Counter
import pandas as pd
resAmnts = list()
#resAmnts = pd.DataFrame()
for i in range(0, len(resOccurrences)):
    occDict = dict(Counter(resOccurrences[i]))
    occurrenceFrame = pd.DataFrame()
    occurrenceFrame["Resolution"] = occDict.keys()
    occurrenceFrame["Occurrences"] = occDict.values()
    totalOccurrences = occurrenceFrame["Occurrences"].sum()
    occurrenceFrame["Proportion"] = occurrenceFrame["Occurrences"] / totalOccurrences
    resAmnts.append(occurrenceFrame)

In [None]:
resAmnts[0]

In [None]:
resAmnts[0].loc[resAmnts[0]["Occurrences"].idxmax()]
#resAmnts[0].loc[Occurrences == resAmnts[0]["Occurrences"]

In [None]:
resAmnts[1]

In [None]:
resAmnts[2]

In [None]:
resOccurrencesSorted = [sorted([reso_to_bitrate_kbps[res] for res in runData]) for runData in resOccurrences]

In [None]:
import plotly.express as px
import pandas as pd

resOccurrencesSortedDf = pd.DataFrame()
for idx, seglen in enumerate(seg_lens):
    #resOccurrencesSortedDf[seglen] = resOccurrencesSorted[idx]
    resOccurrencesSortedDf = pd.concat([resOccurrencesSortedDf, pd.DataFrame(resOccurrencesSorted[idx])], axis = 1)
resOccurrencesSortedDf.columns = seg_lens
#fig = px.ecdf(resOccurrencesSorted[0], ecdfnorm = "percent")
fig = px.ecdf(resOccurrencesSortedDf.loc[:, "5s"], ecdfnorm = "percent")
#fig.data
#fig.data[0].line.color = "red"
#fig.data[1].line.color = "green"
#fig.data[2].line.color = "blue"
#fig.update_xaxes(categoryorder = "array", categoryarray = ["480x270", "640x360", "960x540", "1280x720", "1920x1080", "3840x2160"])
fig.update_xaxes(title = "Bitrate", range = [0, 42000])
fig.show()

In [None]:
resOccurrencesSortedDf

In [None]:
import plotly.graph_objects as plgo
import plotly.express as px

#fig = px.ecdf(resOccurrencesSortedDf, x = seg_lens, ecdfnorm = "percent")
#fig.update_xaxes(range = [0, 42000])
combinedFig = plgo.Figure()
seg_len_traces = []
for idx, seg_len in enumerate(seg_lens):
    seg_len_trace = px.ecdf(resOccurrencesSortedDf.iloc[:, idx], ecdfnorm = "percent")
    seg_len_traces += seg_len_trace
for seg_len_trace in seg_len_traces:
    combinedFig.add_trace(seg_len_trace)
combinedFig.update_xaxes(range = [0, 42000])
combinedFig.show()

## RTT Time

In [None]:
import os
import csv
import numpy as np

def find_missing_and_out_of_order_values(data):
    missing_values = []
    out_of_order_values = []

    for i, value in enumerate(data):
        # Check if the current value is missing
        if i not in data:
            missing_values.append(i)

        # Check if the current value is out of order
        if i > 0 and value < data[i-1]:
            out_of_order_values.append(value)

    return missing_values, out_of_order_values

def count_elements_layered_list(nested_list):
    count = 0
    for element in nested_list:
        if isinstance(element, list):
            count += count_elements_layered_list(element)
        else:
            count += 1
    return count

def calculate_rtt(folder_path):
    all_packet_loss_seqs = []
    all_missing_seqs = []
    all_out_of_order_seqs = []
    rtt_list = []
    receive_rates = []

    for run_folder in os.listdir(folder_path):
        run_folder_path = os.path.join(folder_path, run_folder)
        if not os.path.isdir(run_folder_path):
            continue
        file_path = os.path.join(run_folder_path, "UDPing_log.csv")
        with open(file_path, "r") as csv_file:
            csv_reader = csv.reader(csv_file)
            next(csv_reader) # skip "Pinging..." row
            next(csv_reader) # skip "empty" row
            next(csv_reader) # skip header row
            for row in csv_reader:
                if "Ping statistics" in row[0]:
                    break
                rtt = float(row[2])
                rtt_list.append(rtt)

            send=int(csv_reader.__next__()[1])
            receive=int(csv_reader.__next__()[1])
            if receive == 0:
                receive_rates.append(0) # There are cases that all packets got lost.
            else:
                receive_rates.append(send/receive)

        #get the seq that is not one above the previous seq
        with open(file_path, "r") as csv_file:
            csv_reader = csv.reader(csv_file)
            prev_seq=-9999
            packet_loss_seqs=[]
            next(csv_reader)  # skip "Pinging..." row
            next(csv_reader)  # skip "empty" row
            next(csv_reader)  # skip header row
            for row in csv_reader:
                if "Ping statistics" in row[0]:
                    break

                seq = int(row[3])
                if prev_seq != -9999 and seq != prev_seq + 1:
                    packet_loss_seqs.append(seq)
                prev_seq = seq

            all_packet_loss_seqs.append(packet_loss_seqs)

        #Find missing and out of order values
        with open(file_path, "r") as csv_file:
            seq_column = []
            csv_reader = csv.reader(csv_file)
            next(csv_reader)  # skip "Pinging..." row
            next(csv_reader)  # skip "empty" row
            next(csv_reader)  # skip header row
            for row in csv_reader:
                if "Ping statistics" in row[0]:
                    break
                seq_column.append(int(row[3]))
            missing_values, out_of_order_values=find_missing_and_out_of_order_values(seq_column)
            all_missing_seqs.append(missing_values)
            all_out_of_order_seqs.append(out_of_order_values)

    mean_rtt = np.mean(rtt_list)
    std_rtt = np.std(rtt_list)
    mean_receive_rate=np.mean(receive_rates)
    std_receive_rate=np.std(receive_rates)

    return (mean_rtt, std_rtt,mean_receive_rate,std_receive_rate, all_packet_loss_seqs, all_missing_seqs, all_out_of_order_seqs)

mean_rtt, std_rtt, mean_receive_rate, std_receive_rate, packet_loss_seqs, missing_seqs, out_of_order_seqs= calculate_rtt(".\\5s")
print("Mean RTT:", mean_rtt)
print("Standard deviation of RTT:", std_rtt)
print("Mean Receive rate:", mean_receive_rate)
print("Standard deviation of Receive rate:", std_receive_rate)
print("Packet loss sequences:", packet_loss_seqs)
print("Packet loss sequences count:", count_elements_layered_list(packet_loss_seqs))
print("Missing sequences:",missing_seqs)
print("Missing sequences count:", count_elements_layered_list(missing_seqs))
print("Out of order sequences:",out_of_order_seqs)
print("Out of order sequences count:",count_elements_layered_list(out_of_order_seqs))

## Estimated Throughput

In [None]:
import re
import os

estimated_throughput_pattern = re.compile("^LOG  ([\d\.]+),(?:[^,]+,){4}([\d\.]+)$")

def ComputeEstTputData(dir_path):
    jsLog_names = [filename for filename in os.listdir(dir_path) if filename.startswith("Tester_") and filename.endswith(".log")]
    if len(jsLog_names) != 1:
        return
    jsLog_name = jsLog_names[0]
    jsLog = open(os.path.join(dir_path, jsLog_name), "r")
    
    times = list()
    esttputs = list()
    
    # Read through all the lines.
    while line := jsLog.readline():
        esttput_match = estimated_throughput_pattern.match(line)
        if not esttput_match:
            continue

        time = float(esttput_match.group(1))
        times.append(time)
        esttput = float(esttput_match.group(2))
        esttputs.append(esttput)
        
    return times, esttputs

In [None]:
import os

settings = ["5s", "10s", "20s"]
estimated_times = list()
estimated_throughputs = list()
for setting in settings:
    dir_path = "./{setting}".format(setting = setting)
    
    setting_times = list()
    setting_esttputs = list()
    
    for file_name in os.listdir(dir_path):
        file_path = os.path.join(dir_path, file_name)
        if not os.path.isdir(file_path): # Only looking to read directories (that are symlinks).
            continue
        
        run_times, run_esttputs = ComputeEstTputData(file_path)
        setting_times.append(run_times)
        setting_esttputs.append(run_esttputs)
    
    estimated_times.append(setting_times)
    estimated_throughputs.append(setting_esttputs)

In [None]:
import numpy as np
import plotly.express as plx
import plotly.graph_objects as plgo

for setting_idx in range(0, len(estimated_throughputs)):
    setting_tputs = list()
    
    fig = plgo.Figure()
    set_std_plot_params(fig)
    
    for run_idx in range(len(estimated_throughputs[setting_idx])):
        run_times = estimated_times[setting_idx][run_idx]
        run_tputs = estimated_throughputs[setting_idx][run_idx]
        setting_tputs += run_tputs
        #fig.add_scatter(y = estimated_throughputs[setting_idx][run_idx], name = "Run {idx}".format(idx = run_idx))
        fig.add_scatter(x = run_times, y = run_tputs,
                        name = "Run {idx}".format(idx = run_idx))
    print("Mean:", np.mean(setting_tputs))
    print("Stdev:", np.std(setting_tputs))
    fig.show()

## Measured Throughput

In [None]:
import os
import pandas as pd
import math

def ComputeMeasuredTputData(dir_path):
    packetLog_names = [filename for filename in os.listdir(dir_path) if filename == "packets_client.csv"]
    if len(packetLog_names) != 1:
        return
    packetLog_name = packetLog_names[0]
    packetLog = open(os.path.join(dir_path, packetLog_name), "r")
    
    measuredTputs = list()
    
    packetDf = pd.read_csv(packetLog, usecols=[0, 1])
    
    def grouping_attr(index):
        return math.floor(packetDf['_ws.col.Time'].loc[index])

    packetDf = packetDf.groupby(grouping_attr)['frame.len'].sum().reset_index()
    #packetDf['frame.len'] = packetDf['frame.len'].multiply(8e-6)  # multiply to get Mbit/s
    packetDf['frame.len'] = packetDf['frame.len'].multiply(8e-3)  # multiply to get Kbit/s
        
    return list(packetDf["index"]), list(packetDf["frame.len"])

In [None]:
import os

settings = ["5s", "10s", "20s"]
measured_times = list()
measured_throughputs = list()
for setting in settings:
    dir_path = "./{setting}".format(setting = setting)
    
    setting_times = list()
    setting_measuredTputs = list()
    
    for file_name in os.listdir(dir_path):
        file_path = os.path.join(dir_path, file_name)
        if not os.path.isdir(file_path): # Only looking to read directories (that are symlinks).
            continue
        
        run_times, run_measuredTputs = ComputeMeasuredTputData(file_path)
        setting_times.append(run_times)
        setting_measuredTputs.append(run_measuredTputs)
    
    measured_times.append(setting_times)
    measured_throughputs.append(setting_measuredTputs)

In [None]:
import numpy as np
import plotly.express as plx
import plotly.graph_objects as plgo

for setting_idx in range(len(measured_throughputs)):
    setting_tputs = list()
    
    fig = plgo.Figure()
    set_std_plot_params(fig)
    
    for run_idx in range(len(measured_throughputs[setting_idx])):
        run_tputs = measured_throughputs[setting_idx][run_idx]
        setting_tputs += run_tputs
        fig.add_scatter(y = measured_throughputs[setting_idx][run_idx], name = "Run {idx}".format(idx = run_idx))
        
    print("Mean:", np.mean(setting_tputs))
    print("Stdev:", np.std(setting_tputs))
    fig.show()

## Estimated vs Measured Throughput

In [None]:
setting_idx = 0
for run_idx in range(len(estimated_throughputs[setting_idx])):
    run_estimated_times = estimated_times[setting_idx][run_idx]
    run_estimated_tputs = estimated_throughputs[setting_idx][run_idx]
    
    run_measured_times = measured_times[setting_idx][run_idx]
    run_measured_tputs = measured_throughputs[setting_idx][run_idx]
    
    fig = plgo.Figure()
    
    set_std_plot_params(fig)
    
    fig.add_scatter(x = run_estimated_times, y = run_estimated_tputs,
                    name = "Estimated".format(idx = run_idx))
    fig.add_scatter(x = run_measured_times, y = run_measured_tputs,
                    name = "Measured".format(idx = run_idx))
    fig.show()

## Time to Start Playing

In [None]:
import os
import re

log_video_time_pattern = re.compile("^LOG  ([\d\.]+),([\d\.]+):.+$")
#stall_pattern = re.compile("^STALL  ([\d]+) ms and stopped at [\d\.]+ sec", re.M)

def ComputeTimeToStart(dir_path):
    jsLog_names = [filename for filename in os.listdir(dir_path) if filename.startswith("Tester_") and filename.endswith(".log")]
    if len(jsLog_names) != 1:
        return
    jsLog_name = jsLog_names[0]
    jsLog = open(os.path.join(dir_path, jsLog_name), "r")
    
    # Read through all the lines.
    while line := jsLog.readline():
        match = log_video_time_pattern.match(line)
        if match:
            absolute_time = float(match.group(1))
            video_time = float(match.group(2))
            if video_time > 0:
                # We've found when the video started playing.
                return absolute_time
    
    return -1

In [None]:
import numpy as np
import scipy.stats as st
import os

conf_interval = 0.95
seg_lens = ["5s", "10s", "20s"]

avg_start_times_sec = list()
avg_start_times_sec_conf = list()
stdev_start_times_sec = list()

for seg_len in seg_lens:
    dir_path = "./{seg_len}".format(seg_len = seg_len)
    
    start_times_sec = list()
    
    for file_name in os.listdir(dir_path):
        file_path = os.path.join(dir_path, file_name)
        if not os.path.isdir(file_path): # Only looking to read directories (that are symlinks).
            continue
        start_time_sec = ComputeTimeToStart(file_path)
        start_times_sec.append(start_time_sec)
    
    avg_start_time_sec = np.mean(start_times_sec)
    avg_start_times_sec.append(avg_start_time_sec)
    #avg_start_times_sec_conf.append(st.norm.interval(alpha = conf_interval, loc = avg_start_time_sec, scale = st.sem(start_times_sec)))
    avg_start_times_sec_conf.append(st.t.interval(alpha = conf_interval,
                                                  df = len(start_times_sec) - 1,
                                                  loc = avg_start_time_sec,
                                                  scale = st.sem(start_times_sec)))
    stdev_start_times_sec.append(np.std(start_times_sec))

In [None]:
avg_start_times_sec

In [None]:
avg_start_times_sec_conf

In [None]:
stdev_start_times_sec