# Log File Processing Utilities for 499 Visualizations

Various utilities that allow us to extract data from the DASH log files.

In [23]:
%run DataPaths.ipynb

In [24]:
import numpy as np

In [25]:
""" Return a list containing all fields from the given log file
"""
def readFieldsFromDataFile(filepath):
    lines = []
    with open(filepath) as inFile:
        lines = inFile.readlines()
    
    # Split the lines on whitespace to get the individual fields
    fieldsList = []
    for line in lines:
        fields = line.rstrip('\n').split()
        fieldsList.append(fields)
    
    return fieldsList


def readAdaptationLog(parentDir):       
    filepath = parentDir + "cl0_adaptationLog.txt";
    stringFields = readFieldsFromDataFile(filepath)
    
    headers = stringFields[0][:3]
    
    rows = []
    for row in stringFields[1:]:
        segment = int(row[0])
        qualityLevel = int(row[1])
        timestamp = float(row[2])
        
        rows.append((segment, qualityLevel, timestamp))
    
    return (headers, rows)


def readBufferLog(parentDir):
    filepath = parentDir + "cl0_bufferLog.txt"

    stringFields = readFieldsFromDataFile(filepath)
    
    headers = stringFields[0]
    
    rows = []
    for row in stringFields[1:]:
        timestamp = float(row[0])
        bufferLevel = float(row[1])
        
        rows.append((timestamp, bufferLevel))
    
    return (headers, rows)


def readUnderflowLog(parentDir):   
    filepath = parentDir + "cl0_bufferUnderrunLog.txt"
    stringFields = readFieldsFromDataFile(filepath)
    
    headers = stringFields[0][:3]
    
    rows = []
    for row in stringFields[1:]:
        if len(row) < 2:
            print("Malformed buffer underflow log. Likely that QUIC failed near time {}".format(row[0]))
            break
        
        startTime = float(row[0])
        endTime = float(row[1])
        rows.append((startTime, endTime))

    return (headers, rows)


def readPlaybackLog(parentDir):
    filepath = parentDir + "cl0_playbackLog.txt"
    stringFields = readFieldsFromDataFile(filepath)
    
    headers = stringFields[0][:3]
    
    rows = []
    for row in stringFields[1:]:
        segment = int(row[0])
        startTime = float(row[1])
        qualityLevel = int(row[2])
    
        rows.append((segment, startTime, qualityLevel))
    
    return (headers, rows)


def readDownloadLog(parentDir):
    filepath = parentDir + "cl0_downloadLog.txt"
    stringFields = readFieldsFromDataFile(filepath)
    
    headers = stringFields[0][:3]
    
    rows = []
    for row in stringFields[1:]:
        segment = int(row[0])
        startTime = float(row[2])
        endTime = float(row[3])
        segmentSize = int(row[4])
    
        rows.append((segment, startTime, endTime, segmentSize))
    
    return (headers, rows)


def readThroughputLog(parentDir):
    filepath = parentDir + "cl0_throughputLog.txt"
    stringFields = readFieldsFromDataFile(filepath)
    
    # Since these files are so big, we depart from the existing pattern.
    # 1. Ignore the headers
    # 2. Create separate arrays of the two values we care about and return them.
    
    rowCount = len(stringFields) - 1
    times = np.empty(rowCount)
    sizes = np.empty(rowCount)
    i = 0
    for row in stringFields[1:]:
        times[i] = float(row[0])
        sizes[i] = int(row[1])
        i += 1

    return (times, sizes)

In [26]:
# Reads the segment sizes and returns them as a matrix
def readSegmentSizesFile(filepath):
    fields = readFieldsFromDataFile(filepath)
    qualityLevels = np.array(fields).astype(int)
    return qualityLevels

In [33]:
# Returns the size of each segment chosen 
def getSegmentSizeChoices(filepath):
    _, rows = readDownloadLog(filepath)
    return np.array(rows)[:,3].astype(int)