In [4]:


import xml.etree.ElementTree as ET
import numpy as np

!pwd

path = "../statistics/"
xml_file_list = ["statistics/Portland-1-1-1.xml",
                 "statistics/Portland-0.5-1-1.xml",
                 "statistics/Portland-0.25-1-1.xml",
                 "statistics/Portland-0.125-1-1.xml",
                 "statistics/Portland-1-2-1.xml",
                 "statistics/Portland-1-4-1.xml",
                 "statistics/Portland-1-8-1.xml",
                 "statistics/Portland-1-1-2.xml",
                 "statistics/Portland-1-1-4.xml",
                 "statistics/Portland-1-1-8.xml",]

def analyze(file_path):
    data_list = []
    
    # Read file.
    tree = ET.parse(file_path)
    root = tree.getroot()
    flow_stats = root[0]
    
    # Loop all flow.
    for flow in flow_stats:
        data = {}
        data["flow_id"] = flow.attrib["flowId"]
        data["tx_byte"] = float(flow.attrib["txBytes"])
        data["lost_packet"] = int(flow.attrib["lostPackets"])
        data["time_tx_packet"] = \
            float(flow.attrib["timeLastTxPacket"][:-2]) - float(flow.attrib["timeFirstTxPacket"][:-2])
        data["time_rx_packet"] = \
            float(flow.attrib["timeLastRxPacket"][:-2]) - float(flow.attrib["timeFirstRxPacket"][:-2])
        
        data_list.append(data)
    
    return data_list
    
    
def summarize(flow_list):
    result = {}
    flow_num = len(flow_list)
    result["flow_num"] = flow_num
    result["avg_byte"] = sum([flow["tx_byte"] for flow in flow_list]) / flow_num
    result["avg_tx_time"] = sum([flow["time_tx_packet"] for flow in flow_list]) / flow_num
    result["tx_time_std"] = np.std([flow["time_tx_packet"] for flow in flow_list])
    result["avg_rx_time"] = sum([flow["time_rx_packet"] for flow in flow_list]) / flow_num
    result["rx_time_std"] = np.std([flow["time_rx_packet"] for flow in flow_list])
    result["total_lost"] = sum(flow["lost_packet"] for flow in flow_list)
    
    return result
    

if __name__ == "__main__":
    for xml_file in xml_file_list:
        print "File name:", xml_file 
        flow_list = analyze(path + xml_file)
        result = summarize(flow_list)
        print result
        
    

/Users/Kuhn/Dropbox/Study/UCSD/CSE 291 Cloud Computing/projects/project1/workspace/ntu-dsi-dcn-portland/stats
File name: Fat-tree.xml
{'avg_tx_time': 98420519459.4375, 'tx_time_std': 1525135317.5563493, 'flow_num': 16, 'total_lost': 0, 'avg_rx_time': 98420516670.9375, 'rx_time_std': 1525133633.7767444, 'avg_byte': 6320021.5}
File name: Fat-tree-Bilal.xml
{'avg_tx_time': 98420519459.4375, 'tx_time_std': 1525135317.5563493, 'flow_num': 16, 'total_lost': 0, 'avg_rx_time': 98420516670.9375, 'rx_time_std': 1525133633.7767444, 'avg_byte': 6320021.5}
File name: Fat-tree-AlFares.xml
{'avg_tx_time': 5802644.0, 'tx_time_std': 0.0, 'flow_num': 16, 'total_lost': 0, 'avg_rx_time': 5799728.5, 'rx_time_std': 3088.8466698753437, 'avg_byte': 72588.0}
File name: BCube.xml
{'avg_tx_time': 98541666699.64062, 'tx_time_std': 1542753729.0118563, 'flow_num': 64, 'total_lost': 0, 'avg_rx_time': 98541660287.14062, 'rx_time_std': 1542753220.9924941, 'avg_byte': 6324130.875}
File name: Three-Tier.xml
{'avg_tx_tim