In [22]:
import numpy as np
import xml.etree.ElementTree as ET

!pwd

path = "../statistics/"
xml_file_list = ["Three-tier2-10s-1-1-1.xml",
                 "Three-tier2-10s-0.5-1-1.xml",
                 "Three-tier2-10s-0.25-1-1.xml",
                 "Three-tier2-10s-0.125-1-1.xml",
                 "Three-tier2-10s-1-2-1.xml",
                 "Three-tier2-10s-1-4-1.xml",
                 "Three-tier2-10s-1-8-1.xml",
                 "Three-tier2-10s-1-1-2.xml",
                 "Three-tier2-10s-1-1-4.xml",
                 "Three-tier2-10s-1-1-8.xml",
                 
                 ]

def analyze(file_path):
    data_list = []
    
    # Read file.
    tree = ET.parse(file_path)
    root = tree.getroot()
    flow_stats = root[0]
    
    # Loop all flow.
    for flow in flow_stats:
        data = {}
        data["flow_id"] = flow.attrib["flowId"]
        data["tx_byte"] = float(flow.attrib["txBytes"])
        data["lost_packet"] = int(flow.attrib["lostPackets"])
        data["time_tx_packet"] = \
            float(flow.attrib["timeLastTxPacket"][:-2]) - float(flow.attrib["timeFirstTxPacket"][:-2])
        data["time_rx_packet"] = \
            float(flow.attrib["timeLastRxPacket"][:-2]) - float(flow.attrib["timeFirstRxPacket"][:-2])
        
        data_list.append(data)
    
    return data_list
    
    
def summarize(flow_list):
    result = {}
    flow_num = len(flow_list)
    result["flow_num"] = flow_num
    result["avg_byte"] = sum([flow["tx_byte"] for flow in flow_list]) / flow_num
    result["avg_tx_time"] = sum([flow["time_tx_packet"] for flow in flow_list]) / flow_num
    result["tx_time_std"] = np.std([flow["time_tx_packet"] for flow in flow_list])
    result["avg_rx_time"] = sum([flow["time_rx_packet"] for flow in flow_list]) / flow_num
    result["rx_time_std"] = np.std([flow["time_rx_packet"] for flow in flow_list])
    result["total_lost"] = sum(flow["lost_packet"] for flow in flow_list)
    
    return result
    

if __name__ == "__main__":
    for xml_file in xml_file_list:
        print "File name:", xml_file 
        flow_list = analyze(path + xml_file)
        result = summarize(flow_list)
        print result
        
    

/Users/Kuhn/Dropbox/Study/UCSD/CSE 291 Cloud Computing/projects/project1/workspace/ntu-dsi-dcn-portland/stats
File name: Three-tier2-10s-1-1-1.xml
{'avg_tx_time': 8903407217.9375, 'tx_time_std': 1120232058.4849615, 'flow_num': 16, 'total_lost': 0, 'avg_rx_time': 8903403679.4375, 'rx_time_std': 1120230120.4338911, 'avg_byte': 595103.25}
File name: Three-tier2-10s-0.5-1-1.xml
{'avg_tx_time': 8910318810.125, 'tx_time_std': 1119849001.0638173, 'flow_num': 16, 'total_lost': 0, 'avg_rx_time': 8910315271.625, 'rx_time_std': 1119847062.0028217, 'avg_byte': 611313.75}
File name: Three-tier2-10s-0.25-1-1.xml
{'avg_tx_time': 8913389682.125, 'tx_time_std': 1119691350.2917693, 'flow_num': 16, 'total_lost': 0, 'avg_rx_time': 8913386143.625, 'rx_time_std': 1119689411.4202442, 'avg_byte': 643153.5}
File name: Three-tier2-10s-0.125-1-1.xml
{'avg_tx_time': 8891461640.5625, 'tx_time_std': 1113685079.2826912, 'flow_num': 16, 'total_lost': 0, 'avg_rx_time': 8891458102.0625, 'rx_time_std': 1113683054.014369