In [116]:
# Clears lingering variables from Jupyter
from IPython import get_ipython
get_ipython().magic('reset -sf')

# Imports
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


# Import data and name columns
data = pd.read_csv('order.csv', header=None)
data.columns = ['packet_id', 'max_packets', 'ip', 'port']

# split data by client (same IP + same Port)
groupedBy = data.groupby(['port', 'ip'], sort=False)

# extract keys from groups
keys = groupedBy.groups.keys()

# aggregate variables
total_missing = 0
min_lost = 0
max_lost = 0
min_unordered = 0
max_unordered = 0


# process groups  
for i in keys:
    print('client: ' + str(i))
    group = groupedBy.get_group(i)
    group.reset_index(drop=True, inplace=True)
    print(group)
    print ('\n')
    packetsReceived = len(group.index)
    packetsIntended = group.iloc[1]['max_packets']
    packetsLost = packetsIntended - packetsReceived
    total_missing += packetsLost
    print('num packets received: ' + str(packetsReceived))
    print('num packets intended: ' + str(packetsIntended))
    print('packets lost: ' + str(packetsLost))

    # make a series for faster processing
    receivedSeries = pd.Series(data = group['packet_id'], index=list(range(0, packetsIntended)))
    expectedSeries = pd.Series(range(1, packetsIntended + 1))
    expectedSeries.rename('packet_id')

    # find missing and out of order packets
    lastVal = 0
    val = 0
    missing = []
    out_of_order = []
    visited = []

    for index, value in receivedSeries.items():
        lastVal = val
        val = value

        visited.append(val)

        if val in missing:
            missing.remove(val)
        elif val != lastVal + 1 and lastVal + 1 < packetsIntended and val not in visited:
            missing.append(lastVal + 1)

        if val < lastVal:
            out_of_order.append(val)




    # print results
    print('missing: ' + str(missing))
    print('out of order: ' + str(out_of_order))

    # debug only: compare function for verification
    comparison = receivedSeries.compare(expectedSeries)
    if len(comparison) != 0:
        print('\nself = received, other = expected')
        print(comparison)

    print('\n\n')

average_missing = round(total_missing / groupedBy.ngroups, 2)
print('average missing: ' + str(average_missing))

client: (9000, '1.0.0.0')
    packet_id  max_packets       ip  port
0           1          100  1.0.0.0  9000
1           2          100  1.0.0.0  9000
2           3          100  1.0.0.0  9000
3           4          100  1.0.0.0  9000
4           5          100  1.0.0.0  9000
..        ...          ...      ...   ...
95         96          100  1.0.0.0  9000
96         97          100  1.0.0.0  9000
97         98          100  1.0.0.0  9000
98         99          100  1.0.0.0  9000
99        100          100  1.0.0.0  9000

[100 rows x 4 columns]


num packets received: 100
num packets intended: 100
packets lost: 0
missing: []
out of order: []



client: (9000, '2.0.0.0')
    packet_id  max_packets       ip  port
0           1          100  2.0.0.0  9000
1           2          100  2.0.0.0  9000
2           3          100  2.0.0.0  9000
3           4          100  2.0.0.0  9000
4           5          100  2.0.0.0  9000
..        ...          ...      ...   ...
95         96          1