In [None]:
filename = "r_intra_1_500p_udp2.log"

In [None]:
from pathlib import Path

if not Path(filename).exists():
    !grep "Forwarding request " ../logs/receiver*.log >{filename}

In [None]:
import re
def parse_int_tag(line, name):
    match = re.search(f"{name}=([0-9]*)", line)
    if match:
        return int(match.group(1))
    else:
        return 0

def parse_line(line):
    deadline = parse_int_tag(line, "deadline")
    now = parse_int_tag(line, "now")
    id = parse_int_tag(line, "r_id")
    proxy_id = parse_int_tag(line, "c_id")
    proxy_seq = parse_int_tag(line, "c_seq")

    return (id, proxy_id, proxy_seq)    

In [None]:
requests = {}

for line in open(filename):
    (id, *req) = parse_line(line)

    if id not in requests:
        requests[id] = []
        
    requests[id].append(tuple(req))

requests = {
    k:v[:15000] for k, v in requests.items()
}
print(*(len(v) for v in requests.values()))

In [None]:
from collections import defaultdict

# Get edit distance between 2 sequences from receivers
def req_eq(r1, r2):
    return r1[0] == r2[0] and r1[1] == r2[1]

def edit_dist(s1, s2):
    dp = [ ([-1] * (len(s2) + 1)) for _ in range(len(s1) + 1)]

    dp[0][0] = 0

    for i in range(1, len(dp)):
        dp[i][0] = i
        
    for j in range(1, len(dp[0])):
        dp[0][j] = j
        
    for i in range(1, len(dp)):
        for j in range(1, len(dp[i])):
            # Change req at position i/j
            if req_eq(s1[i-1], s2[j-1]):
                dp[i][j] = dp[i - 1][j - 1]
            else:
                dp[i][j] = 1 + dp[i - 1][j - 1]

            # TODO swaps?
            dp[i][j] = min(dp[i][j], 1 + dp[i - 1][j], 1 + dp[i][j - 1])
    
    return dp[-1][-1]


# Longest Increasing Subsequence
def lis(s1, s2):
    # use s1 as truth
    entry_to_idx = defaultdict(int)
    for i, x in enumerate(s1):
        x = (x[0], x[1])
        entry_to_idx[x] = i

    seq = [entry_to_idx[(x[0], x[1])] for x in s2]    

    dp = []

    for i in range(len(seq)):
        dp.append(1)
        for j in range(i):
            if seq[i] > seq[j]: 
                dp[i] = max(dp[j] + 1, dp[i])

    return max(dp)

def lis_multiple(*seqs):
    s_true = seqs[0]
    n = len(s_true)

    ret = []
    for s in seqs[1:]:
        ret.append(lis(s_true, s) / n)

    return ret

# Simpler metric, which perecentage of requests are the same?
def matching_reqs(*seqs):
    n = len(seqs[0])
    ret = []
    
    for entries in zip(*seqs):
        match = True
        for (x, y) in zip(entries, entries[1:]):
            if not req_eq(x, y): match = False

        ret.append(match)
        
    return ret

In [None]:
import itertools

reqs = matching_reqs(*[v for v in requests.values()])

print(f"Total number of requests: {len(reqs)}")
print(f"Total proportion of requests the same: {sum(reqs) / len(reqs):.2f}")

# Get runs of smae requests len 10 or above
print([(k, sum(1 for _ in g)) for k, g in itertools.groupby(reqs)])

In [None]:
print(lis_multiple(*requests.values()))

In [None]:
print(edit_dist(requests[0], requests[1]))
print(edit_dist(requests[0], requests[2]))
print(edit_dist(requests[0], requests[3]))



In [None]:
seq = [s for (id, s) in requests[0] if (id == 2)]

for a, b in zip(seq, seq[1:]):
    if (b - a > 1): print("Drop!", a, b)