# Notebook to Reproduce Originality Detection Findings

## Start Testground

```
$ testground daemon
```

## Run Testplans in Docker:
``` 
$ testground run single --plan=coopbc --testcase=bc --runner=local:docker --builder=docker:go --instances=10 --collect
```

### Run Testplans in K8S:
```
$ testground run single --plan=coopbc --testcase=bc --runner=cluster:k8s --builder=docker:go --instances=10 --collect --build-cfg go_proxy_mode=direct
```

# Parsing Outputs

### Find Output Dirs

In [146]:
import os

outputs_folder = "c6vhkft8gb1ak76ffd00"

docker_data_dir = "../outputs/local_docker/coopbc/"
directory_contents = os.listdir(docker_data_dir)
print(directory_contents)

['c6vns7d8gb19og3qh6gg', 'c6vnsmd8gb19og3qh6h0', 'c6vnrod8gb19og3qh6g0']


### Find all .out files and map them to their RUNID

In [147]:
import os
import pathlib
from pathlib import Path
import glob

def listdir_r(dirpath):
    paths=[]
    paths.append(dirpath)
    for path in os.listdir(dirpath):
        rpath = os.path.join(dirpath, path)
        if os.path.isdir(rpath):
            subdirs = listdir_r(rpath)
            if not subdirs == []:
                paths.extend(subdirs)
    return paths

all_paths_dict = {}
for d in directory_contents:
    print(docker_data_dir + d)
    all_paths = listdir_r(os.path.abspath(docker_data_dir + d))
    all_paths_containing_target = [s for s in all_paths if d in s]
    all_paths_dict[d]=all_paths_containing_target

print(all_paths_dict)

../outputs/local_docker/coopbc/c6vns7d8gb19og3qh6gg
../outputs/local_docker/coopbc/c6vnsmd8gb19og3qh6h0
../outputs/local_docker/coopbc/c6vnrod8gb19og3qh6g0
{'c6vns7d8gb19og3qh6gg': ['/home/manjaro/GIT/bc_p2p/src/go/bc_dht/data/outputs/local_docker/coopbc/c6vns7d8gb19og3qh6gg', '/home/manjaro/GIT/bc_p2p/src/go/bc_dht/data/outputs/local_docker/coopbc/c6vns7d8gb19og3qh6gg/single', '/home/manjaro/GIT/bc_p2p/src/go/bc_dht/data/outputs/local_docker/coopbc/c6vns7d8gb19og3qh6gg/single/3', '/home/manjaro/GIT/bc_p2p/src/go/bc_dht/data/outputs/local_docker/coopbc/c6vns7d8gb19og3qh6gg/single/1', '/home/manjaro/GIT/bc_p2p/src/go/bc_dht/data/outputs/local_docker/coopbc/c6vns7d8gb19og3qh6gg/single/15', '/home/manjaro/GIT/bc_p2p/src/go/bc_dht/data/outputs/local_docker/coopbc/c6vns7d8gb19og3qh6gg/single/8', '/home/manjaro/GIT/bc_p2p/src/go/bc_dht/data/outputs/local_docker/coopbc/c6vns7d8gb19og3qh6gg/single/16', '/home/manjaro/GIT/bc_p2p/src/go/bc_dht/data/outputs/local_docker/coopbc/c6vns7d8gb19og3qh6g

### Find target JSON Outputs in .out files 

In [148]:
def check_dir_for_target_lines(search_path):
    search_str = "___UploaderData___"
    file_type = ".out"
    target_line = []
    result = []
    targets = 13 # MAGIC NUMBER for the number of outputs - i should have parsed the whole file as json and just check the exact message instead of all strings
    success = False
    for fname in os.listdir(path=search_path):
        if fname.endswith(file_type):
                fo = open(search_path + "/" + fname)
                line = fo.readline()
                line_no = 1
                while line != '' :
                        index = line.find(search_str)
                        if ( index != -1) :
                            # print(fname, "[", line_no, ",", index, "] ", line, sep="")
                            success = True
                        line = fo.readline()
                        if success == True and targets > 0:
                            result.append(line)
                            targets -= 1
                        line_no += 1
                fo.close()
    return result

output_lines_dict_by_runid = {}
for d in directory_contents:
    output = []
    output_lines = []
    paths_containing_target = all_paths_dict[d]
    for pct in paths_containing_target:
        output = check_dir_for_target_lines(pct)
        if output:
            for l in output:
                # print(l)
                output_lines.append(l)
    output_lines_dict_by_runid[d]=output_lines

print(output_lines_dict_by_runid)

{'c6vns7d8gb19og3qh6gg': ['{"ts":1639939643381479549,"msg":"","group_id":"single","run_id":"c6vns7d8gb19og3qh6gg","event":{"message_event":{"message":"Instances: 20"}}}\n', '{"ts":1639939643381495309,"msg":"","group_id":"single","run_id":"c6vns7d8gb19og3qh6gg","event":{"message_event":{"message":"SimLatency_ms: 80"}}}\n', '{"ts":1639939643381506229,"msg":"","group_id":"single","run_id":"c6vns7d8gb19og3qh6gg","event":{"message_event":{"message":"SimPeerBandwidth_bps: 1048576"}}}\n', '{"ts":1639939643381515326,"msg":"","group_id":"single","run_id":"c6vns7d8gb19og3qh6gg","event":{"message_event":{"message":"SimJitter_ms: 5"}}}\n', '{"ts":1639939643381526317,"msg":"","group_id":"single","run_id":"c6vns7d8gb19og3qh6gg","event":{"message_event":{"message":"SimLoss_pct: 1"}}}\n', '{"ts":1639939643381537628,"msg":"","group_id":"single","run_id":"c6vns7d8gb19og3qh6gg","event":{"message_event":{"message":"SimCorrupt_pct: 1"}}}\n', '{"ts":1639939643381548428,"msg":"","group_id":"single","run_id":

### Parse Json Results to Data Frame

In [149]:
import json
import pandas as pd

#d = {'Instances': [], 'Latency': [], 'Bandwidth': [], 'Jitter': [], 'Loss': [], 'Corrupt': [], 'S2CheckDuration': [], 'OriginalityRatio': [], 'OriginalCombinations': [], 'DhtUploadDuration': [], 'DhtQueryDuration': []}


uber_data = [{}]
for runid in output_lines_dict_by_runid:
    kvs_dict = {}
    kvs_dict["RunID"] = runid
    for o in output_lines_dict_by_runid[runid]:
        m = json.loads(o)["event"]["message_event"]["message"]
        kv = m.split(": ", 1)
        if len(kv) > 1:
            kvs_dict[kv[0]] = kv[1]
    uber_data.append(kvs_dict)
uber_data.pop(0)
print(uber_data)

my_data_frame = pd.DataFrame.from_dict(uber_data)
my_data_frame

[{'RunID': 'c6vns7d8gb19og3qh6gg', 'Instances': '20', 'SimLatency_ms': '80', 'SimPeerBandwidth_bps': '1048576', 'SimJitter_ms': '5', 'SimLoss_pct': '1', 'SimCorrupt_pct': '1', 'S2CheckDuration_s': '5.815842761', 'OriginalityRatio_RO': '0.7', 'UploadedOriginalCombinations': '14', 'DhtUploadDuration_s': '6.015219912', 'DhtQueryDuration_ms': '19.700734'}, {'RunID': 'c6vnsmd8gb19og3qh6h0', 'Instances': '30', 'SimLatency_ms': '80', 'SimPeerBandwidth_bps': '1048576', 'SimJitter_ms': '5', 'SimLoss_pct': '1', 'SimCorrupt_pct': '1', 'S2CheckDuration_s': '6.059036505', 'OriginalityRatio_RO': '0.7', 'UploadedOriginalCombinations': '14', 'DhtUploadDuration_s': '6.602333999', 'DhtQueryDuration_ms': '20.254672'}, {'RunID': 'c6vnrod8gb19og3qh6g0', 'Instances': '10', 'SimLatency_ms': '80', 'SimPeerBandwidth_bps': '1048576', 'SimJitter_ms': '5', 'SimLoss_pct': '1', 'SimCorrupt_pct': '1', 'S2CheckDuration_s': '6.834352239', 'OriginalityRatio_RO': '0.7', 'UploadedOriginalCombinations': '14', 'DhtUploadDu

Unnamed: 0,RunID,Instances,SimLatency_ms,SimPeerBandwidth_bps,SimJitter_ms,SimLoss_pct,SimCorrupt_pct,S2CheckDuration_s,OriginalityRatio_RO,UploadedOriginalCombinations,DhtUploadDuration_s,DhtQueryDuration_ms
0,c6vns7d8gb19og3qh6gg,20,80,1048576,5,1,1,5.815842761,0.7,14,6.015219912,19.700734
1,c6vnsmd8gb19og3qh6h0,30,80,1048576,5,1,1,6.059036505,0.7,14,6.602333999,20.254672
2,c6vnrod8gb19og3qh6g0,10,80,1048576,5,1,1,6.834352239,0.7,14,7.151007538,17.658881


In [152]:
import matplotlib.pyplot as plt
import numpy as np

my_data_frame.Instances=pd.to_numeric(my_data_frame.Instances)
my_data_frame.S2CheckDuration=pd.to_numeric(my_data_frame.S2CheckDuration_s)
my_data_frame.dtypes
#my_data_frame.plot(x='Instances', y='S2CheckDuration_s', style='o')

TypeError: no numeric data to plot

### Figures from DataFrame

In [95]:
df.plot()

NameError: name 'plt' is not defined