# Notebook to Reproduce Originality Detection Findings

## Start Testground

```
$ testground daemon
```

## Run Testplans in Docker:
``` 
$ testground run single --plan=coopbc --testcase=bc --runner=local:docker --builder=docker:go --instances=10 --collect
```

### Run Testplans in K8S:
```
$ testground run single --plan=coopbc --testcase=bc --runner=cluster:k8s --builder=docker:go --instances=10 --collect --build-cfg go_proxy_mode=direct
```

# Parsing Outputs

### Find Output Dirs

In [15]:
import os

outputs_folder = "c6vhkft8gb1ak76ffd00"

docker_data_dir = "../outputs/local_docker/coopbc/"
directory_contents = os.listdir(docker_data_dir)
print(directory_contents)

['c6vhn9t8gb1ak76ffd30', 'c6vhlst8gb1ak76ffd1g', 'c6vhmbt8gb1ak76ffd20', 'c6vhkft8gb1ak76ffd00', 'c6vhnp58gb1ak76ffd3g', 'c6vhkut8gb1ak76ffd0g', 'c6vhmqt8gb1ak76ffd2g', 'c6vhond8gb1ak76ffd4g', 'c6vhldt8gb1ak76ffd10', 'c6vho8d8gb1ak76ffd40']


### Find all .out files and map them to their RUNID

In [73]:
import os
import pathlib
from pathlib import Path
import glob

def listdir_r(dirpath):
    paths=[]
    paths.append(dirpath)
    for path in os.listdir(dirpath):
        rpath = os.path.join(dirpath, path)
        if os.path.isdir(rpath):
            subdirs = listdir_r(rpath)
            if not subdirs == []:
                paths.extend(subdirs)
    return paths

all_paths_dict = {}
for d in directory_contents:
    print(docker_data_dir + d)
    all_paths = listdir_r(os.path.abspath(docker_data_dir + d))
    all_paths_containing_target = [s for s in all_paths if d in s]
    all_paths_dict[d]=all_paths_containing_target

print(all_paths_dict)

../outputs/local_docker/coopbc/c6vhn9t8gb1ak76ffd30
../outputs/local_docker/coopbc/c6vhlst8gb1ak76ffd1g
../outputs/local_docker/coopbc/c6vhmbt8gb1ak76ffd20
../outputs/local_docker/coopbc/c6vhkft8gb1ak76ffd00
../outputs/local_docker/coopbc/c6vhnp58gb1ak76ffd3g
../outputs/local_docker/coopbc/c6vhkut8gb1ak76ffd0g
../outputs/local_docker/coopbc/c6vhmqt8gb1ak76ffd2g
../outputs/local_docker/coopbc/c6vhond8gb1ak76ffd4g
../outputs/local_docker/coopbc/c6vhldt8gb1ak76ffd10
../outputs/local_docker/coopbc/c6vho8d8gb1ak76ffd40
{'c6vhn9t8gb1ak76ffd30': ['/home/manjaro/GIT/bc_p2p/src/go/bc_dht/data/outputs/local_docker/coopbc/c6vhn9t8gb1ak76ffd30', '/home/manjaro/GIT/bc_p2p/src/go/bc_dht/data/outputs/local_docker/coopbc/c6vhn9t8gb1ak76ffd30/single', '/home/manjaro/GIT/bc_p2p/src/go/bc_dht/data/outputs/local_docker/coopbc/c6vhn9t8gb1ak76ffd30/single/44', '/home/manjaro/GIT/bc_p2p/src/go/bc_dht/data/outputs/local_docker/coopbc/c6vhn9t8gb1ak76ffd30/single/60', '/home/manjaro/GIT/bc_p2p/src/go/bc_dht/dat

### Find target JSON Outputs in .out files 

In [78]:
def check_dir_for_target_lines(search_path):
    search_str = "___UploaderData___"
    file_type = ".out"
    target_line = []
    result = []
    targets = 13 # MAGIC NUMBER for the number of outputs - i should have parsed the whole file as json and just check the exact message instead of all strings
    success = False
    for fname in os.listdir(path=search_path):
        if fname.endswith(file_type):
                fo = open(search_path + "/" + fname)
                line = fo.readline()
                line_no = 1
                while line != '' :
                        index = line.find(search_str)
                        if ( index != -1) :
                            # print(fname, "[", line_no, ",", index, "] ", line, sep="")
                            success = True
                        line = fo.readline()
                        if success == True and targets > 0:
                            result.append(line)
                            targets -= 1
                        line_no += 1
                fo.close()
    return result

output_lines_dict_by_runid = {}
for d in directory_contents:
    output = []
    output_lines = []
    paths_containing_target = all_paths_dict[d]
    for pct in paths_containing_target:
        output = check_dir_for_target_lines(pct)
        if output:
            for l in output:
                # print(l)
                output_lines.append(l)
    output_lines_dict_by_runid[d]=output_lines

print(output_lines_dict_by_runid)

{'c6vhn9t8gb1ak76ffd30': ['{"ts":1639914461588817294,"msg":"","group_id":"single","run_id":"c6vhn9t8gb1ak76ffd30","event":{"message_event":{"message":"Instances: 70"}}}\n', '{"ts":1639914461588836821,"msg":"","group_id":"single","run_id":"c6vhn9t8gb1ak76ffd30","event":{"message_event":{"message":"SimLatency: 80ms"}}}\n', '{"ts":1639914461588850897,"msg":"","group_id":"single","run_id":"c6vhn9t8gb1ak76ffd30","event":{"message_event":{"message":"SimPeerBandwidth (bytes per second): 1048576"}}}\n', '{"ts":1639914461588861316,"msg":"","group_id":"single","run_id":"c6vhn9t8gb1ak76ffd30","event":{"message_event":{"message":"SimJitter (ms): 5ms"}}}\n', '{"ts":1639914461588873760,"msg":"","group_id":"single","run_id":"c6vhn9t8gb1ak76ffd30","event":{"message_event":{"message":"SimLoss (%): 1"}}}\n', '{"ts":1639914461588888878,"msg":"","group_id":"single","run_id":"c6vhn9t8gb1ak76ffd30","event":{"message_event":{"message":"SimCorrupt (%): 1"}}}\n', '{"ts":1639914461588898546,"msg":"","group_id":

### Parse Json Results to Data Frame

In [117]:
import json
import pandas as pd

#d = {'Instances': [], 'Latency': [], 'Bandwidth': [], 'Jitter': [], 'Loss': [], 'Corrupt': [], 'S2CheckDuration': [], 'OriginalityRatio': [], 'OriginalCombinations': [], 'DhtUploadDuration': [], 'DhtQueryDuration': []}


uber_data = [{}]
for runid in output_lines_dict_by_runid:
    kvs_dict = {}
    kvs_dict["RunID"] = runid
    for o in output_lines_dict_by_runid[runid]:
        m = json.loads(o)["event"]["message_event"]["message"]
        kv = m.split(": ", 1)
        if len(kv) > 1:
            kvs_dict[kv[0]] = kv[1]
    uber_data.append(kvs_dict)
uber_data.pop(0)
print(uber_data)

pd.DataFrame.from_dict(uber_data)



TypeError: 'builtin_function_or_method' object is not subscriptable

### Figures from DataFrame

In [95]:
df.plot()

NameError: name 'plt' is not defined