In [1]:
import json
import random
# Parsing only
import pandas as pd
import re

## Functions
### Script Generation Functions

In [2]:
def check_test_case_validity(test_case_dataset):
    for i, test_case in enumerate(test_case_dataset):
        assert ("NAME" in test_case), f"Test case #{i} Invalid NAME"

        assert ("N_STATES" in test_case and isinstance(test_case["N_STATES"], int) and
                0 < test_case["N_STATES"] <= 64), f"Test case #{i} Invalid N_STATES"

        assert ("N_SYMBOLS" in test_case and isinstance(test_case["N_SYMBOLS"], int) and
                0 < test_case["N_SYMBOLS"] <= test_case["N_STATES"]), f"Test case #{i} Invalid N_SYMBOLS"

        assert ("PLAYER_INPUT_SIZES" in test_case and isinstance(test_case["PLAYER_INPUT_SIZES"], list) and 
                len(test_case["PLAYER_INPUT_SIZES"]) > 1 and
                all((isinstance(x, int) and x > 0) for x in test_case["PLAYER_INPUT_SIZES"])), f"Test case #{i} Invalid PLAYER_INPUT_SIZES"

        assert ("REPETITIONS" not in test_case or (isinstance(test_case["REPETITIONS"], int) and 
               0 < test_case["REPETITIONS"])), f"Test case #{i} Invalid REPETITIONS"

        assert ("DEBUG" not in test_case or isinstance(test_case["DEBUG"], bool)), f"Test case #{i} Invalid DEBUG"

        assert ("VIRTUAL_MACHINE" not in test_case or (isinstance(test_case["VIRTUAL_MACHINE"], str) and
                test_case["VIRTUAL_MACHINE"] in ["./spdz2k-party.x", "./semi2k-party.x"])), f"Test case #{i} Invalid VIRTUAL_MACHINE"

        if "PLAYER_DATA" in test_case:
            assert (isinstance(test_case["PLAYER_DATA"], list)), f"Test case #{i} Invalid PLAYER_DATA - Not a list"
            for j, size in enumerate(test_case["PLAYER_INPUT_SIZES"]):
                player_data = test_case["PLAYER_DATA"][j]
                max_value = test_case["N_SYMBOLS"]
                assert (isinstance(player_data, list) and len(player_data) == size and 
                        all((isinstance(x, int) and 0 <= x <= max_value) for x in player_data)), f"Test case #{i} Invalid PLAYER_DATA - User {j} inputs are invalid"

In [3]:
def write_opening(test_case_dataset, fd):
    max_containers = 0
    for entry in test_case_dataset:
        n_containers = len(entry["PLAYER_INPUT_SIZES"])
        if n_containers > max_containers:
            max_containers = n_containers
    
    # generate network if missing
    fd.write(
        f"docker network inspect {NETWORK_NAME} >/dev/null 2>&1 || "
        f"docker network create --internal {NETWORK_NAME}""\n"
    )
    # init containers
    for i in range(max_containers):
        fd.write(
            "docker run -d -t "
            f"--name {IMAGE_NAME_FORMAT.format(i)} "
            f"--network {NETWORK_NAME} "
            f"{IMAGE_NAME} "
            ">/dev/null""\n"
        )
    return max_containers

In [4]:
def write_closing(test_case_dataset, fd):
    max_containers = 0
    for entry in test_case_dataset:
        n_containers = len(entry["PLAYER_INPUT_SIZES"])
        if n_containers > max_containers:
            max_containers = n_containers
    
    # stop and remove containers
    for i in range(max_containers):
        fd.write(
            f"docker stop {IMAGE_NAME_FORMAT.format(i)} >/dev/null && "
            f"docker rm {IMAGE_NAME_FORMAT.format(i)} >/dev/null""\n"
        )
    return max_containers

In [43]:
def write_test_case(test_case, fd):
    n_containers       = len(test_case["PLAYER_INPUT_SIZES"])
    test_name          = test_case["NAME"]
    n_states           = test_case["N_STATES"]
    n_symbols          = test_case["N_SYMBOLS"]
    player_input_sizes = test_case["PLAYER_INPUT_SIZES"]
    string_length      = sum(test_case["PLAYER_INPUT_SIZES"])
    
    
    # Optional Arguments
    if "DEBUG" in test_case and test_case["DEBUG"] is True: 
        debug = "_debug"
    else: debug = ""
    if "REPETITIONS" in test_case:
        repetitions = test_case["REPETITIONS"]
    else:
        repetitions = 1
    if "VIRTUAL_MACHINE" in test_case:
        virtual_machine = test_case["VIRTUAL_MACHINE"]
    else: virtual_machine = r"./semi2k-party.x"
    
    if "PLAYER_DATA" in test_case:
        player_data = test_case["PLAYER_DATA"]
    else: player_data = None

    fd.write(
        f"echo Starting {test_name}\n"
        f"echo N_STATES={n_states}\n"
        f"echo N_SYMBOLS={n_symbols}\n"
        f"echo N_PLAYERS={n_containers}\n"
        f"echo STRING_LENGTH={string_length}\n" +
        (f"echo DEBUG MODE\n" if debug is True else "") +
        f"echo VIRTUAL_MACHINE={virtual_machine}\n"
        f"echo REPETITIONS={repetitions}\n"
    )


    # compile and generate data
    sum_of_data = 0
    for i in range(n_containers):
        fd.write(
            f"docker exec {IMAGE_NAME_FORMAT.format(i)} "
            f"./compile.py -M fsm_eval{debug} "
            f"{n_states} "
            f"{n_symbols} "
            f"{' '.join(str(j) for j in player_input_sizes)} "
            ">/dev/null""\n"
        )

        if player_data is None:
            # generate uniformly over [0, n_symbols[
            secret_data = " ".join(f"{random.randrange(n_symbols):x}" for j in range(player_input_sizes[i]))
        else:
            secret_data = " ".join(f"{value:x}" for value in player_data[i])

        fd.write(
            f"docker exec {IMAGE_NAME_FORMAT.format(i)} bash -c '"
            f"echo {player_input_sizes[i]} {secret_data} > gf2n_vals.in && "
            "./gen_input_f2n.x && "
            f"mv gf2n_vals.out Player-Data/Private-Input-{i}' "
            ">/dev/null 2>&1""\n"
        )
        
        sum_of_data += sum(int(x, 16) for x in secret_data.split(' '))
    
    # adds sum of elements (Useful for checking final result with current FSM)
    fd.write(
        f"echo SUM_OF_DATA={sum_of_data}\n"
    )

    executable_name = f"fsm_eval{debug}-{n_states}-{n_symbols}-"
    executable_name += "-".join(str(j) for j in player_input_sizes)

    for k in range(repetitions):
        fd.write(
            f"echo Repetition {k+1}\n"
        )
        # start all players but 0
        for i in range(n_containers-1,0,-1):
            fd.write(
                f"docker exec -d {IMAGE_NAME_FORMAT.format(i)} {virtual_machine} {i} "
                f"{executable_name} -N {n_containers} -h mp-fsm-eval-0 -pn 5000""\n"
            )

        # start player 0 (stdout on)
        fd.write(
            f"docker exec {IMAGE_NAME_FORMAT.format(0)} {virtual_machine} 0 "
            f"{executable_name} -N {n_containers} -h mp-fsm-eval-0 -pn 5000""\n"
        )

    fd.write(
        f"echo Finished {test_name}\n"
    )

### Parsing Functions

In [6]:
class ParsingError(Exception):
    pass

In [7]:
def parse_output_file(fd):
    dataframe = pd.DataFrame()
    line = True
    line_count = 0
    while line:
        record = {}
        test_sample = 0

        line = fd.readline(); line_count += 1
        if isinstance(line, str) and len(line.strip()) == 0:
            line = fd.readline(); line_count += 1
            continue
        if line.startswith("Starting "):
            record["TestName"] = line[re.match("(Starting )", line).end():-1].strip()
        else: raise ParsingError(f"Invalid Line ({line_count}): {line}")

        line = fd.readline(); line_count += 1
        if line.startswith("N_STATES"):
            record["# States"] = int(line[re.match("(N_STATES=)", line).end():])
        else: raise ParsingError(f"Invalid Line ({line_count}): {line}")

        line = fd.readline(); line_count += 1
        if line.startswith("N_SYMBOLS"):
            record["# Symbols"] = int(line[re.match("(N_SYMBOLS=)", line).end():])
        else: raise ParsingError(f"Invalid Line ({line_count}): {line}")

        line = fd.readline(); line_count += 1
        if line.startswith("N_PLAYERS"):
            record["# Players"] = int(line[re.match("(N_PLAYERS=)", line).end():])
        else: raise ParsingError(f"Invalid Line ({line_count}): {line}")

        line = fd.readline(); line_count += 1
        if line.startswith("STRING_LENGTH"):
            record["String Length"] = int(line[re.match("(STRING_LENGTH=)", line).end():])
        else: raise ParsingError(f"Invalid Line ({line_count}): {line}")
        
        line = fd.readline(); line_count += 1
        if line.startswith("VIRTUAL_MACHINE"):
            record["Virtual Machine"] = line[re.match("(VIRTUAL_MACHINE=)", line).end():].strip()
        elif line.startswith("DEBUG"):
            raise ParsingError(f"DEBUG flag not supported by parser ({line_count})")
        else: raise ParsingError(f"Invalid Line ({line_count}): {line}")
            
        line = fd.readline(); line_count += 1
        if line.startswith("REPETITIONS"):
            test_sample = int(line[re.match("(REPETITIONS=)", line).end():])
        else: raise ParsingError(f"Invalid Line ({line_count}): {line}")

        line = fd.readline(); line_count += 1
        if line.startswith("SUM_OF_DATA"):
            record["Sum of Inputs"] = int(line[re.match("(SUM_OF_DATA=)", line).end():])
        else: raise ParsingError(f"Invalid Line ({line_count}): {line}")

        samples = []
        for i in range(test_sample):
            samples.append({})

            line = fd.readline(); line_count += 1
            if not line.startswith("Repetition "):
                raise ParsingError(f"Invalid Line ({line_count}): {line}")
            
            line = fd.readline(); line_count += 1
            if line.startswith("Trying to run"):
                # ignore message if present (semi2k only)
                line = fd.readline(); line_count += 1
            
            if line.startswith("String has been refused"):
                samples[i]["Result"] = False
            elif line.startswith("String has been accepted"):
                samples[i]["Result"] = True
            else: raise ParsingError(f"Invalid Line ({line_count}): {line}")

            line = fd.readline(); line_count += 1
            if line.startswith("Time = "):
                samples[i]["Time(s)"] = float(re.search(r"[+-]?([0-9]*[.])?[0-9]+", line).group(0))
            else: raise ParsingError(f"Invalid Line ({line_count}): {line}")

            line = fd.readline(); line_count += 1
            if line.startswith("Data sent = "):
                samples[i]["Player-0 Data Sent(MB)"] = float(re.search(r"[+-]?([0-9]*[.])?[0-9]+", line).group(0))
            else: raise ParsingError(f"Invalid Line ({line_count}): {line}")

            line = fd.readline(); line_count += 1
            if line.startswith("Global data sent = "):
                samples[i]["Global Data Sent(MB)"] = float(re.search(r"[+-]?([0-9]*[.])?[0-9]+", line).group(0))
            else: raise ParsingError(f"Invalid Line ({line_count}): {line}")

        line = fd.readline(); line_count += 1
        if line.startswith("Finished "):
            if len(samples) != test_sample:
                raise ParsingError(f"Repetition count does not match ({line_count}) in Test: {record['TestName']}")
            for i, sample in enumerate(samples):
                record["Repetition"] = i
                record["Result"] = sample["Result"]
                record["Time(s)"] = sample["Time(s)"]
                record["Player-0 Data Sent(MB)"] = sample["Player-0 Data Sent(MB)"]
                record["Global Data Sent(MB)"] = sample["Global Data Sent(MB)"]
                dataframe = dataframe.append(record, ignore_index=True)
        else: raise ParsingError(f"Invalid Line ({line_count}): {line}")
    return dataframe

### Example JSON

# Script Generation

Generates a Testing Suite based on json file (format above).

Only works on Linux. Powershell support may be added.

In [52]:
SCRIPT_NAME = "./TestSuite.sh"
TEST_DESCRIPTION = "./Tests/spdz-semi-comparison.json"

IMAGE_NAME = "mp-fsm-eval"
NETWORK_NAME = IMAGE_NAME + "-internal-lan"

IMAGE_NAME_FORMAT = "mp-fsm-eval-{}" # Container Names : mp-fsm-eval-0 ... mp-fsm-eval-n

In [53]:
with open(TEST_DESCRIPTION) as fd:
    test_case_dataset = json.load(fd)

In [54]:
check_test_case_validity(test_case_dataset)

In [55]:
with open(SCRIPT_NAME, "w+", newline='\n') as fd:
    # Initialize Network and Containers 
    write_opening(test_case_dataset, fd)
    
    # Execute all Test Cases
    for test_case in test_case_dataset:
        write_test_case(test_case, fd)
        
    # Stop and Remove Containers 
    write_closing(test_case_dataset, fd)

To run the script docker must be open and `IMAGE_NAME` must be the correct image name.

It is reccomended to redirect both stdout and stderr to file:
`SCRIPT_NAME > OUTPUT_FILE 2>&1`

A new network with name `NETWORK_NAME` will be generated.

# Data Aggregation

Parses an output file of a Testing Suite (above).

Generate this data first by running `SCRIPT_NAME > OUTPUT_FILE 2>&1`

In [64]:
# parser input file
OUTPUT_FILE = "./Results/Raw/spdz-semi-comparison.txt"
# results are saved to csv
CVS_FILE = "./Results/Parsed/spdz-semi-comparison.csv"

In [65]:
with open(OUTPUT_FILE, newline='\n') as fd:
    dataframe = parse_output_file(fd)

In [66]:
dataframe["# States"] = dataframe["# States"].astype("int32")
dataframe["# Symbols"] = dataframe["# Symbols"].astype("int32")
dataframe["# Players"] = dataframe["# Players"].astype("int32")
dataframe["Repetition"] = dataframe["Repetition"].astype("int32")
dataframe["String Length"] = dataframe["String Length"].astype("int32")
dataframe["Sum of Inputs"] = dataframe["Sum of Inputs"].astype("int32")
dataframe["Result"] = dataframe["Result"].astype("bool")

In [67]:
dataframe = dataframe[["TestName", "Repetition", "# Players", "# States", "# Symbols", "String Length", "Virtual Machine",
                       "Time(s)", "Player-0 Data Sent(MB)", "Global Data Sent(MB)", "Result", "Sum of Inputs"]]

In [68]:
dataframe

Unnamed: 0,TestName,Repetition,# Players,# States,# Symbols,String Length,Virtual Machine,Time(s),Player-0 Data Sent(MB),Global Data Sent(MB),Result,Sum of Inputs
0,Test semi2k,0,3,8,8,15,./semi2k-party.x,0.321248,16.6005,49.7077,False,61
1,Test semi2k,1,3,8,8,15,./semi2k-party.x,0.320494,16.6005,49.7077,False,61
2,Test semi2k,2,3,8,8,15,./semi2k-party.x,0.328809,16.6005,49.7077,False,61
3,Test spdz2k,0,3,8,8,15,./spdz2k-party.x,1.36443,131.71,394.972,False,53
4,Test spdz2k,1,3,8,8,15,./spdz2k-party.x,1.37157,131.71,394.972,False,53
5,Test spdz2k,2,3,8,8,15,./spdz2k-party.x,1.32906,131.71,394.972,False,53


In [69]:
dataframe.to_csv(CVS_FILE)