In [1]:
import pandas as pd
import os

# --- Path Settings ---
DIR_PREFIX = "./data/SF/"
CENTERS_LABELED_FILENAME = "centers_100_labeled.csv"
WAITING_POSITIONS_FILENAME = "waiting_positions.csv"
ERROR_RATE_STATISTICS_FILENAME = "error_rates.csv"
EVENTS_DIR_NAME = "event_SF"

# --- Test Settings ---
GROUP_NUM = 3
CLIENTS_NUM_MAX_LIMIT = 20
DROP_DUPLICATE_REPORTS = False
assert GROUP_NUM * 2 < CLIENTS_NUM_MAX_LIMIT

In [2]:
centers = pd.read_csv(os.path.join(DIR_PREFIX, CENTERS_LABELED_FILENAME), dtype={"cluster-id": object})
centers["event-identifier"] = centers["cluster-id"]
centers["groundtruth"] = centers["traffic-light"]
centers.set_index("event-identifier", inplace=True)
centers = centers["groundtruth"]
centers.head()

event-identifier
3     1
6     0
7     0
17    0
20    0
Name: groundtruth, dtype: int64

In [3]:
event_sets = dict() # indexed by slaves' name
for _, _, files in os.walk(os.path.join(DIR_PREFIX, EVENTS_DIR_NAME)):
    for file in files:
        name = os.path.splitext(os.path.basename(file))[0]
        path = os.path.join(DIR_PREFIX, EVENTS_DIR_NAME, file)
        events = pd.read_csv(path, usecols=["identifier", "owned"], dtype={"identifier": object})
        events.set_index("identifier", inplace=True)
        events = events[events["owned"]]
        del events["owned"]

        events["groundtruth"] = events.index.map(lambda id: centers[id])
        event_sets[name] = events

print("clients_num:", len(event_sets))

clients_num: 20


In [4]:
false_rates = [[name, 1-groundtruth.mean().iloc[0]] for name, groundtruth in event_sets.items()]
false_rates = pd.DataFrame(false_rates, columns=["id", "error_rate"])
false_rates.set_index("id", inplace=True)
false_rates.to_csv(os.path.join(DIR_PREFIX, "error_rates.csv"))