# **Collecting Vehicles' Reference Dataset**

Reference datasets for vanilla federated learning.

In [4]:
import numpy as np
import pandas as pd

import json

from multiprocessing.pool import Pool, ThreadPool
from multiprocessing import Lock

from tqdm.notebook import trange

import copy

In [5]:
SEEDS = [42, 1234, 1867, 613, 1001]
TIME_LIMIT = 300

In [None]:
RESULTS_ROOT = "../../02_data/01_simulation_results/"
VEH_LIST_PATH = "../../02_data/veh_list.json"
MEETING_PATH = "../../02_data/03_meeting_data/"
COMBINED_PATH = "../../02_data/03_meeting_data/combined_dataset.csv"
EDGE_MAP_PATH = "../../02_data/edge_maps.json"

In [12]:
import os


if not(os.path.exists(f"{MEETING_PATH}/ref")):
    os.makedirs(f"{MEETING_PATH}/ref")
for s in SEEDS:
    if not(os.path.exists(f"{MEETING_PATH}/ref/{s}")):
        os.makedirs(f"{MEETING_PATH}/ref/{s}")

In [7]:
#reading *test* vehicles:

with open(VEH_LIST_PATH) as f:
    veh_list  = json.load(f)

test_vehicles = veh_list["test_vehs"]

In [8]:
def combine_commuters(veh_id):
    if veh_id.startswith("carIn"):
        return veh_id.split(":")[0]
    return veh_id


## Collecting meeting vehicles

In [2]:
p_data = pd.read_csv(COMBINED_PATH)

In [3]:
with open(EDGE_MAP_PATH) as f:
    edge_maps = json.load(f)

edge_to_idx = edge_maps["edge_to_idx"]

In [9]:
#reading moving simulation data:

m_data = pd.DataFrame()
for s in SEEDS:
    filename = f"{RESULTS_ROOT}/vehicle_positions_{s}.csv"
    mf = pd.read_csv(filename)
    mf["seed"] = [s]*len(mf)
    m_data = pd.concat([m_data, mf])

m_data["veh_id"] = m_data["veh_id"].apply(combine_commuters)
m_data = m_data[m_data["veh_id"].isin(test_vehicles)]

In [10]:
whole_df = pd.read_csv(COMBINED_PATH)
whole_df["time"] = whole_df["time"].astype(int)
whole_df["seed"] = whole_df["seed"].astype(int)

In [11]:
whole_df = whole_df[whole_df["veh_id"].isin(test_vehicles)]
receive_time = [-1]*len(whole_df)
whole_df["receive_time"] = receive_time

In [13]:
def per_seed_script(seed):
    p_data = whole_df[whole_df["seed"] == seed]
    meeting_times = {}
    vehicles_kb = {}
    store_sharing = []

    for veh in test_vehicles:
        vehicles_kb[veh] = copy.deepcopy(p_data[p_data["veh_id"] == veh])

    for veh in vehicles_kb:
        vehicles_kb[veh].to_csv(f"{MEETING_PATH}/ref/{seed}/{veh}.csv", index=False)
    store_dict = {
        "shared_data": store_sharing
    }
    with open(f"{MEETING_PATH}/ref/{seed}/shared_data.json", "w") as f:
        json.dump(store_dict, f)

with Pool(5) as ps:
    ps.map(per_seed_script, SEEDS)