# **Collecting Vehicles' Dataset Shared Upon Meetings -- All Data**

Simulation time computation is computationally really challenging. Therefore, we collect vehicle's dataset offline, after the simulations.

In [1]:
import numpy as np
import pandas as pd

import json

from multiprocessing.pool import Pool, ThreadPool
from multiprocessing import Lock

from tqdm.notebook import trange

import copy

In [2]:
SEEDS = [42, 1234, 1867, 613, 1001]
TIME_LIMIT = 300

In [3]:
RESULTS_ROOT = "../../02_data/01_simulation_results/"
VEH_LIST_PATH = "../../02_data/veh_list.json"
MEETING_PATH = "../../02_data/03_meeting_data/"
COMBINED_PATH = "../../02_data/03_meeting_data/combined_dataset.csv"

MEETING_VEHICLES = "../../02_data/meeting_vehicles"

In [4]:
import os


if not(os.path.exists(f"{MEETING_PATH}/all_data")):
    os.makedirs(f"{MEETING_PATH}/all_data")
for s in SEEDS:
    if not(os.path.exists(f"{MEETING_PATH}/all_data/{s}")):
        os.makedirs(f"{MEETING_PATH}/all_data/{s}")

In [5]:
#reading *test* vehicles:

with open(VEH_LIST_PATH) as f:
    veh_list  = json.load(f)

test_vehicles = veh_list["test_vehs"]

In [6]:
def combine_commuters(veh_id):
    if veh_id.startswith("carIn"):
        return veh_id.split(":")[0]
    return veh_id


In [7]:
whole_df = pd.read_csv(COMBINED_PATH)
whole_df["time"] = whole_df["time"].astype(int)
whole_df["seed"] = whole_df["seed"].astype(int)

In [8]:
whole_df = whole_df[whole_df["veh_id"].isin(test_vehicles)]
receive_time = [-1]*len(whole_df)
whole_df["receive_time"] = receive_time

In [9]:
def collect_data_upon_meeting(senders_data, meeting_time, seed, time_limit=TIME_LIMIT):
    send_data = senders_data[senders_data["seed"] == seed]
    send_data = send_data[send_data["time"] <= meeting_time]
    send_data = send_data[send_data["time"] >= meeting_time-time_limit]

    
    if not("receive_time" in send_data.columns):
        print(send_data)
    return send_data.drop(columns=["receive_time"])

In [10]:
def receive_data(args):
    sender, receiver, time, seed, vehicles_kb = args

    received_data = collect_data_upon_meeting(vehicles_kb[sender], time, seed)

    #storing shared data:
    store_shared_data = {
        "sender": sender,
        "receiver": receiver,
        "time": time,
        "data": received_data.to_json()
    }

    #fusing data into the receiver vehicle's dataset:
    rec_t = [time]*len(received_data)
    received_data["receive_time"] = rec_t

    updated_kb = pd.concat([vehicles_kb[receiver], received_data], ignore_index=True)
    updated_kb = updated_kb.drop_duplicates(subset="hash", ignore_index=True)

    return store_shared_data, updated_kb

In [11]:
def per_seed_script(args):
    seed, meeting_collection_name = args
    
    with open(f"{MEETING_VEHICLES}_{meeting_collection_name}_{seed}.json") as f:
        meetings = json.load(f)
    
    p_data = whole_df[whole_df["seed"] == seed]
    vehicles_kb = {}
    store_sharing = []

    for veh in test_vehicles:
        vehicles_kb[veh] = copy.deepcopy(p_data[p_data["veh_id"] == veh])

    for t in trange(min(p_data["time"]), max(p_data["time"])):
        if str(t) in meetings:
            mets = meetings[str(t)]
            for sender, receiver in mets:
                arguments = [sender, receiver, t, seed, vehicles_kb]

                new_store, updated_kb = receive_data(arguments)
                vehicles_kb[receiver] = copy.deepcopy(updated_kb)
                store_sharing.append(new_store)

    for veh in vehicles_kb:
        vehicles_kb[veh].to_csv(f"{MEETING_PATH}/all_data/{seed}/{veh}.csv", index=False)
    store_dict = {
        "shared_data": store_sharing
    }
    with open(f"{MEETING_PATH}/all_data/{seed}/shared_data.json", "w") as f:
        json.dump(store_dict, f)

with Pool(5) as ps:
    ps.map(per_seed_script, zip(SEEDS, ["broad"]*5))