# **Collecting Vehicles' Dataset Shared Upon Meetings -- All Data**

Simulation time computation is computationally really challenging. Therefore, we collect vehicle's dataset offline, after the simulations.

In [1]:
import numpy as np
import pandas as pd

import json

from multiprocessing.pool import Pool, ThreadPool
from multiprocessing import Lock

from tqdm.notebook import trange

import copy

In [2]:
SEEDS = [42, 1234, 1867, 613, 1001]
TIME_LIMIT = 300

In [3]:
RESULTS_ROOT = "../../02_data/01_simulation_results/"
VEH_LIST_PATH = "../../02_data/veh_list.json"
MEETING_PATH = "../../02_data/03_meeting_data/"
COMBINED_PATH = "../../02_data/03_meeting_data/combined_dataset.csv"
EDGE_MAP_PATH = "../../02_data/edge_maps.json"
NEIGHBORING_EDGES_FILE = "../../02_data/neighboring_edges.json"

In [4]:
with open(NEIGHBORING_EDGES_FILE) as f:
    neighbors = json.load(f)

In [5]:
import os


if not(os.path.exists(f"{MEETING_PATH}/all_data")):
    os.makedirs(f"{MEETING_PATH}/all_data")
for s in SEEDS:
    if not(os.path.exists(f"{MEETING_PATH}/all_data/{s}")):
        os.makedirs(f"{MEETING_PATH}/all_data/{s}")

In [6]:
#reading *test* vehicles:

with open(VEH_LIST_PATH) as f:
    veh_list  = json.load(f)

test_vehicles = veh_list["test_vehs"]

In [7]:
def combine_commuters(veh_id):
    if veh_id.startswith("carIn"):
        return veh_id.split(":")[0]
    return veh_id


## Collecting meeting vehicles

In [8]:
with open(EDGE_MAP_PATH) as f:
    edge_maps = json.load(f)

edge_to_idx = edge_maps["edge_to_idx"]

In [9]:
#reading moving simulation data:

m_data = pd.DataFrame()
for s in SEEDS:
    filename = f"{RESULTS_ROOT}/vehicle_positions_{s}.csv"
    mf = pd.read_csv(filename)
    mf["seed"] = [s]*len(mf)
    m_data = pd.concat([m_data, mf])

m_data["veh_id"] = m_data["veh_id"].apply(combine_commuters)
m_data = m_data[m_data["veh_id"].isin(test_vehicles)]

In [10]:
whole_df = pd.read_csv(COMBINED_PATH)
whole_df["time"] = whole_df["time"].astype(int)
whole_df["seed"] = whole_df["seed"].astype(int)

In [11]:
whole_df = whole_df[whole_df["veh_id"].isin(test_vehicles)]
receive_time = [-1]*len(whole_df)
whole_df["receive_time"] = receive_time

In [12]:
def collect_meeting_vehicles_narrow(m_data, meeting_time, seed, meeting_times, meeting_time_gap=TIME_LIMIT):
    #collecting recently met vehicles:
    
    meetings = m_data[m_data["time"] == meeting_time]
    meetings = meetings[meetings["seed"] == seed]

    mets = []

    #same edges:
    for edge in edge_to_idx:
        vehs = meetings[meetings["edge"] == edge]["veh_id"].unique()
        for i in range(len(vehs)):
            for j in range(i+1, len(vehs)):
                sender = vehs[i]
                receiver = vehs[j]
                #if not met yet or met long time ago:
                if ((not((sender, receiver) in meeting_times)) or
                    meeting_time - meeting_times[(sender, receiver)] > meeting_time_gap):
                    mets.append((sender, receiver))
                    mets.append((receiver, sender)) #they meet vice-versa

    #opposed edges:
    for edge in edge_to_idx:
        #only "reversed" edges are processed, to avoid duplicated meetings:
        if edge.startswith("-"):
            veh_edge = meetings[meetings["edge"] == edge]["veh_id"].unique()
            contra_edge = edge.split("-")[1]
            veh_contra = meetings[meetings["edge"] == contra_edge]["veh_id"].unique()
            for sender in veh_edge:
                for receiver in veh_contra:
                    #if not met yet or met long time ago:
                    if ((not((sender, receiver) in meeting_times)) or
                        meeting_time - meeting_times[(sender, receiver)] > meeting_time_gap):
                        mets.append((sender, receiver))
                        mets.append((receiver, sender)) #they meet vice-versa


    return mets #vehicles at the same time, at the same place, not the 'ego' vehicle and not met recently

In [13]:
def collect_meeting_vehicles_broad(m_data, meeting_time, seed, meeting_times, meeting_time_gap=TIME_LIMIT,
                                   neighboring_edges = neighbors):
    #collecting recently met vehicles:
    
    meetings = m_data[m_data["time"] == meeting_time]
    meetings = meetings[meetings["seed"] == seed]

    mets = []

    #same edges:
    for edge in edge_to_idx:
        vehs = meetings[meetings["edge"] == edge]["veh_id"].unique()
        for i in range(len(vehs)):
            for j in range(i+1, len(vehs)):
                sender = vehs[i]
                receiver = vehs[j]
                #if not met yet or met long time ago:
                if ((not((sender, receiver) in meeting_times)) or
                    meeting_time - meeting_times[(sender, receiver)] > meeting_time_gap):
                    mets.append((sender, receiver))
                    mets.append((receiver, sender)) #they meet vice-versa

    #other edges:
    for edge in edge_to_idx:
        for neigh in neighboring_edges[edge]:
            veh_edge = meetings[meetings["edge"] == edge]["veh_id"].unique()
            veh_contra = meetings[meetings["edge"] == neigh]["veh_id"].unique()
            for sender in veh_edge:
                for receiver in veh_contra:
                    #if not met yet or met long time ago:
                    if ((not((sender, receiver) in meeting_times)) or
                        meeting_time - meeting_times[(sender, receiver)] > meeting_time_gap):
                        mets.append((sender, receiver))
                        mets.append((receiver, sender)) #they meet vice-versa


    return mets #vehicles at the same time, at the same place, not the 'ego' vehicle and not met recently

In [14]:
def collect_data_upon_meeting(senders_data, meeting_time, seed, time_limit=TIME_LIMIT):
    send_data = senders_data[senders_data["seed"] == seed]
    send_data = send_data[send_data["time"] <= meeting_time]
    send_data = send_data[send_data["time"] >= meeting_time-time_limit]

    
    if not("receive_time" in send_data.columns):
        print(send_data)
    return send_data.drop(columns=["receive_time"])

In [15]:
def receive_data(args):
    sender, receiver, time, seed, vehicles_kb = args

    received_data = collect_data_upon_meeting(vehicles_kb[sender], time, seed)

    #storing shared data:
    store_shared_data = {
        "sender": sender,
        "receiver": receiver,
        "time": time,
        "data": received_data.to_json()
    }

    #fusing data into the receiver vehicle's dataset:
    rec_t = [time]*len(received_data)
    received_data["receive_time"] = rec_t

    updated_kb = pd.concat([vehicles_kb[receiver], received_data], ignore_index=True)
    updated_kb = updated_kb.drop_duplicates(subset="hash", ignore_index=True)

    return store_shared_data, updated_kb

In [None]:
def per_seed_script(seed):
    p_data = whole_df[whole_df["seed"] == seed]
    meeting_times = {}
    vehicles_kb = {}
    store_sharing = []

    for veh in test_vehicles:
        vehicles_kb[veh] = copy.deepcopy(p_data[p_data["veh_id"] == veh])

    for t in trange(min(p_data["time"]), max(p_data["time"])):
        meetings = collect_meeting_vehicles_broad(m_data, t, seed, meeting_times)
        for sender, receiver in meetings:
            arguments = [sender, receiver, t, seed, vehicles_kb]
            meeting_times[(sender, receiver)] = t
            
            new_store, updated_kb = receive_data(arguments)
            vehicles_kb[receiver] = copy.deepcopy(updated_kb)
            store_sharing.append(new_store)

    for veh in vehicles_kb:
        vehicles_kb[veh].to_csv(f"{MEETING_PATH}/all_data/{seed}/{veh}.csv", index=False)
    store_dict = {
        "shared_data": store_sharing
    }
    with open(f"{MEETING_PATH}/all_data/{seed}/shared_data.json", "w") as f:
        json.dump(store_dict, f)

with Pool(5) as ps:
    ps.map(per_seed_script, SEEDS)