# **Converting raw results into a NN-input**

In [20]:
MEASUREMENT_PATH = "../dockeroutput/"
OUTPUT_PATH = "../02_data/vehicle_data/"
NUM_DAYS = 60

In [21]:
import numpy as np
import pandas as pd
import json

from tqdm.notebook import trange

import utils
from importlib import reload
reload(utils)

<module 'utils' from '/mnt/data/phd/kutatas/sumo_sec/03_src/utils.py'>

In [3]:
#reading nominal encodings:
with open("../dockeroutput/day_0/vehicle_maps.json", "r") as f:
    maps = json.load(f)
    nominal_veh_to_id_encoding = maps["vehicle_to_idx_map"]
    nominal_id_to_veh_encoding = maps["idx_to_vehicle_map"]

## **Filtering**

>Due to the size of the data, we shall filter to a limited number of vehicles.

Hence, we will select only the 10% of the vehicle data, choosen uniformly randomly from the list of vehicles.

In [4]:
filter_idx = np.random.choice(len(nominal_id_to_veh_encoding), size=int(len(nominal_id_to_veh_encoding)/10), replace=False)
filtered_vehicles = []
for fx in filter_idx:
    filtered_vehicles.append(nominal_id_to_veh_encoding[str(fx)])

In [5]:
df = pd.read_csv("../dockeroutput/day_0/vehicle_measurement.csv")
df = df[df["veh_id"].isin(filtered_vehicles)]
print("filtered", len(df))
id_nums = []
for i,r in df.iterrows():
    id_nums.append(nominal_veh_to_id_encoding[r.veh_id])

df["veh_id"]=id_nums

filtered 2610375


In [6]:
df.head()

Unnamed: 0,veh_id,day,timestamp,parking_id,occupancy
75,36,0,18220,1091,0.0
86,36,0,18230,1091,0.0
95,45,0,18240,1151,0.0
206,34,0,18310,1152,0.0
207,45,0,18310,1133,0.0


In [7]:
id_list = [i for i in range(1059, 1186+1)]

In [8]:
mean = np.mean(df["occupancy"])
std = np.std(df["occupancy"])
min_ = np.min(df["timestamp"])
max_ = np.max(df["timestamp"])


print("Mean occup.:\t%.5f"%mean)
print("Std occup.:\t%.5f"%std)
print("Min_t:\t%d"%min_)
print("Max_t:\t%d"%max_)

Mean occup.:	0.58785
Std occup.:	0.35377
Min_t:	18220
Max_t:	46800


## **Encoding**

In [13]:
def encode_vehicle(veh_data, nominal_veh_to_id_encoding):
    veh_id = nominal_veh_to_id_encoding[veh_data.veh_id]
    with open(OUTPUT_PATH+"%s.csv"%veh_id, "a") as f:
        ohe = utils.one_hot_encoder([veh_data["parking_id"]], id_list)
        timestamps = np.array(df_["timestamp"]).reshape(len(df_), 1)
        timestamps = utils.normalize(timestamps, min_, max_)
        counts = np.array(df_["occupancy"]).reshape(len(df_), 1)
        counts = utils.standardize(counts, mean, std)
        
        p_lots = np.array(df_["parking_id"]).reshape(len(df_), 1)
        
        for x in ohe[0]:
            f.write("%d,"%x)
        f.write("%f,%f\n"%(timestamps[0,0], counts[0,0]))

In [9]:
def encode(day):
    print("\n\n=======Day%d=========="%day)
    meas_path = MEASUREMENT_PATH+"day_%d/"%day
    df_ = pd.read_csv(meas_path+"vehicle_measurement.csv")
    
    #vehicle filtering and encoding:
    df_ = df_[df_["veh_id"].isin(filtered_vehicles)]
    print("\tFiltered.")
    id_nums = []
    for i,r in df_.iterrows():
        id_nums.append(nominal_veh_to_id_encoding[r.veh_id])
    veh_ids = np.array(id_nums).reshape(len(df_), 1)
    print("\tVehicles encoded.")
    
    #one-hot-encoding:
    ohe = utils.one_hot_encoder(df_["parking_id"].tolist(), id_list)
    print("\t1-hot-encoded.")
    
    #standardize and normalize
    timestamps = np.array(df_["timestamp"]).reshape(len(df_), 1)
    timestamps = utils.normalize(timestamps, min_, max_)
    counts = np.array(df_["occupancy"]).reshape(len(df_), 1)
    counts = utils.standardize(counts, mean, std)
    
    p_lots = np.array(df_["parking_id"]).reshape(len(df_), 1)
    
    #saving results
    print("Saving results.")
    combined = np.hstack([ohe, timestamps, veh_ids, counts])
    pd.DataFrame(combined).to_csv(meas_path+"nn.csv", header=None, index=None)

In [10]:
from multiprocessing.pool import Pool

days = [i for i in range(60)]
pool = Pool(processes=4)
pool.map(encode, days)









	Filtered.
	Vehicles encoded.
	1-hot-encoded.
Saving results.
	Filtered.	Filtered.

	Filtered.
	Vehicles encoded.
	Vehicles encoded.
	1-hot-encoded.
Saving results.
	1-hot-encoded.
Saving results.
	Vehicles encoded.
	1-hot-encoded.
Saving results.


	Filtered.






	Vehicles encoded.
	1-hot-encoded.
Saving results.
	Filtered.
	Filtered.
	Filtered.
	Vehicles encoded.
	1-hot-encoded.
Saving results.
	Vehicles encoded.
	1-hot-encoded.
Saving results.


	Vehicles encoded.
	1-hot-encoded.
Saving results.
	Filtered.


	Vehicles encoded.
	1-hot-encoded.
Saving results.






	Filtered.
	Filtered.
	Filtered.
	Filtered.
	Vehicles encoded.
	1-hot-encoded.
Saving results.
	Vehicles encoded.
	1-hot-encoded.
Saving results.
	Vehicles encoded.
	1-hot-encoded.
Saving results.
	Vehicles encoded.
	1-hot-encoded.
Saving results.








	Filtered.
	Filtered.
	Filtered.
	Filtered.
	Vehicles encoded.
	1-hot-encoded.
Saving results.
	Vehicles encoded.
	1-hot-encoded.
Saving results.
	Vehicles enco

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [None]:
for day in trange(NUM_DAYS):
    meas_path = MEASUREMENT_PATH+"day_%d/"%day
    df_ = pd.read_csv(meas_path+"nn.csv", header=None)
    for vehicle in df_[129].unique():
        df_save = df_[df_[129] == vehicle]
        df_save.to_csv(OUTPUT_PATH+"_%s_%d.csv"%(vehicle, day),
                       index=False, header=False)

  0%|          | 0/60 [00:00<?, ?it/s]

In [11]:
import json
filt_save = {"vehicles": filtered_vehicles}
with open("../02_data/filtered_vehicles.json", "w") as f:
    json.dump(filt_save, f)

In [4]:
nominals = pd.read_csv("../dockeroutput/day_0/nn.csv")

In [None]:
filtered_vehicles = []
nominals = np.array(nominals)
ids = nominals[:,-2]

In [12]:
for x in ids:
    vid = nominal_id_to_veh_encoding[str(int(x))]
    if not(vid in filtered_vehicles):
        filtered_vehicles.append(vid)

## **Saving known edges**

In [22]:
known = pd.read_csv(MEASUREMENT_PATH+"day_0/vehicle_measurement.csv")
known_grouped = known.groupby(["veh_id", "parking_id"]).mean("counts")

In [23]:
known_times = known.groupby("veh_id").timestamp.mean()

In [24]:
known_parkings = {}
known_moving_times = {}
for idx in known_grouped.index:
    if not(idx[0] in known_parkings):
        known_parkings[idx[0]] = []
    known_parkings[idx[0]].append(idx[1])
for i in range(len(known_times)):
    known_moving_times[known_times.index[i]] = known_times[i]

In [25]:
with open("../02_data/known_parkings.json", "w") as f:
    json.dump(known_parkings, f)
with open("../02_data/known_moving_times.json", "w") as f:
    json.dump(known_moving_times, f)