# **Converting raw results into a NN-input**

In [2]:
MEASUREMENT_PATH = "../dockeroutput/"
NUM_DAYS = 60
TRAIN_DAYS = 55
TEST_DAYS = NUM_DAYS-TRAIN_DAYS

In [11]:
import numpy as np
import pandas as pd
import json

from tqdm.notebook import trange

import utils
from importlib import reload
reload(utils)

<module 'utils' from '/mnt/data/phd/kutatas/sumo_sec/03_src/utils.py'>

In [4]:
#reading nominal encodings:
with open("../dockeroutput/day_0/vehicle_maps.json", "r") as f:
    maps = json.load(f)
    nominal_veh_to_id_encoding = maps["vehicle_to_idx_map"]
    nominal_id_to_veh_encoding = maps["idx_to_vehicle_map"]

## **Filtering**

>Due to the size of the data, we shall filter to a limited number of vehicles.

Hence, we will select only the 10% of the vehicle data, choosen uniformly randomly from the list of vehicles.

In [5]:
filter_idx = np.random.choice(len(nominal_id_to_veh_encoding), size=int(len(nominal_id_to_veh_encoding)/10), replace=False)
filtered_vehicles = []
for fx in filter_idx:
    filtered_vehicles.append(nominal_id_to_veh_encoding[str(fx)])

In [6]:
df = pd.read_csv("../dockeroutput/day_0/vehicle_measurement.csv")
df = df[df["veh_id"].isin(filtered_vehicles)]
print("filtered", len(df))
id_nums = []
for i,r in df.iterrows():
    id_nums.append(nominal_veh_to_id_encoding[r.veh_id])

df["veh_id"]=id_nums

filtered 4894167


In [7]:
df.head()

Unnamed: 0,veh_id,day,timestamp,parking_id,occupancy
143,27,0,18190,1087,0.0
237,28,0,18220,1130,0.0
267,43,0,18230,1131,0.0
268,43,0,18230,1156,0.0
303,43,0,18240,1131,0.0


In [13]:
id_list = [i for i in range(1059, 1186+1)]

In [14]:
mean = np.mean(df["occupancy"])
std = np.std(df["occupancy"])
min_ = np.min(df["timestamp"])
max_ = np.max(df["timestamp"])


print("Mean occup.:\t%.5f"%mean)
print("Std occup.:\t%.5f"%std)
print("Min_t:\t%d"%min_)
print("Max_t:\t%d"%max_)

Mean occup.:	0.56820
Std occup.:	0.39741
Min_t:	18190
Max_t:	46800


## **Encoding**

In [16]:
for day in trange(NUM_DAYS):
    print("\n\n=======Day%d=========="%day)
    meas_path = MEASUREMENT_PATH+"day_%d/"%day
    df_ = pd.read_csv(meas_path+"vehicle_measurement.csv")
    
    #vehicle filtering and encoding:
    df_ = df_[df_["veh_id"].isin(filtered_vehicles)]
    print("\tFiltered.")
    id_nums = []
    for i,r in df_.iterrows():
        id_nums.append(nominal_veh_to_id_encoding[r.veh_id])
    veh_ids = np.array(id_nums).reshape(len(df_), 1)
    print("\tVehicles encoded.")
    
    #one-hot-encoding:
    ohe = utils.one_hot_encoder(df_["parking_id"].tolist(), id_list)
    print("\t1-hot-encoded.")
    
    #standardize and normalize
    timestamps = np.array(df_["timestamp"]).reshape(len(df_), 1)
    timestamps = utils.normalize(timestamps, min_, max_)
    counts = np.array(df_["occupancy"]).reshape(len(df_), 1)
    counts = utils.standardize(counts, mean, std)
    
    p_lots = np.array(df_["parking_id"]).reshape(len(df_), 1)
    
    #saving results
    print("Saving results.")
    combined = np.hstack([ohe, timestamps, veh_ids, counts])
    pd.DataFrame(combined).to_csv(meas_path+"nn.csv", header=None, index=None)

  0%|          | 0/60 [00:00<?, ?it/s]



	Filtered.
	Vehicles encoded.
	1-hot-encoded.
Saving results.


	Filtered.
	Vehicles encoded.
	1-hot-encoded.
Saving results.


	Filtered.
	Vehicles encoded.
	1-hot-encoded.
Saving results.


	Filtered.
	Vehicles encoded.
	1-hot-encoded.
Saving results.


	Filtered.
	Vehicles encoded.
	1-hot-encoded.
Saving results.


	Filtered.
	Vehicles encoded.
	1-hot-encoded.
Saving results.


	Filtered.
	Vehicles encoded.
	1-hot-encoded.
Saving results.


	Filtered.
	Vehicles encoded.
	1-hot-encoded.
Saving results.


	Filtered.
	Vehicles encoded.
	1-hot-encoded.
Saving results.


	Filtered.
	Vehicles encoded.
	1-hot-encoded.
Saving results.


	Filtered.
	Vehicles encoded.
	1-hot-encoded.
Saving results.


	Filtered.
	Vehicles encoded.
	1-hot-encoded.
Saving results.


	Filtered.
	Vehicles encoded.
	1-hot-encoded.
Saving results.


	Filtered.
	Vehicles encoded.
	1-hot-encoded.
Saving results.


	Filtered.
	Vehicles encoded.
	1-hot-encoded.
Saving results.


	Filtered.
	Vehicles encoded.
	1-hot-e

## ID 1-hot encoding

In [17]:
import json
filt_save = {"vehicles": filtered_vehicles}
with open("../02_data/filtered_vehicles.json", "w") as f:
    json.dump(f, filt_save)

TypeError: Object of type TextIOWrapper is not JSON serializable

In [None]:
mport json
filt_save = {"vehicles": filtered_vehicles}
with open("../02_data/filtered_vehicles.json", "w") as f:
    json.dump(filt_save, f)

## Adding timestamps and labels

In [33]:
timestamps = np.array(raw_df["timestamp"]).reshape(len(raw_df), 1)
timestamps = timestamps - min(timestamps)
timestamps = timestamps / max(timestamps)

In [34]:
counts = np.array(raw_df["counts"]).reshape(len(raw_df), 1)
counts = counts-np.mean(counts)
counts = counts / np.std(counts)

In [35]:
collected_df = np.hstack([id_encoding, timestamps, counts])

In [36]:
collected_df.shape

(1981200, 129)

## Saving result

In [38]:
pd.DataFrame(collected_df).to_csv("../02_data/nn.csv", header=None, index=None)