In [1]:
import os
import json
import pandas as pd

In [2]:
data_folder = "./execution/"

In [3]:
def get_client_reqs(execution_folder: str):
    client_reqs = []
    for filename in os.listdir(execution_folder):
        for file in os.listdir(execution_folder + filename):
            if(file== "settings"):
                continue
            with open(execution_folder + filename + "/" + file) as f:
                client_reqs.append(f.readline())
    return client_reqs

def parse_request_dispatch(request_dispatch: list):
    return {
        'start': request_dispatch[0],
        'end': request_dispatch[1],
        'companyName': request_dispatch[2],
        'vehicleName': request_dispatch[3],
        'vehicleCapacity': int(request_dispatch[4]),
        'vehicleFilledUpCargo': request_dispatch[5],
        'cargoOccupancy': int(request_dispatch[6]),
        'loadedOn': int(request_dispatch[7]),
        'holdingTime': int(request_dispatch[8]),
        'dispatchedOn': int(request_dispatch[9]),
        'idleTime': int(request_dispatch[10]),
        'arrivedOn': int(request_dispatch[11]),
        'travelTime': int(request_dispatch[12]),
        'requestStage': int(request_dispatch[13])
    }

def parse_client_req(client_req: str):
    tokens = client_req.split(";")
    return {
        'clientName': tokens[0],
        'productName': tokens[1],
        'productVolume': int(tokens[2]),
        'quantity': int(tokens[3]),
        'route': tokens[4].split(","),
        'requestDispatches': [parse_request_dispatch(t) for t in [t.split(",") for t in tokens[5].split(":")]],
        'price': float(tokens[6]),
        'startedOn': int(tokens[7]),
        'finishedOn': int(tokens[8]),
        'requestTime': int(tokens[9]),
        'requestStage': int(tokens[10])
    }

def get_all_json(execution_folder: str):
    client_reqs = get_client_reqs(data_folder)
    return [parse_client_req(cr) for cr in client_reqs]

In [4]:
def process_to_df(entry: dict):
    entry['route_len'] = len(entry['route'])
    entry['holdingTime'] = sum([rd['holdingTime'] for rd in entry['requestDispatches']])
    entry['idleTime'] = sum([rd['idleTime'] for rd in entry['requestDispatches']])
    entry['travelTime'] = sum([rd['travelTime'] for rd in entry['requestDispatches']])
    entry['unique_companies'] = len(set([rd['companyName'] for rd in entry['requestDispatches']]))
    del entry['requestDispatches']
    del entry['route']
    return entry

def to_df(js: list):
    return pd.json_normalize([process_to_df(entry) for entry in js])


## Baseline

In [5]:
json_data = get_all_json(data_folder)
df = to_df(json_data)

In [6]:
df.describe()

Unnamed: 0,productVolume,quantity,price,startedOn,finishedOn,requestTime,requestStage,route_len,holdingTime,idleTime,travelTime,unique_companies
count,69.0,69.0,69.0,69.0,69.0,69.0,69.0,69.0,69.0,69.0,69.0,69.0
mean,19.507246,1.0,9118.382663,118.869565,209.405797,90.536232,4.507246,5.507246,257.73913,0.0,10.0,3.333333
std,5.603631,0.0,2280.831432,106.025789,127.951199,105.509071,0.778822,0.778822,318.049693,0.0,5.754793,1.080123
min,8.0,1.0,3273.157895,11.0,21.0,2.0,2.0,3.0,3.0,0.0,1.0,1.0
25%,16.0,1.0,7414.846154,71.0,88.0,17.0,4.0,5.0,39.0,0.0,4.0,3.0
50%,20.0,1.0,9738.437229,71.0,183.0,42.0,5.0,6.0,98.0,0.0,12.0,3.0
75%,24.0,1.0,11083.512821,154.0,340.0,117.0,5.0,6.0,340.0,0.0,16.0,4.0
max,32.0,1.0,12308.484848,389.0,443.0,368.0,5.0,6.0,1128.0,0.0,18.0,5.0
