In [1]:
import pandas as pd
import pickle
import numpy as np
import networkx as nx
import re
import time

In [2]:
df_ivs = pickle.load(open('data/ivs_exploded_100.p', 'rb'))
agent_data = pickle.load(open('data/agent_data.p', 'rb'))
model_data = pickle.load(open('data/model_data.p', 'rb'))
batch_data = pickle.load(open('data/batch_run_result.p', 'rb'))

In [22]:
df_ivs

Unnamed: 0,origin,destination,trip_count,hour,M12,M8,BII-6b,M10,BIIa-1,M9,...,B04,M0,C2l,BII-2L,B02,C1b,C2b,B01,C1l,route_v
0,NLRTM,NLAMS,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,NLRTM,NLAMS,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,NLRTM,NLAMS,0,2,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,NLRTM,NLAMS,1,3,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,NLRTM,NLAMS,6,4,0,0,0,0,0,0,...,2,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3907,NLAMS,NLZWI,1,19,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4
3908,NLAMS,NLZWI,0,20,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4
3909,NLAMS,NLZWI,0,21,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4
3910,NLAMS,NLZWI,1,22,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4


## Functions to process data single run

In [3]:
def process_cs_data(df_cs=agent_data):
    df_cs = df_cs.reset_index()
    df_cs = df_cs.loc[df_cs.Step == (8*24*60)]
    df_cs = df_cs.dropna(subset=['charging_stations'])
    return df_cs

def process_vessel_data(df_vessel=model_data):
    df_vessel = pd.DataFrame(df_vessel.iloc[(60*24*8)]['data_completed_trips'])
    df_vessel = df_vessel.loc[df_vessel.time_departed > (60*24)]
    return df_vessel

In [4]:
df_charging_stations_single_run = process_cs_data()

In [5]:
df_charging_stations_single_run

Unnamed: 0,Step,AgentID,occupation,max_occupation,avg_line,max_line,charging_stations
2632219,11520,8866889,0.083929,1.0,0.013095,1.0,1.0
2632220,11520,30984595,0.397718,2.0,0.007242,1.0,2.0
2632223,11520,8863818,0.308433,2.0,0.000397,1.0,2.0
2632225,11520,8864005,0.313095,2.0,0.000198,1.0,2.0
2632232,11520,8862663,0.344147,2.0,0.001488,1.0,2.0
2632235,11520,8866775,0.0625,1.0,0.0,0.0,1.0
2632244,11520,8864566,0.055952,1.0,0.0,0.0,1.0
2632246,11520,8865003,0.342857,2.0,0.003869,1.0,2.0
2632247,11520,8867240,0.093155,1.0,0.0,0.0,1.0
2632251,11520,8863709,0.025298,1.0,0.0,0.0,1.0


In [6]:
df_vessels_single_run = process_vessel_data()

In [7]:
df_vessels_single_run

Unnamed: 0,id,route,combi,time_departed,travel_time,time_in_line,time_charging,battery_size
53,68,"(NLAMS, NLZAA, 0)",[8866889],1472,30,0,48,3220
62,71,"(NLUTC, NLTIE, 0)","[8867240, 22638146]",1527,195,0,35,2031
63,83,"(NLKGZ, NLZAA, 0)",[8866889],1740,22,0,0,6650
64,80,"(NLAMS, NLWMO, 0)",[8866889],1722,30,0,21,1400
66,67,"(NLDOR, NLRTM, 1)",[8862663],1457,130,0,219,9404
...,...,...,...,...,...,...,...,...
506,510,"(NLRTM, NLMOE, 0)",[8866686],11189,188,0,57,3220
507,508,"(NLOOS, NLTLB, 0)",[8863818],11164,127,0,153,6650
508,501,"(NLHAR, NLWTE, 0)",[113],11026,208,0,214,9404
509,506,"(NLRTM, NLMOE, 0)",[8865003],11119,188,0,148,6650


## Process batch data

### Vessel data

In [8]:
def get_vessel_data_batch(df_batch):
    df_batch = pd.DataFrame(df_batch)
    df_batch = df_batch.loc[df_batch.Step>0]
    df_vessels = df_batch.groupby('RunId').first()
    vessel_df = pd.DataFrame(df_vessels['data_completed_trips'][0])
    for i in range(2, len(df_vessels['data_completed_trips'])):
        df_temp = pd.DataFrame(df_vessels['data_completed_trips'][0])
        vessel_df = pd.concat([vessel_df, df_temp])
    return vessel_df

In [9]:
vessel_df = get_vessel_data_batch(batch_data)

In [10]:
vessel_df = vessel_df.groupby(['route']).mean()

In [11]:
vessel_df.sort_values('time_in_line', ascending=False)

Unnamed: 0_level_0,id,time_departed,travel_time,time_in_line,time_charging,battery_size
route,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"(NLTNZ, NLBRS, 0)",337.571429,7458.0,121.0,20.285714,65.142857,4743.714286
"(NLAMS, NLMOE, 3)",185.7,4116.5,654.0,17.5,390.5,6650.0
"(NLBZM, NLRTM, 0)",304.0,6816.0,414.0,17.25,255.5,5792.5
"(NLAMS, NLZWI, 1)",379.5,8371.0,501.0,15.5,104.5,2031.0
"(NLRTM, NLWLK, 1)",281.142857,6201.285714,361.0,11.571429,87.571429,2031.0
"(NLRTM, NLMOE, 1)",301.3,6671.9,188.0,11.5,196.5,9404.0
"(NLMOE, NLAPN, 1)",233.083333,5138.75,317.0,9.916667,173.083333,6078.333333
"(NLAER, NLNIE, 0)",265.727273,5871.545455,474.454545,9.363636,194.272727,4467.272727
"(NLHTB, NLBZM, 0)",222.75,4945.916667,553.0,8.583333,315.666667,6212.5
"(NLNIJ, NLDRU, 0)",270.833333,5990.333333,143.0,7.416667,71.916667,5220.833333


## Charging station data

In [14]:
def get_cs_data_batch(df_batch):
    df_batch = pd.DataFrame(df_batch)
    df_batch = df_batch.loc[df_batch.Step>0]
    df_charging_stations = df_batch.groupby(['AgentID']).mean()
    df_charging_stations = df_charging_stations.loc[df_charging_stations.charging_stations>0].sort_values('occupation')
    df_charging_stations = df_charging_stations.drop(columns=['RunId', 'iteration', 'Step', 'seed'])
    return df_charging_stations

In [15]:
get_cs_data_batch(batch_data)

Unnamed: 0_level_0,occupation,max_occupation,avg_line,max_line,charging_stations
AgentID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
8865412,0.0263,1.0,0.000159,0.1,1.0
8863709,0.030129,1.0,0.000645,0.2,1.0
22638146,0.036161,1.0,0.000635,0.1,1.0
8866775,0.048512,1.0,0.0,0.0,1.0
122,0.071052,1.0,0.001786,0.3,1.0
8867240,0.08748,1.0,0.005149,0.6,1.0
8864566,0.088373,1.0,0.010456,0.8,1.0
8866889,0.092232,1.0,0.00504,1.1,1.0
8863019,0.096379,1.0,0.012669,0.6,1.0
22638327,0.101349,1.0,0.005149,0.8,1.0


# OLD

In [None]:
# df_ivs_old = pickle.load(open('data/ivs_exploded_100_old.p', 'rb'))
# routes_dests = pickle.load(open('data/users_ship_specific_routes.p', 'rb'))
# df_ships = pd.read_excel('data/ship_types.xlsx')

# G = pickle.load(open("data/network.p", "rb"))
# paths = pickle.load(open("data/paths.p", "rb"))
# df_random = pickle.load(open("data/df_random.p", "rb"))
# df_abm = pickle.load(open("data/df_abm.p", "rb"))
# non_zero_flows = pickle.load(open("data/non_zero_flows.p", "rb"))
# feasible_combinations = pickle.load(open('data/feasible_comb.p', 'rb'))
# optimal_flows = pickle.load(open('data/optimal_flows.p', 'rb'))
# path_lengths = pickle.load(open('data/path_lengths_ship_specific_routes.p', 'rb'))

## Process agent data

### Vessel data

In [43]:
agent_data = agent_data.reset_index()

In [44]:
agent_data = agent_data.loc[agent_data.Step>(24*60)]

In [45]:
vessel_data = agent_data.loc[agent_data.vessel_status.notnull()]
vessel_data = vessel_data.drop(columns=['removed_at'])
vessel_data['status'] = 0
vessel_data['at_station'] = 0
vessel_data.status = vessel_data.vessel_status.apply(lambda x: x[0] if type(x) == list else x)
vessel_data.at_station = vessel_data.vessel_status.apply(lambda x: x[1] if type(x) == list else x)

In [46]:
vessel_data_dict = {}
def process_vessel_data(x):
    if not x.AgentID in vessel_data_dict.keys():
        vessel_data_dict[x.AgentID] = {"charging":{}, "inline":{}, "driving":0}
    if x.status == 'driving':
        vessel_data_dict[x.AgentID]['driving'] += 1
    else:
        if not x.at_station in vessel_data_dict[x.AgentID][x.status].keys():
            vessel_data_dict[x.AgentID][x.status][x.at_station] = 1
        else:
            vessel_data_dict[x.AgentID][x.status][x.at_station] += 1

In [48]:
vessel_data.apply(lambda x: process_vessel_data(x), axis=1)

328196     None
328197     None
328198     None
328199     None
328200     None
           ... 
2622303    None
2622304    None
2622305    None
2622306    None
2622307    None
Length: 157364, dtype: object

In [49]:
df = pd.DataFrame(vessel_data_dict)
df = df.transpose()

In [50]:
df['total_charging'] = 0
df.total_charging = df.charging.apply(lambda x: sum(x.values()))
df['total_inline'] = 0
df.total_inline = df.inline.apply(lambda x: sum(x.values()))

In [51]:
df1 = vessel_data.loc[vessel_data.status != 0].loc[:,['AgentID','Step','vessel_route','departed_from', 'combi','battery_size', 'generated_at']].groupby('AgentID').first()
vessel_data = pd.merge(df1, df, left_index=True, right_index=True)

In [52]:
60*24

1440

In [53]:
vessel_data = vessel_data.loc[vessel_data.generated_at > (24*60)]
vessel_data
# looks okay! Final check with pycharm later

Unnamed: 0_level_0,Step,vessel_route,departed_from,combi,battery_size,generated_at,charging,inline,driving,total_charging,total_inline
AgentID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
74,1450,"(NLRTM, NLGTB, 1)",8866893,"[8866893, 8863306]",6650.0,1449.0,{'8863306': 150},{},262,150,0
75,1509,"(NLRTM, NLAPN, 0)",8863306,"[8863306, 8863360]",3220.0,1508.0,{'8863360': 51},{},184,51,0
76,1512,"(NLOOS, NLTLB, 0)",8867740,[8863818],6650.0,1511.0,{'8863818': 153},{},127,153,0
77,1522,"(NLRTM, NLMOE, 0)",8863306,[8862663],3220.0,1521.0,{'8862663': 75},{},186,75,0
78,1543,"(NLOOS, NLTLB, 0)",8863818,[8863818],6650.0,1542.0,{},{},127,0,0
...,...,...,...,...,...,...,...,...,...,...,...
523,11394,"(NLNIJ, NLWAS, 0)",8864566,"[8864566, 22638327]",6650.0,11393.0,{},{},127,0,0
524,11396,"(NLNIJ, NLDRU, 0)",22638327,"[22638327, 8865412]",6650.0,11395.0,{},{},125,0,0
525,11416,"(NLWSP, NLAER, 0)",8864005,"[8864837, 8864005]",3220.0,11415.0,{},{},105,0,0
526,11443,"(NLRTM, NLMOE, 0)",8863306,"[8863306, 30984595]",3220.0,11442.0,{},{},78,0,0


In [28]:
vessel_data.groupby("vessel_route").mean()

Unnamed: 0_level_0,Step,battery_size,generated_at,total_charging,total_inline
vessel_route,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"(NLABL, NLRTM, 0)",5266.375,6994.25,5265.375,73.75,0.0
"(NLAER, NLLEY, 0)",5933.4,2142.6,5932.4,29.4,0.0
"(NLAER, NLNIE, 0)",4346.0,5363.75,4345.0,231.25,0.0
"(NLAER, NLRTM, 0)",6919.714286,5376.714286,6918.714286,273.0,0.0
"(NLALK, NLIJM, 0)",6180.538462,1933.923077,6179.538462,44.769231,1.615385
"(NLAMS, NLAER, 0)",6780.807692,3324.230769,6779.807692,63.576923,0.807692
"(NLAMS, NLMOE, 3)",6692.583333,6650.0,6691.583333,344.333333,0.0
"(NLAMS, NLMOE, 4)",6076.5,9404.0,6075.5,528.0,0.0
"(NLAMS, NLUTC, 1)",5871.4,5278.0,5870.4,127.2,0.0
"(NLAMS, NLWMO, 0)",6317.6,1463.1,6316.6,6.2,0.6


### Charging station data

In [68]:
df1 = pd.DataFrame(agent_data.loc[(agent_data.charging_stations>0)].groupby('AgentID').first().loc[:,'charging_stations'])

In [69]:
df2 = pd.DataFrame(agent_data.loc[(agent_data.charging_stations>0)].groupby('AgentID').station_status.mean())

In [70]:
cs_data = pd.merge(df1, df2, left_index=True, right_index=True)

In [71]:
cs_data
# TODO check why some are not visited at all within 7 days, seems off

Unnamed: 0_level_0,charging_stations,station_status
AgentID,Unnamed: 1_level_1,Unnamed: 2_level_1
104,2.0,0.172173
107,2.0,0.159573
113,2.0,0.069097
122,1.0,0.073611
22638146,1.0,0.057837
22638327,1.0,0.113889
30984595,2.0,0.118353
8862663,2.0,0.170536
8862801,1.0,0.266369
8863019,1.0,0.120337


### Remark
Something seems off, a lot of stations are not used at all
Way forward:
1. Check whether this may be clarified based on input data
2. Recheck logic behind generation
3. Check whether the generation is going well and as expected