In [1]:
import pickle
import torch
import logging
import utils.data_loader as data_loader
import utils.display as display
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from scipy.spatial.distance import cdist
from tqdm.notebook import tqdm
from sklearn.metrics.pairwise import haversine_distances
import charging_behavior.where_to_charge.NN_utility_model as NN_utility_model

display.configure_pandas()
display.configure_logging()

tqdm.pandas()

  from pandas import Panel


In [2]:
# Rest events
rest_events = data_loader.load_rest()

# OD data
od_distance = data_loader.load_od(scale='full', with_distance=True)
od_with_hs = data_loader.load_od(with_hotpots=True, version='v4')
od_with_hs = od_with_hs[['Licence', 'begin_time', 'end_time', 'in_bbox', 'load_label', 'drop_label']]
od_with_hs = od_with_hs.merge(od_distance[['Licence', 'begin_time', 'distance_before_od', 'od_distance']])

# Charging events
ce = data_loader.load_ce(version='v5_30min')
common = data_loader.load_trajectory_od_intersection()
ce = ce.loc[ce['licence'].isin(common)].reset_index(drop=True)

22-Jul-20 16:41:43 - Loadingdata/rest/rest_events.csv
22-Jul-20 16:41:43 - Loadingdata/od/od_with_distance_between_before.csv
22-Jul-20 16:41:44 - Loading data/od/full_od_with_hotpots_v4.csv
22-Jul-20 16:41:45 - Loading data/ce/v5_30min.csv


In [3]:
p2d_distance = od_with_hs.groupby(['load_label', 'drop_label'])['od_distance'].mean()

In [4]:
# 给od数据添加信息：是否在一次充电或休息事件之后
# First mark whether it is after a charging event
def add_whether_after_ce(license_ce, od_all=od_with_hs):
    begin_index = od_all['Licence'].searchsorted(license_ce.name, side='left')
    end_index = od_all['Licence'].searchsorted(license_ce.name, side='right')
    license_od = od_all.iloc[begin_index: end_index]
    od_count = len(license_od.index)
    for _, row in license_ce.iterrows():
        od_index = license_od['begin_time'].searchsorted(row['start_charging'])
        if od_index == od_count:
            break
        else:
            license_od.at[license_od.index[od_index], 'after_ce'] = True
    return license_od

od_with_hs['after_ce'] = False
od_with_hs = ce.groupby('licence').progress_apply(add_whether_after_ce, od_all=od_with_hs).reset_index(drop=True)

# Second mark whether it is after a rest event
def add_whether_after_rest(license_od, all_rests=None):
    begin_index = all_rests['license'].searchsorted(license_od.name, side='left')
    end_index = all_rests['license'].searchsorted(license_od.name, side='right')
    license_rests = all_rests.iloc[begin_index: end_index]
    od_count = len(license_od.index)
    for _, row in license_rests.iterrows():
        od_index = license_od['begin_time'].searchsorted(row['start_time'])
        if od_index == od_count:
            break
        else:
            license_od.at[license_od.index[od_index], 'after_rest'] = True
    return license_od


od_with_hs['after_rest'] = False
od_with_hs = od_with_hs.groupby('Licence').progress_apply(add_whether_after_rest, all_rests=rest_events)

HBox(children=(FloatProgress(value=0.0, max=643.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=643.0), HTML(value='')))

In [5]:
od_with_hs['last_drop_label'] = od_with_hs['drop_label'].shift()
od_with_hs.loc[od_with_hs['Licence'] != od_with_hs['Licence'].shift(), 'last_drop_label'] = None

df_to_count_distance = pd.merge(od_distance[['Licence', 'begin_time', 'distance_before_od', 'od_distance']],
                                od_with_hs, how='inner')

d2p_distance = df_to_count_distance.loc[~(od_with_hs['after_ce'] | od_with_hs['after_rest'])].groupby(
    ['last_drop_label', 'load_label'])['distance_before_od'].mean()

In [6]:
# 算出发分布
departure_distributions = []
for i in tqdm(range(24)):
    departure_hs = od_with_hs.loc[(od_with_hs['begin_time'].dt.hour == i)
                                  & od_with_hs['after_rest']
                                  & (-1 != od_with_hs['load_label']), 'load_label'].value_counts()
    departure_hs = departure_hs / departure_hs.sum()
    departure_distributions.append(departure_hs)



HBox(children=(FloatProgress(value=0.0, max=24.0), HTML(value='')))




In [9]:
p2d_distance.to_csv('generated_data/generation_input/p2d_distance.csv')
d2p_distance.to_csv('generated_data/generation_input/d2p_distance.csv')
with open('generated_data/generation_input/departure_distributions.pickle', mode='wb') as f:
    pickle.dump(departure_distributions, f)

In [11]:
pd.read_csv('generated_data/generation_input/p2d_distance.csv', index_col=[0, 1])


Unnamed: 0_level_0,Unnamed: 1_level_0,od_distance
load_label,drop_label,Unnamed: 2_level_1
-1,-1,8.998971
-1,0,19.615518
-1,1,36.195396
-1,2,24.861204
-1,3,35.812621
...,...,...
540,168,20.941709
540,172,37.263293
540,174,10.131132
540,194,24.765681
