# Emissions Plot Generation

In [1]:
import sys
sys.path.append("../")

In [2]:
from plots import trace_visualisation, pdf, line_plots
import pandas as pd
import definitions
import os
import datetime
from functions import mp_funcs
from functions import func
import plotly.io as pio
import pickle

## Get the Data Name and Locations

In [3]:
DATA_FOLDER = '2020-06-24_09_03_03-Full-Analysis'
DATA_DIR_FULL_PATH = os.path.join(definitions.DATA_DIR, DATA_FOLDER)

RAW_DATA_FILE = 'data.csv'
RAW_DATA_SUMMARY = 'data_summary.csv'

## Read in the Data

In [4]:
raw_data_df = pd.read_csv(os.path.join(DATA_DIR_FULL_PATH, RAW_DATA_FILE), low_memory=False, index_col=0)
summary_df = pd.read_csv(os.path.join(DATA_DIR_FULL_PATH, RAW_DATA_SUMMARY), header=[0, 1], index_col=0)
raw_data_df.loc[:, 'timestep_time'] = pd.to_datetime(raw_data_df.loc[:, 'timestep_time'])


elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison



## Convert the Timestep_Time to a timestep if not already

In [None]:
if raw_data_df['timestep_time'].dtype.name == 'float64':
    """ These don't work on Windows """
    # print("Calling mp apply")
    # raw_data_df.loc[:, 'timestep_time'] = mp_funcs.apply_by_multiprocessing(raw_data_df['timestep_time'], func)
    # raw_data_df.loc[:, 'timestep_time'] = pd.to_datetime(raw_data_df.loc[:, 'timestep_time'])
    raw_data_df['timestep_time'] = raw_data_df['timestep_time'].apply(func=func)
    raw_data_df.loc[:, 'timestep_time'] = pd.to_datetime(raw_data_df.loc[:, 'timestep_time'])
    print("Saving to .csv")
    raw_data_df.to_csv(os.path.join(DATA_DIR_FULL_PATH, RAW_DATA_FILE))

In [13]:
columns = [('distance', 'total'),
           #('norm_time', 'total'),
           #('vehicle_fuel', 'total'),
           # ('vehicle_fuel', 'average_per_step'),
           ('vehicle_fuel', 'per_100km'),
           # ('vehicle_fuel', 'mpg'),
           #('vehicle_CO2', 'total'),
           # ('vehicle_CO2', 'average_per_step'),
           #('vehicle_CO2', 'per_100km'),
           #('vehicle_CO', 'total'),
           # ('vehicle_CO', 'average_per_step'),
           #('vehicle_CO', 'per_100km'),
           #('vehicle_HC', 'total'),
           # ('vehicle_HC', 'average_per_step'),
           #('vehicle_HC', 'per_100km'),
           #('vehicle_NOx', 'total'),
           # ('vehicle_NOx', 'average_per_step'),
           #('vehicle_NOx', 'per_100km'),
           #('vehicle_PMx', 'total'),
           # ('vehicle_PMx', 'average_per_step'),
           #('vehicle_PMx', 'per_100km'),
           # ('vehicle_electricity', 'total'),
           # ('vehicle_electricity', 'average_per_step'),
           # ('vehicle_electricity', 'per_100km')
           ]

diff_df = pd.DataFrame(index=summary_df.index, columns=summary_df.columns)

# Calculate the standard deviation and z score for each parameter. get abs(add z score) and get the min

for col in columns:
    diff_df[col].iloc[2:] = abs(summary_df[col].iloc[2:].subtract(summary_df.loc['Total_average', col]))
    diff_df[col].iloc[2:] = diff_df[col].iloc[2:] / \
                            summary_df[col].iloc[2:].add(summary_df.loc['Total_average', col])

diff_df['diff_sum'] = diff_df.sum(axis=1)
best_fit_vehicle = diff_df['diff_sum'].iloc[2:].idxmin()

## Pick a vehicle from the 5th and 95th percentile and look at actual low and high

print('The best fit vehicle is: {0}'.format(best_fit_vehicle))

The best fit vehicle is: 45480_20


In [5]:
plot_vehicle = '45480_20'

trace_visualisation.sampled_emissions_df = raw_data_df
trace_visualisation.trace_visual(plot_vehicle).show()
trace_no_map = trace_visualisation.trace_no_map(plot_vehicle, plot_columns=['vehicle_fuel', 'vehicle_CO2', 'vehicle_NOx'],
                                 axis_names=["Vehicle Speed [mph]", 'Vehicle Fuel [gal/s]', 'Vehicle CO_2 [g/s]',
                                             'Vehicle NOx [g/s]'])
trace_no_map.show()
pio.write_json(trace_no_map, os.path.join(definitions.ROOT, 'raw_plots', 'trace_no_map.plotly'))

In [8]:
fig = pio.read_json(os.path.join(definitions.ROOT, 'raw_plots', 'trace_no_map.plotly'))
fig.show()

In [12]:
pdf.simple_pdf(summary_df[('vehicle_fuel', 'per_100km')].iloc[2:], labels="l/100km", xaxis_label="l/100km").show()

In [30]:
raw_data_df['vehicle_count'] = int(1)

groupyby_obj = raw_data_df.groupby(['timestep_time'])
index = groupyby_obj.sum().index

sum_data_df = pd.DataFrame(index=index)
sum_data_df['timestep_time'] = index

In [31]:
sum_columns = ['vehicle_CO', 'vehicle_CO2', 'vehicle_HC', 'vehicle_NOx', 'vehicle_PMx',
               'vehicle_electricity', 'vehicle_fuel', 'vehicle_pos', 'vehicle_count', 'vehicle_waiting']

mean_columns = ['vehicle_speed']

sum_data_df[sum_columns] = groupyby_obj.sum().loc[:, sum_columns]
sum_data_df[mean_columns] = groupyby_obj.mean().loc[:, mean_columns]



In [34]:
line_plots.DF = sum_data_df
line_plots.line_plot(time_column='timestep_time', plot_columns='vehicle_count', time_range=None).show()
line_plots.line_plot(time_column='timestep_time', plot_columns='vehicle_speed', time_range=None).show()

In [18]:
binned_emissions_dict = pickle.load(open(os.path.join(definitions.DATA_DIR, 'emissions_dict.pkl'), 'rb'))

In [19]:
from plots import emissions_heatmap
from functions import emissions

time_range = pd.to_datetime('2020-02-13T10:00:00')

fig = emissions_heatmap.single_time_interval(binned_emissions_dict, time_range=time_range, )
fig.show()

In [11]:
from functions.emissions import get_time_based_emissions_distribution

time_interval = [['2020-02-13 06:00:00', '2020-02-13 09:00:00'], ['2020-02-13 11:00:00', '2020-02-13 14:00:00'],
                 ['2020-02-13 16:00:00', '2020-02-13 19:00:00']]

plot_column = ('vehicle_fuel', 'mpg')

interval_distribution = get_time_based_emissions_distribution(emissions_df=raw_data_df, summary_df=summary_df,
                                                              interval=time_interval, bin_column=plot_column,
                                                              return_data=True)

# interval_distribution = pickle.load(open(os.path.join(definitions.DATA_DIR, 'interval_distribution.pkl'), 'rb'))
# %%
dist_list = [list(item[1]) for item in interval_distribution]
label_list = [str(item[0][0]) + ' - ' + str(item[0][1]) for item in interval_distribution]

pdf_fig = pdf.simple_pdf(pd_series=dist_list, labels=label_list, xaxis_label="Vehicle Total Time [s]")
pdf_fig.show()

# with open(os.path.join("images", "-".join(plot_column)), 'wb') as img:
#     img.write(pdf_fig.to_image(format='png', height=1080, width=1920))