### Feature Analysis

#### Sliding window

##### According to state overall

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotting_util as putil

plt.rcParams['figure.facecolor'] = 'white'

DRIVER_BEHAVIOR = ['steer', 'gas', 'brake', 'SteerSpeed', 'indicator_left', 'indicator_right',
                    'gas_vel', 'brake_vel', 'gas_acc', 'brake_acc', 'SteerSpeed_acc',
                    'gas_jerk', 'brake_jerk', 'SteerSpeed_jerk', 'SteerError']
VEHICLE_BEHAVIOR = ['velocity', 'acc', 'acc_jerk', 'latvel', 'YawRate', 'latvel_acc', 'latvel_jerk',
                    'YawRate_acc', 'YawRate_jerk']
RADAR = ['lane_position', 'lane_distance_left_edge', 'lane_distance_right_edge', 'lane_crossing', 
         'is_crossing_lane', 'is_crossing_lane_left', 'is_crossing_lane_right',
         'lane_crossing_left', 'lane_crossing_right', 'lane_switching', 'opp_lane_switching',
         'Ttc', 'TtcOpp', 'Thw', 'Dhw']
NAVI = ['dtoint', 'SpeedDif', 'speed_limit_exceeded']

SIGNALS = {'driver_behavior': DRIVER_BEHAVIOR,
           'vehicle_behavior': VEHICLE_BEHAVIOR,
           'radar': RADAR,
           'navi': NAVI}

STATS = ['mean', 'std', 'min','max', 'q5', 'q95', 'range', 'iqrange', 'iqrange_5_95', 'sum', 'energy', 'skewness',
         'kurtosis', 'peaks', 'rms', 'lineintegral', 'n_above_mean', 'n_below_mean', 'n_sign_changes', 'ptp']

for signal_type, signals in SIGNALS.items():
    can_data = pd.read_parquet('out/can_data_features_{}_windowsize_60s.parquet'.format(signal_type))
    for signal in signals:
        fig, axes = putil.create_plot(len(STATS), constrained_layout=True)
        putil.set_figure_size(fig, 20, 60)
        putil.set_fig_title(fig, signal)
        for i, stat in enumerate(STATS):
            sns.kdeplot(x=signal + '_' + stat, data=can_data, hue='subject_state', ax=axes[i])
        plt.savefig('out/kdeplots/general/sliding_window/{}_{}.png'.format(signal_type, signal))
        plt.close(fig)

##### According to state per scenario

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotting_util as putil

plt.rcParams['figure.facecolor'] = 'white'

DRIVER_BEHAVIOR = ['steer', 'gas', 'brake', 'SteerSpeed', 'indicator_left', 'indicator_right',
                    'gas_vel', 'brake_vel', 'gas_acc', 'brake_acc', 'SteerSpeed_acc',
                    'gas_jerk', 'brake_jerk', 'SteerSpeed_jerk', 'SteerError']
VEHICLE_BEHAVIOR = ['velocity', 'acc', 'acc_jerk', 'latvel', 'YawRate', 'latvel_acc', 'latvel_jerk',
                    'YawRate_acc', 'YawRate_jerk']
RADAR = ['lane_position', 'lane_distance_left_edge', 'lane_distance_right_edge', 'lane_crossing', 
         'is_crossing_lane', 'is_crossing_lane_left', 'is_crossing_lane_right',
         'lane_crossing_left', 'lane_crossing_right', 'lane_switching', 'opp_lane_switching',
         'Ttc', 'TtcOpp', 'Thw', 'Dhw']
NAVI = ['dtoint', 'SpeedDif', 'speed_limit_exceeded']

SIGNALS = {'driver_behavior': DRIVER_BEHAVIOR,
           'vehicle_behavior': VEHICLE_BEHAVIOR,
           'radar': RADAR,
           'navi': NAVI}

STATS = ['mean', 'std', 'min','max', 'q5', 'q95', 'range', 'iqrange', 'iqrange_5_95', 'sum', 'energy', 'skewness',
         'kurtosis', 'peaks', 'rms', 'lineintegral', 'n_above_mean', 'n_below_mean', 'n_sign_changes', 'ptp']

SCENARIOS = ['highway', 'rural', 'town']

for signal_type, signals in SIGNALS.items():
    can_data = pd.read_parquet('out/can_data_features_{}_windowsize_60s.parquet'.format(signal_type))
    for scenario in SCENARIOS:
        can_data_scenario = can_data.loc[:, :, scenario, :]
        for signal in signals:
            fig, axes = putil.create_plot(len(STATS), constrained_layout=True)
            putil.set_figure_size(fig, 20, 60)
            putil.set_fig_title(fig, scenario + ': ' + signal)
            for i, stat in enumerate(STATS):
                sns.kdeplot(x=signal + '_' + stat, data=can_data_scenario, hue='subject_state', ax=axes[i])
            plt.savefig('out/kdeplots/scenarios/sliding_window/{}_{}_{}.png'.format(scenario, signal_type, signal))
            plt.close(fig)

#### Events

##### Per scenario

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotting_util as putil

plt.rcParams['figure.facecolor'] = 'white'

DRIVER_BEHAVIOR = ['steer', 'gas', 'brake', 'SteerSpeed', 'indicator_left', 'indicator_right',
                    'gas_vel', 'brake_vel', 'gas_acc', 'brake_acc', 'SteerSpeed_acc',
                    'gas_jerk', 'brake_jerk', 'SteerSpeed_jerk', 'SteerError']
VEHICLE_BEHAVIOR = ['velocity', 'acc', 'acc_jerk', 'latvel', 'YawRate', 'latvel_acc', 'latvel_jerk',
                    'YawRate_acc', 'YawRate_jerk']
RADAR = ['lane_position', 'lane_distance_left_edge', 'lane_distance_right_edge', 'lane_crossing', 
         'is_crossing_lane', 'is_crossing_lane_left', 'is_crossing_lane_right',
         'lane_crossing_left', 'lane_crossing_right', 'lane_switching', 'opp_lane_switching',
         'Ttc', 'TtcOpp', 'Thw', 'Dhw']
NAVI = ['dtoint', 'SpeedDif', 'speed_limit_exceeded']

SIGNALS = {'driver_behavior': DRIVER_BEHAVIOR,
           'vehicle_behavior': VEHICLE_BEHAVIOR,
           'radar': RADAR,
           'navi': NAVI}

STATS = ['mean', 'std', 'min','max', 'q5', 'q95', 'range', 'iqrange', 'iqrange_5_95', 'sum', 'energy', 'skewness',
         'kurtosis', 'peaks', 'rms', 'lineintegral', 'n_above_mean', 'n_below_mean', 'n_sign_changes', 'ptp']

EVENTS = ['brake', 'brake_to_gas', 'gas', 'gas_to_brake', 'overtaking', 'road_sign', 'turning']

SCENARIOS = ['highway', 'rural', 'town']

for event in EVENTS:
    event_data = pd.read_parquet('out/can_data_{}_events_features.parquet'.format(event))
    for scenario in SCENARIOS:
        # event_data = event_data[(
        #     (event_data['sign_type'] != 1)
        #     & (event_data['sign_type'] != 2)
        #     & (event_data['sign_type'] != 140)
        #     & (event_data['sign_type'] != 141)
        #     & (event_data['sign_type'] != 4)
        #     & (event_data['sign_type'] != 5)
        #     )]
        if event == 'turning' and scenario == 'highway':
            continue
        event_data_scenario = event_data.loc[:, :, scenario, :]
        for signal_type, signals in SIGNALS.items():
            for signal in signals:
                fig, axes = putil.create_plot(len(STATS), constrained_layout=True)
                putil.set_figure_size(fig, 20, 60)
                putil.set_fig_title(fig, signal)
                for i, stat in enumerate(STATS):
                    sns.kdeplot(x=signal + '_' + stat, data=event_data_scenario, hue='subject_state', ax=axes[i])
                plt.savefig('out/kdeplots/scenarios/events/{}_{}_event_{}_{}.png'.format(scenario, event, signal_type, signal))
                plt.close(fig)

##### Events in sliding window per scenario

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotting_util as putil

plt.rcParams['figure.facecolor'] = 'white'

EVENTS = ['brake', 'brake_to_gas', 'gas', 'gas_to_brake', 'overtaking', 'road_sign', 'turning']

SIGNALS = [
    'brake',
    'brake_acc',
    'brake_jerk',
    'brake_vel',
    'gas',
    'gas_acc',
    'gas_jerk',
    'gas_vel',
    'steer',
    'SteerSpeed',
    'SteerSpeed_acc',
    'SteerSpeed_jerk',
    'speed_limit_exceeded',
    'SpeedDif',
    'Dhw',
    'is_crossing_lane_left',
    'is_crossing_lane_right',
    'lane_crossing',
    'lane_distance_left_edge',
    'lane_distance_right_edge',
    'Ttc',
    'TtcOpp',
    'acc',
    'acc_jerk',
    'velocity',
    'latvel_acc',
    'latvel_jerk',
    'YawRate_acc',
    'YawRate_jerk',
    'YawRate'
]

SELECTED_STATS = ['mean', 'std', 'min', 'max', 'q5', 'q95', 'iqrange', 'iqrange_5_95', 'skewness', 'kurtosis', 'peaks', 'rms']
STATS = ['mean', 'std']
ADDITIONAL_STATS = ['duration-mean', 'duration-std', 'ratio', 'count']

SELECTED_FEATURES = [[signal + '_' + stat for stat in SELECTED_STATS] for signal in SIGNALS]

SCENARIOS = ['highway', 'rural', 'town']

for event in EVENTS:
    can_data_events_per_window = pd.read_parquet('out/can_data_{}_events_per_window_windowsize_60s.parquet'.format(event))
    for scenario in SCENARIOS:
        if event == 'turning' and scenario == 'highway':
            continue
        can_data_events_per_window_scenario = can_data_events_per_window.loc[:, :, scenario, :]
        fig, axes = putil.create_plot(len(ADDITIONAL_STATS), constrained_layout=True)
        putil.set_figure_size(fig, 20, 15)
        putil.set_fig_title(fig, event)
        for i, stat in enumerate(ADDITIONAL_STATS):
            sns.kdeplot(x=event + '_event_' + stat, data=can_data_events_per_window_scenario, hue='subject_state', ax=axes[i])
        plt.savefig('out/kdeplots/scenarios/events_sliding_window/{}_{}_event_per_window_additional_stats.png'.format(scenario, event))
        plt.close(fig)
        for i, signal_features in enumerate(SELECTED_FEATURES):
            fig, axes = putil.create_plot(len(signal_features) * len(STATS), constrained_layout=True)
            putil.set_figure_size(fig, 20, 60)
            putil.set_fig_title(fig, event + ' event: ' + SIGNALS[i])
            for j, feature in enumerate(signal_features):
                for k, stat in enumerate(STATS):
                    sns.kdeplot(x=event + '_event_' + feature + '-' + stat, data=can_data_events_per_window_scenario, hue='subject_state', ax=axes[j*len(STATS)+k])
            plt.savefig('out/kdeplots/scenarios/events_sliding_window/{}_{}_event_per_window_{}.png'.format(scenario, event, SIGNALS[i]))
            plt.close(fig)

### Load Config

In [None]:
from yaml import load, Loader
from bunch import Bunch
Bunch.__str__ = Bunch.__repr__

stream = open("config.yaml", 'r')
config = Bunch(load(stream, Loader=Loader))

import pandas as pd
import numpy as np

can_data = pd.read_parquet('out/can_data.parquet', columns=['subject_id'])
subject_ids = np.unique(can_data['subject_id'])
top_n = 10

#### Logistic Regression coefficients

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotting_util as putil
from joblib import load

plt.rcParams['figure.facecolor'] = 'white'

SIGNAL_COMBOS = ['driver_behavior', 'vehicle_behavior', 'navi', 'radar']

SCENARIOS = ['highway', 'rural', 'town']

for i, window_size in enumerate(config.window_sizes):
    signal_string = ''
    for signal in SIGNAL_COMBOS:
        signal_string += '_' + signal

    for k, scenario in enumerate(SCENARIOS):
        fig, axes = putil.create_plot(len(config.window_sizes), 1)
        putil.set_figure_size(fig, 10, 10)
        putil.set_fig_title(fig, scenario)

        cols = pd.read_csv('out/results/{}_{}_selected_features_windowsize_{}{}_{}.csv'.format(
                                     config.classifier_type, config.clf_mode, window_size, signal_string, scenario
                                     ), usecols=['selected_features']).squeeze('columns').to_list()

        if config.classifier_type == 'log_regression':
            coefficients = []
            for subject_id in subject_ids:
                est = load('out/estimators/{}_{}_{}.joblib'.format(subject_id, config.classifier_type, scenario))
                coefficients.append(est.coef_[0])
            coefficients = pd.DataFrame(coefficients, columns=cols)
            top_features = coefficients.mean(axis=0).abs().nlargest(top_n).index.to_list()
            ax = axes[0]
            sns.swarmplot(ax=ax, data=coefficients.loc[:, top_features], orient='h', size=3, color='royalblue')
            putil.set_ax_grid_lines(ax, flag=True, style=':', axis='y', color='grey')
            ax.axvline(0, linestyle=':', color='grey', linewidth=1)
            putil.set_ax_visible_spines(ax, False, False, True, False)
            putil.set_ax_xticks(ax, list(range(-4, 3)), list(range(-4, 3)))
            putil.set_ax_ticks_size(ax, size=14)
            putil.set_ax_axis_labels(ax, 'Coefficient', 'Top {} features'.format(top_n))

            plt.tight_layout()
            plt.savefig('out/results/{}_coef_top_{}_{}.pdf'.format(config.classifier_type, top_n, scenario))


#### SHAP values

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotting_util as putil
from joblib import load
from sklearn.preprocessing import StandardScaler
import shap

SIGNAL_COMBOS = ['driver_behavior', 'vehicle_behavior', 'navi', 'radar']

for window_size in config.window_sizes:
    signal_string = ''
    can_data_features = []
    for signal in SIGNAL_COMBOS:
        signal_string += '_' + signal
        can_data_features.append(
            pd.read_parquet('out/can_data_features_{}_windowsize_{}s.parquet'.format(signal, window_size))
        )
    can_data_features = pd.concat(can_data_features, axis=1)
    # drop below BAC level for binary classification
    can_data_features.drop('below', level=1, inplace=True)

    for scenario in SCENARIOS:
        cols = pd.read_csv('out/results/{}_{}_selected_features_windowsize_{}{}_{}.csv'.format(
                                    config.classifier_type, config.clf_mode, window_size, signal_string, scenario
                                    ), usecols=['selected_features']).squeeze('columns').to_list()

        can_data_features_cols = can_data_features[cols]

        all_shap_values = []
        for subject_id in subject_ids:
            X_train = can_data_features_cols.loc[can_data_features.index.get_level_values(0) != subject_id]
            X_test = can_data_features_cols.loc[subject_id]

            ind = X_train.index
            X_train = StandardScaler().fit_transform(X_train)

            est = load('out/estimators/{}_{}_{}.joblib'.format(subject_id, config.classifier_type, scenario))
            explainer = shap.Explainer(est, X_train, feature_names=cols, seed=42)
            shap_values = explainer.shap_values(X_train)
            all_shap_values.append(pd.DataFrame(explainer.shap_values(X_train), index=ind))
        
        shap_values = pd.concat(all_shap_values, axis=0)

        plt.figure(dpi=150)
        shap.summary_plot(shap_values.groupby(shap_values.index).mean().to_numpy(), can_data_features_cols.to_numpy(), feature_names=cols, max_display=top_n, show=False)
        plt.gcf().axes[-1].set_aspect(20)
        plt.gcf().axes[-1].set_box_aspect(20)
        plt.tight_layout()
        plt.savefig('out/results/{}_shap_values_{}.pdf'.format(config.classifier_type, scenario))