### Load Config

In [1]:
from yaml import load, Loader
from bunch import Bunch

stream = open("config.yaml", 'r')
config = Bunch(load(stream, Loader=Loader))

### Read Data

In [2]:
import pandas as pd
import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import get_scorer
from sklearn.model_selection import cross_validate
from sklearn.model_selection import LeaveOneGroupOut

SCORING = ['roc_auc', 'accuracy', 'balanced_accuracy', 'f1_micro', 'average_precision', 'recall', 'precision']
PARAMETERS = {'n_estimators': [50, 100, 150], "max_features": ['sqrt', 'log2']}

SIGNAL_COMBOS = [['driver_behavior'], ['driver_behavior', 'vehicle_behavior'], 
            ['driver_behavior', 'vehicle_behavior', 'navi'], ['driver_behavior', 'vehicle_behavior', 'navi', 'radar']]

def collect_scores(scoring, y_true, y_pred):
    scores_dict = dict()
    for scorer in scoring:
        s = get_scorer(scorer)
        scores_dict[scorer] = s._score_func(y_true, y_pred)
    return scores_dict

pipeline = make_pipeline(StandardScaler(), PCA(), LogisticRegression(penalty='l1', solver='liblinear', max_iter=1000, class_weight='balanced'))

for window_size in config.window_sizes:
    for combo in SIGNAL_COMBOS:
        signal_string = ''
        can_data_features = []
        for signal in combo:
            signal_string += '_' + signal
            can_data_features.append(pd.read_parquet('out/can_data_features_{}_windowsize_{}s.parquet'.format(signal, window_size)))
        can_data_features = pd.concat(can_data_features, axis=1)
        can_data_features.loc[:, 'label'] = 0
        can_data_features.loc[(slice(None), 'above', slice(None)), 'label'] = 1

        can_data_features.replace(np.nan, 10e6, inplace=True)

        for scenario in ['highway', 'rural', 'town']:
            print('signals: {}, window size: {}s, scenario: {}'.format(signal_string, window_size, scenario))

            # drop below BAC level for binary classification
            can_data_features_bin = can_data_features.drop('below', level=1)
            can_data_features_bin = can_data_features_bin.loc[:, :, scenario, :]

            groups = list(can_data_features_bin.index.get_level_values('subject_id'))
            subject_ids = np.unique(groups)
            
            X = can_data_features_bin.drop(columns='label').to_numpy()
            
            y = can_data_features_bin['label'].to_numpy()

            cv = cross_validate(estimator=pipeline, X=X, y=y, scoring=SCORING, verbose=3, return_estimator=True,
                    return_train_score=True, cv=LeaveOneGroupOut(), groups=groups, n_jobs=None)
            
            exclude =['estimator']
            
            pd.DataFrame({k:v for k,v in cv.items() if k not in exclude}).set_index(subject_ids).to_csv(
                    'out/pred_results_windowsize_{}{}_{}.csv'.format(window_size, signal_string, scenario), index=True, header=True)

signals: _driver_behavior, window size: 5s, scenario: highway
                                                                            steer_mean  \
subject_id subject_state subject_scenario datetime                                       
001        above         highway          2021-08-27 13:09:43.546000+02:00   20.832133   
                                          2021-08-27 13:09:44.546000+02:00   32.299733   
                                          2021-08-27 13:09:45.546000+02:00   41.973600   
                                          2021-08-27 13:09:46.546000+02:00   49.191400   
                                          2021-08-27 13:09:47.546000+02:00   49.813600   
...                                                                                ...   
035        sober         highway          2021-11-10 10:09:10.876000+01:00   13.869370   
                                          2021-11-10 10:09:11.876000+01:00    9.008144   
                                      

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] END  accuracy: (train=0.640, test=0.524) average_precision: (train=0.690, test=0.500) balanced_accuracy: (train=0.640, test=0.524) f1_micro: (train=0.640, test=0.524) precision: (train=0.645, test=0.529) recall: (train=0.621, test=0.535) roc_auc: (train=0.696, test=0.503) total time=   3.4s


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    3.8s remaining:    0.0s


[CV] END  accuracy: (train=0.636, test=0.648) average_precision: (train=0.683, test=0.693) balanced_accuracy: (train=0.636, test=0.651) f1_micro: (train=0.636, test=0.648) precision: (train=0.640, test=0.614) recall: (train=0.619, test=0.744) roc_auc: (train=0.690, test=0.713) total time=  17.9s


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   22.1s remaining:    0.0s


[CV] END  accuracy: (train=0.638, test=0.560) average_precision: (train=0.687, test=0.594) balanced_accuracy: (train=0.638, test=0.560) f1_micro: (train=0.638, test=0.560) precision: (train=0.643, test=0.562) recall: (train=0.620, test=0.518) roc_auc: (train=0.692, test=0.596) total time=  16.9s
[CV] END  accuracy: (train=0.639, test=0.610) average_precision: (train=0.686, test=0.618) balanced_accuracy: (train=0.639, test=0.609) f1_micro: (train=0.639, test=0.610) precision: (train=0.644, test=0.626) recall: (train=0.618, test=0.537) roc_auc: (train=0.692, test=0.643) total time=  39.5s
[CV] END  accuracy: (train=0.638, test=0.609) average_precision: (train=0.686, test=0.642) balanced_accuracy: (train=0.638, test=0.613) f1_micro: (train=0.638, test=0.609) precision: (train=0.642, test=0.687) recall: (train=0.619, test=0.431) roc_auc: (train=0.693, test=0.641) total time=  18.4s
[CV] END  accuracy: (train=0.636, test=0.679) average_precision: (train=0.683, test=0.718) balanced_accuracy:

[Parallel(n_jobs=1)]: Done  31 out of  31 | elapsed: 10.6min finished


                                                                            steer_mean  \
subject_id subject_state subject_scenario datetime                                       
001        above         rural            2021-08-27 12:45:56.794000+02:00   -3.846133   
                                          2021-08-27 12:45:57.794000+02:00   -3.206133   
                                          2021-08-27 12:45:58.794000+02:00   -2.566133   
                                          2021-08-27 12:45:59.794000+02:00   -2.153600   
                                          2021-08-27 12:46:00.794000+02:00   -2.449400   
...                                                                                ...   
035        sober         rural            2021-11-10 10:20:44.444000+01:00  100.913067   
                                          2021-11-10 10:20:45.444000+01:00   68.225702   
                                          2021-11-10 10:20:46.444000+01:00    8.034725   
          

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] END  accuracy: (train=0.630, test=0.581) average_precision: (train=0.681, test=0.594) balanced_accuracy: (train=0.630, test=0.577) f1_micro: (train=0.630, test=0.581) precision: (train=0.636, test=0.598) recall: (train=0.596, test=0.424) roc_auc: (train=0.681, test=0.603) total time=  16.6s


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   17.0s remaining:    0.0s


[CV] END  accuracy: (train=0.630, test=0.607) average_precision: (train=0.680, test=0.633) balanced_accuracy: (train=0.629, test=0.610) f1_micro: (train=0.630, test=0.607) precision: (train=0.635, test=0.579) recall: (train=0.595, test=0.718) roc_auc: (train=0.680, test=0.659) total time=  23.4s


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   40.8s remaining:    0.0s


[CV] END  accuracy: (train=0.632, test=0.540) average_precision: (train=0.684, test=0.559) balanced_accuracy: (train=0.632, test=0.540) f1_micro: (train=0.632, test=0.540) precision: (train=0.638, test=0.542) recall: (train=0.597, test=0.536) roc_auc: (train=0.683, test=0.560) total time=  22.9s
[CV] END  accuracy: (train=0.632, test=0.553) average_precision: (train=0.682, test=0.563) balanced_accuracy: (train=0.632, test=0.554) f1_micro: (train=0.632, test=0.553) precision: (train=0.637, test=0.582) recall: (train=0.596, test=0.395) roc_auc: (train=0.682, test=0.565) total time=  21.1s
[CV] END  accuracy: (train=0.629, test=0.639) average_precision: (train=0.679, test=0.680) balanced_accuracy: (train=0.629, test=0.636) f1_micro: (train=0.629, test=0.639) precision: (train=0.634, test=0.700) recall: (train=0.593, test=0.464) roc_auc: (train=0.679, test=0.700) total time=  20.9s
