In [None]:
import pandas as pd
import warnings
import numpy as np
import wildboar
from wildboar.transform import RocketTransform, HydraTransform, DiffTransform
from sklearn.pipeline import make_pipeline, make_union
from sklearn.preprocessing import StandardScaler  # <-- Only from sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt


warnings.simplefilter(action='ignore', category=FutureWarning)   ## For random shapelet forest

In [None]:
## Get file names
file = "source/binded_file"

In [None]:
def check_and_trim(df):
    # Define the desired size per group
    desired_size = 900

    # create an empty DataFrame to hold the trimmed data
    trimmed_df = pd.DataFrame()

    # Group the dataframe by 'sample_ID' and iterate over the groups
    for sample_ID, group in df.groupby('id1'):
        # If the size of the group is larger than the desired size, take the first 'desired_size' rows
        if len(group) > desired_size:
            group = group.iloc[:desired_size]
        # Append the group to the trimmed dataframe
        trimmed_df = pd.concat([trimmed_df, group], ignore_index=True)

    return trimmed_df

pred_vars = ['id1', 'Brake','Accel', 'Lat_Pos', 'Speed',  'Wheel_Rate' ,
             'gaze_unif_x','gaze_unif_y', 'gaze_unif_z', 'yaw', 'pitch', 
                'gaze_valid', 'gaze_orig_x','gaze_orig_y', 'gaze_orig_z', 'gaze_orig_valid', 
                'blinks','last_blink_dur','left_eye_open_mm','right_eye_open_mm','left_open_valid','right_open_valid'
                , 'head_pos_x', 'head_pos_y', 'head_pos_z','head_pos_valid','inter_pup_dist',
                'left_pupil_dia', 'left_pup_vis','right_pupil_dia', 'right_pup_vis']
x_vars = ['Brake','Accel', 'Lat_Pos', 'Speed','Wheel_Rate',
          'gaze_unif_x','gaze_unif_y', 'gaze_unif_z', 'yaw', 'pitch', 
            'gaze_valid','gaze_orig_x','gaze_orig_y', 'gaze_orig_z', 
            'gaze_orig_valid', 'blinks','last_blink_dur','left_eye_open_mm'
            ,'right_eye_open_mm', 'left_open_valid','right_open_valid',
            'head_pos_x', 'head_pos_y', 'head_pos_z','head_pos_valid','inter_pup_dist',
            'left_pupil_dia', 'left_pup_vis','right_pupil_dia', 'right_pup_vis']
y_vars = ['Condition','id1']
status_names = ['status1','status2','status3','status4','status5','status6','status7','status8','status9','status10']


cur_file = file
data = pd.read_csv(cur_file)
data = data.rename(columns={'CFS.Brake.Pedal.Force': 'Brake', 
                         'CFS.Accelerator.Pedal.Position': 'Accel', 
                         'LaneDev2': 'Lat_Pos',
                         'VDS.Veh.Speed': 'Speed',
                         'CFS.Steering.Wheel.Angle': 'Wheel_Angle',
                         'CFS.Steering.Wheel.Angle.Rate': 'Wheel_Rate',
                         'Output.FovioDMEResults.dgaze.unified.gaze.direction.x': 'gaze_unif_x', 
                            'Output.FovioDMEResults.dgaze.unified.gaze.direction.y': 'gaze_unif_y', 
                            'Output.FovioDMEResults.dgaze.unified.gaze.direction.z': 'gaze_unif_z',
                            'Output.FovioDMEResults.dgaze.unified.gaze.direction.deg.yaw.deg': 'yaw',
                            'Output.FovioDMEResults.dgaze.unified.gaze.direction.deg.pitch.d': 'pitch',
                            'Output.FovioDMEResults.dgaze.unified.gaze.direction.valid': 'gaze_valid',
                            'Output.FovioDMEResults.dgaze.unified.gaze.origin.m.x': 'gaze_orig_x',
                            'Output.FovioDMEResults.dgaze.unified.gaze.origin.m.y': 'gaze_orig_y',
                            'Output.FovioDMEResults.dgaze.unified.gaze.origin.m.z': 'gaze_orig_z',
                            'Output.FovioDMEResults.dgaze.unified.gaze.origin.valid' : 'gaze_orig_valid',
                            'Output.FovioDMEResults.dme.core.eyelid.blink.counter': 'blinks',
                            'Output.FovioDMEResults.dme.core.eyelid.last.blink.duration.us' : 'last_blink_dur',
                            'Output.FovioDMEResults.dme.core.eyelid.left.eyelid.opening.mm' : 'left_eye_open_mm',
                            'Output.FovioDMEResults.dme.core.eyelid.right.eyelid.opening.mm' : 'right_eye_open_mm',
                            'Output.FovioDMEResults.dme.core.eyelid.left.eyelid.opening.vali' : 'left_open_valid',
                            'Output.FovioDMEResults.dme.core.eyelid.right.eyelid.opening.val' : 'right_open_valid',
                            'Output.FovioDMEResults.dme.core.head.head.pose.rotation.x.deg' : 'head_pos_x',
                            'Output.FovioDMEResults.dme.core.head.head.pose.rotation.y.deg' : 'head_pos_y',
                            'Output.FovioDMEResults.dme.core.head.head.pose.rotation.z.deg' : 'head_pos_z',
                            'Output.FovioDMEResults.dme.core.head.head.pose.valid': 'head_pos_valid',
                            'Output.FovioDMEResults.dme.core.pupil.inter.pupil.distance.mm' : 'inter_pup_dist',
                            'Output.FovioDMEResults.dme.core.pupil.left.pupil.diameter.mm': 'left_pupil_dia',
                            'Output.FovioDMEResults.dme.core.pupil.left.pupil.visible' : 'left_pup_vis',
                            'Output.FovioDMEResults.dme.core.pupil.right.pupil.diameter.mm': 'right_pupil_dia',
                            'Output.FovioDMEResults.dme.core.pupil.right.pupil.visible' : 'right_pup_vis'
                            })

In [None]:
# trim data
data = check_and_trim(data)

# build 3D array for ALL samples
ids = data['id1'].unique()
n_samples = len(ids)
n_vars = len(x_vars)
timesteps = 900 # change size to desired group size

X_all = np.zeros((n_samples, n_vars, timesteps))
y_all = np.zeros(n_samples)
# to store the ids and the indices of that id 
id_idx_match = {}
for i in range(len(ids)):
    id_ = ids[i]
    id_idx_match[id_] = i

# reshaping the data into 3d arrays X_all and y_all
i = 0
for cur_id in ids:
    ji = 0
    for j in x_vars:
        X_all[i, ji, :] = data.loc[data['id1'] == cur_id][j]
        ji = ji + 1
    y_all[i] = data.loc[data['id1'] == cur_id]['Condition'].iloc[0]
    i = i + 1

# finding indices for splits and using only those indices
for a in range(1, 11):
    sp = status_names[a-1]
    train_ids = data.loc[data[sp] == 't', 'id1'].unique()
    test_ids = data.loc[data[sp] == 'v', 'id1'].unique()
    train_idx = [id_idx_match[id_] for id_ in train_ids]
    test_idx = [id_idx_match[id_] for id_ in test_ids]
    train_X = X_all[train_idx]
    train_y = y_all[train_idx]
    test_X = X_all[test_idx]
    test_y = y_all[test_idx]

    hydra_diff = make_pipeline(
        make_union(
            RocketTransform(n_kernels=5000, random_state=1234, n_jobs=-1),
            make_pipeline(
                DiffTransform(),
                HydraTransform(n_groups=32, random_state=1234, n_jobs=-1),
            ),
        ),
        StandardScaler(),
        LogisticRegression(random_state = 1234, max_iter = 300),
    )

    hydra_diff.fit(train_X, train_y)
    y_pred = hydra_diff.predict(test_X)
    y_score = hydra_diff.predict_proba(test_X)[:, 1]
    roc_auc = roc_auc_score(test_y, y_score)
    print(f"Split {a} Test ROC AUC:", roc_auc)

    # saving results
    results_df = pd.DataFrame({
        'id1': test_ids,
        'predicted_prob': y_score,
        'target': test_y,
        'pred': y_pred
    })
    results_df.to_csv(f'destination/results/rocketresults_combined{a}.csv', index=False)
    
    # roc curve
    fpr, tpr, thresholds = roc_curve(test_y, y_score)
    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, label=f'Rocket+Logistic (AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'ROC Curve Vehicle 60s Split {a}')
    plt.legend()
    plt.savefig(f'destination/ROCs/Split{a}.png')
    plt.show()

