In [None]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('../src/')


import pandas as pd

In [4]:
from src import vehicular_data_columns, subject_data_columns, neurophysiological_data_columns, video_data_analysis_columns, event_flags_columns
from src.preprocess import remove_invalid_data
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torch.optim.lr_scheduler import StepLR
import pandas as pd

In [5]:

# Paths to the files
feature_track_path = '../data/Feature_Track.xlsx'
feature_simulation_path = '../data/Feature_Simulation.xlsx'

# Reading the datasets
feature_track_df = pd.read_excel(feature_track_path)
feature_simulation_df = pd.read_excel(feature_simulation_path)

# cleaned dataset
cleaned_feature_track = remove_invalid_data(
    feature_track_df,
    columns_with_defined_valid_values={'straight': [0, 1]},
)

cleaned_feature_simulation = remove_invalid_data(
    feature_simulation_df,
    columns_with_defined_valid_values={'straight': [0, 1]},
)


In [34]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class LabelModel(nn.Module):
    def __init__(self, num_features, num_event_classes):
        super(LabelModel, self).__init__()
        self.layer_1 = nn.Linear(num_features, 128)
        self.layer_2 = nn.Linear(128, 64)
        self.output = nn.Linear(64, num_event_classes)

    def forward(self, x):
        x = F.relu(self.layer_1(x))
        x = F.relu(self.layer_2(x))
        return torch.sigmoid(self.output(x))  # Sigmoid for binary classification

class RiskPredictionModel(nn.Module):
    def __init__(self, num_features, num_labels):
        super(RiskPredictionModel, self).__init__()
        self.label_model = LabelModel(num_features, num_labels)
        self.risk_layer_1 = nn.Linear(num_features + num_labels, 256)
        self.risk_layer_2 = nn.Linear(256, 128)
        self.risk_output = nn.Linear(128, 1)

    def forward(self, x):
        labels = self.label_model(x)

        # Concatenate the outputs with the original features for risk prediction
        combined = torch.cat((x, labels), dim=1)

        # Final layers for risk prediction
        risk = F.relu(self.risk_layer_1(combined))
        risk = F.relu(self.risk_layer_2(risk))
        return torch.sigmoid(self.risk_output(risk))



In [42]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import precision_recall_fscore_support


# Function to preprocess the data
def preprocess_data(simulation_df, track_df, vehicle_columns, neuro_columns, label_columns, risk_column):
    simulation_df = simulation_df.copy()
    track_df = track_df.copy()

    # Standardize Neuro and Vehicle Features
    scaler = StandardScaler()
    simulation_df[vehicle_columns + neuro_columns] = scaler.fit_transform(simulation_df[vehicle_columns + neuro_columns])
    track_df[vehicle_columns + neuro_columns] = scaler.transform(track_df[vehicle_columns + neuro_columns])

    # One-Hot Encode Flags
    encoder = OneHotEncoder(sparse=False)
    simulation_flags = encoder.fit_transform(simulation_df[label_columns])
    track_flags = encoder.transform(track_df[label_columns])

    # Replace flag columns with one-hot encoded versions
    label_encoded_columns = ['label_' + str(i) for i in range(simulation_flags.shape[1])]
    simulation_df[label_encoded_columns] = simulation_flags
    track_df[label_encoded_columns] = track_flags

    # Drop original flag columns
    simulation_df.drop(label_columns, axis=1, inplace=True)
    track_df.drop(label_columns, axis=1, inplace=True)

    # Create Simplified Risk Evaluation Column
    simulation_df['simplified_risk'] = simulation_df[risk_column].apply(lambda x: 1 if x > 0 else 0)
    track_df['simplified_risk'] = track_df[risk_column].apply(lambda x: 1 if x > 0 else 0)

    return simulation_df, track_df, label_encoded_columns, simulation_flags, track_flags

def one_hot_column(simulation_df, track_df, column):
    encoder = OneHotEncoder(sparse=False)
    simu_data  = encoder.fit_transform(simulation_df[column])
    track_data  = encoder.transform(track_df[column])

    encoded_columns = [column + "_" + str(i) for i in range(simu_data.shape[1])]

    simulation_df[encoded_columns] = simu_data
    track_df[encoded_columns] = track_data
    return simulation_df, track_df, encoded_columns

# Define your feature columns
vehicle_columns = vehicular_data_columns  # Replace with your vehicle feature column names
neuro_columns = [
    'hr',
    'gsr_tonic',
    'UpperAlphaParietal',
    'gaze_complete_expl',
    'ThetaParietal',
    'AlphaFrontal'
]  # Replace with your neuro feature column names
label_columns = [
    'habituation',
    'bypass_dummy',
    'frustration',
    'surprise'
]
risk_column = 'risk_evaluation'

processed_simulation_df, processed_track_df, label_encoded_columns, simulation_flags, track_flags  = preprocess_data(cleaned_feature_simulation, cleaned_feature_track, vehicle_columns, neuro_columns, label_columns, risk_column)

X_train, X_test, y_train_labels, y_test_labels, y_train_risk, y_test_risk = train_test_split(
    processed_simulation_df[vehicle_columns + neuro_columns],  # Features for training
    processed_simulation_df[label_encoded_columns],   # Labels for flags model
    processed_simulation_df['simplified_risk'],
    stratify=processed_simulation_df['simplified_risk'],  # Stratify based on the risk evaluation
    test_size=0.2,
    random_state=42
)


# Function to create DataLoaders
def create_dataloader(X, y, batch_size=32):
    tensor_x = torch.Tensor(X.values)
    tensor_y = torch.Tensor(y.values)
    dataset = TensorDataset(tensor_x, tensor_y)
    return DataLoader(dataset, batch_size=batch_size)





In [43]:
# Create DataLoaders for the events and flags models
train_loader_labels = create_dataloader(X_train, y_train_labels)
test_loader_labels = create_dataloader(X_test, y_test_labels)



In [44]:
def pretrain_model(model, train_loader, test_loader, criterion, optimizer, scheduler, num_epochs=5):
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        total_loss = 0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        # Evaluation phase
        model.eval()
        total_pred_correct = 0
        total_pred_count = 0
        with torch.no_grad():
            for inputs, labels in test_loader:
                outputs = model(inputs)
                predicted = outputs > 0.5  # Thresholding predictions for binary classification
                total_pred_correct += (predicted == labels).float().sum()
                total_pred_count += labels.numel()

        accuracy = total_pred_correct / total_pred_count
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {total_loss / len(train_loader)}, Accuracy: {accuracy}')

        scheduler.step()


def train_risk_model(model, train_loader, test_loader, criterion, optimizer, scheduler, num_epochs=5):
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        total_loss = 0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            risk_output = model(inputs)  # Risk output
            loss = criterion(risk_output.squeeze(), labels.float())  # Ensure labels are float
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        # Evaluation phase
        model.eval()
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for inputs, labels in test_loader:
                risk_output = model(inputs)
                predicted = risk_output.squeeze() > 0.5
                all_preds.extend(predicted.numpy())
                all_labels.extend(labels.numpy())

        # Calculate evaluation metrics
        accuracy = accuracy_score(all_labels, all_preds)
        precision = precision_score(all_labels, all_preds)
        recall = recall_score(all_labels, all_preds)
        f1 = f1_score(all_labels, all_preds)

        print(f'Epoch [{epoch + 1}/{num_epochs}]: Loss: {total_loss / len(train_loader)}, Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1-Score: {f1}')

        scheduler.step()



In [45]:

def evaluate_model(model, loader):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in loader:
            outputs = model(inputs)
            predicted = outputs.squeeze() > 0.5
            all_preds.extend(predicted.numpy())
            all_labels.extend(labels.numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)

    return accuracy, precision, recall, f1


In [46]:
# Initialize models
risk_model = RiskPredictionModel(num_features=len(vehicle_columns + neuro_columns),
                                 num_labels=len(label_encoded_columns))
events_model = risk_model.label_model

# Optimizers
optimizer_events = torch.optim.Adam(events_model.parameters(), lr=0.01)

# Loss functions for binary classification
criterion_events = nn.BCELoss()
criterion_flags = nn.BCELoss()

scheduler_events = StepLR(optimizer_events, step_size=10, gamma=0.1)  # Decay LR by a factor of 0.1 every 10 epochs

# Pretrain Events Model
print("Pretraining Events Model")
pretrain_model(events_model, train_loader_labels, test_loader_labels, criterion_events, optimizer_events, scheduler_events, num_epochs=20)


Pretraining Events Model
Epoch [1/20], Loss: 0.38957194328308103, Accuracy: 0.8609715104103088
Epoch [2/20], Loss: 0.3175352728366852, Accuracy: 0.8615298867225647
Epoch [3/20], Loss: 0.2951357614994049, Accuracy: 0.8525963425636292
Epoch [4/20], Loss: 0.2789265418052673, Accuracy: 0.8514796495437622
Epoch [5/20], Loss: 0.26415720164775847, Accuracy: 0.8475711941719055
Epoch [6/20], Loss: 0.2525937432050705, Accuracy: 0.8498045802116394
Epoch [7/20], Loss: 0.2415873032808304, Accuracy: 0.8447794318199158
Epoch [8/20], Loss: 0.22789883196353913, Accuracy: 0.8341708779335022
Epoch [9/20], Loss: 0.21090896487236022, Accuracy: 0.8436627388000488
Epoch [10/20], Loss: 0.19883177042007447, Accuracy: 0.8330541849136353
Epoch [11/20], Loss: 0.17230107963085176, Accuracy: 0.8419877290725708
Epoch [12/20], Loss: 0.15864933162927627, Accuracy: 0.8397543430328369
Epoch [13/20], Loss: 0.15338744044303895, Accuracy: 0.8414293527603149
Epoch [14/20], Loss: 0.14930244475603105, Accuracy: 0.842546045780

In [47]:
risk_train_loader = create_dataloader(X_train, y_train_risk)
risk_test_loader = create_dataloader(X_test, y_test_risk)


In [48]:
# Define optimizer and loss function for the risk model
optimizer_risk = torch.optim.Adam(risk_model.parameters(), lr=0.01)
criterion_risk = nn.BCELoss()
scheduler_risk = StepLR(optimizer_risk, step_size=10, gamma=0.1)


# Train the Risk Model
print("Training Risk Model")
train_risk_model(risk_model, risk_train_loader, risk_test_loader, criterion_risk, optimizer_risk, scheduler_risk, num_epochs=100)


Training Risk Model
Epoch [1/100]: Loss: 0.5290473020076751, Accuracy: 0.8291457286432161, Precision: 1.0, Recall: 0.08108108108108109, F1-Score: 0.15
Epoch [2/100]: Loss: 0.43919423937797547, Accuracy: 0.8391959798994975, Precision: 1.0, Recall: 0.13513513513513514, F1-Score: 0.2380952380952381
Epoch [3/100]: Loss: 0.40900116086006166, Accuracy: 0.8241206030150754, Precision: 1.0, Recall: 0.05405405405405406, F1-Score: 0.10256410256410257
Epoch [4/100]: Loss: 0.37850277721881864, Accuracy: 0.8291457286432161, Precision: 1.0, Recall: 0.08108108108108109, F1-Score: 0.15
Epoch [5/100]: Loss: 0.38058062314987184, Accuracy: 0.8291457286432161, Precision: 1.0, Recall: 0.08108108108108109, F1-Score: 0.15
Epoch [6/100]: Loss: 0.3399139314889908, Accuracy: 0.8291457286432161, Precision: 0.6, Recall: 0.24324324324324326, F1-Score: 0.34615384615384615
Epoch [7/100]: Loss: 0.31096435844898224, Accuracy: 0.8391959798994975, Precision: 0.6190476190476191, Recall: 0.35135135135135137, F1-Score: 0.44

In [58]:
# Create a DataLoader for the track dataset
track_loader = create_dataloader(processed_track_df[vehicle_columns + neuro_columns], processed_track_df['simplified_risk'])

# Evaluate the trained Risk Model on the track dataset
accuracy, precision, recall, f1 = evaluate_model(risk_model, track_loader)
print(f'Evaluation on Track Dataset:\nAccuracy: {accuracy}\nPrecision: {precision}\nRecall: {recall}\nF1-Score: {f1}')


Evaluation on Track Dataset:
Accuracy: 0.9554730983302412
Precision: 0.0
Recall: 0.0
F1-Score: 0.0


  _warn_prf(average, modifier, msg_start, len(result))
