In [1]:
# Standard Library Imports
import os
import pandas as pd
import sys

# Third-Party Imports
import matplotlib.pyplot as plt
import pickle


# Local Module Imports
sys.path.append('../src/complete_collision_recorder/')
import complete_collision as cc
import utils as u


# Map project directories
root_dir, src_dir, data_dir, models_dir = u.map_project_directories(notebook=True)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load the training data
training_df = pd.read_csv(os.path.join(data_dir, "processed", "training_df.csv"))
training_df['concatenated_text'] = training_df.apply(u.concatenate_texts, axis=1)

In [3]:
# Load the severity model
with open(os.path.join(models_dir,'severity.pkl'), 'rb') as f:
    severity_best = pickle.load(f)

In [4]:
training_df['SEVERITY_PRED'] = severity_best.predict(training_df['concatenated_text'])
training_df['SEVERITY_PRED'].value_counts()

SEVERITY_PRED
0    1923
1     252
Name: count, dtype: int64

In [5]:
# Copy the training data to a new dataframe and add the severity prediction text
GCAT_pred_df = training_df.copy()
GCAT_pred_df['SEVERITY_PRED_TEXT'] = GCAT_pred_df['SEVERITY_PRED'].replace({
    0: '',
    1: 'SEVERE INJURY',
})
GCAT_pred_df['GCAT_PRED_TEXT'] = GCAT_pred_df['SEVERITY_PRED_TEXT']
GCAT_pred_df['GCAT_PRED_TEXT'] = GCAT_pred_df['GCAT_PRED_TEXT'].apply(lambda x: 'NONE' if x.strip() == '' else x)
GCAT_pred_df['concatenated_text'] = GCAT_pred_df['concatenated_text'] + '\n\n GCAT INDICATORS: ' + GCAT_pred_df['GCAT_PRED_TEXT']

In [42]:
# Export the GCAT prediction dataframe
GCAT_pred_df.to_csv(os.path.join(data_dir, 'processed', 'GCAT_pred_df.csv'), index=False)
print('GCAT Prediction Dataframe Exported to ../data/processed/GCAT_pred_df.csv')
print("This data may be used directly for scoring with the BikeCLE Narrative model or processed with the notebook 'train_and_score_bikeCLE_Gemini.ipynb' to train and evaluate the BikeCLE Narrative model.")

GCAT Prediction Dataframe Exported to ../data/processed/GCAT_pred_df.csv
This data may be used directly for scoring with the BikeCLE Narrative model or processed with the notebook 'train_and_score_bikeCLE_Gemini.ipynb' to train and evaluate the BikeCLE Narrative model.
