In [1]:
# Standard Library Imports
import os
import pandas as pd
import sys

# Third Party Imports
from IPython.display import Image, display
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

# Local Module Imports
sys.path.append('../src/complete_collision_recorder/')
import complete_collision as cc
import utils as u

# Map project directories
root_dir, src_dir, data_dir, models_dir = u.map_project_directories(notebook=True)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Import and copy the full dataset
GCAT_pred_df = pd.read_csv(os.path.join(data_dir, "processed", "GCAT_pred_df.csv"))
narrative_scoring_df = GCAT_pred_df.copy()
print(f"Number of records with input text: {len(narrative_scoring_df)}")
print("As of this writing, the narrative scoring takes approximately 1 second per record.")
print(f"Using this assumption, this dataset will probably take: {len(narrative_scoring_df) / 60} minutes to score.")

Number of records with input text: 2175
As of this writing, the narrative scoring takes approximately 1 second per record.
Using this assumption, this dataset will probably take: 36.25 minutes to score.


In [3]:
# Score the testing data with the fine-tuned model with Google API
GeminiBikeCleModel = cc.GenBikeCleNarrative(google_api_key="CCR_API")
narrative_scoring_df["GenAISummary"] = narrative_scoring_df["concatenated_text"].apply(
    lambda x: GeminiBikeCleModel.summarize(
        concat_text=x,
        model_name="tunedModels/narrativetrainingdf-oo49g0etpnrj",
    )
)

In [6]:
# Define the severe injury indicator
keywords = ['severe', 'severely', 'serious', 'killed', 'fatal', 'fatally']
narrative_scoring_df['SevereAiReviewInd'] = narrative_scoring_df['GenAISummary'].apply(
    lambda x: 1 if any(keyword in x.lower() for keyword in keywords) else 0
)

In [42]:
# Output the results
narrative_scoring_df.to_csv(os.path.join(data_dir, "processed", "GenAI_df.csv"))
print("The scoring is complete and the results are saved to /data/processed/GenAI_df.csv.")

The scoring is complete and the results are saved to /data/processed/GenAI_df.csv.
