In [1]:
import re
import torch
import numpy as np
import pandas as pd
import nltk
from nltk.stem import WordNetLemmatizer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, classification_report
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
import torch.nn as nn
import torch.nn.functional as F
import nlpaug.augmenter.word as naw
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.figure_factory as ff

  "class": algorithms.Blowfish,


In [4]:
# Data Loading and Preprocessing
# ---------------------------------------------
data = pd.read_excel('\\\\vi240c060002.woc.prod\\e$\\datasets\\WCMLDataset12_23.xlsx')
example_data = data.copy()

text_fields = [
    'Incident Description', 
    'Activity Engaged in During Accident', 
    'General HS Comments', 
    'Injury Description'
]

example_data[text_fields] = example_data[text_fields].fillna('')
for field in text_fields:
    example_data[field] = example_data[field].apply(clean_text)

example_data['Combined_Text'] = (
    example_data['Incident Description'] + ' ' +
    example_data['Activity Engaged in During Accident'] + ' ' +
    example_data['General HS Comments'] + ' ' +
    example_data['Injury Description']
).str.strip()

In [3]:
# ---------------------------------------------
# Text Cleaning and Lemmatization
# ---------------------------------------------
lemmatizer = WordNetLemmatizer()

def clean_text(text):
    # Lowercase
    text = text.lower()
    # Remove non-informative chars
    text = re.sub(r"[^a-z0-9.,!?'\s-]", '', text)
    # Replace multiple spaces with a single space
    text = re.sub(r"\s+", " ", text).strip()
    # Normalize excessive punctuation
    text = re.sub(r"!+", "!", text)
    text = re.sub(r"\?+", "?", text)
    # Lemmatize tokens
    tokens = text.split()
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    text = " ".join(tokens)
    return text

In [5]:
# ---------------------------------------------
# Label Encoding
# ---------------------------------------------
targets = [
    'Event of Injury Desc', 
    'Source of Injury Desc', 
    'Event of Incident Desc', 
    'Source of Incident Desc',
    'EDI Cause Desc'
]

label_encoders = {}
for target in targets:
    le = LabelEncoder()
    example_data[target + '_Encoded'] = le.fit_transform(example_data[target])
    label_encoders[target] = le

In [6]:
# ---------------------------------------------
# Tokenization and Data Split
# ---------------------------------------------
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [7]:
model = BertForSequenceClassification.from_pretrained('\\\\vi240c060002.woc.prod\\e$\\Machine Learning\\fine_tuned_event_of_incident')

In [12]:
#Change text below to for prediction
new_text = ["The Guest did walk in from her train after being evacuated and was given a wheelchair to sit down in. She was crying and breathing very quickly. She said her fingers were tingling. BI set up per Guest call 01.03.19 OR. SPANISH SPEAKING ONLY. Guest states she had to be transported to the hospital after she suffered a panic attack when the ride got stuck. Guest states she was with her 2 kids and she thought she was going to die while on the ride. Guest is worried now saying she's going to receive a bill from the hospital and she can't afford to pay for it. The Guest was in train 3 that had stopped in the Brake zone 1 area during an estop at Expedition Everest. We had to evacuate the trains on the ride. The Cast Member who was evacuating her train called 911 because she seemed to be having a severe panic attack - crying and having trouble breathing normally. LILO: 483631043387."]
new_text = [clean_text(t) for t in new_text]  
new_encodings = tokenizer(new_text, truncation=True, padding=True, max_length=512, return_tensors='pt')

In [13]:
model.eval()
outputs = model(**new_encodings)
predicted_class = torch.argmax(outputs.logits, dim=1).item()
decoded_class = label_encoders['Event of Incident Desc'].inverse_transform([predicted_class])
print(f"Predicted Event of Incident: {decoded_class[0]}")

Predicted Event of Incident: On Ride - Overt Act - Unplanned Exiting


In [None]:
#List to predict

#WDWG2021650082 - Shows the complexity of model understanding
Child was sitting on planter/bench and fell hitting the back of the head. Child was crying in pain with mother holding the back of the head.
Furnishings/Fixtures/Decorations - Bench

#WDWG2021650648 - difference in coding example
Guest fell down on the exit ramp of the Epcot monorail station, scrapping the skin below her right knee. Guest denied Reedy Creek emergency services. Guest fell down on the exit ramp of the Epcot monorail station, scrapping the skin below her right knee. Guest denied Reedy Creek emergency services.
Motor Vehicle - Bus - Wheelchair/ECV Ramp

#WDWG2021654828 - Multiple niche words - Shows understanding of context
Guest had a gash on the lower right forearm.  He had tripped over a planter the previous evening when exiting for fireworks.  He had a family member clean the wound and did not notify a CM or request paramedics at that time. Guest was exiting after Fireworks that night near Mission Space (along the construction wall) when he tripped over the planter due to poor visibility and gashed his arm.  Guest had a family member check the wound but requested paramedics on 10/1 due to tingling and numbness.  They were concerned with getting to transportation the night before and did not seek medical attention on the night of the incident Guest had a gash on the right forearm.  Video of the fall did show poor lighting in the area.  Facilities was informed.  Guest was alert and conscious but mentioned numbness and tingling in the arm.  Planter location was along the construction wall across from the play fountain area and near the Joffrey's cart.  Alpha was called on 10/1 in Guest Relations lobby 
Furnishings/Fixtures/Decorations - Planter/Retaining Wall/Concrete Barrier

#WDWG2019204537 - Coding without descriptions. No context limitation
Collapsed lung In house BI set up per Linda C. 1.3.19 SD. Kristin, Guest was upset on the phone this morning. She thought yesterday she was transferred to a Disney Medical Clinic who told them her husband is fine, as soon as he got back to the resort RCES transported the guest. Guest was immediately admitted to the hospital for a collapsed lung. She is a nurse and cannot believe the clinic she went to originally she thought Centra Care now possibly East Coast Medical on Sand Lake missed this diagnosis.
Toxic/Flammable/Chemicals - Pyrotechnics (fireworks)

#WDWG2019204633 - Difference in coding. Tolerance on recodes
The Guest did walk in from her train after being evacuated and was given a wheelchair to sit down in. She was crying and breathing very quickly. She said her fingers were tingling. BI set up per Guest call 01.03.19 OR. SPANISH SPEAKING ONLY. Guest states she had to be transported to the hospital after she suffered a panic attack when the ride got stuck. Guest states she was with her 2 kids and she thought she was going to die while on the ride. Guest is worried now saying she's going to receive a bill from the hospital and she can't afford to pay for it. The Guest was in train 3 that had stopped in the Brake zone 1 area during an estop at Expedition Everest. We had to evacuate the trains on the ride. The Cast Member who was evacuating her train called 911 because she seemed to be having a severe panic attack - crying and having trouble breathing normally. LILO: 483631043387.
Person - Self

#WDWG2019205113 - Niche Category
Subject had an iron fall on her head. Subject had what appeared to have a small cut on the top of her head. Subjects parents stated that she had bled a little prior to my arrival. Upon arrival, I met with Amelia Waldren, and her parents Julia and Aaron Waldren. Mr. and Mrs. Waldren stated that Amelia had pulled an iron onto her head prior to my arrival. Amelia had what appeared to be a small cut. Mr. and Mrs. Waldren stated that the injury had bled a small amount prior to my arrival. Reedy Creek arrived and advised Mr. and Mrs. Waldren, but did not transport.At approximately 21:25, on the evening of January 3, 2019, I was dispatched to room 4164 in reference to a guest injury.
Appliance - Iron

#WDWG2019205009 - Question on coding. 
Guests had a cut on her knee, and a bruise and cut on her hand. Guests was running with their stroller through the park and tripped, and fell on their own stroller. Guest had the injury at 1145 am but did not report it to a CM until 1300.
Walking Surface - Outdoor

#WDWG2019205693 - Tolerance question
The Guest stated that their was pain but no knot. She stated she took some motrin. Guest was walking through the queue in the Zulu hut portion when a piece of wood from the roof fell and landed on the neck of the Guest.
Objects