In [4]:
import pandas as pd
import numpy as np
import pickle
from gpt_few_shot_clf import MultiLabelFewShotGPTClassifier
from sklearn.model_selection import train_test_split

In [5]:
# Metrics Functions
def accuracy(list1, list2):
    intersection = len(list(set(list1).intersection(list2)))
    union = (len(list1) + len(list2)) - intersection
    return float(intersection)/union

def precision(list1, list2):
    intersection = len(list(set(list1).intersection(list2)))
    return float(intersection)/len(list1)

def recall(list1, list2):
    intersection = len(list(set(list1).intersection(list2)))
    return float(intersection)/len(list2)

def f1(list1, list2):
    intersection = len(list(set(list1).intersection(list2)))
    precision_score = precision(list1, list2)
    recall_score = recall(list1, list2)
    try:
        f1_score = float(2*precision_score*recall_score)/(precision_score+recall_score)
    except:
        f1_score = 0
    return f1_score

### Using old test set

In [6]:
df = pd.read_csv("./test_set_2023.csv")
df = df.replace(np.nan, '')

df["All Labels"] = df["labels"].astype(str) + "," + df["labels.1"].astype(str) + "," + df["labels.2"].astype(str) + "," + df["labels.3"].astype(str)

df = df.drop(columns=["labels", "labels.1", "labels.2", "labels.3"])
df

Unnamed: 0,sentence,All Labels
0,Three Station Level Human Performance Event Cl...,"personal accountability,continuous learning,wo..."
1,Equipment problems due to aging have led to an...,"work processes,problem identification and reso..."
2,"The DCPP knowledge transfer program, “Passport...","continuous learning,decision making,,"
3,RC1: The process for evaluating both the risk ...,"work processes,questioning attitude,,"
4,RC2: Maintenance leadership has not been proac...,"work processes,continuous learning,,"
...,...,...
60,DCPP experienced significant Feedwater Heater ...,"problem identification and resolution,environm..."
61,The health of transmission systems at DCPP was...,"work processes,continuous learning,,"
62,The health of DCPP's Emergency Diesel Generato...,"work processes,,,"
63,A July 2021 failure of the same Emergency Dies...,"personal accountability,continuous learning,,"


In [7]:
X = [sentence for sentence in df["sentence"].tolist()]
y = [[label for label in labels.split(",") if label] for labels in df["All Labels"].tolist()]

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.45, random_state=7)

In [9]:
clf = MultiLabelFewShotGPTClassifier(max_labels=4, openai_model="gpt-3.5-turbo", openai_key="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")
clf.fit(X_train, y_train)

In [10]:
labels_1 = clf.predict(X_test)

100%|██████████| 30/30 [11:49<00:00, 23.66s/it]  


In [11]:
i = 0
for test in X_test:
    print(test)
    pred_labels = labels_1[i]
    true_labels = y_test[i]
    print("true", true_labels)
    print("pred:", pred_labels)
    print("\n")
    i+=1

The DCISC has identified a number of potential nuclear safety issues with the use of closed cooling at DCPP. Insufficient information was available to resolve these questions during this review period. The DCISC intends to follow this issue over the next year or more and to review the operational safety implications of any proposal that would modify the cooling water systems at DCPP.
true ['effective safety communication', 'work processes', 'problem identification and resolution']
pred: ['environment for raising concerns']


The DCPP knowledge transfer program, “Passport to Knowledge” appears well designed but full implementation has taken a back seat to higher priority items such as outage planning and outages. The DCISC encourages DCPP to move forward with this program to not lose valuable job knowledge as employees retire.
true ['continuous learning', 'decision making']
pred: ['continuous learning', 'work processes']


Motors are aging/degrading and needing rewinding or replacement 

In [12]:
all_accuracies = []
all_precision = []
all_recall = []
all_f1 = []

i = 0

for pred_labels in labels_1:

    true_labels = y_test[i]
    accuracy_score = accuracy(pred_labels, true_labels)
    precision_score = precision(pred_labels, true_labels)
    recall_score = recall(pred_labels, true_labels)
    f1_score = f1(pred_labels, true_labels)

    all_accuracies.append(accuracy_score)
    all_precision.append(precision_score)
    all_recall.append(recall_score)
    all_f1.append(f1_score)

    i+=1

In [13]:
avg_acc = sum(all_accuracies)/len(all_accuracies)
avg_prec = sum(all_precision)/len(all_precision)
avg_rec = sum(all_recall)/len(all_recall)
avg_f1 = sum(all_f1)/len(all_f1)

print("Average Accuracy on Test Data:", round(avg_acc*100,2), "%")
print("Average Precision on Test Data:", round(avg_prec*100,2), "%")
print("Average Recall on Test Data:", round(avg_rec*100,2), "%")
print("Average F1-Score on Test Data:", round(avg_f1*100,2), "%")

Average Accuracy on Test Data: 50.56 %
Average Precision on Test Data: 66.39 %
Average Recall on Test Data: 62.22 %
Average F1-Score on Test Data: 62.44 %


### Using new test set

In [14]:
df_test = pd.read_csv("./Labels - data.csv")
df_test = df_test.replace(np.nan, '')
df_test

Unnamed: 0,File Name,Power Plant,Sentence/Paragraph,Safety Trait #1,Safety Trait #2,Safety Trait #3,Safety Trait #4,Labelled by,Reviewed by,Review Notes,Corrected?
0,3462002008.pdf,Davis-Besse,"Following unit shutdown on February 16, 2002, ...",problem_identification,work_processes,questioning_attitude,,Anood,Ruturaj,,
1,3462002009.pdf,Davis-Besse,The cause of the cracks appears to be high cyc...,problem_identification,questioning_attitude,,,Anood,Ruturaj,,
2,3462003001.pdf,Davis-Besse,"These conditions, apparently caused by design ...",questioning_attitude,work_processes,,,Anood,Ruturaj,work_process ?,Yes
3,3462003002.pdf,Davis-Besse,The apparent cause of the HPI pump debris tole...,work_processes,continuous_learning,questioning_attitude,,Anood,Ruturaj,,
4,3462003004.pdf,Davis-Besse,The previous procedures used to calibrate the ...,work_processes,,,,Anood,Ruturaj,,
...,...,...,...,...,...,...,...,...,...,...,...
95,4992001002.pdf,South Texas,The linkage mechanism that operates the breake...,personal_accountability,,,,Nikki,Vijay,LGTM,
96,4992001003.pdf,South Texas,The majority of the defective tubes detected i...,problem_identification,respectful_environment,decision_making,,Nikki,Vijay,Decision making: Seems like some of the design...,
97,4992001004.pdf,South Texas,1. Personnel did not recognize the laptop comp...,personal_accountability,work_processes,,,Nikki,Vijay,Work processes: Would add this as sufficient t...,
98,4992002001.pdf,South Texas,The root cause of this incident was a lack of ...,problem_identification,continuous_learning,,,Nikki,Vijay,"Work processes: Don't think it fits into this,...",


In [15]:
replacements = {
    'problem_identification':'problem identification and resolution',
    'work_processes':'work processes',
    'questioning_attitude':'questioning attitude',
    'continuous_learning':'continuous learning',
    'personal_accountability':'personal accountability',
    'respectful_environment':'respectful work environment',
    'decision_making':'decision making',
    'leadership_values':'leadership safety values and actions',
    'safety_communication':'effective safety communication',
    'environment_raising_concerns':'environment for raising concerns'
}
df_test = df_test.replace(replacements)
df_test

Unnamed: 0,File Name,Power Plant,Sentence/Paragraph,Safety Trait #1,Safety Trait #2,Safety Trait #3,Safety Trait #4,Labelled by,Reviewed by,Review Notes,Corrected?
0,3462002008.pdf,Davis-Besse,"Following unit shutdown on February 16, 2002, ...",problem identification and resolution,work processes,questioning attitude,,Anood,Ruturaj,,
1,3462002009.pdf,Davis-Besse,The cause of the cracks appears to be high cyc...,problem identification and resolution,questioning attitude,,,Anood,Ruturaj,,
2,3462003001.pdf,Davis-Besse,"These conditions, apparently caused by design ...",questioning attitude,work processes,,,Anood,Ruturaj,work_process ?,Yes
3,3462003002.pdf,Davis-Besse,The apparent cause of the HPI pump debris tole...,work processes,continuous learning,questioning attitude,,Anood,Ruturaj,,
4,3462003004.pdf,Davis-Besse,The previous procedures used to calibrate the ...,work processes,,,,Anood,Ruturaj,,
...,...,...,...,...,...,...,...,...,...,...,...
95,4992001002.pdf,South Texas,The linkage mechanism that operates the breake...,personal accountability,,,,Nikki,Vijay,LGTM,
96,4992001003.pdf,South Texas,The majority of the defective tubes detected i...,problem identification and resolution,respectful work environment,decision making,,Nikki,Vijay,Decision making: Seems like some of the design...,
97,4992001004.pdf,South Texas,1. Personnel did not recognize the laptop comp...,personal accountability,work processes,,,Nikki,Vijay,Work processes: Would add this as sufficient t...,
98,4992002001.pdf,South Texas,The root cause of this incident was a lack of ...,problem identification and resolution,continuous learning,,,Nikki,Vijay,"Work processes: Don't think it fits into this,...",


In [16]:
X_test_2 = [sentence for sentence in df_test["Sentence/Paragraph"].tolist()]
len(X_test_2)

100

In [17]:
df_test["All Labels"] = df_test["Safety Trait #1"].astype(str) + "," + df_test["Safety Trait #2"].astype(str) + "," + df_test["Safety Trait #3"].astype(str) + "," + df_test["Safety Trait #4"].astype(str)
y_test_2 = [[label for label in labels.split(",") if label] for labels in df_test["All Labels"].tolist()]
len(y_test_2)

100

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X_test_2, y_test_2, test_size=0.70, random_state=7)

In [19]:
clf2 = MultiLabelFewShotGPTClassifier(max_labels=4, openai_model="gpt-3.5-turbo", openai_key="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")
clf2.fit(X_train, y_train)

In [20]:
predictions = clf2.predict(X_test)

100%|██████████| 70/70 [24:00<00:00, 20.59s/it]   


In [41]:
all_accuracies = []
all_precision = []
all_recall = []
all_f1 = []

i = 0

columnNamesForResult = ["Power Plant", "Sentence/Paragraph", "Predicted Labels", "True Labels"]
powerplants = []
sentenceOrParagraphs = []
predictedLabels = []
trueLabels = []

for pred_labels in predictions:
    true_labels = sorted(y_test[i])
    pred_labels = sorted(pred_labels)

    causeOfEventDescription = X_test[i]
    row = df_test.loc[df_test['Sentence/Paragraph'] == causeOfEventDescription]
    powerplants.append(row['Power Plant'].item())
    sentenceOrParagraphs.append(causeOfEventDescription)
    predictedLabels.append(pred_labels)
    trueLabels.append(true_labels)

    i+=1

resultingDF = pd.DataFrame(columns=columnNamesForResult, data=
                           {"Power Plant": powerplants,
                            "Sentence/Paragraph": sentenceOrParagraphs,
                            "Predicted Labels": predictedLabels,
                            "True Labels": trueLabels})

true: ['decision making', 'personal accountability', 'questioning attitude']
pred: ['personal accountability', 'work processes']
<class 'list'>


true: ['leadership safety values and actions', 'personal accountability', 'work processes']
pred: ['continuous learning', 'personal accountability', 'work processes']
<class 'list'>


true: ['personal accountability', 'problem identification and resolution', 'work processes']
pred: ['problem identification and resolution', 'work processes']
<class 'list'>


true: ['effective safety communication', 'leadership safety values and actions', 'personal accountability', 'problem identification and resolution']
pred: ['leadership safety values and actions', 'problem identification and resolution', 'work processes']
<class 'list'>


true: ['work processes']
pred: ['problem identification and resolution']
<class 'list'>


true: ['work processes']
pred: ['problem identification and resolution', 'work processes']
<class 'list'>


true: ['effective safety

In [39]:
resultingDF
resultingDF.to_csv("predictedLabelsFewShotNoTuning.csv", index=False)