In [8]:
import os 
import time 
import json,math,sys,re
import pandas as pd 
from sklearn.metrics import accuracy_score, precision_score, f1_score, cohen_kappa_score
import seaborn as sns
import matplotlib.pyplot as plt

import sys 
sys.path.append('../')


qp_folder = "/Users/amycweng/Downloads/QP"

bible_info = {}
for bname in ['Geneva', 'Vulgate', 'Douay-Rheims', 'Tyndale', 'Wycliffe', 'KJV']:
    data = pd.read_csv(f"../assets/Bibles/{bname}.csv",header=None) 
    data = data.to_dict(orient="records")
    for entry in data: 
       bible_info[entry[0]] = (entry[1], entry[2], entry[3]) # version, part, book

def get_bible_info(id_string): 
    ids = re.findall(r"\'(.*?)\'", id_string)
    if len(ids) == 0: 
        ids = [id_string]
    ids = [id.split(" -")[0] for id in ids]
    id = ids[0] .strip()
    id = re.sub(" Bible","",id)
    version, part, book = bible_info[id]
    return version, part, book


In [2]:
def read_data_and_labels(inputfname): 
    data = pd.read_csv(f"{qp_folder}/{inputfname}.csv")
    if 'preE_qp' == inputfname: 
        data = data.drop(columns=['0','1','2','4','5','Unnamed: 7'])
        data = data.rename(columns = {'3': 'verse_id','6':'text','8':'verse_text','9':'label'})
    with open(f"/Users/amycweng/DH/Early-Modern-Sermons/assets/QP_labels/{inputfname}_DS.json",'r') as f: 
        labels_DS = json.load(f) 
    with open(f"/Users/amycweng/DH/Early-Modern-Sermons/assets/QP_labels/{inputfname}_GPT.json",'r') as f: 
        labels_GPT = json.load(f) 
    return data, labels_DS, labels_GPT

VnotPinC,VnotPinC_DS,VnotPinC_GPT = read_data_and_labels("Vul_notPriorInCited")
preE, preE_DS, preE_GPT = read_data_and_labels("preE_qp")

len(VnotPinC), len(preE)

(615, 4842)

In [3]:
ground_VnotPinC = [1 if str(x).lower() == "true" else 0 for x in VnotPinC['label']]
ground_preE = [1 if str(x).lower() == "true" else 0 for x in preE['label']]
preE_len = min(len(preE_DS), len(preE_GPT))
ground_preE = ground_preE[:preE_len]
labels_preE_DS = [1 if str(x).lower() == "true" else 0 for x in preE_DS.values()][:preE_len]
labels_preE_GPT = [1 if str(x).lower() == "true" else 0 for x in preE_GPT.values()][:preE_len]
labels_VnotPinC_DS = [1 if str(x).lower() == "true" else 0 for x in VnotPinC_DS.values()]
labels_VnotPinC_GPT = [1 if str(x).lower() == "true" else 0 for x in VnotPinC_GPT.values()]
labels_all_ground = ground_VnotPinC.copy()
labels_all_ground.extend(ground_preE)
labels_all_DS = labels_VnotPinC_DS.copy()
labels_all_DS.extend(labels_preE_DS)
labels_all_GPT = labels_VnotPinC_GPT.copy()
labels_all_GPT.extend(labels_preE_GPT)

verses_VnotPinC = [x for x in VnotPinC['verse_id']]
verses_preE = [x for x in preE['verse_id']][:preE_len]
verses_all = verses_VnotPinC.copy()
verses_all.extend(verses_preE)

mylabels = {'Vulgate_PriorNotInCited':ground_VnotPinC,
            'pre-Elizabethan_qp':ground_preE,
            'All':labels_all_ground}
labels = {'labels_preE_DS':labels_preE_DS,
          'labels_VnotPinC_DS':labels_VnotPinC_DS,
          'labels_all_DS':labels_all_DS,
          'labels_preE_GPT':labels_preE_GPT,
          'labels_VnotPinC_GPT':labels_VnotPinC_GPT,
          'labels_all_GPT':labels_all_GPT}

# Compute evaluation stats 

In [None]:
tables = {labeled:{
    "Model": [],
    "Accuracy": [],
    "Precision": [],
    "F1_Score": [],
    "Cohen's_Kappa":[]
}
 for labeled in mylabels.keys()}

In [None]:
for true_name, true_labels in mylabels.items():
    for pred_name, pred_labels in labels.items():
        if len(true_labels) == len(pred_labels):  
            print(true_name, pred_name)
            pred_name = pred_name.split("_")[-1]
            Accuracy = round(accuracy_score(true_labels, pred_labels),4)
            Precision = round(precision_score(true_labels, pred_labels),4)
            F1_Score = round(f1_score(true_labels, pred_labels),4)
            kappa = round(cohen_kappa_score(true_labels,pred_labels),4)
            tables[true_name]["Model"].append(pred_name)
            tables[true_name]["Accuracy"].append(Accuracy)
            tables[true_name]["Precision"].append(Precision)
            tables[true_name]["F1_Score"].append(F1_Score)
            tables[true_name]["Cohen's_Kappa"].append(kappa)


In [281]:
print("------------------------------------------------------")
for name,data in tables.items(): 
    df = pd.DataFrame(data)
    print(name,len(mylabels[name]),'\n')
    print(df.to_string(index=False))
    print("------------------------------------------------------")


------------------------------------------------------
Vulgate_PriorNotInCited 615 

Model  Accuracy  Precision  F1_Score  Cohen's_Kappa
   DS    0.8423     0.9554    0.8152         0.6827
  GPT    0.7610     0.9529    0.6879         0.5174
------------------------------------------------------
pre-Elizabethan_qp 3000 

Model  Accuracy  Precision  F1_Score  Cohen's_Kappa
   DS    0.8777     0.9502    0.9235         0.6196
  GPT    0.7903     0.9674    0.8580         0.4761
------------------------------------------------------
All 3615 

Model  Accuracy  Precision  F1_Score  Cohen's_Kappa
   DS    0.8716     0.9507    0.9128         0.6711
  GPT    0.7853     0.9663    0.8417         0.5250
------------------------------------------------------


In [None]:
pairwise_kappa = {}

kappas = []
for true_name, true_labels in mylabels.items():
    if true_name != "All": continue 
    for pred_name, pred_labels in labels.items():
        if len(true_labels) == len(pred_labels):  
            kappa = round(cohen_kappa_score(true_labels,pred_labels),4)
            kappas.append(kappa)
            if "DS" in pred_name: 
                other_pred_labels = labels['labels_all_GPT']
                kappa = round(cohen_kappa_score(pred_labels,other_pred_labels),4)
                kappas.append(kappa) 
float(round(sum(kappas)/len(kappas),4))

In [9]:
verses = {'Vulgate_PriorNotInCited':verses_VnotPinC,
            'pre-Elizabethan_qp':verses_preE,
            'All':verses_all}

for name, verse_set in verses.items():
    for idx, id_string in enumerate(verse_set): 
        version, part, book = get_bible_info(id_string)
        verses[name][idx] = (version,part,book)

In [None]:
# 0 - Version, 1 - Part, 2 - Book
fidx_f = {0:'Version',1:'Part',2:'Book'}
all_df = pd.DataFrame(columns=['Feature_Type','Feature','Model','Accuracy','Precision','F1_Score'])

for fidx in [1,0,2]: 
    print(fidx_f[fidx])
    for true_name, true_labels in mylabels.items(): 
        if "All" != true_name: continue 
        flist = [f[fidx] for f in verses[true_name]]
        unique = list(set(flist))
        specifics = {f:{
                    "Model": [],
                    "Accuracy": [],
                    "Precision": [],
                    "F1_Score": [],
                    "Cohen's Kappa":[]
                    } for f in unique} 
        for f in unique:
            for pred_name, pred_labels in labels.items(): 
                if len(true_labels) != len(pred_labels): continue 
                pred_name = pred_name.split("_")[-1]
                indices = []
                for i, item in enumerate(flist): 
                    if item == f: indices.append(i)
                y_t = [true_labels[i] for i in indices]
                y_p = [pred_labels[i] for i in indices]
                Accuracy = round(accuracy_score(y_t, y_p),4)
                Precision = round(precision_score(y_t, y_p),4)
                F1_Score = round(f1_score(y_t, y_p),4)
                kappa = round(cohen_kappa_score(y_t,y_p),4)
                specifics[f]["Model"].append(pred_name)
                specifics[f]["Accuracy"].append(Accuracy)
                specifics[f]["Precision"].append(Precision)
                specifics[f]["F1_Score"].append(F1_Score)
                specifics[f]["Cohen's Kappa"].append(kappa)
    for name,data in specifics.items(): 
        data['Feature'] = name
        data['Feature_Type'] = fidx_f[fidx]
        df = pd.DataFrame(data)
        df = df.rename(columns = {'Dataset_Model':'Model'})
        all_df = pd.concat([all_df,df],ignore_index=True)
all_df.to_csv(f"../assets/QP_labels/model_comparison.csv")

In [280]:
all_df

Unnamed: 0,Feature_Type,Feature,Model,Accuracy,Precision,F1_Score,Cohen's Kappa
0,Part,Old Testament,DS,0.8438,0.9333,0.8710,0.6753
1,Part,Old Testament,GPT,0.7838,0.9425,0.8089,0.5713
2,Part,New Testament,DS,0.8822,0.9538,0.9240,0.6639
3,Part,New Testament,GPT,0.7896,0.9706,0.8522,0.5052
4,Part,Apocrypha,DS,0.7286,0.9643,0.7397,0.4865
...,...,...,...,...,...,...,...
145,Book,Ecclesiastes,GPT,0.7778,1.0000,0.8333,0.5263
146,Book,Joshua,DS,1.0000,0.0000,0.0000,
147,Book,Joshua,GPT,0.9000,0.0000,0.0000,0.0000
148,Book,1 Maccabees,DS,0.4286,0.6667,0.3333,0.0175


# Identify mis-matches between myself & V3 for fine-tuning 

In [None]:
mismatches = {}

# target_ground = ground_preE
target_ground = VnotPinC

for idx, label in enumerate(ground_preE): 
    V3_label = labels_preE_DS[idx]
    if label != V3_label: 
        mismatches[idx] = (label,V3_label)

mismatch_set = []
targets = target_ground.to_dict(orient='records')
for idx, entry in enumerate(targets): 
    version, part, book = get_bible_info(entry['verse_id'])
    if idx in mismatches: 
        entry['label'] = mismatches[idx][0]
        entry['V3_Label'] = mismatches[idx][1] 
        entry['index'] = idx 
        entry['Version'] = version
        entry['Part'] = part 
        mismatch_set.append(entry)
mismatch_set = pd.DataFrame(mismatch_set)

In [18]:
mismatch_set.to_csv("../assets/QP_labels/mismatches_preE.csv")

In [20]:
mismatch_set.to_csv("../assets/QP_labels/mismatches_VnotPinC.csv")