## Error Analysis for CAD Predictions (BERT Augmented)

In [1]:
import os
import string
import random
import pandas as pd
from pandas import DataFrame
import matplotlib.pyplot as plt

In [2]:
# SK-learn libraries for evaluation.
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score


In [3]:
import numpy as np

### Test LABELS for TOKENS in TEST Dataset against BERT Outputs

BERT Classifier has returned results for the tokens passed in 'test.tsv' file.  The returned values are probabilities, that need to be converted into equivalent class labels based on majority class.  Then, the class label should be compared against the actual label from the code above to extract the IO-Coding from the xml files.  This is a brute-force approach or a manual way of verifying the validity of the predictions


Read in results from BERT Predicitons to the above dataset
The above dataset is derived from IO-Coding applied as done on the training set. This is what should be based on the annotation process. Nowe, we have to read in the predictions from bert, which is a set of class probabilities across all 3 classes and we have to merget that with the above dataset for comparison and error analysis.

### Data File Names

* Test files with Labels and Filenames : /data_for_bert_sent/test_files_with_labels/*_testfile.csv
* Bert label mapping /data_for_bert_sent/test_files_with_labels/*_labelmapping.csv
* BERT evaluation /data_for_bert_sent/BERT_run_results/*_eval_results.txt


In [4]:
print(os.path.dirname(os.path.abspath('__file__')))

C:\Users\Kalyan\Documents\Anu\W266 - NLP\Final Project\lheart-disease-risk-prediction\Code


### CAD Indicator

In [5]:
# read in the test files with labels

CI_test = pd.read_csv("data_for_bert_augmented/test_files_with_labels/cad_ind_testfile.csv")

In [6]:
CI_test.rename( columns={'Unnamed: 0' :'sentenceId'}, inplace=True )

In [7]:
CI_test.head(10)

Unnamed: 0,sentenceId,sentence,label,file
0,0,Record date: 2080-02-18,Other,110-03.xml
1,1,SDU JAR Admission Note,Other,110-03.xml
2,2,Name: \t Yosef Villegas,Other,110-03.xml
3,3,MR:\t8249813,Other,110-03.xml
4,4,DOA: \t2/17/80,Other,110-03.xml
5,5,PCP: Gilbert Perez,Other,110-03.xml
6,6,Attending: YBARRA,Other,110-03.xml
7,7,CODE: FULL,Other,110-03.xml
8,8,HPI: 70 yo M with NIDDM admitted for cath aft...,test,110-03.xml
9,9,Pt has had increasing CP and SOB on exertion f...,Other,110-03.xml


In [8]:
# read in the test results captured for BERT Augmented CAD Indicator model and specify columns as the actual file has no header
bert_aug_CI_results = pd.read_csv("data_for_bert_augmented/bert_augmented_run_results/bert_aug_data_output_data_cad_ind_output_results_test_results.tsv", sep='\t')
                               
bert_aug_CI_results.columns=["Class0", "Class1", "Class2", "Class3", "Class4"]

In [9]:
bert_aug_CI_results.head()

Unnamed: 0,Class0,Class1,Class2,Class3,Class4
0,0.999926,2e-05,2.7e-05,1.6e-05,1e-05
1,0.999272,0.000397,0.000152,2.3e-05,0.000156
2,0.999398,0.000318,0.000139,2e-05,0.000124
3,0.999571,0.00022,0.000102,2e-05,8.7e-05
4,0.999926,2.1e-05,2.9e-05,1.5e-05,1e-05


In [11]:
bert_aug_CI_results['predClass'] = bert_aug_CI_results.idxmax(axis=1)

In [12]:
bert_aug_CI_results.head()

Unnamed: 0,Class0,Class1,Class2,Class3,Class4,predClass
0,0.999926,2e-05,2.7e-05,1.6e-05,1e-05,Class0
1,0.999272,0.000397,0.000152,2.3e-05,0.000156,Class0
2,0.999398,0.000318,0.000139,2e-05,0.000124,Class0
3,0.999571,0.00022,0.000102,2e-05,8.7e-05,Class0
4,0.999926,2.1e-05,2.9e-05,1.5e-05,1e-05,Class0


In [13]:
bert_aug_CI_results['predClass'].value_counts()

Class0    20640
Class2      329
Class1      270
Class3      126
Class4       47
Name: predClass, dtype: int64

In [14]:
def CI_set_labels(classlabel):
    if (classlabel=='Class1'):
        return 'event'
    elif (classlabel=='Class2'):
        return 'mention'
    elif (classlabel=='Class3'):
        return 'symptom'
    elif (classlabel=='Class4'):
        return 'test'
    else:
        return 'Other'

In [15]:
bert_aug_CI_results['predLabel'] = bert_aug_CI_results['predClass'].apply(CI_set_labels)

bert_aug_CI_results.head(10)


Unnamed: 0,Class0,Class1,Class2,Class3,Class4,predClass,predLabel
0,0.999926,2e-05,2.7e-05,1.6e-05,1e-05,Class0,Other
1,0.999272,0.000397,0.000152,2.3e-05,0.000156,Class0,Other
2,0.999398,0.000318,0.000139,2e-05,0.000124,Class0,Other
3,0.999571,0.00022,0.000102,2e-05,8.7e-05,Class0,Other
4,0.999926,2.1e-05,2.9e-05,1.5e-05,1e-05,Class0,Other
5,0.999921,2.5e-05,2.9e-05,1.3e-05,1.2e-05,Class0,Other
6,0.999928,2.1e-05,2.6e-05,1.4e-05,1e-05,Class0,Other
7,0.999854,6.1e-05,3.9e-05,1.4e-05,3.2e-05,Class0,Other
8,0.99992,2e-05,2.7e-05,2.3e-05,1e-05,Class0,Other
9,0.999883,5e-05,3.9e-05,1.6e-05,1.3e-05,Class0,Other


In [17]:
# validating the counts by label
bert_aug_CI_results['predLabel'].value_counts()

Other      20640
mention      329
event        270
symptom      126
test          47
Name: predLabel, dtype: int64

In [18]:
CI_combined = pd.concat([CI_test, bert_aug_CI_results], axis=1)

In [19]:
CI_combined.head()

Unnamed: 0,sentenceId,sentence,label,file,Class0,Class1,Class2,Class3,Class4,predClass,predLabel
0,0,Record date: 2080-02-18,Other,110-03.xml,0.999926,2e-05,2.7e-05,1.6e-05,1e-05,Class0,Other
1,1,SDU JAR Admission Note,Other,110-03.xml,0.999272,0.000397,0.000152,2.3e-05,0.000156,Class0,Other
2,2,Name: \t Yosef Villegas,Other,110-03.xml,0.999398,0.000318,0.000139,2e-05,0.000124,Class0,Other
3,3,MR:\t8249813,Other,110-03.xml,0.999571,0.00022,0.000102,2e-05,8.7e-05,Class0,Other
4,4,DOA: \t2/17/80,Other,110-03.xml,0.999926,2.1e-05,2.9e-05,1.5e-05,1e-05,Class0,Other


In [20]:
CI_combined[CI_combined['predLabel']!='Other']

Unnamed: 0,sentenceId,sentence,label,file,Class0,Class1,Class2,Class3,Class4,predClass,predLabel
65,65,ASSESSMENT AND PLAN:,Other,110-03.xml,0.000793,0.000240,0.000303,0.998302,0.000362,Class3,symptom
85,85,History of Present Illness,Other,110-04.xml,0.000804,0.993561,0.004396,0.000401,0.000837,Class1,event
97,97,Problems,Other,110-04.xml,0.001814,0.003055,0.994300,0.000270,0.000561,Class2,mention
156,156,Impression,Other,110-04.xml,0.001314,0.996738,0.000688,0.000601,0.000660,Class1,event
160,160,"Doubt pneumonia (no cough or fever, lungs clea...",Other,110-04.xml,0.001491,0.996681,0.000750,0.000558,0.000520,Class1,event
179,179,"HISTORY OF PRESENT ILLNESS: In essence, this ...",Other,111-04.xml,0.002081,0.000973,0.996085,0.000404,0.000457,Class2,mention
191,191,"vomiting, and some shortness of breath associa...",Other,111-04.xml,0.002119,0.000964,0.996061,0.000422,0.000434,Class2,mention
250,250,His past medical history is significant for hy...,Other,112-03.xml,0.397554,0.004942,0.000649,0.587603,0.009252,Class3,symptom
252,252,He was able to exercise for 8 minutes and 4 se...,Other,112-03.xml,0.000901,0.000225,0.000318,0.998172,0.000384,Class3,symptom
288,288,"He is a 54-year-old man with obesity, dyslipid...",Other,112-04.xml,0.000908,0.000529,0.000285,0.000495,0.997784,Class4,test


In [27]:
CI_test_labels = CI_combined['label']
CI_pred_labels = CI_combined['predLabel']

#print(type(CI_pred_labels))

In [28]:
accuracy_score(CI_test_labels, CI_pred_labels)

TypeError: '<' not supported between instances of 'float' and 'str'

In [26]:
print(classification_report(CI_pred_labels, CI_test_labels))

TypeError: '<' not supported between instances of 'float' and 'str'

In [29]:
unique_label = np.unique(CI_test_labels)
print(pd.DataFrame(confusion_matrix(CI_test_labels, CI_pred_labels, labels=unique_label), 
                   index=['true:{:}'.format(x) for x in unique_label], 
                   columns=['pred:{:}'.format(x) for x in unique_label]))

TypeError: '<' not supported between instances of 'float' and 'str'

### CAD Time

In [None]:
# read in the test files with labels

CT_test = pd.read_csv("data_for_bert_augmented/test_files_with_labels/cad_tim_testfile.csv")

In [None]:
CI_test.rename( columns={'Unnamed: 0' :'sentenceId'}, inplace=True )

In [None]:
CI_test.head(10)

In [None]:
# read in the test results captured for BERT Augmented CAD model and specify columns as the actual file has no header
bert_CI_results = pd.read_csv("data_for_bert_augmented/bert_augmented_run_results/bert_aug_data_output_data_cad_time_output_results_test_results.tsv", sep='\t')
                               
bert_CI_results.columns=["Class0", "Class1", "Class2", "Class3"]

In [None]:
bert_CI_results.head()

In [None]:
bert_CI_results['predClass'] = bert_CI_results.idxmax(axis=1)

In [None]:
bert_CI_results.head()

In [None]:
bert_CI_results['predClass'].value_counts()

In [None]:
def CI_set_labels(classlabel):
    if (classlabel=='Class1'):
        return 'after dct'
    elif (classlabel=='Class2'):
        return 'before dct'
    elif (classlabel=='Class3'):
        return 'during dct'
    else:
        return 'Other'

In [None]:
bert_CI_results['predLabel'] = bert_CI_results['predClass'].apply(CI_set_labels)

bert_CI_results.head(10)


In [None]:
# validating the counts by label
bert_CI_results['predLabel'].value_counts()

In [None]:
CI_combined = pd.concat([CI_test, bert_CI_results], axis=1)

In [None]:
CI_combined.head()

In [None]:
CI_combined[CI_combined['predLabel']!='Other']

In [None]:
CI_test_labels = CI_combined['label']
CI_pred_labels = CI_combined['predLabel']

#print(type(CI_test_labels))

In [None]:
accuracy_score(CI_test_labels, CI_pred_labels)

In [None]:
print(classification_report(CI_pred_labels, CI_test_labels))

In [None]:
unique_label = np.unique(CI_test_labels)
print(pd.DataFrame(confusion_matrix(CI_test_labels, CI_pred_labels, labels=unique_label), 
                   index=['true:{:}'.format(x) for x in unique_label], 
                   columns=['pred:{:}'.format(x) for x in unique_label]))