In [45]:
# Elizabeth Shelton ejs6ar
"""
Processes text files of concepts truth/output into lists and calculates the recall, precision, and f1 score.
Format for txt files:
1) ground truth file: copy an Excel column of concepts in the following format for each cell, each cell being for one recording:
-four values per concept, comma-separated: concept, True/False (negation), value, text wording
-each concept separated by a semicolon ; and if possible (I don't think required) a newline
2) pipeline results file: copy and paste each test result from the Pipeline to an Excel cell; copy and paste the column into a text file

Note: When processing the results of get_truth or get_results, you MUST process both *truth* and *results* with the same function of the 4 available to get acurate results
Note 2: Works as a .py file if converted
"""
from interventions import get_tp, get_fp, get_fn, calc_recall, calc_precision, calc_f1

In [46]:
def get_truth_concepts(path):
    """
    Converts the truth, in a text file, into a master list of concepts per case
    :param path: the location of the .txt file with the pipeline output
    :return: truth: a list of lists of lists. Master list: contains all data. Each element of master list: a list of concepts for each case. Each case element: a concept
    """
    file = open(path)
    truth = []
    all_text = file.read().lower()
    file.close()
    draft_list = all_text.strip().split('"')
    for item in draft_list:
        if item != '':
            case_list = item.strip().split(";")
            for i in range(len(case_list)):
                tup = case_list[i].strip()
                tup = tup.split(",")
                for j in range(len(tup)):
                    tup[j] = tup[j].strip()
                case_list[i] = tup
            for thing in case_list:
                if thing == ['']:
                    case_list.remove(thing)
            if len(case_list):
                truth.append(case_list)
    return truth

def get_results_concepts(path):
    """
    Converts the pipeline output into a master list of concepts per case
    :param path: the location of the .txt file with the pipeline output
    :return: results: a list of lists of lists. Master list: contains all data. Each element of master list: a list of concepts for each case. Each case element: a concept
    """
    file = open(path)
    all_text = file.read().lower()
    file.close()
    results = []
    draft_list = all_text.strip().split('"')
    for item in draft_list:
        if item != '':
            case_list = item.strip().split(")\n(")
            for i in range(len(case_list)):
                tup = case_list[i].strip().strip("()")
                tup = tup.split(",")
                for j in range(len(tup)):
                    tup[j] = tup[j].strip("' \"")
                case_list[i] = tup
            for thing in case_list:
                if thing == ['']:
                    case_list.remove(thing)
            if len(case_list):
                results.append(case_list)
    return results



def process_with_text(concepts):
    """
    The strictest way to compare concepts; looks at concept, negation, value, and text signal
    :param concepts: the truth or results master list
    :return: concepts: the master list, but each concept pared down to only [concept, T/F, value, text], and converted to string
    """
    for i in range(len(concepts)):
        for j in range(len(concepts[i])):
            concepts[i][j] = str(concepts[i][j][:4])
    return concepts



def process_no_text(concepts):
    """
    An intermediate-strictness way to compare concepts; compares the concept, negation, and value but not (for numerical values) the surrounding text
    :param concepts: the truth or results master list
    :return: concepts: the master list, but each concept pared down to only [concept, T/F, value] and converted to string
    """
    for i in range(len(concepts)):
        for j in range(len(concepts[i])):
            concepts[i][j] = str(concepts[i][j][:3])
    return concepts

def process_tf(concepts):
    """
    The least strict way to compare concepts; compares only the concept and its negation; useful for whether or not the concept and negation is correct and disregards differences in context
    :param concepts:
    :return: concepts: the master list, but each concept pared down to only [concept, T/F] and converted to string
    """
    for i in range(len(concepts)):
        for j in range(len(concepts[i])):
            concepts[i][j] = str(concepts[i][j][:2])
    return concepts

def process_custom(concepts):
    """
    A hybrid of processing methods, possibly the most accurate measure; looks up to the number for numerical concepts but only negation for non-numerical concepts
    :param concepts: the truth or results master list 
    :return concepts: the master list, but each concept pared down to only [concept, T/F] or [concept, T/F, # value] and converted to string
    """
    num_list = ["age", "blood pressure", "heart rate", "glucose", "gcs", "resp", "pulse oximetry"]
    for i in range(len(concepts)):
        for j in range(len(concepts[i])):
            if concepts[i][j][0] in num_list:
                concepts[i][j][2] = concepts[i][j][2].strip(".")
                concepts[i][j] = str(concepts[i][j][:3])
            else:
                concepts[i][j] = str(concepts[i][j][:2])
    return concepts









In [47]:
def main2():
    # Use your own file path here
    path_results = "/home/harry/Downloads/EMS-Pipeline/ETC/Performance Evaluation/extracted.txt"
    path_truth = "/home/harry/Downloads/EMS-Pipeline/ETC/Performance Evaluation/concepts.txt"

    # Get results
    results = get_results_concepts(path_results)


    """Process results options"""
#     results = process_with_text(results)
    # results = process_no_text(results)
    results = process_tf(results)
 #   print("results")
#    print(results)
#    results = process_custom(results)


#     results[2].sort()
#     print(results[2])


    # Get truth
    truth = get_truth_concepts(path_truth)
#    print(truth)
#    print(len(truth))

    """Process truth options"""
#     truth = process_with_text(truth)
    # truth = process_no_text(truth)
    truth = process_tf(truth)
#    truth = process_custom(truth)

#     truth[2].sort()
#     print(truth[2])


    """Calculating the metrics for each case"""
    # WITH LABELS
    for i in range(12):
        print("Case " + str(i) + ":\t")
        print("Precision: " + str(calc_precision(truth[i], results[i])))
        print("Recall: " + str(calc_recall(truth[i], results[i])))
        print("F1: " + str(calc_f1(truth[i], results[i])))
        print()

    # JUST NUMBERS
    for i in range(2):
#         print(calc_precision(truth[i], results[i]))
#         print(calc_recall(truth[i], results[i]))
        print(calc_f1(truth[i], results[i]))
#         print()


    print("Precision: " + str(calc_precision(truth[1], results[1])))
    print("Recall: " + str(calc_recall(truth[1], results[1])))

In [48]:
main2()

Case 0:	
Precision: 0.0
Recall: 0.0
F1: 0

Case 1:	
Precision: 0.2
Recall: 0.18181818181818182
F1: 0.1904761904761905

Case 2:	
Precision: 0.25
Recall: 0.15789473684210525
F1: 0.1935483870967742

Case 3:	
Precision: 0.14285714285714285
Recall: 0.16666666666666666
F1: 0.15384615384615383

Case 4:	
Precision: 0.2857142857142857
Recall: 0.16666666666666666
F1: 0.2105263157894737

Case 5:	
Precision: 0.3333333333333333
Recall: 0.26666666666666666
F1: 0.2962962962962963

Case 6:	
Precision: 0.3333333333333333
Recall: 0.17647058823529413
F1: 0.23076923076923078

Case 7:	
Precision: 0.5
Recall: 0.35294117647058826
F1: 0.41379310344827586

Case 8:	
Precision: 0.35294117647058826
Recall: 0.35294117647058826
F1: 0.35294117647058826

Case 9:	
Precision: 0.5
Recall: 0.2222222222222222
F1: 0.30769230769230765

Case 10:	
Precision: 0.26666666666666666
Recall: 0.23529411764705882
F1: 0.25

Case 11:	
Precision: 0.3333333333333333
Recall: 0.25
F1: 0.28571428571428575

0
0.1904761904761905
Precision: 0.