# Initialization

In [1]:
!pip install prettytable
from collections import defaultdict

import boto3
import numpy as np
import prettytable
import sagemaker
import scipy.cluster.hierarchy as hcluster
from sagemaker import get_execution_role
from sagemaker.mxnet import MXNetPredictor
from sagemaker.predictor import RealTimePredictor, json_serializer, json_deserializer

role = get_execution_role()
session = boto3.Session(region_name='us-west-2')
sagemaker_session = sagemaker.Session(boto_session=session)


def l2_distance(field, value):
    return np.linalg.norm((np.array(field["center"]) - np.array(value["center"])))


def get_center(bbox):  # {'top': 911, 'height': 31, 'width': 328, 'left': 961}
    return bbox['top'] + bbox['height'] / 2, bbox["left"] + bbox["width"] / 2

def get_top_left(bbox):
    return bbox['top'], bbox["left"]

def get_top_right(bbox):
    return bbox['top'], bbox["left"]+bbox["width"] 

def get_bottom_left(bbox):
    return bbox['top']+ bbox['height'], bbox["left"] 

def get_bottom_right(bbox):
    return bbox['top']+ bbox['height'], bbox["left"]+bbox["width"] 

class JSONPredictor(RealTimePredictor):
    def __init__(self, endpoint_name, sagemaker_session):
        super(JSONPredictor, self).__init__(endpoint_name, sagemaker_session, json_serializer, json_deserializer)


loc_predictor = MXNetPredictor('localization-model-2019-01-29', sagemaker_session)
#field_matching = JSONPredictor('field-match-2019-01-24-12-39-05-522', sagemaker_session)
field_matching = JSONPredictor('field-match-2019-01-31-03-33-24-384', sagemaker_session)
ml_field_matching = MXNetPredictor("field-match-ml-2019-01-20")
print("predictors reference created") 

[33mYou are using pip version 10.0.1, however version 19.0.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m
predictors reference created


# calling the localizer

In [2]:
bucket = "unum-files"
#file_name = "field-match-demo/Accident Claim small- 2_3.tiff"
file_name = "preprocessed/b8f774a9-ec77-4d39-9265-b2dc6d362f9d$3.tiff"
loc_out = loc_predictor.predict({"url": "s3://{}/{}".format(bucket, file_name)})
print("localized")
print(loc_out)


localized
{'status': 'SUCCESS', 'result': {'num_hw_crops': 4, 'hw_key': 'preprocessed/b8f774a9-ec77-4d39-9265-b2dc6d362f9d$3/hand_written.pkl', 'hp_key': 'preprocessed/b8f774a9-ec77-4d39-9265-b2dc6d362f9d$3/hand_printed.pkl', 'bucket_name': 'unum-files', 'num_hp_crops': 5}}


# calling the field matching

In [None]:
# My data
data = {
    "hw_endpoint": 'pytorch-handwriting-ocr-2018-11-21-20-10-49-542',  
    "hp_endpoint": 'hand-printed-model-2018-12-10',#''  #  
    "sp_endpoint": "spell-corrector-2018-11-26-17-44-10-615",
    
    #"hw_endpoint":"pytorch-handwriting-ocr-2018-11-21-20-10-49-542",
    #"hp_endpoint":"sagemaker-mxnet-2018-11-03-23-32-01-918",
    #"sp_endpoint":"spell-corrector-2018-11-26-17-44-10-615",
    
    "field_names": [{"bucket": "ahmedb-test", "filename": "field_name_list.txt"},
                    {"bucket": "unum-files", "filename": "unum_field_names.txt"}],
    "field_names_ignore": [
        {"bucket": "ahmedb-test", "filename": "must_ignore.txt"},
        {"bucket": "unum-files", "filename": "unum_must_ignore_field_names.txt"}
    ],

    #"hw_pickle": {"bucket": "unum-files", "filename": "preprocessed/0a654812-21c5-4482-b054-9f0d2425df42$1/hand_written.pkl"},
    #"hp_pickle": {"bucket": "unum-files", "filename": "preprocessed/0a654812-21c5-4482-b054-9f0d2425df42$1/hand_printed.pkl"},
    #"page_image": {"bucket": "unum-files", "filename": "preprocessed/0a654812-21c5-4482-b054-9f0d2425df42$1.tiff"},

    "hw_pickle": {"bucket": loc_out['bucket_name'], "filename": loc_out['hw_key']},
    "hp_pickle": {"bucket": loc_out['bucket_name'], "filename": loc_out['hp_key']},
    "page_image": {"bucket": bucket, "filename": file_name},

}




In [3]:
data = {
    "hw_pickle": {
        "bucket": "unum-files",
        "filename": "preprocessed/a793d971-37ff-4d9a-98b2-c9e32dd4e94a$4/hand_written.pkl"
    },
    "hp_pickle": {
        "bucket": "unum-files",
        "filename": "preprocessed/a793d971-37ff-4d9a-98b2-c9e32dd4e94a$4/hand_printed.pkl"
    },
   "hw_endpoint": "pytorch-handwriting-ocr-2019-02-01-03-31-58-759",
   "hp_endpoint": "hand-printed-model-2019-01-29-1",
   "sp_endpoint": "spell-corrector-2019-01-29-02-43-03-805",
    "page_image": {
        "bucket": "unum-files",
        "filename": "preprocessed/a793d971-37ff-4d9a-98b2-c9e32dd4e94a$4.tiff"
    },
    "field_names": [{
            "bucket": "ahmedb-test",
            "filename": "field_name_list.txt"
        }, {
            "bucket": "unum-files",
            "filename": "MissingField/unum_field_names.txt"
        }
    ],
    "field_names_ignore": []
}

In [None]:
data = {
    "hw_endpoint": 'pytorch-handwriting-ocr-2018-11-21-20-10-49-542',  #'pytorch-handwriting-ocr-2019-02-01-03-31-58-759',
    "hp_endpoint": 'hand-printed-model-2018-12-10',#''  #  
    "sp_endpoint": "spell-corrector-2018-11-26-17-44-10-615",
    
    #"hw_endpoint":"pytorch-handwriting-ocr-2018-11-21-20-10-49-542",
    #"hp_endpoint":"sagemaker-mxnet-2018-11-03-23-32-01-918",
    #"sp_endpoint":"spell-corrector-2018-11-26-17-44-10-615",
    
    "field_names": [{"bucket": "ahmedb-test", "filename": "field_name_list.txt"},
                    {"bucket": "unum-files", "filename": "unum_field_names.txt"}],
    "field_names_ignore": [
        {"bucket": "ahmedb-test", "filename": "must_ignore.txt"},
        {"bucket": "unum-files", "filename": "unum_must_ignore_field_names.txt"}
    ],

    #"hw_pickle": {"bucket": "unum-files", "filename": "preprocessed/0a654812-21c5-4482-b054-9f0d2425df42$1/hand_written.pkl"},
    #"hp_pickle": {"bucket": "unum-files", "filename": "preprocessed/0a654812-21c5-4482-b054-9f0d2425df42$1/hand_printed.pkl"},
    #"page_image": {"bucket": "unum-files", "filename": "preprocessed/0a654812-21c5-4482-b054-9f0d2425df42$1.tiff"},

    "hw_pickle": {"bucket": loc_out['bucket_name'], "filename": loc_out['hw_key']},
    "hp_pickle": {"bucket": loc_out['bucket_name'], "filename": loc_out['hp_key']},
    "page_image": {"bucket": bucket, "filename": file_name},

}


In [4]:
try:
    initial_matching = field_matching.predict(data)
except Exception as ex:
    print(ex)
    #tb = traceback.format_exc()

In [5]:
initial_matching

{'field_match_output': [{'bbox': {'top': 414,
    'height': 32,
    'width': 708,
    'left': 153},
   'confidence': 0.958,
   'field_name': 'ATTENDING PHYSICIAN STATEMENT ',
   'field_name_before_correction': 'ATTENDING PHYSICIAN STATEMENT',
   'value': {'bbox': {'top': -1, 'height': -1, 'width': -1, 'left': -1},
    'confidence': 0,
    'field_value': '',
    'field_value_before_correction': ''}},
  {'bbox': {'top': 474, 'height': 35, 'width': 890, 'left': 125},
   'confidence': 0.9359999999999999,
   'field_name': 'Insured/Policyholder Name (Last Name, First Name, MI, Suffix) ',
   'field_name_before_correction': 'Insured/Policyholder Name (Last Name, First Name, MI, Suffix)',
   'value': {'bbox': {'top': 511, 'height': 78, 'width': 1222, 'left': 76},
    'confidence': 0.9590040834881876,
    'field_value': 'Rawacatoose Vivi ',
    'field_value_before_correction': 'Rawacatoose Svivi'}},
  {'bbox': {'top': 474, 'height': 31, 'width': 349, 'left': 1938},
   'confidence': 0.76857142857

In [6]:
original_match = prettytable.PrettyTable(["field", "values", "field score", "value score"])
fields = []
values = []
bbox_of_all = {}
text_to_score={}
for pair in initial_matching['field_match_output']:
    fields.append({"string": pair['field_name'], "bbox": pair['bbox'], "center": get_center(pair['bbox'])})
    bbox_of_all[pair['field_name']] = pair['bbox']
    text_to_score[pair['field_name']]= pair["confidence"]
    if pair["value"]['bbox'] != {'top': -1, 'height': -1, 'width': -1, 'left': -1}:
        values.append({"string": pair["value"]['field_value'], "bbox": pair["value"]['bbox'], "center": get_center(pair["value"]['bbox'])})
        text_to_score[pair["value"]['field_value']] = pair["value"]['confidence']
        bbox_of_all[pair["value"]['field_value']] = pair["value"]['bbox']

    # print({"strings": {"field": , "value": pair["value"]['field_value']},
    #        "bboxs": {"field": pair['bbox'], "value": pair["value"]['bbox']}})
    original_match.add_row([pair['field_name'], pair["confidence"],
                            pair["value"]['field_value'], pair["value"]['confidence']
                            ])
print(original_match)
    

+-------------------------------------------------------------------------------------------------------------+--------------------+-------------------+--------------------+
|                                                    field                                                    |       values       |    field score    |    value score     |
+-------------------------------------------------------------------------------------------------------------+--------------------+-------------------+--------------------+
|                                        ATTENDING PHYSICIAN STATEMENT                                        |       0.958        |                   |         0          |
|                        Insured/Policyholder Name (Last Name, First Name, MI, Suffix)                        | 0.9359999999999999 | Rawacatoose Vivi  | 0.9590040834881876 |
|                                          Date of Birth (mm/dd/yy)                                           | 0.7685714285714286

# rematching by nearest

In [10]:
predictions_act = prettytable.PrettyTable(["field", "field score", "values", "value score", "score"])
dist_thresh = 100
for field in fields:
    print(field["string"])
    candidates = []
    for value in values:
        print(value["string"])
        l2_dist = l2_distance(field, value)
        print(str(l2_dist))
        if(l2_dist < dist_thresh):
            candidates.append((value, l2_dist))
            print('added')

    nearest = list(map(lambda item: item[0]["string"], sorted(candidates, key=lambda item: item[1])[:5]))
    input_to_matching = {"field_names": [field["string"]], "field_values": nearest}
    if(len(nearest) != 0):
        results = ml_field_matching.predict(input_to_matching)  # siamese string field match
    else:
        results = [{"field": field["string"], "value": '', "score": 0}]
        text_to_score[''] = ''
    for result in sorted(results, key=lambda item: -item["score"]):
        predictions_act.add_row([result["field"],
                                 text_to_score[result["field"]],
                                 result["value"],
                                 text_to_score[result["value"]],
                                 result["score"],
                                 ])

print(predictions_act)


ATTENDING PHYSICIAN STATEMENT 
Rawacatoose Vivi 
216.33307652783935
11-00-71 
1663.5492929276247
03-18-18 
1396.7614327436163
Insured/Policyholder Name (Last Name, First Name, MI, Suffix) 
Rawacatoose Vivi 
130.8099766837377
11-00-71 
1597.42996716601
03-18-18 
1320.1254675219323
Date of Birth (mm/dd/yy) 
Rawacatoose Vivi 
1426.783270157034
11-00-71 
78.15529412650176
added
03-18-18 
412.16744170300495


ValidationError: An error occurred (ValidationError) when calling the InvokeEndpoint operation: Endpoint field-match-ml-2019-01-20 of account 620580205565 not found.

# rematching using hugarian alg

In [None]:
fields_strings = list(map(lambda item: item["string"], fields))
values_strings = list(map(lambda item: item["string"], values))

print(len(fields_strings))
print(len(values_strings))
data = {'field_names': fields_strings, 'field_values':values_strings}
results = ml_field_matching.predict(data)
for result in results:
    print(result)

# rematching using hugarian alg + clustering

In [None]:
points_2d = []
for field in fields:
    points_2d.append(field["center"])
for value in values:
    points_2d.append(value["center"])

points_2d = np.array(points_2d)

# clustering
thresh = 250
clusters = hcluster.fclusterdata(points_2d, thresh, criterion="distance")

groupings = defaultdict(lambda: {'field_names': [], 'field_values': []})
for index, class_ in enumerate(clusters):
    if index >= len(fields):
        groupings[class_]["field_values"].append(values[index - len(fields)]["string"])
    else:
        groupings[class_]["field_names"].append(fields[index]["string"])

for cluster in [grouping for grouping in groupings.values()]:
    cluster = {"field_names": list(set(cluster["field_names"])), "field_values": list(set(cluster["field_values"]))}
    predictions_act= prettytable.PrettyTable(["field", "field score", "values", "value score", "score"])
        
    if cluster["field_names"] and cluster["field_values"]:
        import pprint
        pprint.pprint(cluster)
        results = ml_field_matching.predict(cluster)
        for result in sorted(results, key=lambda item: -item["score"]):
            predictions_act.add_row([result["field"],
                                     text_to_score[result["field"]],
                                     result["value"],
                                     text_to_score[result["value"]],
                                     result["score"],
                                     ])
        print(predictions_act)
        print("=" * 50)

In [54]:
import sagemaker
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()

role = get_execution_role()

from sagemaker.mxnet.model import MXNetModel,MXNetPredictor

sagemaker_model = MXNetModel(model_data='s3://sagemaker-field-matching/elmo.h5.tar.gz',py_version="py3",
                             role=role, entry_point="global_endpoint_improvements.py",)


#predictor = sagemaker_model.deploy(initial_instance_count=1, instance_type='ml.t2.medium',endpoint_name="field-match-ml-2019-01-20") # , instance_type='local'

predictor = sagemaker_model.deploy(initial_instance_count=1, instance_type='ml.t2.medium',endpoint_name="ml-field-match-2019-02-03-2") # , instance_type='local'

INFO:sagemaker:Created S3 bucket: sagemaker-us-east-1-620580205565
INFO:sagemaker:Creating model with name: sagemaker-mxnet-2019-02-03-16-44-41-182
INFO:sagemaker:Creating endpoint with name ml-field-match-2019-02-03-2


--------------------------------------------------------------------------------------!

In [7]:
import sagemaker
from sagemaker import get_execution_role
import json
import pandas as pd
sagemaker_session = sagemaker.Session()

role = get_execution_role()

from sagemaker.mxnet.model import MXNetModel, MXNetPredictor

predictor = MXNetPredictor("ml-field-match-2019-02-03-2") 


results = predictor.predict(initial_matching)
for result in results:
    print(result)

{'field': 'ATTENDING PHYSICIAN STATEMENT ', 'score': 0.958, 'value_bbox': {'left': -1, 'top': -1, 'width': -1, 'height': -1}, 'field_bbox': {'left': 153, 'top': 414, 'width': 708, 'height': 32}, 'value': '', 'value_detection_score': 1, 'field_detection_score': 0.958}
{'field': 'Insured/Policyholder Name (Last Name, First Name, MI, Suffix) ', 'score': 0.9359999999999999, 'value_bbox': {'left': 76, 'top': 511, 'width': 1222, 'height': 78}, 'field_bbox': {'left': 125, 'top': 474, 'width': 890, 'height': 35}, 'value': 'Rawacatoose Vivi ', 'value_detection_score': 0.9590040834881876, 'field_detection_score': 0.9359999999999999}
{'field': 'Date of Birth (mm/dd/yy) ', 'score': 0.9557475447654724, 'value_bbox': {'left': 1895, 'top': 510, 'width': 543, 'height': 72}, 'field_bbox': {'left': 1938, 'top': 589, 'width': 349, 'height': 30}, 'value': '11-00-71 ', 'value_detection_score': 0.9844390292856002, 'field_detection_score': 0.9399999999999998}
{'field': 'Patient Name (Last Name, First Name. M

In [8]:
predictions_act = prettytable.PrettyTable(["field", "value", "score", "field_detection_score", "value_detection_score"])
for result in results:
    predictions_act.add_row([result["field"],                             
                             result["value"],
                             result["score"],
                             result["field_detection_score"],
                             result["value_detection_score"],
                             ])
print(predictions_act)    

+-------------------------------------------------------------------------------------------------------------+-------------------+--------------------+-----------------------+-----------------------+
|                                                    field                                                    |       value       |       score        | field_detection_score | value_detection_score |
+-------------------------------------------------------------------------------------------------------------+-------------------+--------------------+-----------------------+-----------------------+
|                                        ATTENDING PHYSICIAN STATEMENT                                        |                   |       0.958        |         0.958         |           1           |
|                        Insured/Policyholder Name (Last Name, First Name, MI, Suffix)                        | Rawacatoose Vivi  | 0.9359999999999999 |   0.9359999999999999  |   0.959004083488187

# Test code from endpoint

In [None]:
'''
Inputs:
- doc_img: tiff
- filed_bbox: {'field': ,'field_bbox':{'top': , 'height': , 'width': , 'left': }}
- candidates_bbox: [{'value': ,'value_bbox':{'top': , 'height': , 'width': , 'left': }},
                    {'value': ,'value_bbox':{'top': , 'height': , 'width': , 'left': }},
                    {'value': ,'value_bbox':{'top': , 'height': , 'width': , 'left': }},
                    ....]
Outputs:
- Draw rect on "doc_img" in "color" the field_bbox in continous, and all candidates_bbox in dashed.
'''
def visualize_candidates(doc_img, field, candidates, color):
    return

'''
Inputs:
- doc_img: tiff
- mached_pairs_bbox: [{'field': ,'field_bbox': {'top': , 'height': , 'width': , 'left': }, 'value':, 'value_bbox': {'top': , 'height': , 'width': , 'left': }},
{'field': ,'field_bbox': {'top': , 'height': , 'width': , 'left': }, 'value':, 'value_bbox': {'top': , 'height': , 'width': , 'left': }}
{'field': ,'field_bbox': {'top': , 'height': , 'width': , 'left': }, 'value':, 'value_bbox': {'top': , 'height': , 'width': , 'left': }}
...
]
                
Outputs:
Draw a line between bboxes between the pairs bboxes. on the doc_img
'''
def visualize_matches(doc_img, mached_pairs_bbox):
    return

In [47]:
print('Global EP')


original_match = prettytable.PrettyTable(["field", "field score", "values", "value score"])
fields = []
values = []
bbox_of_all = {}
text_to_score={}
matched_results_dict = {}
field_id = 0
for pair in initial_matching['field_match_output']:
    fields.append({"id": field_id, "string": pair['field_name'], "bbox": pair['bbox'], "center": get_center(pair['bbox'])})
    bbox_of_all[pair['field_name']] = pair['bbox']
    text_to_score[pair['field_name']]= pair["confidence"]
    if pair["value"]['bbox'] != {'top': -1, 'height': -1, 'width': -1, 'left': -1}:
        values.append({"string": pair["value"]['field_value'], "bbox": pair["value"]['bbox'], "center": get_center(pair["value"]['bbox'])})
        text_to_score[pair["value"]['field_value']] = pair["value"]['confidence']
        bbox_of_all[pair["value"]['field_value']] = pair["value"]['bbox']

    # print({"strings": {"field": , "value": pair["value"]['field_value']},
    #        "bboxs": {"field": pair['bbox'], "value": pair["value"]['bbox']}})
    original_match.add_row([pair['field_name'], pair["confidence"],
                            pair["value"]['field_value'], pair["value"]['confidence']
                            ])
    matched_results_dict[field_id] = {'field': pair['field_name'],
                             "value": pair["value"]['field_value'], 
                             "score": pair["confidence"], 
                             "field_detection_score": pair["confidence"], 
                             "value_detection_score": pair["value"]['confidence'], 
                             "value_bbox": pair["value"]['bbox'], 
                             "field_bbox": pair['bbox'] }
    field_id += 1

   

print('Calling ML fields_match')
ml_field_matching = MXNetPredictor("field-match-ml-2019-01-20-1")
'''
fields_strings = list(map(lambda item: item["string"], fields))
values_strings = list(map(lambda item: item["string"], values))

print(len(fields_strings))
print(len(values_strings))
data = {'field_names': fields_strings, 'field_values':values_strings}


results = ml_field_matching.predict(data)
for result in results:
    print(result)
'''

predictions_act = prettytable.PrettyTable(["field", "field score", "values", "value score", "score"])
dist_thresh = 100
ml_matched_results = []
ml_matched_results_dict = {}
#n_colors = 10
#colors = {0: 'black', 1: 'red', 2: 'blue',..., n_colors:'magenta'}
for field in fields:
    #print(field["string"])
    candidates = []
    for value in values:
        #print(value["string"])
        l2_dist = l2_distance(field, value)
        if(l2_dist < dist_thresh):
            candidates.append((value, l2_dist))
            #print(str(l2_dist))

    nearest = list(map(lambda item: item[0]["string"], sorted(candidates, key=lambda item: item[1])[:5]))
    input_to_matching = {"field_names": [field["string"]], "field_values": nearest}
    #print(input_to_matching)
    #visualize_candidates(doc_img=doc_img, field=field, candidates=input_to_matching, color=colors[np.randint(0,n_colors)])
    if(len(nearest) != 0):
        results = ml_field_matching.predict(input_to_matching)  # siamese string field match
    else:
        results = [{"field": field["string"], "value": '', "score": 0}]
        text_to_score[''] = 0
        bbox_of_all[''] = {'width': -1, 'top': -1, 'height': -1, 'left': -1}
    for result in sorted(results, key=lambda item: -item["score"]):
        predictions_act.add_row([result["field"],
                                 text_to_score[result["field"]],
                                 result["value"],
                                 text_to_score[result["value"]],
                                 result["score"],
                                 ])
        ml_matched_results.append({"field": result["field"], 
                                "value": result["value"], 
                                "score": result["score"], 
                                "field_detection_score": text_to_score[result["field"]], 
                                "value_detection_score": text_to_score[result["value"]], 
                                "value_bbox": bbox_of_all[result["value"]], 
                                "field_bbox": bbox_of_all[result["field"]] })
        ml_matched_results_dict[field["id"]] = {"field": result["field"],
                                                "value": result["value"], 
                                                 "score": result["score"], 
                                                 "field_detection_score": text_to_score[result["field"]], 
                                                 "value_detection_score": text_to_score[result["value"]], 
                                                 "value_bbox": bbox_of_all[result["value"]], 
                                                 "field_bbox": bbox_of_all[result["field"]] }        

#visualize_matches(doc_img, mached_pairs_bbox)
print(predictions_act)


Global EP
Calling ML fields_match
+-------------------------------------------------------------------------------------------------------------+--------------------+-----------+--------------------+----------------------+
|                                                    field                                                    |    field score     |   values  |    value score     |        score         |
+-------------------------------------------------------------------------------------------------------------+--------------------+-----------+--------------------+----------------------+
|                                        ATTENDING PHYSICIAN STATEMENT                                        |       0.958        |           |         0          |          0           |
|                        Insured/Policyholder Name (Last Name, First Name, MI, Suffix)                        | 0.9359999999999999 |           |         0          |          0           |
|                    

In [8]:
print(original_match)

+-------------------------------------------------------------------------------------------------------------+--------------------+-------------------------------------------------------------------------------------------------------------------------+--------------------+
|                                                    field                                                    |    field score     |                                                          values                                                         |    value score     |
+-------------------------------------------------------------------------------------------------------------+--------------------+-------------------------------------------------------------------------------------------------------------------------+--------------------+
|                                        ATTENDING PHYSICIAN STATEMENT                                        |       0.958        |                                        

In [35]:
for field in ml_matched_results_dict:    
    print(ml_matched_results_dict[field]['score'])

ATTENDING PHYSICIAN STATEMENT 
0
Insured/Policyholder Name (Last Name, First Name, MI, Suffix) 
0
Date of Birth (mm/dd/yy) 
0.9557475447654724
Patient Name (Last Name, First Name. MI. Suffix) 
0
ACCIDENT DETAILS 
0
If yes, date of accident (mm/dd/yy) 
0
O Unknown 
0
Diagnosis 
0
Procedure 
0
Has the patient been treated for the same or a similar condition by another physician in the past O Yes No 
0
lfyes, please provide the following: 
0.020851850509643555
Diagnosis: 
0
Treatment Dates: 
0.019500792026519775
Did you advise the patient to stop working 
0
If yes, as of what date (mmiédd/yy) 
0
if this claim is related to normal pregnancy, please provide the following: 
0
Actual Delivery Date (mm/dd/yy 
0
C. Signature of Attending Physician 
0
The above statements are true and complete to the best of my knowledge and belief. 
0
Physician (Lapt First Napte, MI_Syffix) Please Print 
0
First Napte, 
0
Medical Specialty 
0
Degree 
0
Zip 
0
Telephone Number 
0
Fax Number 
0
physicians Tax ID 

In [34]:
for field in matched_results_dict:
    #print(matched_results_dict[field]['score'])
    print(matched_results_dict[field]['value_detection_score'])
    

0
0.9590040834881876
0
0
0
0.6971021235622193
0
0.5742436705538013
0
0
0.83
0
0
0
0
0
0
0
0
0
0.7902991390312286
0
0
0.8284757537016366
0
0
0
0
0.856901691436868
0
0


In [48]:
final_matched_results_dict = {}
for field_id in ml_matched_results_dict:
    if(matched_results_dict[field_id]['value_detection_score'] == 0): matched_results_dict[field_id]['value_detection_score'] = 1
    if(matched_results_dict[field_id]['value_detection_score'] > ml_matched_results_dict[field_id]['value_detection_score']):
        final_matched_results_dict[field_id] = matched_results_dict[field_id]
    else:
        final_matched_results_dict[field_id] = ml_matched_results_dict[field_id]

In [49]:
final_matched_results_dict

{0: {'field': 'ATTENDING PHYSICIAN STATEMENT ',
  'value': '',
  'score': 0.958,
  'field_detection_score': 0.958,
  'value_detection_score': 1,
  'value_bbox': {'top': -1, 'height': -1, 'width': -1, 'left': -1},
  'field_bbox': {'top': 414, 'height': 32, 'width': 708, 'left': 153}},
 1: {'field': 'Insured/Policyholder Name (Last Name, First Name, MI, Suffix) ',
  'value': 'Rawacatoose Vivi ',
  'score': 0.9359999999999999,
  'field_detection_score': 0.9359999999999999,
  'value_detection_score': 0.9590040834881876,
  'value_bbox': {'top': 511, 'height': 78, 'width': 1222, 'left': 76},
  'field_bbox': {'top': 474, 'height': 35, 'width': 890, 'left': 125}},
 2: {'field': 'Date of Birth (mm/dd/yy) ',
  'value': '11-00-71 ',
  'score': 0.9557475447654724,
  'field_detection_score': 0.9399999999999998,
  'value_detection_score': 0.9844390292856002,
  'value_bbox': {'top': 510, 'height': 72, 'width': 543, 'left': 1895},
  'field_bbox': {'top': 589, 'height': 30, 'width': 349, 'left': 1938}}

In [50]:
final_predictions = prettytable.PrettyTable(["field", "field score", "values", "value score", "score"])
for field_id in final_matched_results_dict:
    final_predictions.add_row([final_matched_results_dict[field_id]['field'],
                              final_matched_results_dict[field_id]['field_detection_score'],
                              final_matched_results_dict[field_id]['value'],
                              final_matched_results_dict[field_id]['value_detection_score'],
                              final_matched_results_dict[field_id]['score']])
    
print(final_predictions)

+-------------------------------------------------------------------------------------------------------------+--------------------+-------------------------------------------------------------------------------------------------------------------------+--------------------+----------------------+
|                                                    field                                                    |    field score     |                                                          values                                                         |    value score     |        score         |
+-------------------------------------------------------------------------------------------------------------+--------------------+-------------------------------------------------------------------------------------------------------------------------+--------------------+----------------------+
|                                        ATTENDING PHYSICIAN STATEMENT                                 