# Evaluate the Alignment Results against References

In [2]:
import pandas as pd
import numpy as np

from xml.dom import minidom

In [40]:
# Load alignments as a DataFrame
def load_alignments(rdf_path, name):
    """
        input: path to a rdf file with alignments
            <map>
                <Cell>
                    <entity1 rdf:resource="http://mouse.owl#MA_0002401"/>
                    <entity2 rdf:resource="http://human.owl#NCI_C52561"/>
                    <measure rdf:datatype="xsd:float">1.0</measure>
                    <relation>=</relation>
                </Cell>
            </map>
        ouptut: DataFrame with 'source', 'target', 'relation', 'measure'
    """
    
    xml_data = minidom.parse(rdf_path)
    maps = xml_data.getElementsByTagName('map')

    print("Total number of {} is {}".format(name, len(maps)))
    
    # holds the mappings from uri to uri
    uri_maps = []
    for ele in maps:
        e1 = ele.getElementsByTagName('entity1')[0].attributes['rdf:resource'].value
        e2 = ele.getElementsByTagName('entity2')[0].attributes['rdf:resource'].value
        rel = ele.getElementsByTagName('relation')[0].childNodes[0].data
        confd = ele.getElementsByTagName('measure')[0].childNodes[0].data
        uri_maps.append((e1, e2, rel, confd))
    
    alignment = pd.DataFrame(uri_maps, columns=['source', 'target', 'relation', 'confidence'])
    
    return alignment

In [23]:
ref_path = "../data/reference.rdf"

In [41]:
refs = load_alignments(ref_path, "references")
refs.shape

Total number of references is 1516


(1516, 4)

In [28]:
align_path = "/var/folders/2v/l9vs169j3ndcvjlgxn30bk_r0000gp/T/alignment___3397p8.rdf"

In [42]:
alignments = load_alignments(align_path, "predicted alignments")

Total number of predicted alignments is 983


In [50]:
# Evaluate the alignment against references by precision, recall, f-measure
def evaluate(align_rdf_path, refs_rdf_path):
    """
        input: align_rdf_path: path to alignment rdf file
               refs_rdf_path: path to references rdf file
        output: print precision, recall, f1-meaure
    """
    
    align_df = load_alignments(align_rdf_path, 'predicted alignments')
    refs_df = load_alignments(refs_rdf_path, 'references')
    
    matched_df = align_df.merge(refs_df, how='inner', left_on=['source', 'target'], \
                                right_on=['source', 'target'])
    
    print("Total correctly predicted alignments is {}".format(matched_df.shape[0]))
    
    p = matched_df.shape[0] / align_df.shape[0]
    r = matched_df.shape[0] / refs_df.shape[0]
    f = 2 / (1/p + 1/r)
    print("Precision is {}".format(p))
    print("Recall is {}".format(r))
    print("F1-Measure is {}".format(f))

In [59]:
evaluate('/var/folders/2v/l9vs169j3ndcvjlgxn30bk_r0000gp/T/alignment_zq7k1uxn.rdf', ref_path)

Total number of predicted alignments is 1781
Total number of references is 1516
Total correctly predicted alignments is 270
Precision is 0.15160022459292533
Recall is 0.17810026385224276
F1-Measure is 0.16378525932666058


In [64]:
evaluate('/var/folders/2v/l9vs169j3ndcvjlgxn30bk_r0000gp/T/alignment_sm7qr0eu.rdf', ref_path)

Total number of predicted alignments is 983
Total number of references is 1516
Total correctly predicted alignments is 974
Precision is 0.9908443540183113
Recall is 0.6424802110817942
F1-Measure is 0.7795118047218887


In [63]:
evaluate('/var/folders/2v/l9vs169j3ndcvjlgxn30bk_r0000gp/T/alignment_bmqlgxbf.rdf', ref_path)

Total number of predicted alignments is 2764
Total number of references is 1516
Total correctly predicted alignments is 1244
Precision is 0.45007235890014474
Recall is 0.820580474934037
F1-Measure is 0.5813084112149532
