### Dependencies

In [6]:
# Lexical similarity libraries
import Levenshtein as lev 
from isub import isub
# Look-ups
from lookup import DBpediaLookup, WikidataAPI, GoogleKGLookup
from endpoints import DBpediaEndpoint, WikidataEndpoint
# Ontology processing
from owlready2 import *
#Shoudl be imported after owlready
from rdflib import Graph

### Lexical similarity

In [7]:
print(lev.distance('Congo', 'Republic of Congo'))
print(lev.jaro_winkler('Congo', 'Republic of Congo'))
print(lev.jaro_winkler('Congo', 'Congo Republic'))
print(isub('Congo', 'Republic of Congo'))
print(isub('Congo', 'Congo Republic'))

12
0.4196078431372549
0.8714285714285714
0.7272727272727273
0.8578947368421053


### Look-up services

In [8]:
query="United Kingdom"
#Max entities to be returned
limit=5

print("Entities from Google KG for the keyword search: " + query)
kg = GoogleKGLookup()
entities = kg.getKGEntities(query, limit)
for ent in  entities:
    print(ent.getId(), ent)

Entities from Google KG for the keyword search: United Kingdom
kg:/m/07ssc <id: kg:/m/07ssc, label: United Kingdom, description: Country in Europe, types: {'http://schema.org/Place', 'http://schema.org/Country', 'http://schema.org/AdministrativeArea'}, source: GoogleKG>
kg:/m/0xnc3 <id: kg:/m/0xnc3, label: Charles III, description: King of the United Kingdom, types: {'http://schema.org/Person'}, source: GoogleKG>
kg:/m/0d1_f <id: kg:/m/0d1_f, label: Elizabeth II, description: Former Queen of the United Kingdom, types: {'http://schema.org/Person'}, source: GoogleKG>
kg:/m/01vbry <id: kg:/m/01vbry, label: Met Office, description: Government agency, types: {'http://schema.org/Organization', 'http://schema.org/GovernmentOrganization', 'http://schema.org/Corporation', 'http://schema.org/EducationalOrganization'}, source: GoogleKG>
kg:/m/08pj55 <id: kg:/m/08pj55, label: HSBC Bank, description: Financial services corporation, types: {'http://schema.org/Organization', 'http://schema.org/Corpor

In [9]:
dbpedia = DBpediaLookup()
entities = dbpedia.getKGEntities(query, limit)
print("Entities from DBPedia for the keyword search: " + query)
for ent in  entities:
    print(ent.getId(), ent)

Entities from DBPedia for the keyword search: United Kingdom
http://dbpedia.org/resource/United_Kingdom <id: http://dbpedia.org/resource/United_Kingdom, label: <B>United</B> <B>Kingdom</B>, description: ['The <B>United</B> <B>Kingdom</B> of Great Britain and Northern Ireland, commonly known as the <B>United</B> <B>Kingdom</B> (UK'], types: {'http://dbpedia.org/ontology/Place', 'http://dbpedia.org/ontology/PopulatedPlace', 'http://dbpedia.org/ontology/Country', 'http://dbpedia.org/ontology/Location'}, source: DBpedia>
http://dbpedia.org/resource/London <id: http://dbpedia.org/resource/London, label: London, description: ['London is the capital and largest city of England and of the <B>United</B> <B>Kingdom</B>. Standing on the River'], types: {'http://dbpedia.org/ontology/Place', 'http://dbpedia.org/ontology/Settlement', 'http://dbpedia.org/ontology/City', 'http://dbpedia.org/ontology/Location', 'http://dbpedia.org/ontology/PopulatedPlace'}, source: DBpedia>
http://dbpedia.org/resource/

In [10]:
wikidata = WikidataAPI()
entities = wikidata.getKGEntities(query, limit, "item")
print("Entities from Wikidata for the keyword search: " + query)
for ent in  entities:
    print(ent.getId(), ent)

Entities from Wikidata for the keyword search: United Kingdom
http://www.wikidata.org/entity/Q145 <id: http://www.wikidata.org/entity/Q145, label: United Kingdom, description: country in north-west Europe, types: set(), source: Wikidata>
http://www.wikidata.org/entity/Q174193 <id: http://www.wikidata.org/entity/Q174193, label: United Kingdom of Great Britain and Ireland, description: historical sovereign state (1801–1922), types: set(), source: Wikidata>
http://www.wikidata.org/entity/Q21528200 <id: http://www.wikidata.org/entity/Q21528200, label: A United Kingdom, description: 2016 film by Amma Asante, types: set(), source: Wikidata>
http://www.wikidata.org/entity/Q974636 <id: http://www.wikidata.org/entity/Q974636, label: United Kingdom in the Eurovision Song Contest 2010, description: , types: set(), source: Wikidata>
http://www.wikidata.org/entity/Q161885 <id: http://www.wikidata.org/entity/Q161885, label: Kingdom of Great Britain, description: constitutional monarchy in Western Eu

### SPARQL Endpoints

In [11]:
print("DBpedia")
ent="http://dbpedia.org/resource/London"
ep = DBpediaEndpoint()
types = ep.getTypesForEntity(ent)
print("Found " + str(len(types)) + " types.")
for type in types:
    print("\t", type, ep.getEnglishLabelsForEntity(type))

DBpedia
Found 9 types.
	 http://dbpedia.org/ontology/Place {'place'}
	 http://www.wikidata.org/entity/Q486972 set()
	 http://dbpedia.org/ontology/Settlement {'settlement'}
	 http://dbpedia.org/ontology/City {'city'}
	 http://schema.org/City set()
	 http://schema.org/Place set()
	 http://www.wikidata.org/entity/Q515 set()
	 http://dbpedia.org/ontology/Location set()
	 http://dbpedia.org/ontology/PopulatedPlace {'populated place'}


In [12]:
print("Wikidata")
#London
ent = "http://www.wikidata.org/entity/Q84"
ep = WikidataEndpoint()
types = ep.getTypesForEntity(ent)
print("Found " + str(len(types)) + " types.")
for type in types:
    print("\t", type, ep.getEnglishLabelsForEntity(type))

Wikidata
Found 7 types.
	 http://www.wikidata.org/entity/Q174844 {'megacity'}
	 http://www.wikidata.org/entity/Q200250 {'Metropolis', 'metropolis'}
	 http://www.wikidata.org/entity/Q5119 {'Capital city', 'capital city'}
	 http://www.wikidata.org/entity/Q1066984 {'Financial centre', 'financial centre'}
	 http://www.wikidata.org/entity/Q515 {'city'}
	 http://www.wikidata.org/entity/Q208511 {'global city'}
	 http://www.wikidata.org/entity/Q51929311 {'largest city'}


### Methods to get the entity labels

In [2]:
def getClasses(onto):        
    return onto.classes()
    
def getDataProperties(onto):        
    return onto.data_properties()
    
def getObjectProperties(onto):        
    return onto.object_properties()
    
def getIndividuals(onto):    
    return onto.individuals()


def getRDFSLabelsForEntity(entity):
    #if hasattr(entity, "label"):
    return entity.label


def getRDFSLabelsForEntity(entity):
    #if hasattr(entity, "label"):
    return entity.label

### Load ontology and print labels

In [3]:
#Load ontology and print 5 classes with labels

#Conference ontologies do not contain rdfs:label but a URI with a readable name
urionto="data/cmt.owl"
#urionto="data/ekaw.owl"
#urionto="data/confOf.owl"

#human and mouse contain rdfs:label but the URI has a concept ID
#urionto="data/human.owl"
#urionto="data/mouse.owl


#Method from owlready
onto = get_ontology(urionto).load()
    
print("Classes in Ontology: " + str(len(list(getClasses(onto)))))
i=0
for cls in getClasses(onto):
    i+=1
    #Name of entity in URI. But in some cases it may be a 
    #code like in mouse and human anatomy ontologies                
    print(cls.iri)
    print("\t"+cls.name)  
    #Labels from RDFS label
    print("\t"+str(getRDFSLabelsForEntity(cls)))
    
    if i==5:
        break

Classes in Ontology: 29
http://cmt#Meta-Reviewer
	Meta-Reviewer
	[]
http://cmt#Reviewer
	Reviewer
	[]
http://cmt#Decision
	Decision
	[]
http://cmt#Person
	Person
	[]
http://cmt#Document
	Document
	[]


### Method to compute Precision and Recall

In [4]:
def compareWithReference(reference_mappings_file, system_mappings_file):
    ref_mappings = Graph()
    ref_mappings.parse(reference_mappings_file, format="ttl")
    
    system_mappings = Graph()
    system_mappings.parse(system_mappings_file, format="ttl")
    
    
    #We calculate precision and recall via true positives, false positives and false negatives
    #https://en.wikipedia.org/wiki/Precision_and_recall        
    tp=0
    fp=0
    fn=0
    
    for t in system_mappings:
        if t in ref_mappings:
            tp+=1
        else:
            fp+=1

    
    for t in ref_mappings:
        if not t in system_mappings:
            fn+=1
            
            
    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    f_score = (2*precision*recall)/(precision+recall)
    #print(tp, tp2)
    #print(fp)
    #print(fn)
    print("Comparing '" + system_mappings_file + "' with '" + reference_mappings_file)
    print("\tPrecision: " + str(precision))
    print("\tRecall: " + str(recall))
    print("\tF-Score: " + str(f_score))

### Check results

In [5]:
reference_mappings="data/anatomy-reference-mappings.ttl"
system_mappings="data/anatomy-example-system-mappings.ttl"

#P, R, and F can only be obtained if a reference alignment exists.    
compareWithReference(reference_mappings, system_mappings)

# For the lab you should compare, for example, 
# cmt-confOf-reference.ttl with the cmt-confOf-your-system.ttl you generate.
# compareWithReference("cmt-confOf-reference.ttl", "cmt-confOf-your-system.ttl")

Comparing 'data/anatomy-example-system-mappings.ttl' with 'data/anatomy-reference-mappings.ttl
	Precision: 0.8537434706906558
	Recall: 0.9703166226912929
	F-Score: 0.9083050324174129
