<h1>OD2WD API PLAYGROUND</h1>
<br>(Use this notebook to play with our API 😃)

❧  import package

In [86]:
import json
import numpy as np
import re
import pandas as pd

from itertools import islice
from collections import OrderedDict

from src.utils.mapper import preprocessing, phrase_vector, cosine_sim, lDistance, transform_sigmoid
from src.utils.indexer import search
# from src.utils.wikimedia import searchEntity, searchObjWProperty, searchProperty, is_class, is_instance_of

from elasticsearch import Elasticsearch
from gensim.models import Word2Vec

❧  load model & index

In [14]:
def connect_elasticsearch():
    _es = None
    _es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
    if _es.ping():
        print('Connected to ES Service')
    else:
        print('Could not connect to ES Service')
    return _es

class Server(object):
    def __init__(self):
        self.load_index()
        self.load_model()
        
    def load_model(self):
        self.w2v_model = Word2Vec.load('data/model/' + 'w2vec_wiki_id_case')

    def load_index(self):
        self.es = connect_elasticsearch()

server = Server()

Connected to ES Service


❧  Playground

In [76]:
import requests
import json
import urllib
from SPARQLWrapper import SPARQLWrapper, JSON

def getEntityData(entity):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11")

def searchEntity(keyword, limit):
    keyword = urllib.parse.quote(keyword)
    url = "https://www.wikidata.org/w/api.php?action=wbsearchentities&search={}&limit={}&language=id&format=json".format(keyword,limit)
    res = requests.get(url)
    return json.loads(res.text)

def searchObjWProperty(subject_id, property_id):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11")

    sparql.setQuery("""
    SELECT ?item ?itemLabel ?grandItem ?grandItemLabel
    WHERE
    {
      bind (wd:%s as ?entity)
      wd:%s wdt:%s ?item .
      ?item wdt:P279 ?grandItem
      MINUS { ?entity wdt:P31 wd:Q4167410 }
      SERVICE wikibase:label { bd:serviceParam wikibase:language "id" }
    }
    """ % (subject_id, subject_id, property_id))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return results

def searchSbjWProperty(object_id, property_id):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11")

    sparql.setQuery("""
    SELECT ?item ?itemLabel
    WHERE
    {
        ?item wdt:%s wd:%s .
        SERVICE wikibase:label { bd:serviceParam wikibase:language "id" }
    }
    """ % (property_id, object_id))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return results

def searchProperty(subject_id, object_id):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11")

    sparql.setQuery("""
    SELECT ?item ?itemLabel
    WHERE
    {
        wd:%s ?item wd:%s .
        SERVICE wikibase:label { bd:serviceParam wikibase:language "id" }
    }
    """ % (subject_id, object_id))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return results

def searchPropertyRange(property_id):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11")

    sparql.setQuery("""
    #Subproperties of location (P276)
    SELECT ?wbtype 
    WHERE {
    wd:%s wikibase:propertyType  ?wbtype.
    SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    }
    """ % (property_id))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return results

def is_class(entity_id):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11")

    sparql.setQuery("""
    SELECT distinct ?protag  WHERE { 
        bind (wd:%s as ?protag)
        ?x wdt:P31 ?protag
    }
    """% (entity_id))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return len(results['results']['bindings']) > 0

def get_labels(entity_id):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11")
    sparql.setQuery("""
    SELECT ?label
    WHERE {
      BIND(wd:%s AS ?entityId)
      ?entityId rdfs:label ?label .
      filter(lang(?label) = 'id' || lang(?label) = 'en')
    }
    """% (entity_id))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()['results']['bindings']
    labels = []
    for result in results:
        label = result['label']['value']
        if (label not in labels):
            labels.append(label)
    return labels

def is_instance_of(entity_id, class_id):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11")

    sparql.setQuery("""
    SELECT ?item ?itemLabel
    WHERE
    {
        bind (wd:%s as ?item)
        wd:%s wdt:P31 ?item .
        SERVICE wikibase:label { bd:serviceParam wikibase:language "id" }
    }
    """% (class_id, entity_id))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return len(results['results']['bindings']) > 0


In [16]:
def ranking(candidateList, property_data, model, is_protagonist=False):
    res = []
    score = 0
    for candidate in candidateList:
        for data in property_data:
            if(is_protagonist):
                if(is_instance_of(candidate['id'], data)):
                    score = 1
                else:
                    score = max(score, 0)
            else:
                results = searchObjWProperty(candidate['id'], 'P31')
                sim = 0
                if(len(results['results']['bindings']) <= 0):
                    results = searchObjWProperty(candidate['id'], 'P279')
                    if(len(results['results']['bindings']) <= 0):
                        if('description' in candidate and candidate['description'] == 'Wikimedia disambiguation page'):
                            continue
                        res.append({'candidate': candidate, 'score': 0})
                        continue

                candVector = phrase_vector(model, data)
                if(candVector is not None):
                    for parent in results['results']['bindings']:
                        parVector = phrase_vector(model, preprocessing(parent['itemLabel']['value']))
                        gpVector = phrase_vector(model, preprocessing(parent['grandItemLabel']['value']))
                        temp = 0
                        if(parVector is not None):
                            temp = cosine_sim(candVector, parVector)
                        if(gpVector is not None):
                            temp2 = cosine_sim(candVector, gpVector)
                            temp = temp if temp > temp2 else temp2 
                        sim = sim if sim > temp else temp

                if(sim <= 0.2):
                    sim = 0
                score  = max(score, sim)
        res.append({'candidate': candidate, 'score': score})
    return res

def map_entity(data, context, model, limit=10, is_protagonist=False):
    result = {}
    data = preprocessing(data)
    clean_context = []
    for c in context:
        clean_context.append(preprocessing(c))
    context = clean_context
    jsons = searchEntity(data, context, limit)
    jsons = ranking(jsons, context, model, is_protagonist)
    print(jsons)
    
    if(len(jsons) > 0):
        qword_vector = phrase_vector(model, data)
        if(qword_vector is not None):
            for json in jsons:
                sim = 0
                elabel = json['candidate']['label']
                elabel = preprocessing(elabel)
                eword_vector = phrase_vector(model, elabel)
                if(eword_vector is not None):
                    sim = cosine_sim(qword_vector, eword_vector)
                    description = ''
                    if('description' in json['candidate']):
                        description = json['candidate']['description']
                    
                    ent_map = {
                        'id': json['candidate']['id'], 
                        'label': json['candidate']['label'],
                        'description': description
                        }
                    
                    score = min(np.average([sim, json['score']]), 1.0)
#                     if(json['candidate']['match']['language'] != 'id' and json['candidate']['match']['language'] != 'su'):
#                         score = score * 0.5
                    if(score in result.keys()):
                        result[score].append(ent_map)
                    else:
                        result[score] = [ent_map]
        else:
            for json in jsons:
#                 if(json['candidate']['match']['language'] != 'id'):
#                     continue
                elabel = json['candidate']['label']
                dist = lDistance(elabel, data)
                if(dist <= 3):
                    description = ''
                    if('description' in json['candidate']):
                        description = json['candidate']['description']
                    ent_map = {
                    'id': json['candidate']['id'], 
                    'label': json['candidate']['label'],
                    'description': description
                    }
                    score = np.average([json['score'], transform_sigmoid(dist)])
#                     if(json['candidate']['match']['language'] != 'id' and json['candidate']['match']['language'] != 'su'):
#                         score = score * 0.5
                    if(score in result.keys()):
                        result[score].append(ent_map)
                    else:
                        result[score] = [ent_map] 

    if(len(result) == 0):
        result[0] = [{'id': 'NOT FOUND', 'label':'NOT FOUND', 'description':'NOT FOUND'}]
    else:
        result = OrderedDict(sorted(result.items(), reverse=True))
        result = OrderedDict(islice(result.items(), 0, limit))
    
    return result

In [176]:
import requests
import json
import urllib
from SPARQLWrapper import SPARQLWrapper, JSON

def searchEntityWikidata(keyword, limit):
    keyword = urllib.parse.quote(keyword)
    url = "https://www.wikidata.org/w/api.php?action=wbsearchentities&search={}&language=id&format=json".format(keyword)
    res = requests.get(url)
    return json.loads(res.text)

def getEntityData(entity):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11")
    sparql.setQuery("""
    SELECT ?entity ?label ?entityDescription ?altLabel 
    WHERE {
      BIND(wd:%s as ?entity) .
      OPTIONAL { 
        ?entity rdfs:label ?label filter (lang(?label) = "id") .
        ?entity schema:description ?entityDescription filter (lang(?entityDescription) = "id") .
        ?entity skos:altLabel ?altLabel . FILTER (lang(?altLabel) = "id") 
      }
      SERVICE wikibase:label { bd:serviceParam wikibase:language "id" }   
    }
    """ % (entity))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return results['results']['bindings']

def searchEntityWikimedia(keyword, context, limit=5):
    req = requests.Session()
    url = "https://www.wikidata.org/w/api.php"
    search = "\"{}\" OR {}".format(keyword, " OR ".join(context))
    params = {
        "action": "query",
        "format": "json",
        "list": "search",
        "srsearch": search,
        "srlimit": limit
    }

    R = req.get(url=url, params=params)
    return R.json()['query']['search']

def searchEntity(keyword, context, limit=5):
    entities = searchEntityWikimedia(keyword, context, limit)
    res = []
    for entity in entities:
        resultEntity = getEntityData(entity['title'])
        if (len(resultEntity) > 0):
            resultEntity = resultEntity[0]
            description = resultEntity['entityDescription']['value'] if 'entityDescription' in resultEntity else ''
            label = resultEntity['label']['value'] if 'label' in resultEntity else ''
            res.append({
                'id': resultEntity['entity']['value'].split('/')[4],
                'label': label,
                'description': description
            })
    return res

def checkProperty(model, entityId, columnValue):
    results = searchObjWProperty(entityId, 'P31')
    sim = 0
    if(len(results['results']['bindings']) <= 0):
        results = searchObjWProperty(entityId, 'P279')
        if(len(results['results']['bindings']) <= 0):
            return 0
    candVector = phrase_vector(model, columnValue)
    if(candVector is not None):
        for parent in results['results']['bindings']:
            parVector = phrase_vector(model, preprocessing(parent['itemLabel']['value']))
            gpVector = phrase_vector(model, preprocessing(parent['grandItemLabel']['value']))
            temp = 0
            if(parVector is not None):
                temp = cosine_sim(candVector, parVector)
            if(gpVector is not None):
                temp2 = cosine_sim(candVector, gpVector)
                temp = temp if temp > temp2 else temp2 
            sim = sim if sim > temp else temp

    return sim

def checkEntity(model, keyword, entityId):
    entityVector = phrase_vector(model, keyword)
    sim = 0
    if (entityVector is not None):
        labels = get_labels(entityId)
        for label in labels:
            cleanLabel = preprocessing(label)
            labelVector = phrase_vector(model, cleanLabel)
            temp = 0
            if (labelVector is not None):
                temp = cosine_sim(entityVector, labelVector)
            sim = sim if sim > temp else temp
    return sim
    

def mapEntity(model, keyword, columnValue, context, limit=5):
    clean_keyword = preprocessing(keyword)
    entity = searchEntity(clean_keyword, context, limit)[0]
    if('description' in entity and entity['description'] == 'Wikimedia disambiguation page'):
        return None
    initialScore = checkProperty(model, entity['id'], columnValue)
    entityScore = checkEntity(model, clean_keyword, entity['id'])
    finalScore = (initialScore + entityScore) / 2
    print('{} score: {}'.format(keyword, finalScore))
    if (finalScore > 0.3):
        return entity
    else:
        return {'label': 'NOT FOUND', 'id': 'NOT FOUND'}
    
def applySearch(model, keyword, columnValue, context, limit=5):
    return mapEntity(model, keyword, columnValue, context, 1)['id']

In [177]:
applySearch(server.w2v_model, 'sma negeri 1', 'sekolah', ['sekolah', 'jakarta'], 1)

sma negeri 1 score: 0.9509240388870239


'Q19745500'

In [178]:
df = pd.read_csv('data/test/entity_dataset_sampling.csv')
df['ID (wikidata)'] = df['ID (wikidata)'].replace(np.nan, '', regex=True)
df.head()

Unnamed: 0,cell value (csv),ID (wikidata),Label (wikidata),context,source,context2
0,SD NEGERI PANGGUNGSARI 1,Q61737969,SD NEGERI PANGGUNGSARI 1,Nama Sekolah,data-sekolah-tahun-2018.csv,bandung
1,RA Barokatul Laili,,,nama,Data-Sekolah-TK-PAUD-DKI-Jakarta-2018-edited.csv,jakarta
2,TK Al Hazmi,Q12710394,,nama,Data-Sekolah-TK-PAUD-DKI-Jakarta-2018-edited.csv,jakarta
3,Jakarta Pusat Kampung Rawa,Q6359511,Kampung Rawa,nama_kantor,Daftar-Kantor-Pelayanan-Pajak-Di-DKI-Jakarta-e...,jakarta
4,SmTK Morning Star Academy,,,nama,Data-Sekolah-TK-PAUD-DKI-Jakarta-2018-edited.csv,jakarta


In [179]:
result = df.apply(lambda x: applySearch(server.w2v_model, x['cell value (csv)'], x['context'], [x['context'], x['context2']]), axis=1)
df['result'] = result

SD NEGERI PANGGUNGSARI 1 score: 0.3691582977771759
RA Barokatul Laili score: 0.0910305306315422
TK Al Hazmi score: 0.0910305306315422
Jakarta Pusat Kampung Rawa score: 0.3990340530872345
SmTK Morning Star Academy score: 0.0910305306315422
RPTRA Madusela score: 0.0
Jakarta Barat Jati Pulo score: 0.0
BKB PAUD Teratai 08 score: 0.0910305306315422
PONDOK KELAPA score: 0.8785452842712402
TK Islam Nurul Zahrah score: 0.0910305306315422
PAUD Melati 011 score: 0.23056572675704956
Dra. Hj. ETY PURWANTY M.Pd score: 0.0
TK Budi Darma I score: 0.0910305306315422
TK Aisyiyah 46 score: 0.0910305306315422
TK Islam Ar-Roudhoh score: 0.0910305306315422
KB Kinderland score: 0.0910305306315422
RA Al Maghfiroh score: 0.0910305306315422
TK. Tunas Cempaka score: 0.0910305306315422
Kelurahan Kemayoran score: 0.0
BKB PAUD Nusa Indah Rw 01 score: 0.0910305306315422
Asy-Syifa score: 0.10509075969457626
PAUD Ansera score: 0.0910305306315422
RA El Hikam score: 0.0910305306315422
Tisno Drs. MM score: 0.0
RA An-Nis

TK Satu Atap Bina Athfal Sdsn Semper Barat 15 Pagi score: 0.0910305306315422
PAUD Pra Sd Lintas 32 score: 0.27395227551460266
Pos PAUD Cabe Rawit score: 0.24805453419685364
Drs. ASROFI MM score: 0.0
Penggilingan score: 1.0
Jakarta Selatan Kebon Baru score: 0.38756126165390015
TK Yusufiyah score: 0.0910305306315422
BKB PAUD Cempaka Rw.O1 score: 0.0910305306315422
SMP Negeri 180 score: 0.45335671305656433
Antapani score: 0.5
SMP Negeri 7 score: 0.45335671305656433
Wisma Atlet Bahtera Jaya score: 0.26320868730545044
RA/BA/TA Al Ishlah Islamiyah score: 0.175287663936615
BKB PAUD Dorang I Rw 003 score: 0.0910305306315422
TK Islam Al Ikhlash score: 0.0910305306315422
TK Bakti Ibu score: 0.2392936646938324
TK Islam Rambutan score: 0.32702234387397766
PAUD Cempaka Gedong score: 0.23381447792053223
SMP Negeri 9 score: 0.45335671305656433
Taman Braga score: 0.0
TK Santa Maria Fatima score: 0.0910305306315422
Muara Baru score: 0.5
Tulip Novita score: 0.0910305306315422
Puskesmas Kecamatan score: 

SK Gub No 338/2002 tanggal 18 Feb 2002 score: 0.0
BKB PAUD Kasih Ibu Teratai score: 0.0910305306315422
RPTRA Kebon Pala Berseri score: 0.0
Selong score: 1.0
TK Khaudul Ulum score: 0.0910305306315422
Kedaung Kali angke score: 1.0
RSIA Humana Prima score: 0.0
Cilincing score: 0.5
SMP Negeri 61 Terbuka score: 0.1919780671596527
TK K Bhayangkari 18 score: 0.0910305306315422
TK Patra VII score: 0.2495914101600647
RA Mawaddah Wa Rahmah score: 0.0910305306315422
TK Islam Budaya II score: 0.2776024043560028
S Jaktim score: 0.16101758182048798
RSU Sartika Asih score: 0.0
TK Teratai III score: 0.23905998468399048
TK Al-Muslim score: 0.0910305306315422
SMA Negeri 35 score: 0.31123247742652893
BKB PAUD Mekar Harapan score: 0.0910305306315422
TK Zsahara score: 0.0910305306315422
T Jaktim score: 0.0
Dra. Siti Hairunisa M.Pd score: 0.0
RA Dahlia score: 0.15858423709869385
KB Bunda Lestari score: 0.17157013714313507
RA Ar Rasyidiyah score: 0.0910305306315422
SMP Negeri 174 score: 0.21333900094032288
W

PAUD Ceria 03 score: 0.18165111541748047
PAUD Ceria Bunga Pala score: 0.19229990243911743
PAUD Regina Pacis score: 0.18617913126945496
Kelapa Gading score: 1.0
Papango score: 0.0
Rumah Sakit/Tempat  Bersalin  score: 0.2488003969192505
TK Ananda Smart Art Fun score: 0.8691582679748535
TK Islam Meranti score: 0.325396865606308
KB. Melati 04 score: 0.0910305306315422
Jakarta Utara Koja Utara score: 0.4802260398864746
Jakarta Barat Cengkareng Barat score: 0.0
TK Permata Hati Ibu score: 0.20067329704761505
LE Taman Surya score: 0.07595697045326233
Sulaksono S.Pd score: 0.0
PAUD Mangga Ubi score: 0.2228703796863556
Cipete Utara score: 0.0
SD NEGERI CIPOREAT 3 score: 0.21057343482971191
TRI PADMA PUJIANTARAMM. score: 0.0
PAUD Anggrek score: 0.19577813148498535
PAUD Surya Kasih Komarudin score: 0.0910305306315422
PAUD Amari score: 0.0910305306315422
KEBAGUSAN score: 0.8785452842712402
Drs.JOHAN SUPRIYADI M.Pd score: 0.0
Melawai score: 0.0
PAUD Trinity Little School House score: 0.2226270139217

RA/BA/TA Nur Rahmah score: 0.16252601146697998
Cikapundung score: 0.0
TK Mawar II score: 0.2502223253250122
TK Aisyiyah 24 score: 0.0910305306315422
TK Kalam Kudus score: 0.22291779518127441
PAUD Ar Ridho score: 0.194877490401268
TK Lembaga Putra Kita score: 0.21569959819316864
Taman Fitness score: 0.5
TK K Harapan Zaman score: 0.0910305306315422
Daerah Khusus Ibukota Jakarta score: 0.5807209014892578
TK Kristen 7 Penabur score: 0.24516430497169495
Jakarta Timur Halim Airport score: 0.32753121852874756
KB Cemara score: 0.16947513818740845
RA Al Bahri score: 0.1669439822435379
BKB - PAUD Anggrek score: 0.0910305306315422
Kalideres score: 1.0
RA Amal Khair score: 0.0910305306315422
TK Aisiyah 95 score: 0.0910305306315422
TK Santo Andreas score: 0.0910305306315422
BKB PAUD Cempaka score: 0.0910305306315422
Pulomas score: 0.0
Skate Park score: 0.5000000596046448
TK Perwara score: 0.28624147176742554
PAUD Seroja score: 0.20398136973381042
RA Nurul Badar score: 0.17756874859333038
SPS PAUD A

TK Al Mamur score: 0.0910305306315422
TK Negeri Pembina Dki Jakarta score: 0.4588143229484558
LKP WANODYA score: 0.21057343482971191
Drs. ASEP SUPRIATNA HADIRI score: 0.0
TK Mandiri Sentosa score: 0.2731015682220459
SMP Negeri 28 Terbuka score: 0.1919780671596527
TK Indah Dahlia score: 0.2638956904411316
Jakarta Pusat Balaikota score: 0.35458335280418396


In [180]:
result

0      Q61737969
1      NOT FOUND
2      NOT FOUND
3       Q6359511
4      NOT FOUND
5      NOT FOUND
6      NOT FOUND
7      NOT FOUND
8      Q12506111
9      NOT FOUND
10     NOT FOUND
11     NOT FOUND
12     NOT FOUND
13     NOT FOUND
14     NOT FOUND
15     NOT FOUND
16     NOT FOUND
17     NOT FOUND
18     NOT FOUND
19     NOT FOUND
20     NOT FOUND
21     NOT FOUND
22     NOT FOUND
23     NOT FOUND
24     NOT FOUND
25     NOT FOUND
26     Q25467861
27     NOT FOUND
28      Q9075817
29     NOT FOUND
         ...    
969    NOT FOUND
970    Q12707341
971    NOT FOUND
972    NOT FOUND
973    NOT FOUND
974    NOT FOUND
975    NOT FOUND
976    NOT FOUND
977    NOT FOUND
978    NOT FOUND
979      Q192850
980        Q3630
981    NOT FOUND
982    NOT FOUND
983    NOT FOUND
984    Q12510702
985    NOT FOUND
986    NOT FOUND
987    NOT FOUND
988    NOT FOUND
989    NOT FOUND
990     Q1927687
991    NOT FOUND
992        Q3630
993    NOT FOUND
994    NOT FOUND
995    NOT FOUND
996    NOT FOU

In [181]:
def evaluateResult(target, result):
    newTarget = 'NOT FOUND' if target == '' else target
    if (newTarget == result):
        return 1
    else:
        return 0

In [182]:
score = df.apply(lambda x: evaluateResult(x['ID (wikidata)'], x['result']), axis=1)
df['score'] = score

In [183]:
df.to_csv('entity_result.csv', index=False)

In [185]:
sum(score)/len(score)

0.7757757757757757