<h1>OD2WD API PLAYGROUND</h1>
<br>(Use this notebook to play with our API 😃)

❧  import package

In [13]:
import json
import numpy as np
import re
import pandas as pd

from itertools import islice
from collections import OrderedDict

from src.utils.mapper import preprocessing, phrase_vector, cosine_sim, lDistance, transform_sigmoid
from src.utils.indexer import search
from src.utils.openTapioca import startNER
# from src.utils.wikimedia import searchEntity, searchObjWProperty, searchProperty, is_class, is_instance_of

from elasticsearch import Elasticsearch
from gensim.models import Word2Vec

❧  load model & index

In [8]:
def connect_elasticsearch():
    _es = None
    _es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
    if _es.ping():
        print('Connected to ES Service')
    else:
        print('Could not connect to ES Service')
    return _es

class Server(object):
    def __init__(self):
        self.load_index()
        self.load_model()
        
    def load_model(self):
        self.w2v_model = Word2Vec.load('data/model/' + 'w2vec_wiki_id_case')

    def load_index(self):
        self.es = connect_elasticsearch()

server = Server()

Connected to ES Service


❧  Playground

In [9]:
import requests
import json
import urllib
from SPARQLWrapper import SPARQLWrapper, JSON

def getEntityData(entity):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11")

def searchEntity(keyword, limit):
    keyword = urllib.parse.quote(keyword)
    url = "https://www.wikidata.org/w/api.php?action=wbsearchentities&search={}&limit={}&language=id&format=json".format(keyword,limit)
    res = requests.get(url)
    return json.loads(res.text)

def searchObjWProperty(subject_id, property_id):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11")

    sparql.setQuery("""
    SELECT ?item ?itemLabel ?grandItem ?grandItemLabel
    WHERE
    {
      bind (wd:%s as ?entity)
      wd:%s wdt:%s ?item .
      ?item wdt:P279 ?grandItem
      MINUS { ?entity wdt:P31 wd:Q4167410 }
      SERVICE wikibase:label { bd:serviceParam wikibase:language "id" }
    }
    """ % (subject_id, subject_id, property_id))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return results

def searchSbjWProperty(object_id, property_id):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11")

    sparql.setQuery("""
    SELECT ?item ?itemLabel
    WHERE
    {
        ?item wdt:%s wd:%s .
        SERVICE wikibase:label { bd:serviceParam wikibase:language "id" }
    }
    """ % (property_id, object_id))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return results

def searchProperty(subject_id, object_id):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11")

    sparql.setQuery("""
    SELECT ?item ?itemLabel
    WHERE
    {
        wd:%s ?item wd:%s .
        SERVICE wikibase:label { bd:serviceParam wikibase:language "id" }
    }
    """ % (subject_id, object_id))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return results

def searchPropertyRange(property_id):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11")

    sparql.setQuery("""
    #Subproperties of location (P276)
    SELECT ?wbtype 
    WHERE {
    wd:%s wikibase:propertyType  ?wbtype.
    SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    }
    """ % (property_id))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return results

def is_class(entity_id):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11")

    sparql.setQuery("""
    SELECT distinct ?protag  WHERE { 
        bind (wd:%s as ?protag)
        ?x wdt:P31 ?protag
    }
    """% (entity_id))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return len(results['results']['bindings']) > 0

def get_labels(entity_id):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11")
    sparql.setQuery("""
    SELECT ?label
    WHERE {
      BIND(wd:%s AS ?entityId)
      ?entityId rdfs:label ?label .
      filter(lang(?label) = 'id' || lang(?label) = 'en')
    }
    """% (entity_id))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()['results']['bindings']
    labels = []
    for result in results:
        label = result['label']['value']
        if (label not in labels):
            labels.append(label)
    return labels

def is_instance_of(entity_id, class_id):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11")

    sparql.setQuery("""
    SELECT ?item ?itemLabel
    WHERE
    {
        bind (wd:%s as ?item)
        wd:%s wdt:P31 ?item .
        SERVICE wikibase:label { bd:serviceParam wikibase:language "id" }
    }
    """% (class_id, entity_id))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return len(results['results']['bindings']) > 0


In [10]:
def ranking(candidateList, property_data, model, is_protagonist=False):
    res = []
    score = 0
    for candidate in candidateList:
        for data in property_data:
            if(is_protagonist):
                if(is_instance_of(candidate['id'], data)):
                    score = 1
                else:
                    score = max(score, 0)
            else:
                results = searchObjWProperty(candidate['id'], 'P31')
                sim = 0
                if(len(results['results']['bindings']) <= 0):
                    results = searchObjWProperty(candidate['id'], 'P279')
                    if(len(results['results']['bindings']) <= 0):
                        if('description' in candidate and candidate['description'] == 'Wikimedia disambiguation page'):
                            continue
                        res.append({'candidate': candidate, 'score': 0})
                        continue

                candVector = phrase_vector(model, data)
                if(candVector is not None):
                    for parent in results['results']['bindings']:
                        parVector = phrase_vector(model, preprocessing(parent['itemLabel']['value']))
                        gpVector = phrase_vector(model, preprocessing(parent['grandItemLabel']['value']))
                        temp = 0
                        if(parVector is not None):
                            temp = cosine_sim(candVector, parVector)
                        if(gpVector is not None):
                            temp2 = cosine_sim(candVector, gpVector)
                            temp = temp if temp > temp2 else temp2 
                        sim = sim if sim > temp else temp

                if(sim <= 0.2):
                    sim = 0
                score  = max(score, sim)
        res.append({'candidate': candidate, 'score': score})
    return res

def map_entity(data, context, model, limit=10, is_protagonist=False):
    result = {}
    data = preprocessing(data)
    clean_context = []
    for c in context:
        clean_context.append(preprocessing(c))
    context = clean_context
    jsons = searchEntity(data, context, limit)
    jsons = ranking(jsons, context, model, is_protagonist)
    print(jsons)
    
    if(len(jsons) > 0):
        qword_vector = phrase_vector(model, data)
        if(qword_vector is not None):
            for json in jsons:
                sim = 0
                elabel = json['candidate']['label']
                elabel = preprocessing(elabel)
                eword_vector = phrase_vector(model, elabel)
                if(eword_vector is not None):
                    sim = cosine_sim(qword_vector, eword_vector)
                    description = ''
                    if('description' in json['candidate']):
                        description = json['candidate']['description']
                    
                    ent_map = {
                        'id': json['candidate']['id'], 
                        'label': json['candidate']['label'],
                        'description': description
                        }
                    
                    score = min(np.average([sim, json['score']]), 1.0)
#                     if(json['candidate']['match']['language'] != 'id' and json['candidate']['match']['language'] != 'su'):
#                         score = score * 0.5
                    if(score in result.keys()):
                        result[score].append(ent_map)
                    else:
                        result[score] = [ent_map]
        else:
            for json in jsons:
#                 if(json['candidate']['match']['language'] != 'id'):
#                     continue
                elabel = json['candidate']['label']
                dist = lDistance(elabel, data)
                if(dist <= 3):
                    description = ''
                    if('description' in json['candidate']):
                        description = json['candidate']['description']
                    ent_map = {
                    'id': json['candidate']['id'], 
                    'label': json['candidate']['label'],
                    'description': description
                    }
                    score = np.average([json['score'], transform_sigmoid(dist)])
#                     if(json['candidate']['match']['language'] != 'id' and json['candidate']['match']['language'] != 'su'):
#                         score = score * 0.5
                    if(score in result.keys()):
                        result[score].append(ent_map)
                    else:
                        result[score] = [ent_map] 

    if(len(result) == 0):
        result[0] = [{'id': 'NOT FOUND', 'label':'NOT FOUND', 'description':'NOT FOUND'}]
    else:
        result = OrderedDict(sorted(result.items(), reverse=True))
        result = OrderedDict(islice(result.items(), 0, limit))
    
    return result

In [11]:
def orAdder(text):
    return " OR ".join(text.split(' '))

In [28]:
import requests
import json
import urllib
from SPARQLWrapper import SPARQLWrapper, JSON

def searchEntityWikidata(keyword, limit):
    keyword = urllib.parse.quote(keyword)
    url = "https://www.wikidata.org/w/api.php?action=wbsearchentities&search={}&language=id&format=json".format(keyword)
    res = requests.get(url)
    return json.loads(res.text)

def getEntityData(entity):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11")
    sparql.setQuery("""
    SELECT ?entity ?label ?entityDescription ?altLabel 
    WHERE {
      BIND(wd:%s as ?entity) .
      OPTIONAL { 
        ?entity rdfs:label ?label filter (lang(?label) = "id") .
        ?entity schema:description ?entityDescription filter (lang(?entityDescription) = "id") .
        ?entity skos:altLabel ?altLabel . FILTER (lang(?altLabel) = "id") 
      }
      SERVICE wikibase:label { bd:serviceParam wikibase:language "id" }   
    }
    """ % (entity))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return results['results']['bindings']

def searchEntityWikimedia(keyword, context, limit=5):
    
    req = requests.Session()
    url = "https://www.wikidata.org/w/api.php"
    search = "\"{}\" {}".format(keyword, " OR ".join([(lambda x: orAdder(x))(x) for x in context]))
    params = {
        "action": "query",
        "format": "json",
        "list": "search",
        "srsearch": search,
        "srlimit": limit
    }

    R = req.get(url=url, params=params)
    return R.json()['query']['search']

def searchEntity(keyword, context, limit=5):
    entities = searchEntityWikimedia(keyword, context, limit)
    res = []
    for entity in entities:
        resultEntity = getEntityData(entity['title'])
        if (len(resultEntity) > 0):
            resultEntity = resultEntity[0]
            description = resultEntity['entityDescription']['value'] if 'entityDescription' in resultEntity else ''
            label = resultEntity['label']['value'] if 'label' in resultEntity else ''
            res.append({
                'id': resultEntity['entity']['value'].split('/')[4],
                'label': label,
                'description': description
            })
    return res

def checkProperty(model, entityId, columnValue):
    results = searchObjWProperty(entityId, 'P31')
    sim = 0
    if(len(results['results']['bindings']) <= 0):
        results = searchObjWProperty(entityId, 'P279')
        if(len(results['results']['bindings']) <= 0):
            return 0
    candVector = phrase_vector(model, columnValue)
    if(candVector is not None):
        for parent in results['results']['bindings']:
            parVector = phrase_vector(model, preprocessing(parent['itemLabel']['value']))
            gpVector = phrase_vector(model, preprocessing(parent['grandItemLabel']['value']))
            temp = 0
            if(parVector is not None):
                temp = cosine_sim(candVector, parVector)
            if(gpVector is not None):
                temp2 = cosine_sim(candVector, gpVector)
                temp = temp if temp > temp2 else temp2 
            sim = sim if sim > temp else temp

    return sim

def checkEntity(model, keyword, entityId):
    entityVector = phrase_vector(model, keyword)
    sim = 0
    if (entityVector is not None):
        labels = get_labels(entityId)
        for label in labels:
            cleanLabel = preprocessing(label)
            labelVector = phrase_vector(model, cleanLabel)
            temp = 0
            if (labelVector is not None):
                temp = cosine_sim(entityVector, labelVector)
            sim = sim if sim > temp else temp
    return sim
    

def mapEntity(model, keyword, columnValue, context, limit=5):
    clean_keyword = preprocessing(keyword)
    entities = searchEntity(clean_keyword, context, limit)
    if (len(entities) < 1):
        print('{} score: 0'.format(keyword))
        return {'label': 'NOT FOUND', 'id': 'NOT FOUND'}
    entity = entities[0]
    if('description' in entity and entity['description'] == 'Wikimedia disambiguation page'):
        return None
    initialScore = checkProperty(model, entity['id'], columnValue)
    entityScore = checkEntity(model, clean_keyword, entity['id'])
    finalScore = (initialScore + entityScore) / 2
    print('{} score: {}'.format(keyword, finalScore))
    if (finalScore > 0.3):
        return entity
    else:
        return {'label': 'NOT FOUND', 'id': 'NOT FOUND'}
    
def applySearch(model, keyword, columnValue, context, title, limit=5):
    print()
    title = startNER(title)
    return mapEntity(model, keyword, columnValue, list(set(context + title)), 1)['id']

In [101]:
searchEntityWikimedia('SD NEGERI PANGGUNGSARI 1', ['nama sekolah', 'jakarta'], 5)

[{'ns': 0,
  'pageid': 61575847,
  'size': 1929,
  'snippet': '<span class="searchmatch">SD</span> <span class="searchmatch">NEGERI</span> <span class="searchmatch">PANGGUNGSARI</span> <span class="searchmatch">1</span>',
  'timestamp': '2019-02-15T11:28:34Z',
  'title': 'Q61737969',
  'wordcount': 4}]

In [120]:
searchEntity('Kelurahan Kemayoran', ['desa_kelurahan', 'jakarta'], 5)

[{'description': 'kelurahan di Kota Administrasi Jakarta Pusat',
  'id': 'Q6386863',
  'label': 'Kemayoran'},
 {'description': '', 'id': 'Q25252183', 'label': ''},
 {'description': '', 'id': 'Q25249745', 'label': ''},
 {'description': 'kelurahan di Kota Surabaya',
  'id': 'Q12490670',
  'label': 'Kemayoran'},
 {'description': 'kelurahan di Kabupaten Bangkalan',
  'id': 'Q11276328',
  'label': 'Kemayoran'}]

In [27]:
applySearch(server.w2v_model, 'Kemayoran', 'desa_kelurahan', ['desa_kelurahan', 'jakarta'], 'Data Sekolah TK PAUD DKI Jakarta 2018 edited', 1)


['TK', 'DKI Jakarta']
Kemayoran score: 0.5


'Q6386863'

In [31]:
df = pd.read_csv('entity_gold_1.csv')
df['ID (wikidata)'] = df['ID (wikidata)'].replace(np.nan, '', regex=True)
df.head()

Unnamed: 0,cell value (csv),ID (wikidata),Label (wikidata),context,source,context2,result,score,Unnamed: 8,Unnamed: 9
0,SD NEGERI PANGGUNGSARI 1,Q61737969,SD NEGERI PANGGUNGSARI 1,Nama Sekolah,data-sekolah-tahun-2018.csv,bandung,Q61737969,1,,0.804
1,RA Barokatul Laili,,,nama,Data-Sekolah-TK-PAUD-DKI-Jakarta-2018-edited.csv,jakarta,NOT FOUND,1,,
2,TK Al Hazmi,Q12710394,,nama,Data-Sekolah-TK-PAUD-DKI-Jakarta-2018-edited.csv,jakarta,NOT FOUND,0,,
3,Jakarta Pusat Kampung Rawa,Q6359511,Kampung Rawa,nama_kantor,Daftar-Kantor-Pelayanan-Pajak-Di-DKI-Jakarta-e...,jakarta,Q6359511,1,,
4,SmTK Morning Star Academy,,,nama,Data-Sekolah-TK-PAUD-DKI-Jakarta-2018-edited.csv,jakarta,NOT FOUND,1,,


In [32]:
df['parsed_source'] = df.apply(lambda x: x['source'].replace('.csv', '').replace('-', ' '), axis=1)
df

Unnamed: 0,cell value (csv),ID (wikidata),Label (wikidata),context,source,context2,result,score,Unnamed: 8,Unnamed: 9,parsed_source
0,SD NEGERI PANGGUNGSARI 1,Q61737969,SD NEGERI PANGGUNGSARI 1,Nama Sekolah,data-sekolah-tahun-2018.csv,bandung,Q61737969,1,,0.804,data sekolah tahun 2018
1,RA Barokatul Laili,,,nama,Data-Sekolah-TK-PAUD-DKI-Jakarta-2018-edited.csv,jakarta,NOT FOUND,1,,,Data Sekolah TK PAUD DKI Jakarta 2018 edited
2,TK Al Hazmi,Q12710394,,nama,Data-Sekolah-TK-PAUD-DKI-Jakarta-2018-edited.csv,jakarta,NOT FOUND,0,,,Data Sekolah TK PAUD DKI Jakarta 2018 edited
3,Jakarta Pusat Kampung Rawa,Q6359511,Kampung Rawa,nama_kantor,Daftar-Kantor-Pelayanan-Pajak-Di-DKI-Jakarta-e...,jakarta,Q6359511,1,,,Daftar Kantor Pelayanan Pajak Di DKI Jakarta e...
4,SmTK Morning Star Academy,,,nama,Data-Sekolah-TK-PAUD-DKI-Jakarta-2018-edited.csv,jakarta,NOT FOUND,1,,,Data Sekolah TK PAUD DKI Jakarta 2018 edited
5,RPTRA Madusela,,,nama_rptra,ruang-laktasi.csv,jakarta,NOT FOUND,1,,,ruang laktasi
6,Jakarta Barat Jati Pulo,Q12487509,Jatipulo,nama_kantor,Daftar-Kantor-Pelayanan-Pajak-Di-DKI-Jakarta-e...,jakarta,NOT FOUND,0,,,Daftar Kantor Pelayanan Pajak Di DKI Jakarta e...
7,BKB PAUD Teratai 08,,,nama,Data-Sekolah-TK-PAUD-DKI-Jakarta-2018-edited.csv,jakarta,NOT FOUND,1,,,Data Sekolah TK PAUD DKI Jakarta 2018 edited
8,PONDOK KELAPA,Q12506111,Pondok Kelapa,NAMA KELURAHAN,dkikepadatankelurahan2013.csv,jakarta,Q12506111,1,,,dkikepadatankelurahan2013
9,TK Islam Nurul Zahrah,Q25472496,,nama,Data-Sekolah-TK-PAUD-DKI-Jakarta-2018-edited.csv,jakarta,NOT FOUND,0,,,Data Sekolah TK PAUD DKI Jakarta 2018 edited


In [34]:
result = df.apply(lambda x: applySearch(server.w2v_model, x['cell value (csv)'], x['context'], [x['context'], x['context2']], x['parsed_source'], 1), axis=1)


SD NEGERI PANGGUNGSARI 1 score: 0.3691582977771759

RA Barokatul Laili score: 0

TK Al Hazmi score: 0

Jakarta Pusat Kampung Rawa score: 0.3990340530872345

SmTK Morning Star Academy score: 0

RPTRA Madusela score: 0

Jakarta Barat Jati Pulo score: 0.0

BKB PAUD Teratai 08 score: 0

PONDOK KELAPA score: 0.8785452842712402

TK Islam Nurul Zahrah score: 0

PAUD Melati 011 score: 0

Dra. Hj. ETY PURWANTY M.Pd score: 0

TK Budi Darma I score: 0

TK Aisyiyah 46 score: 0

TK Islam Ar-Roudhoh score: 0

KB Kinderland score: 0

RA Al Maghfiroh score: 0

TK. Tunas Cempaka score: 0

Kelurahan Kemayoran score: 0.0

BKB PAUD Nusa Indah Rw 01 score: 0

Asy-Syifa score: 0.10509075969457626

PAUD Ansera score: 0

RA El Hikam score: 0

Tisno Drs. MM score: 0

RA An-Nissa score: 0

TK Global Mandiri score: 0

SMP NEGERI 50 score: 0.8153343796730042

BKB PAUD Gemilang score: 0

SELONG score: 0.8785452842712402

RA Nurul Ishlah score: 0

TK Islam An Ilmiyah score: 0

BKB PAUD Sayang Ibu 014 score: 0

KB 

TK Islam Al-Azhar 5 Kemandoran score: 0

SdTK Harvest Jakarta score: 0

Drs. H. Sukandi, M Si score: 0

TK Suci Musyaadah score: 0

DURI SELATAN score: 0.8785452842712402

SUMARDIJANTO, Drs, M.Pd score: 0

TK Raharja score: 0

PAUD Azkia score: 0

CILANDAK score: 0.5

TK Global Sevilla Puri Indah score: 0

SMP Negeri 7 Terbuka score: 0

TK Ruwati III score: 0

Sunter Agung score: 0.5

HALIM PERDANA KUSUMAH score: 0.771426796913147

Menteng Dalam  score: 1.0

BKB PAUD Bunga Sakura score: 0

RPTRA Ciganjur Berseri score: 0.0

BKB PAUD Kemuning score: 0

TK Tirtasari score: 0

PAUD An-Nisa score: 0

SMP Negeri 170 score: 0

GUNUNG SAHARI UTARA score: 0.37854525446891785

TK Aisyiyah 23 score: 0

PAUD Domba Kecil score: 0

TK Sint Elizabeth score: 0

Kebon Pala score: 1.0

TK Islam Bunga Amalia score: 0

BKB PAUD Taman Batu Intan score: 0

Johanes Abraham Dimara score: 0

Untung Jawa score: 0.9343827962875366

KB Kasih Ananda III score: 0

SPS BKB PAUD Bawal Pulau Harapan score: 0

BKB PAU

Eton House Indonesia score: 0

RA Ar Rafah score: 0

TK Risanti III score: 0

TK Aisyiyah 90 score: 0

SMP Negeri 200 score: 0.45335671305656433

PASAR REBO score: 1.0

RA Mutiara score: 0

RA Miftahul Khoir score: 0

RA/BA/TA Luqmanul Hakim score: 0

Jakarta Selatan score: 0.7792694568634033

Kecamatan Sawah Besar score: 0.44204413890838623

TK Galatia III score: 0

RA Al Wafa score: 0

TK Satu Atap Sdn Pekayon 15 Pagi score: 0

Mawar Jingga score: 0.5434505343437195

Marunda  score: 0.5

TK Tunas Peni score: 0

Pegangsaan Dua score: 0.5

LE Depnaker score: 0

Lapangan Tenis Pulau Pramuka score: 0

Saraswati score: 0.07350625842809677

PAUD Anggrek Bulan score: 0

SMA Negeri 15 score: 0.4509240686893463

RA Khodijah score: 0

TK Bani Sholeh Basmol score: 0

CIPETE SELATAN score: 0.37854525446891785

SD Muhammadiyah 4 score: 0.8691582679748535

RA Al Alifiyah score: 0

BKB PAUD Permata Bunda Rw.11 score: 0

Drs. SURYANA, M.Si. score: 0

TK Westin score: 0

TK Islam Fatahillah score: 0


Pulomas score: 0.0

Skate Park score: 0.5000000596046448

TK Perwara score: 0

PAUD Seroja score: 0

RA Nurul Badar score: 0

SPS PAUD Al-Amanah Pulau Panggang score: 0

TK Jayakarya score: 0

SMA Negeri 85 score: 0.0

PAUD Agung 02 score: 0

JAKARTA SELATAN score: 0.8386868834495544

TK Kartika Xi-1 score: 0

BKB PAUD Melati Indah score: 0

SMA Negeri 94 score: 0.0

TK Perguruan Advent Xv Ciracas score: 0

Pusat Kegiatan Belajar Masyarakat score: 0.4999999701976776

Jakarta Pusat Mid Plaza score: 0

SMP Negeri 76 score: 0

Apotik/Pharmacy score: 0

KEBON BAWANG score: 0.7547762393951416

TK Fantasia score: 0

TK Islam Nur Rahmah score: 0

TK Attaqwa score: 0

RA Nurul Iman Pinangsia score: 0

Taman Alun Alun Ujung Berung score: 0.0

TK Bintang score: 0

SMA Negeri 66 score: 0.4509240686893463

TK Ekadiayasa score: 0

BKB PAUD Musdalifah score: 0

RA Nurul Aini score: 0

Aliyah Al Istiqomah score: 0

Taman Bangunan Umum score: 0

BKB PAUD Teratai 04 score: 0

Sukardjo Wiryopranoto scor

In [35]:
df['result'] = result

In [36]:
result

0      Q61737969
1      NOT FOUND
2      NOT FOUND
3       Q6359511
4      NOT FOUND
5      NOT FOUND
6      NOT FOUND
7      NOT FOUND
8      Q12506111
9      NOT FOUND
10     NOT FOUND
11     NOT FOUND
12     NOT FOUND
13     NOT FOUND
14     NOT FOUND
15     NOT FOUND
16     NOT FOUND
17     NOT FOUND
18     NOT FOUND
19     NOT FOUND
20     NOT FOUND
21     NOT FOUND
22     NOT FOUND
23     NOT FOUND
24     NOT FOUND
25     NOT FOUND
26     Q25467861
27     NOT FOUND
28      Q9075817
29     NOT FOUND
         ...    
969    NOT FOUND
970    NOT FOUND
971    NOT FOUND
972    NOT FOUND
973    NOT FOUND
974    NOT FOUND
975    NOT FOUND
976    NOT FOUND
977    NOT FOUND
978    NOT FOUND
979      Q192850
980    NOT FOUND
981    NOT FOUND
982    NOT FOUND
983    NOT FOUND
984    Q12510702
985    NOT FOUND
986    NOT FOUND
987    NOT FOUND
988    NOT FOUND
989    NOT FOUND
990     Q1927687
991    NOT FOUND
992    NOT FOUND
993    NOT FOUND
994    NOT FOUND
995    NOT FOUND
996    NOT FOU

In [37]:
def evaluateResult(target, result):
    newTarget = 'NOT FOUND' if target == '' else target
    if (newTarget == result):
        return 1
    else:
        return 0

In [38]:
score = df.apply(lambda x: evaluateResult(x['ID (wikidata)'], x['result']), axis=1)
df['score'] = score

In [40]:
df.to_csv('entity_result_new2.csv', index=False)

In [39]:
sum(score)/len(score)

0.8068068068068068