# Dependencies

In [48]:
###See these links! 
###https://public.paws.wmcloud.org/47948676/03%20-%20Wikidata.ipynb
###https://www.wikidata.org/wiki/Wikidata:Pywikibot_-_Python_3_Tutorial/Data_Harvest

import pywikibot

import requests
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd
import string
import numpy as np

sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

# Functions

In [49]:
###Find entity ID from its name, but using search suggestions.
###Return first srlimit results.
###Generic search on WikiData: use for entities inside news.
def find_Qid_search(s, srlimit = 5):

    ###See documentation https://www.mediawiki.org/wiki/API:Search
    ###and stack overflow entry: https://stackoverflow.com/questions/55039375/why-i-only-get-one-result-of-keyword-by-wikidata-python-package
    ###To understand why we prefer this function to simple find_Qid. It searches intelligently.
    API_ENDPOINT = "https://www.wikidata.org/w/api.php?"

    query = "wikipedia"

    params = {
            'action': 'query',
        'list':'search',
        'format': 'json',
        'srsearch': s,
        'srprop' :'titlesnippet|snippet',
        'srlimit':srlimit,
        'srsort':'relevance',
    }

    r = requests.get(API_ENDPOINT, params=params).json()
    results = pd.json_normalize(r['query']['search'])

    results = results[["title", "snippet"]]
    #return str('wd:'+ results['title'][0])
    results['snippet'] = results['snippet'].apply(lambda x: x.replace('<span class="searchmatch">',''))
    results['snippet'] = results['snippet'].apply(lambda x: x.replace('</span>',''))
    ##Remove entries without description
    results['snippet'].replace('', np.nan, inplace=True)
    results.dropna(subset=['snippet'], inplace=True)
    
    results['snippet'] = results['snippet'].apply(lambda x: s + " is a " + x )
    return results[["title", "snippet"]]



In [50]:
###Find entity ID from its name
###Exact search on WikiData: use for company name.
def find_Qid(s):
    ###Uses different API
    API_ENDPOINT = "https://www.wikidata.org/w/api.php?"
    
    params = dict (
            action='wbsearchentities',
            format='json',
            language='en',
            search=s.translate(str.maketrans('','',string.punctuation))
            )

    r = requests.get(API_ENDPOINT, params=params).json()
    first_result = r.get('search')[0]

    return {'id':str('wd:'+first_result['id']), 'description':first_result['description'].replace('</span>','').replace('<span class="searchmatch">','')}

In [51]:
###Find relation between 2 entities
def submit_query_wrapped(subj, obj, flag = 0):

    try:

        subj_id = find_Qid(subj)['id']
        obj_id = find_Qid(obj)['id']

        sparql.setQuery( """
            SELECT ?a ?propLabel
            WHERE {
              """+obj_id+""" ?a """+subj_id+""".
              SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
            ?prop wikibase:directClaim ?a .
            }
            """)
        sparql.setReturnFormat(JSON)
        results = sparql.query().convert()   
        #print(results)
        results_df = pd.json_normalize(results['results']['bindings'])

        return results_df[['a.value', 'propLabel.value']]        
        
    except:
        if flag == 0:
            #print('Exchanging subj and obj')
            return submit_query_wrapped(obj, subj, flag = 1)
        else:
            #print('Query gave no result.')
            return []
    return []

In [52]:
###Make dictionary with all entities connected to a given entity, and the corresponding property
def company_dict(company_name, verbose = 0):

    my_dicts = []
    c= 0
    
    try:
        ###Search as name
        wikidata = pywikibot.Site('wikidata', 'wikidata')
        site = pywikibot.Site("en", "wikipedia")
        page = pywikibot.Page(site, company_name)
        item = pywikibot.ItemPage.fromPage(page)
    except:
        ###Search as Q entity
        wikidata = pywikibot.Site('wikidata', 'wikidata')
        item = pywikibot.ItemPage(wikidata, company_name)
        #item = pywikibot.ItemPage.fromPage(page)

        
        
    item_dict = item.get() #Get the item dictionary
    page_des = item_dict["descriptions"]["en"] # Get the claim dictionary
    print(page_des)
    print('\n')
    
    clm_dict = item_dict["claims"] # Get the claim dictionary
    
    
    for prop in clm_dict.keys():
        
        _property = pywikibot.PropertyPage(wikidata, str("Property:"+prop))
        prop_ID =  prop
        prop_lab = _property.get()['labels']['en']
        prop_des = _property.get()['descriptions']['en']
        
        for ent in clm_dict[prop]:
            try:
                
                my_dict = {}
                ent_ID = str(ent.getTarget()).replace('[[','').replace(']]','').replace('wikidata:','')
                ent_lab = ent.getTarget().labels['en']
                ent_des = ent.getTarget().descriptions['en']
                ent_als = ent.getTarget().aliases['en']                

                my_dict['Property ID'] = prop_ID
                my_dict['Property Label'] = prop_lab
                my_dict['Property Description'] = prop_des                
                my_dict['Entity Label'] = ent_lab                
                my_dict['Entity ID'] = ent_ID
                my_dict['Entity Description'] = ent_des
                my_dict['Entity Aliases'] = ent_als                
                
                c = 1
                if verbose == 1:
                    print(my_dict)
                    print('\n')
                    
                    print(prop_lab, ' : ', ent_lab)
                    print('Prop description: ', _property.get()['descriptions']['en'])                    
                    print('Entity description: ', ent.getTarget().descriptions['en'])
                    print('Aliases: ', ent.getTarget().aliases['en'])                    
                    print('\n')


                    
            except:
                pass
        if c:
            my_dicts.append(my_dict)
            c = 0
            
    return my_dicts




In [53]:
###Find property within given dictionary
def find_property(value, dictionaries):

    properties = []

    for dictionary in dictionaries:
        for i, values in enumerate(list(dictionary.values())):

            if i != len(list(dictionary.values())) - 1:
                if value.lower() == values.lower():    
                    properties.append(dictionary)#(list(dictionary.keys())[i])
                    break
            else:
                if value.lower() in [v.lower() for v in values]:    
                    properties.append(dictionary)#(list(dictionary.keys())[i])
                    break                
                    
    return properties



## Test

In [61]:
if __name__ == "__main__":
    display(find_Qid_search('Fortnite'))#.iloc[0]['snippet']
    display(find_Qid_search('MacBook Pros'))#.iloc[0]['snippet']

Unnamed: 0,title,snippet
0,Q349375,Fortnite is a 2011 video game developed by Epi...
1,Q66067819,Fortnite is a 2018 single by Dani Faiv
2,Q50822580,Fortnite is a 2017 free-to-play battle royale ...
3,Q55456335,Fortnite is a Wikimedia disambiguation page
4,Q66686075,Fortnite is a annual esports competition based...


Unnamed: 0,title,snippet
0,Q214276,MacBook Pros is a Intel-based line of Macintos...
2,Q20350192,MacBook Pros is a Wikimedia template
3,Q90950884,MacBook Pros is a scientific article published...
4,Q20350273,MacBook Pros is a Wikimedia template


In [62]:
if __name__ == "__main__":
    print(find_Qid('Fortnite'))

In [80]:
if __name__ == "__main__":
    print(submit_query_wrapped('Tim Cook', 'Apple Inc.'))
    print(submit_query_wrapped('Apple Inc.', 'Tim Cook'))

                                    a.value          propLabel.value
0  http://www.wikidata.org/prop/direct/P169  chief executive officer
                                    a.value propLabel.value
0  http://www.wikidata.org/prop/direct/P108        employer


In [34]:
if __name__ == "__main__":
    apple_dict = company_dict('Apple Inc.', verbose = 0)
    print(apple_dict[0])

American technology company based in Cupertino, California






{'Property ID': 'P112',
 'Property Label': 'founded by',
 'Property Description': 'founder or co-founder of this organization, religion or place',
 'Entity Label': 'Steve Jobs',
 'Entity ID': 'Q19837',
 'Entity Description': 'American entrepreneur and co-founder of Apple Inc.',
 'Entity Aliases': ['Steven Paul Jobs', 'Steven Jobs']}

In [73]:
if __name__ == "__main__":
    #print(apple_dict)
    print(find_property('Steven Jobs', apple_dict))

[{'Property ID': 'P112',
  'Property Label': 'founded by',
  'Property Description': 'founder or co-founder of this organization, religion or place',
  'Entity Label': 'Steve Jobs',
  'Entity ID': 'Q19837',
  'Entity Description': 'American entrepreneur and co-founder of Apple Inc.',
  'Entity Aliases': ['Steven Paul Jobs', 'Steven Jobs']}]