In [1]:
import pywikibot
import requests
import sys
import os 
from pathlib import Path
import json
import re 
import time
from datetime import date
from os.path import exists


# https://stackoverflow.com/a/66303932
project_path = Path(os.path.dirname(os.path.realpath("__file__"))).parent
sys.path.append(str(project_path))
sys.path.append(str(project_path / 'scripts'))

import scripts.utils.wikidata_utils as wd
import scripts.utils.wiki_queries as wq


In [2]:
site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()

WIKI_BASE_URL = "https://www.wikidata.org"
WIKI_QUERY_URL = "https://query.wikidata.org"


records = {
    'Q5': 'human',
    'Q30': 'United States',
    'Q487604': 'Martha Graham',
    'Q16973731': 'Dianne McIntyre',
    'Q753828': 'Essex',
    'Q76': 'Barack Obama',
}



## get item

In [3]:
new_item = pywikibot.ItemPage(repo, title="Q753828")
new_item.exists()

True

In [4]:
new_item_json = new_item.toJSON()

# print(json.dumps(new_item.toJSON(), indent=2, ensure_ascii=False))

## format item; this is what will be returned by the api

In [5]:

start = time.time()
item_data = wd.format_display_item(new_item)
stop = time.time()
print(stop-start)



0.5437748432159424


In [6]:
# print(json.dumps(item_data, indent=2, ensure_ascii=False))

## test how long it takes to get labels for all properties and items in an item

In [7]:
all_ids = wd.get_ids_for_item(new_item, new_item_json, include_pids=True, include_qids=True)
q_ids = wd.get_ids_for_item(new_item, new_item_json, include_pids=False, include_qids=True)
p_ids = wd.get_ids_for_item(new_item, new_item_json, include_pids=True, include_qids=False)


connect to wikimedia api

In [8]:
start = time.time()
all_ids_dict = wq.fetch_labels_for_ids(all_ids, lang="en")
stop = time.time()
print('all', stop-start, len(all_ids))


start = time.time()
q_ids_dict = wq.fetch_labels_for_ids(q_ids, lang="en")
stop = time.time()
print('qids', stop-start, len(q_ids))


start = time.time()
p_ids_dict = wq.fetch_labels_for_ids(p_ids, lang="en")
stop = time.time()
print('pids', stop-start, len(p_ids))

all 1.2711541652679443 70
qids 0.8163399696350098 28
pids 0.8183858394622803 42


use sqarql query 

In [9]:
start = time.time()
results = wq.fetch_labels_for_ids_sqarql(all_ids)
stop = time.time()
print('all_ids', stop-start, len(all_ids))


start = time.time()
results = wq.fetch_labels_for_ids_sqarql(q_ids)
stop = time.time()
print('q_ids', stop-start, len(q_ids))



all_ids 0.4917480945587158 70
q_ids 0.21212530136108398 28


In [10]:
start = time.time()

ids = wd.create_id_label_dictionary(new_item, new_item_json)

stop = time.time()
print('ids', stop-start, len(ids))

ids 0.23102211952209473 70


## how to call dir methods

In [11]:
getattr('ab', 'upper')

<function str.upper>

In [12]:
callable(getattr('ab', 'upper'))

True

In [13]:
foo = 'ab'
for attr in dir(foo):
    if attr == 'upper':
        bar = getattr(foo, attr)()
        
bar
        

'AB'

## call every method and attribute in dir() for the claims in an iten

In [14]:
def test_attribute(attr, claim):
    if not attr.startswith('_'):
        print('-------')

        try:
            if callable(getattr(claim.target, attr)):
                print(attr, 'method')
                try:
                    print(getattr(claim.target, attr)())
                except:
                    pass
            else:
                print(attr, 'attr')
                try:
                    print(getattr(claim.target, attr))
                except:
                    pass
        except:
            pass



In [15]:
site = pywikibot.Site("test", "wikidata")
repo = site.data_repository()

# Q154287 record with multimedia test.wikidata 
test_item = pywikibot.ItemPage(repo, title="Q154287")

count = 0
for prop, claims in test_item.claims.items():
    if count > 0:
        break
    for claim in claims:
        if count > 0:
            break
        count += 1
        
        print(claim.target)
        # print(dir(claim.target))

        for attr in dir(claim.target):
            pass
            # test_attribute(attr, claim)

count

[[commons:File:Hum Hum Waterfall.jpg]]


1

## inspect each claim  with a given property

In [16]:
for prop, claims in new_item.claims.items():
    if prop == 'P214':
        for claim in claims:
            continue
        
