# Required package
[qwikidata](https://qwikidata.readthedocs.io/en/stable/)
[json](https://docs.python.org/3/library/json.html)
[bz2](https://docs.python.org/3/library/bz2.html)
# Required data
[wikidata dump](https://dumps.wikimedia.org/wikidatawiki/entities/)

In [1]:
import time
import json
import bz2
import sys
from qwikidata.entity import WikidataItem
from qwikidata.json_dump import WikidataJsonDump
from qwikidata.utils import dump_entities_to_json

In [2]:
WIKI_DATA_FULL = '../latest-all.json.bz2' #path to wiki data dump
WIKI_DATA_FILTERED = '../filtered_politician.json.bz2' #path to filtered wiki data dump
P_OCCUPATION = "P106" #politician ID in wikidata
Q_POLITICIAN = "Q82955" #politician ID in wikidata
P_COUNTRY = "P27" #country of recognized citizenship in wikidata

In [3]:
def has_occupation_politician(item: WikidataItem, truthy: bool = True) -> bool:
    """Return True if the Wikidata Item has occupation politician."""
    if truthy:
        claim_group = item.get_truthy_claim_group(P_OCCUPATION)
    else:
        claim_group = item.get_claim_group(P_OCCUPATION)

    occupation_qids = [
        claim.mainsnak.datavalue.value["id"]
        for claim in claim_group
        if claim.mainsnak.snaktype == "value"
    ]
    return Q_POLITICIAN in occupation_qids

In [4]:
# create an instance of WikidataJsonDump
wjd_dump_path = WIKI_DATA_FULL
wjd = WikidataJsonDump(wjd_dump_path)

In [5]:
# create an instance of WikidataJsonDump
wjd_dump_path = WIKI_DATA_FULL
wjd = WikidataJsonDump(wjd_dump_path)

# create an iterable of WikidataItem representing politicians
politicians = []
t1 = time.time()
with bz2.open(WIKI_DATA_FILTERED, 'wb') as d_file:
    for ii, entity_dict in enumerate(wjd):
        if entity_dict["type"] == "item":
            entity = WikidataItem(entity_dict)
            try:
                if ('P570' not in entity_dict['claims'].keys()):
                    if has_occupation_politician(entity):
                        try:
                            politicians.append(dict(zip(['qid', 'name','nationality'], [entity_dict['id'],entity_dict['labels']['en']['value'],entity_dict['claims']['P27'][-1]['mainsnak']['property']])))# dump entity qid, name and nationality only
                        except KeyError as e:
                            print(e,entity_dict['id'])
            except Exception:
                print Exception
        if ii % 10000 == 0:
            t2 = time.time()
            dt = t2 - t1
            print(
                "Dumped {} politicians among {} entities [entities/s: {:.2f}]".format(
                    len(politicians), ii, ii / dt
                )
            )
            d_file.write('\n'.join(map(json.dumps, politicians)).encode('utf-8'))
            politicians = []
#         if ii > 10000:
#             break

Dumped 0 politicians among 0 entities [entities/s: 0.00]
Dumped 464 politicians among 10000 entities [entities/s: 1073.34]
Dumped 653 politicians among 20000 entities [entities/s: 1059.41]
Dumped 335 politicians among 30000 entities [entities/s: 1097.18]
Dumped 649 politicians among 40000 entities [entities/s: 1167.62]
'en'
Q394762
Dumped 438 politicians among 50000 entities [entities/s: 1251.93]
Dumped 499 politicians among 60000 entities [entities/s: 1300.72]
'en'
Q595519
Dumped 417 politicians among 70000 entities [entities/s: 1362.02]
Dumped 271 politicians among 80000 entities [entities/s: 1418.51]
Dumped 520 politicians among 90000 entities [entities/s: 1464.31]
'en'
Q855006
Dumped 213 politicians among 100000 entities [entities/s: 1504.03]
Dumped 385 politicians among 110000 entities [entities/s: 1532.99]
Dumped 274 politicians among 120000 entities [entities/s: 1558.75]
Dumped 340 politicians among 130000 entities [entities/s: 1599.94]
Dumped 211 politicians among 140000 entiti

'en'
Q6620082
Dumped 70 politicians among 740000 entities [entities/s: 2968.82]
'en'
Q6673632
'en'
Q6684933
'en'
Q6715238
Dumped 152 politicians among 750000 entities [entities/s: 2978.75]
'en'
Q6751686
'en'
Q6752360
'en'
Q6761332
'en'
Q6765279
'en'
Q6766177
'en'
Q6801061
'en'
Q6804626
Dumped 323 politicians among 760000 entities [entities/s: 2989.93]
'en'
Q6811357
'en'
Q6819754
Dumped 238 politicians among 770000 entities [entities/s: 2998.61]
'en'
Q6943194
'en'
Q7002699
Dumped 139 politicians among 780000 entities [entities/s: 3007.87]
'en'
Q7043943
Dumped 133 politicians among 790000 entities [entities/s: 3016.68]
'en'
Q7186837
Dumped 362 politicians among 800000 entities [entities/s: 3025.69]
'en'
Q7247616
'en'
Q7252533
Dumped 127 politicians among 810000 entities [entities/s: 3038.83]
Dumped 595 politicians among 820000 entities [entities/s: 3048.75]
Dumped 281 politicians among 830000 entities [entities/s: 3062.32]
Dumped 301 politicians among 840000 entities [entities/s: 3071.95

KeyboardInterrupt: 