In [4]:
import requests, re

### Data Gathering

In [186]:
def get_revisions(Property,rvcontinue=None):
    """
    Input:
    Property,rvcontinue
    
    Output:
    revisions, rvcontinue
    """
    URL = "https://www.wikidata.org/w/api.php"
    # defining a params dict for the parameters to be sent to the API
    PARAMS = {"action":"query",
              "prop":"revisions",
              "format":"json",
              "rvlimit":"500",
              "titles":"Property:{}".format(Property),
              "rvdir":"newer",
              "rvcontinue": rvcontinue}
    
    # sending get request and saving the response as response object
    r = requests.get(url = URL,params = PARAMS)

    # extracting data in json format
    data = r.json()
    
    # Get revisions
    pages = data['query']['pages']
    keys = pages.keys()
    revisions = [pages[k]['revisions'] for k in keys]
    rvcontinue = data['continue']['rvcontinue'] if 'continue' in data else None
    
    return revisions, rvcontinue

In [6]:
def get_all_revisions(Property,rvcontinue=None):
    
    all_revisions = []
        
    while True:
        revisions, rvcontinue = get_revisions(Property,rvcontinue)
        all_revisions += revisions[0]
        
        if rvcontinue is None:
            break

    return all_revisions

In [324]:
all_revisions = get_all_revisions('P856')

### Data parsing


|Attribute |Add| Update |Remove|
|---|---|---|---|
|label |wbsetlabel-add| wbsetlabel-set| wbsetlabel-remove|
|description |wbsetdescription-add| wbsetdescription-set| wbsetdescription-remove|
|alias |wbsetaliases-add |wbsetaliases-add-remove| wbsetaliases-remove|

wbsetlabeldescriptionaliases
wbeditentity-create

In [192]:
tag_pattern = "(?<=\/\*)(.*)(?=\*\/)"
description_pattern = "(?<=\*\/)[^\]]+"
language_pattern = "(?<=\|)[^\]]+"

In [293]:
attributes_dict = {"wbsetlabel-":["label"],
                   "wbeditentity-create":["label"],
                   "wbsetdescription-":["description"],
                   "wbsetaliases-":["alias"],
                   "wbsetlabeldescriptionaliases":["label","description","alias"]}

actions_dict = {"-add":"add",
                "-update":"update",
                "-set":"update",
                "-add-remove":"update",
                "-remove":"remove"}

In [337]:
def parse_revisions(all_revisions):
    for revision in all_revisions.copy():
        comment = revision['comment']
        tag = re.findall(tag_pattern, comment)
        if(not tag):
            all_revisions.remove(revision)
            continue

        tag =tag[0]
        attributes = [v for k,v in attributes_dict.items() if k in tag]

        # If there is no attribute, we delete the element from the list
        if(not attributes):
            all_revisions.remove(revision)
            continue
        attributes = attributes[0]

        action = [v for k,v in actions_dict.items() if k in tag]
        if not action:
            if 'wbeditentity-create' in tag:
                action = ['add']
            elif 'wbsetlabeldescriptionaliases' in tag:
                action = ['uptdate']

        description =re.findall(description_pattern, comment)[0].strip()
        language = re.findall(language_pattern,tag)[0].strip()

        revision['label'] = language if 'label' in attributes else None
        revision['description'] = language if 'description' in attributes else None
        revision['alias'] = language if 'alias' in attributes else None

        revision['action'] = action[0]
        #revision['description'] = description
        revision['language'] = language

        #revision.pop('comment', None)
        if('anon' in revision):
            revision.pop('anon',None)
    
    return all_revisions

In [None]:
SELECT DISTINCT ?property
    WHERE
    {
      ?property rdf:type wikibase:Property.
    }
    ORDER by ?property

In [338]:
from SPARQLWrapper import SPARQLWrapper, JSON
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
sparql.setQuery("""SELECT DISTINCT ?property
    WHERE
    {
      ?property rdf:type wikibase:Property.
    }
    ORDER by ?property""")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

property_list = []
for result in results["results"]["bindings"]:
    property_list.append(result['property']['value'])
    print(result)

{'property': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/P10'}}
{'property': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/P101'}}
{'property': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/P102'}}
{'property': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/P103'}}
{'property': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/P105'}}
{'property': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/P106'}}
{'property': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/P108'}}
{'property': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/P109'}}
{'property': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/P110'}}
{'property': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/P111'}}
{'property': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/P112'}}
{'property': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/P113'}}
{'property': {'type': 'uri', 'value': 'http://www.wikidata.org/en

In [296]:
import csv

keys = all_revisions[0].keys()

with open('output.csv', 'w') as output_file: # wb if python <3.x 
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(all_revisions)

In [176]:
len(all_revisions)

15

In [184]:
len(all_revisions)

15

In [218]:
test = {"test":[]}

In [220]:
list(test.keys())

['test']

In [217]:
if not []:
    print("bite")