In [1]:
import pandas as pd
import requests
import json
from tqdm import tqdm

In [2]:
%load_ext jupyter_black

## Determine join property & fetch all eligable external ids

In [3]:
joinProp = input("What property are you going to join the tables on?")
try:
    with open("colNamePid.json", "r") as file:
        config = json.load(file)
except FileNotFoundError:
    config = {}
with open("colNamePid.json", "w") as file:
    config["__joinProp"] = joinProp
    json.dump(config, file)

What property are you going to join the tables on? P10660


In [21]:
with open("template.rq", "r") as file:
    template = file.read()


def downloadTemplate(
    joinProp: str,
    filename: str,
    pData=None,
    *,
    constraints="",
    inclName=False,
    inclJoin=False
):
    # Only allow retrieving one extra column at a time
    assert (bool(pData) ^ inclName) | (inclName ^ inclJoin)
    selection = ""
    if inclName:
        selection = "(?_name AS ?name)"
        constraints += '\n ?_qid rdfs:label ?_name . FILTER (LANG(?_name) = "en")'
    if pData is not None or inclJoin:
        selection = "(str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)"
    if pData is None:
        pData = joinProp

    # I love SQL injection attacks!
    query = (
        template.replace("#< selection >#", selection)
        .replace("PJOIN", joinProp)
        .replace("PDATA", pData)
        .replace("#< Extra constraints >#", constraints)
    )
    print(query)

    url = "https://qlever.cs.uni-freiburg.de/api/wikidata"
    params = {"query": query, "action": "tsv_export"}

    response = requests.get(url, params=params)

    if response.status_code == 200:  # If the request was successful
        with open(filename + ".tsv", "wb") as f:
            f.write(response.content)
        print("File downloaded successfully!")
    else:
        print("Failed to download file. Status code:", response.status_code)

In [19]:
downloadTemplate(joinProp, "labels", inclName=True)

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (?_name AS ?name)
WHERE {
  
 ?_qid rdfs:label ?_name . FILTER (LANG(?_name) = "en")
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P10660 ?_target_col.
  ?_qid p:P10660 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}
File downloaded successfully!


In [22]:
downloadTemplate(joinProp, "joinProp", inclJoin=True)

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P10660 ?_target_col.
  ?_qid p:P10660 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}
File downloaded successfully!


## Buisiness logic for retrieving the external db

In [14]:
pd.set_option("display.max_columns", None)

In [15]:
current = pd.read_csv(
    "https://theunitedstates.io/congress-legislators/legislators-current.csv"
)
historic = pd.read_csv(
    "https://theunitedstates.io/congress-legislators/legislators-historical.csv"
)
politicians = pd.concat([current, historic])
politicians.reset_index(
    drop=True, inplace=True
)  # Reset duplicate indexes after merging
politicians.head()

Unnamed: 0,last_name,first_name,middle_name,suffix,nickname,full_name,birthday,gender,type,state,district,senate_class,party,url,address,phone,contact_form,rss_url,twitter,twitter_id,facebook,youtube,youtube_id,mastodon,bioguide_id,thomas_id,opensecrets_id,lis_id,fec_ids,cspan_id,govtrack_id,votesmart_id,ballotpedia_id,washington_post_id,icpsr_id,wikipedia_id
0,Brown,Sherrod,,,,Sherrod Brown,1952-11-09,M,sen,OH,,1.0,Democrat,https://www.brown.senate.gov,503 Hart Senate Office Building Washington DC ...,202-224-2315,https://www.brown.senate.gov/contact/,http://www.brown.senate.gov/rss/feeds/?type=al...,SenSherrodBrown,43910797.0,SenatorSherrodBrown,SherrodBrownOhio,UCgy8jfERh-t_ixkKKoCmglQ,,B000944,136.0,N00003535,S307,"H2OH13033,S6OH00163",5051.0,400050,27018.0,Sherrod Brown,,29389.0,Sherrod Brown
1,Cantwell,Maria,,,,Maria Cantwell,1958-10-13,F,sen,WA,,1.0,Democrat,https://www.cantwell.senate.gov,511 Hart Senate Office Building Washington DC ...,202-224-3441,https://www.cantwell.senate.gov/public/index.c...,http://www.cantwell.senate.gov/public/index.cf...,SenatorCantwell,117501995.0,senatorcantwell,SenatorCantwell,UCN52UDqKgvHRk39ncySrIMw,,C000127,172.0,N00007836,S275,"S8WA00194,H2WA01054",26137.0,300018,27122.0,Maria Cantwell,,39310.0,Maria Cantwell
2,Cardin,Benjamin,L.,,,Benjamin L. Cardin,1943-10-05,M,sen,MD,,1.0,Democrat,https://www.cardin.senate.gov,509 Hart Senate Office Building Washington DC ...,202-224-4524,https://www.cardin.senate.gov/contact/,http://www.cardin.senate.gov/rss/feeds/?type=all,SenatorCardin,109071031.0,senatorbencardin,senatorcardin,UCiQaJnMzlfzzG3VESgyZChA,,C000141,174.0,N00001955,S308,"H6MD03177,S6MD03177",4004.0,400064,26888.0,Ben Cardin,,15408.0,Ben Cardin
3,Carper,Thomas,Richard,,,Thomas R. Carper,1947-01-23,M,sen,DE,,1.0,Democrat,https://www.carper.senate.gov,513 Hart Senate Office Building Washington DC ...,202-224-2441,https://www.carper.senate.gov/contact/,http://www.carper.senate.gov/public/index.cfm/...,SenatorCarper,249787913.0,tomcarper,senatorcarper,UCgLnvbKwu4B3navofj6Qvvw,,C000174,179.0,N00012508,S277,S8DE00079,663.0,300019,22421.0,Tom Carper,,15015.0,Tom Carper
4,Casey,Robert,P.,Jr.,Bob,"Robert P. Casey, Jr.",1960-04-13,M,sen,PA,,1.0,Democrat,https://www.casey.senate.gov,393 Russell Senate Office Building Washington ...,202-224-6324,https://www.casey.senate.gov/contact,http://www.casey.senate.gov/rss/feeds/?all,SenBobCasey,171598736.0,SenatorBobCasey,SenatorBobCasey,UCtVssXhx-KuZa-hSvnsnJ0A,,C001070,1828.0,N00027503,S309,S6PA00217,47036.0,412246,2541.0,"Bob Casey, Jr.",,40703.0,Bob Casey Jr.


## Match external columns to Wikidata PIDs
At this point, you hould have a dataframe of your external source

In [None]:
# TODO: it would be very cool if there was a gui using something like ipywidgets or a proper web server
def createNamePidMapping(df):
    try:
        with open("colNamePid.json", "r") as file:
            colNamePid = json.load(file)
    except FileNotFoundError:
        colNamePid = {}

    colNamePid["__instructions"] = (
        "# Mapping columns to PIDs and downloading Wikidata data. To skip column, leave 'p' property blank. PIDs should start with capital 'P'. 'pname' is automatically generated"
    )
    for col in df.columns:
        if col in colNamePid:
            print(col, "already in mapping, skipping")
            continue
        first_non_null_index = df[col].first_valid_index()
        if first_non_null_index == None:
            first_non_null_value = None
        else:
            first_non_null_value = df[col][first_non_null_index]

        colNamePid[col] = {
            "example": str(first_non_null_value),
            "p": "",
            # "pname": None,
        }

    with open("colNamePid.json", "w") as file:
        json.dump(colNamePid, file, indent=2)


createNamePidMapping(politicians)

In [16]:
with open("colNamePid.json", "r") as file:
    colNamePid = json.load(file)

In [15]:
def get_property_name(pid):
    # URL for Wikidata API to get information about the property
    url = (
        f"https://www.wikidata.org/w/api.php?action=wbgetentities&ids={pid}&format=json"
    )

    try:
        # Sending a GET request to the Wikidata API
        response = requests.get(url)
        data = response.json()

        # Extracting the English label of the property
        label = data["entities"][pid]["labels"]["en"]["value"]

        return label
    except Exception as e:
        print(f"An error occurred: {e}")
        return None


for key, item in tqdm(colNamePid.items()):
    # Ignore comments
    if key.startswith("__") or len(item["p"]) == 0:
        continue
    pname = get_property_name(item["p"])
    if pname is None:
        print(item["p"] + " is an invalid pid")
        continue
        # TODO: handle this
    item["pname"] = pname

with open("colNamePid.json", "w") as file:
    json.dump(colNamePid, file, indent=2)

 37%|███▋      | 14/38 [00:01<00:02,  9.35it/s]

An error occurred: 'entities'
P39.P768 is an invalid pid


 58%|█████▊    | 22/38 [00:03<00:02,  5.43it/s]

An error occurred: 'entities'
P2002.P6552 is an invalid pid


 63%|██████▎   | 24/38 [00:03<00:02,  5.56it/s]

An error occurred: 'entities'
P2397.P11245 is an invalid pid


100%|██████████| 38/38 [00:04<00:00,  8.08it/s]


## Duplicate Check
Ensure I wrote SPARQL query correctly. Don't want to exclude duplicates, but also don't want to make new row for every combination. Search for them, there should be a few.

In [7]:
df = pd.read_csv("joinProp.tsv", delimiter="\t")

In [8]:
df[df["?target_col"].duplicated(keep=False)].sort_values("?target_col")

Unnamed: 0,?qid,?target_col,?target_col_guid
6558,Q4798696,3246,Q4798696-65FC2E31-C6AD-4E3C-ADC2-F46C6CECCA8A
11438,Q7349097,3246,Q7349097-A0824D63-76F4-43AA-8670-74834E214E5F
13009,Q967900,7281,Q967900-51C88E12-5650-4EE3-87F4-E9300061D0E2
12181,Q7817480,7281,Q7817480-5C6F5297-516F-4A0E-9C81-7AD62F433512
3413,Q2204556,20485,Q2204556-6EC5424D-F093-4D01-AA15-41D597A4941E
8234,Q5498791,20485,Q5498791-2D842C85-F43A-47F4-8B69-C750F1A5B766
11124,Q72762723,38958,Q72762723-B86016D0-31CD-4806-B003-56FDD697FF0E
10134,Q6660224,38958,Q6660224-D21D806B-2649-4C6A-BFC9-8B91ED971AD3
12088,Q7789874,53797,Q7789874-85AA94F6-8148-498A-9F37-4EA0CBAEDD76
8908,Q6069522,53797,Q6069522-4E5D58AA-D39A-4A9B-8C18-1C518D0CF343


In [9]:
df[df["?qid"].duplicated(keep=False)].sort_values("?qid")

Unnamed: 0,?qid,?target_col,?target_col_guid
2082,Q163957,2139,Q163957-F6535CB8-F699-4562-9333-B6D42B044195
2083,Q163957,9263109,Q163957-745E7491-2755-491D-BAAA-A0BD7EF0FB9F
4762,Q313776,53850,Q313776-98D7858F-0A1A-4C4A-9F6C-FDAED3EBE79E
4763,Q313776,56987,Q313776-1A688262-4D2B-4F92-813F-23DB834E0F9E
6588,Q482827,1984,Q482827-50F24354-84F8-4820-934A-954596D807C6
6589,Q482827,27782,Q482827-38EE0CF7-171C-4EB2-AE15-0B39134311DB
11725,Q7482898,1000452,Q7482898-338C876E-6EF7-41E0-A33E-21E086591324
11726,Q7482898,54082,Q7482898-EDDFC84B-5A25-4FDB-B7B3-150B09A69610


In [10]:
# Expect to see 2 values
df[df["?qid"] == "Q482827"]

Unnamed: 0,?qid,?target_col,?target_col_guid
6588,Q482827,1984,Q482827-50F24354-84F8-4820-934A-954596D807C6
6589,Q482827,27782,Q482827-38EE0CF7-171C-4EB2-AE15-0B39134311DB


In [12]:
# There should be no duplicates
df[df["?target_col_guid"].duplicated(keep=False)].sort_values("?target_col_guid")

Unnamed: 0,?qid,?target_col,?target_col_guid


In [11]:
# There should be no duplicates here
labels = pd.read_csv("labels.tsv", delimiter="\t")
labels[labels["?qid"].duplicated(keep=False)].sort_values("?qid")

Unnamed: 0,?qid,?name


## Download wikidata values for all columns

In [23]:
def dlWd(pids):
    for pid in tqdm(pids):
        if len(pid) == 0 or "." in pid:
            continue
        downloadTemplate(joinProp, pid, pid)


dlWd(list(map(lambda col: colNamePid[col]["p"], politicians.columns)))

  0%|          | 0/36 [00:00<?, ?it/s]

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P734 ?_target_col.
  ?_qid p:P734 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}


  3%|▎         | 1/36 [00:01<00:49,  1.41s/it]

File downloaded successfully!
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P735 ?_target_col.
  ?_qid p:P735 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}


  6%|▌         | 2/36 [00:02<00:42,  1.24s/it]

File downloaded successfully!
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P8017 ?_target_col.
  ?_qid p:P8017 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}


 11%|█         | 4/36 [00:03<00:20,  1.57it/s]

File downloaded successfully!
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P1449 ?_target_col.
  ?_qid p:P1449 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}


 14%|█▍        | 5/36 [00:03<00:18,  1.68it/s]

File downloaded successfully!
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P569 ?_target_col.
  ?_qid p:P569 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}


 19%|█▉        | 7/36 [00:04<00:16,  1.74it/s]

File downloaded successfully!
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P21 ?_target_col.
  ?_qid p:P21 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}


 22%|██▏       | 8/36 [00:05<00:19,  1.46it/s]

File downloaded successfully!
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P39 ?_target_col.
  ?_qid p:P39 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}


 25%|██▌       | 9/36 [00:06<00:22,  1.20it/s]

File downloaded successfully!
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P102 ?_target_col.
  ?_qid p:P102 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}


 36%|███▌      | 13/36 [00:07<00:10,  2.17it/s]

File downloaded successfully!
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P856 ?_target_col.
  ?_qid p:P856 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}


 39%|███▉      | 14/36 [00:08<00:11,  1.93it/s]

File downloaded successfully!
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P937 ?_target_col.
  ?_qid p:P937 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}


 42%|████▏     | 15/36 [00:09<00:12,  1.71it/s]

File downloaded successfully!
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P1329 ?_target_col.
  ?_qid p:P1329 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}


 44%|████▍     | 16/36 [00:09<00:10,  1.84it/s]

File downloaded successfully!
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P11266 ?_target_col.
  ?_qid p:P11266 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}


 47%|████▋     | 17/36 [00:10<00:09,  1.97it/s]

File downloaded successfully!
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P1019 ?_target_col.
  ?_qid p:P1019 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}


 50%|█████     | 18/36 [00:10<00:08,  2.08it/s]

File downloaded successfully!
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P2002 ?_target_col.
  ?_qid p:P2002 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}


 53%|█████▎    | 19/36 [00:11<00:09,  1.77it/s]

File downloaded successfully!
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P2013 ?_target_col.
  ?_qid p:P2013 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}


 58%|█████▊    | 21/36 [00:12<00:07,  2.09it/s]

File downloaded successfully!
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P2397 ?_target_col.
  ?_qid p:P2397 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}


 64%|██████▍   | 23/36 [00:12<00:05,  2.38it/s]

File downloaded successfully!
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P1855 ?_target_col.
  ?_qid p:P1855 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}


 67%|██████▋   | 24/36 [00:13<00:04,  2.40it/s]

File downloaded successfully!
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P1157 ?_target_col.
  ?_qid p:P1157 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}


 69%|██████▉   | 25/36 [00:13<00:05,  2.12it/s]

File downloaded successfully!
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P2686 ?_target_col.
  ?_qid p:P2686 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}


 75%|███████▌  | 27/36 [00:14<00:03,  2.58it/s]

File downloaded successfully!
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P10660 ?_target_col.
  ?_qid p:P10660 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}


 83%|████████▎ | 30/36 [00:15<00:02,  2.65it/s]

File downloaded successfully!
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P3344 ?_target_col.
  ?_qid p:P3344 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}


 89%|████████▉ | 32/36 [00:16<00:01,  2.77it/s]

File downloaded successfully!
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
SELECT DISTINCT (SUBSTR(STR(?_qid), 32) AS ?qid)  (str(COALESCE(?_target_col_label, ?_target_col)) AS ?target_col) (SUBSTR(STR(?_target_col_guid), 42) AS ?target_col_guid)
WHERE {
  
  ?_join_col_guid ps:P10660 ?_join_col.
  ?_qid p:P10660 ?_join_col_guid.
  ?_target_col_guid ps:P2390 ?_target_col.
  ?_qid p:P2390 ?_target_col_guid.
  OPTIONAL {
    ?_target_col rdfs:label ?_target_col_label. 
    FILTER (LANG(?_target_col_label) = "en") 
  }
}


100%|██████████| 36/36 [00:16<00:00,  2.14it/s]

File downloaded successfully!



