In [153]:
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON
from tqdm import tqdm

tqdm.pandas()

sparql = SPARQLWrapper("https://dbpedia.org/sparql")

In [154]:
ask_query = """
@prefix dbr:    <http://dbpedia.org/resource/> .

ASK  {{dbr:{} a owl:Thing. }}
"""

select_query1 = """
@prefix dbr:    <http://dbpedia.org/resource/> .


SELECT DISTINCT ?label WHERE {{
    {{ 
          dbr:{} <http://purl.org/linguistics/gold/hypernym> ?label. 
          ?label a owl:Thing .
    }}

}}
"""

select_query2 = """
@prefix dbr:    <http://dbpedia.org/resource/> .


SELECT DISTINCT ?label WHERE {{
    {{ 
    
          dbr:{} rdf:type ?class. 
          ?class rdfs:subClassOf* ?label .
          ?label a owl:Class.
          FILTER(?class != ?label) . 
    }}
}}
"""

In [151]:
def query_dbpedia(query):
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return results
    
def query_item(item):
    try:
        result = query_dbpedia(ask_query.format(item))
        if result['boolean'] == 'true':
            return [f'http://dbpedia.org/resource/{item}']
        else:
            results = query_dbpedia(select_query1.format(item))
            df = pd.json_normalize(results['results']['bindings'])
            if df.shape[0] > 0:
                return df['label.value'].tolist()
            else:
                results = query_dbpedia(select_query2.format(item))
                df = pd.json_normalize(results['results']['bindings'])
                if df.shape[0] > 0:
                    return df['label.value'].tolist()
    except Exception as e:
        print(item, e)
        return []

In [62]:
def replace_characters(string):
    for ch in ["(", ")", "'", "_"]:
        string = string.replace(ch, f'\\{ch}')
    return string

In [157]:
resources = pd.read_csv('../data/dbpedia_resources.csv', index_col=False)
resources['name'] = resources['uri'].apply(lambda x: x.split('/')[-1])
resources['name'] = resources['name'].apply(lambda x: replace_characters(x))

In [158]:
resources_to_map = resources[resources['count'] >= 10]
resources_to_map

Unnamed: 0,uri,count,name
0,http://dbpedia.org/resource/White_supremacy,8774,White\_supremacy
1,http://dbpedia.org/resource/VAX,6709,VAX
2,http://dbpedia.org/resource/Hurricane_Floyd,5324,Hurricane\_Floyd
3,http://dbpedia.org/resource/Self-defense,3600,Self-defense
4,http://dbpedia.org/resource/Kenosha_Comets,3399,Kenosha\_Comets
...,...,...,...
1162,http://dbpedia.org/resource/Dam,10,Dam
1163,http://dbpedia.org/resource/Chicago_Police_Dep...,10,Chicago\_Police\_Department
1164,http://dbpedia.org/resource/God_in_Christianity,10,God\_in\_Christianity
1165,http://dbpedia.org/resource/Shorthand,10,Shorthand


In [159]:
resources_to_map['uri_list'] = resources_to_map['name'].progress_apply(query_item)

  0%|▏                                                                                | 2/1167 [00:00<04:39,  4.17it/s]

White\_supremacy HTTP Error 502: Bad Gateway


  0%|▏                                                                                | 3/1167 [00:00<04:42,  4.12it/s]

VAX HTTP Error 502: Bad Gateway


  0%|▎                                                                                | 4/1167 [00:00<04:47,  4.05it/s]

Hurricane\_Floyd HTTP Error 502: Bad Gateway


  0%|▎                                                                                | 5/1167 [00:01<04:50,  4.00it/s]

Self-defense HTTP Error 502: Bad Gateway


  1%|▍                                                                                | 6/1167 [00:01<04:51,  3.98it/s]

Kenosha\_Comets HTTP Error 502: Bad Gateway


  1%|▍                                                                                | 7/1167 [00:01<04:54,  3.94it/s]

Court HTTP Error 502: Bad Gateway


  1%|▌                                                                                | 8/1167 [00:01<04:50,  3.99it/s]

Defamation HTTP Error 502: Bad Gateway


  1%|▌                                                                                | 9/1167 [00:02<04:49,  3.99it/s]

Doxing HTTP Error 502: Bad Gateway


  1%|▋                                                                               | 10/1167 [00:02<04:49,  4.00it/s]

Arizona\_State\_University HTTP Error 502: Bad Gateway


  1%|▊                                                                               | 11/1167 [00:02<04:49,  3.99it/s]

Black\_people HTTP Error 502: Bad Gateway


  1%|▊                                                                               | 12/1167 [00:02<04:48,  4.00it/s]

Waukesha\_Engine HTTP Error 502: Bad Gateway


  1%|▉                                                                               | 13/1167 [00:03<04:47,  4.01it/s]

Rifle HTTP Error 502: Bad Gateway


  1%|▉                                                                               | 14/1167 [00:03<04:48,  3.99it/s]

AR-15\_style\_rifle HTTP Error 502: Bad Gateway


  1%|█                                                                               | 15/1167 [00:03<04:46,  4.01it/s]

William\_Charles\_Schroeder HTTP Error 502: Bad Gateway


  1%|█                                                                               | 16/1167 [00:03<04:43,  4.05it/s]

Planet HTTP Error 502: Bad Gateway


  1%|█▏                                                                              | 17/1167 [00:04<04:43,  4.06it/s]

Beer HTTP Error 502: Bad Gateway


  2%|█▏                                                                              | 18/1167 [00:04<04:43,  4.05it/s]

Patent HTTP Error 502: Bad Gateway


  2%|█▎                                                                              | 19/1167 [00:04<04:42,  4.07it/s]

History\_of\_the\_United\_States\_Republican\_Party HTTP Error 502: Bad Gateway


  2%|█▎                                                                              | 20/1167 [00:04<04:40,  4.09it/s]

Homicide HTTP Error 502: Bad Gateway


  2%|█▍                                                                              | 21/1167 [00:05<04:36,  4.14it/s]

President\_of\_the\_United\_States HTTP Error 502: Bad Gateway


  2%|█▌                                                                              | 22/1167 [00:05<04:34,  4.17it/s]

Fuck HTTP Error 502: Bad Gateway


  2%|█▌                                                                              | 23/1167 [00:05<04:37,  4.13it/s]

Coronavirus\_disease HTTP Error 502: Bad Gateway


  2%|█▋                                                                              | 24/1167 [00:05<04:36,  4.13it/s]

Right-wing\_politics HTTP Error 502: Bad Gateway


  2%|█▋                                                                              | 25/1167 [00:06<04:38,  4.10it/s]

Make\_America\_Great\_Again HTTP Error 502: Bad Gateway


  2%|█▊                                                                              | 26/1167 [00:06<04:38,  4.09it/s]

Podcast HTTP Error 502: Bad Gateway


  2%|█▊                                                                              | 27/1167 [00:06<04:41,  4.05it/s]

Vigilantism HTTP Error 502: Bad Gateway


  2%|█▉                                                                              | 28/1167 [00:06<04:45,  3.99it/s]

Federal\_Bureau\_of\_Investigation HTTP Error 502: Bad Gateway


  2%|█▉                                                                              | 29/1167 [00:07<04:42,  4.03it/s]

University\_of\_Michigan HTTP Error 502: Bad Gateway


  3%|██                                                                              | 30/1167 [00:07<04:39,  4.07it/s]

Social\_media HTTP Error 502: Bad Gateway


  3%|██▏                                                                             | 31/1167 [00:07<04:35,  4.12it/s]

Firearm HTTP Error 502: Bad Gateway


  3%|██▏                                                                             | 32/1167 [00:07<04:36,  4.11it/s]

Bureau\_of\_Land\_Management HTTP Error 502: Bad Gateway


  3%|██▎                                                                             | 33/1167 [00:08<04:37,  4.09it/s]

Rallying HTTP Error 502: Bad Gateway


  3%|██▎                                                                             | 34/1167 [00:08<04:33,  4.14it/s]

Radio\_broadcasting HTTP Error 502: Bad Gateway


  3%|██▍                                                                             | 35/1167 [00:08<04:38,  4.06it/s]

Parade HTTP Error 502: Bad Gateway


  3%|██▍                                                                             | 36/1167 [00:08<04:40,  4.03it/s]

Race\_and\_ethnicity\_in\_the\_United\_States\_Census HTTP Error 502: Bad Gateway


  3%|██▌                                                                             | 37/1167 [00:09<04:41,  4.02it/s]

Domestic\_terrorism HTTP Error 502: Bad Gateway


  3%|██▌                                                                             | 38/1167 [00:09<04:44,  3.97it/s]

Skateboard HTTP Error 502: Bad Gateway


  3%|██▋                                                                             | 39/1167 [00:09<04:45,  3.95it/s]

City HTTP Error 502: Bad Gateway


  3%|██▋                                                                             | 40/1167 [00:09<04:42,  3.99it/s]

Far-right\_politics HTTP Error 502: Bad Gateway


  4%|██▊                                                                             | 41/1167 [00:10<04:40,  4.01it/s]

Hectare HTTP Error 502: Bad Gateway


  4%|██▉                                                                             | 42/1167 [00:10<04:37,  4.05it/s]

Art\_movement HTTP Error 502: Bad Gateway


  4%|██▉                                                                             | 43/1167 [00:10<04:37,  4.06it/s]

Racism HTTP Error 502: Bad Gateway


  4%|███                                                                             | 44/1167 [00:10<04:34,  4.08it/s]

University\_of\_Oxford HTTP Error 502: Bad Gateway


  4%|███                                                                             | 45/1167 [00:11<04:35,  4.07it/s]

Active\_shooter HTTP Error 502: Bad Gateway


  4%|███▏                                                                            | 46/1167 [00:11<04:35,  4.06it/s]

Right\_of\_self-defense HTTP Error 502: Bad Gateway


  4%|███▏                                                                            | 47/1167 [00:11<04:36,  4.05it/s]

Vaccine HTTP Error 502: Bad Gateway


  4%|███▎                                                                            | 48/1167 [00:11<04:38,  4.02it/s]

Trial HTTP Error 502: Bad Gateway


  4%|███▎                                                                            | 49/1167 [00:12<04:35,  4.05it/s]

Fat HTTP Error 502: Bad Gateway


  4%|███▍                                                                            | 50/1167 [00:12<04:36,  4.04it/s]

Chicken\_McNuggets HTTP Error 502: Bad Gateway


  4%|███▍                                                                            | 51/1167 [00:12<04:34,  4.07it/s]

Areca\_nut HTTP Error 502: Bad Gateway


  4%|███▌                                                                            | 52/1167 [00:12<04:33,  4.07it/s]

Sex\_trafficking HTTP Error 502: Bad Gateway


  5%|███▋                                                                            | 53/1167 [00:13<04:30,  4.11it/s]

Trafficking\_of\_children HTTP Error 502: Bad Gateway


  5%|███▋                                                                            | 54/1167 [00:13<04:33,  4.07it/s]

LOL HTTP Error 502: Bad Gateway


  5%|███▊                                                                            | 55/1167 [00:13<04:30,  4.11it/s]

White\_nationalism HTTP Error 502: Bad Gateway


  5%|███▊                                                                            | 56/1167 [00:13<04:35,  4.03it/s]

Fascism HTTP Error 502: Bad Gateway


  5%|███▉                                                                            | 57/1167 [00:14<04:36,  4.02it/s]

Courthouse HTTP Error 502: Bad Gateway


  5%|███▉                                                                            | 58/1167 [00:14<04:36,  4.01it/s]

Abortion HTTP Error 502: Bad Gateway


  5%|████                                                                            | 59/1167 [00:14<04:37,  4.00it/s]

QAnon HTTP Error 502: Bad Gateway





KeyboardInterrupt: 

In [None]:
resources_to_map

In [152]:
query_item('Arizona_State_University')

['http://dbpedia.org/resource/University']

In [131]:
resources_to_map.set_index('uri')[['uri_list']].explode('uri_list').to_csv('../data/dbpedia_resource_mapping.csv')