In [1]:
import requests

In [3]:
with open("cleanversion3") as f:
    text = f.read()

In [4]:
text

'Wikidata:WikiProject_Civil_Defense/List_of_emergency_medical_services_organizations\nWikidata:WikiProject_Civil_Defense/List_of_rescue_organizations\nWikidata:WikiProject_Civil_Defense/List_of_law_enforcement_agencies\nWikidata:WikiProject_Civil_Defense/List_of_law_enforcement_agencies/Brazil\nWikidata:WikiProject_Civil_Defense/List_of_nuclear_power_plants\nWikidata:WikiProject_Civil_Defense/List_of_dykes\nWikidata:WikiProject_Civil_Defense/List_of_dams\nWikidata:WikiProject_Civil_Defense/List_of_dams/Brazil\nWikidata:WikiProject_Civil_Defense/List_of_shelters\nWikidata:WikiProject_Civil_Defense/List_of_firefighting_organizations/Brazil\nWikidata:WikiProject_Civil_Defense/List_of_aerodromes\nWikidata:WikiProject_IFRC/List/Tree\nWikidata:WikiProject_20th_Century_Press_Archives/Tools_%26_Tasks/Subject_Category_checks\nWikidata:WikiProject_Climate_Change/Queries\nWikidata:WikiProject_Climate_Change/Models\nWikidata:WikiProject_Civil_Defense/List_of_emergency_management_governmental_agenc

In [6]:
lines = text.split("\n")

In [7]:
lines

['Wikidata:WikiProject_Civil_Defense/List_of_emergency_medical_services_organizations',
 'Wikidata:WikiProject_Civil_Defense/List_of_rescue_organizations',
 'Wikidata:WikiProject_Civil_Defense/List_of_law_enforcement_agencies',
 'Wikidata:WikiProject_Civil_Defense/List_of_law_enforcement_agencies/Brazil',
 'Wikidata:WikiProject_Civil_Defense/List_of_nuclear_power_plants',
 'Wikidata:WikiProject_Civil_Defense/List_of_dykes',
 'Wikidata:WikiProject_Civil_Defense/List_of_dams',
 'Wikidata:WikiProject_Civil_Defense/List_of_dams/Brazil',
 'Wikidata:WikiProject_Civil_Defense/List_of_shelters',
 'Wikidata:WikiProject_Civil_Defense/List_of_firefighting_organizations/Brazil',
 'Wikidata:WikiProject_Civil_Defense/List_of_aerodromes',
 'Wikidata:WikiProject_IFRC/List/Tree',
 'Wikidata:WikiProject_20th_Century_Press_Archives/Tools_%26_Tasks/Subject_Category_checks',
 'Wikidata:WikiProject_Climate_Change/Queries',
 'Wikidata:WikiProject_Climate_Change/Models',
 'Wikidata:WikiProject_Civil_Defense/L

In [28]:
import re
import os
import requests

template_prefix = """@prefix ex: <https://sparql.uniprot.org/.well-known/sparql-examples/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix schema: <https://schema.org/> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix wiki: <https://www.wikidata.org/wiki/> .

"""

template_body = """ex:{title} a sh:SPARQLExecutable,
        sh:SPARQLSelectExecutable ;
    rdfs:comment "{comment}"^^rdf:HTML ;
    dcterms:license <https://creativecommons.org/licenses/by-sa/4.0/> ;
    dcterms:isVersionOf {wikiurl} ;
    sh:select """ + '"""' + """
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wds: <http://www.wikidata.org/entity/statement/>
PREFIX wdv: <http://www.wikidata.org/value/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
PREFIX pq: <http://www.wikidata.org/prop/qualifier/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX bd: <http://www.bigdata.com/rdf#>

PREFIX wdref: <http://www.wikidata.org/reference/>
PREFIX psv: <http://www.wikidata.org/prop/statement/value/>
PREFIX psn: <http://www.wikidata.org/prop/statement/value-normalized/>
PREFIX pqv: <http://www.wikidata.org/prop/qualifier/value/>
PREFIX pqn: <http://www.wikidata.org/prop/qualifier/value-normalized/>
PREFIX pr: <http://www.wikidata.org/prop/reference/>
PREFIX prv: <http://www.wikidata.org/prop/reference/value/>
PREFIX prn: <http://www.wikidata.org/prop/reference/value-normalized/>
PREFIX wdno: <http://www.wikidata.org/prop/novalue/>
PREFIX wdata: <http://www.wikidata.org/wiki/Special:EntityData/>

PREFIX schema: <http://schema.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX prov: <http://www.w3.org/ns/prov#>
PREFIX bds: <http://www.bigdata.com/rdf/search#>
PREFIX gas: <http://www.bigdata.com/rdf/gas#>
PREFIX hint: <http://www.bigdata.com/queryHints#>

{query}
""" + '"""' + """ ;
    schema:target <https://query.wikidata.org/sparql/> .
"""

# Function to generate SPARQL template
def generate_sparql_template(file_name, sparql_query, wikiurl):
    title = file_name.replace('.rq', '')
    comment = f"Sparql query from file {file_name}"
    return template_prefix + template_body.format(title=title, comment=comment, query=sparql_query, wikiurl=wikiurl)


for line in lines:
    # Create a directory named after the line
    wikiURL = f"<https://www.wikidata.org/wiki/{line}>"
    directory_name = "sparqlrdf/" + line.replace(" ", "_")  # Replace spaces with underscores for the directory name
    os.makedirs(directory_name, exist_ok=True)
    
    # Fetch the content of the page
    uri = f"https://www.wikidata.org/w/api.php?action=query&prop=revisions&rvprop=content&format=json&formatversion=2&titles={line}"
    response = requests.get(uri)
    data = response.json()
    
    # Extract the wikitext content
    try:
        wikitext = data["query"]["pages"][0]["revisions"][0]["content"]
    except (IndexError, KeyError):
        print(f"Error fetching content for {line}")
        continue

    # Regular expression pattern to match {{sparql ...}} and {{sparql2 ...}}, case insensitive
    pattern = r'{{sparql2?.*?}}'
    snippets = re.findall(pattern, wikitext, re.DOTALL | re.IGNORECASE)
    
    # Write each snippet to a separate .ttl file
    for i, snippet in enumerate(snippets):
        # Clean up snippet by removing the template markup
        clean_snippet = re.sub(r"^\{\{sparql2?\|query=", "", snippet, flags=re.IGNORECASE)
        clean_snippet = re.sub(r"\}\}$", "", clean_snippet)
        
        # Generate the SPARQL template
        file_name = f"snippet_{i + 1}.ttl"
        full_file_path = os.path.join(directory_name, file_name)
        sparql_template = generate_sparql_template(full_file_path, clean_snippet, wikiURL)
        
        # Save the SPARQL template to a file
        
        with open(full_file_path, "w", encoding="utf-8") as file:
            file.write(sparql_template)
            
        print(f"Snippet saved to {full_file_path}")

Snippet saved to sparqlrdf/Wikidata:WikiProject_Civil_Defense/List_of_emergency_medical_services_organizations/snippet_1.ttl
Snippet saved to sparqlrdf/Wikidata:WikiProject_Civil_Defense/List_of_rescue_organizations/snippet_1.ttl
Snippet saved to sparqlrdf/Wikidata:WikiProject_Civil_Defense/List_of_law_enforcement_agencies/snippet_1.ttl
Snippet saved to sparqlrdf/Wikidata:WikiProject_Civil_Defense/List_of_law_enforcement_agencies/snippet_2.ttl
Snippet saved to sparqlrdf/Wikidata:WikiProject_Civil_Defense/List_of_law_enforcement_agencies/snippet_3.ttl
Snippet saved to sparqlrdf/Wikidata:WikiProject_Civil_Defense/List_of_law_enforcement_agencies/Brazil/snippet_1.ttl
Snippet saved to sparqlrdf/Wikidata:WikiProject_Civil_Defense/List_of_nuclear_power_plants/snippet_1.ttl
Snippet saved to sparqlrdf/Wikidata:WikiProject_Civil_Defense/List_of_dykes/snippet_1.ttl
Snippet saved to sparqlrdf/Wikidata:WikiProject_Civil_Defense/List_of_dams/snippet_1.ttl
Snippet saved to sparqlrdf/Wikidata:WikiPro

In [17]:
print(len(lines))

609
