In [12]:
def queryDBPedia(originalQuery, limit=1000, filename="results", header=True):

    from SPARQLWrapper import SPARQLWrapper, JSON
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    sparql.setReturnFormat(JSON)
    
    ## Get column names
    query = originalQuery + " LIMIT 1"
    sparql.setQuery(query)
    columns = sparql.query().convert()
    columnNames = ""
    for columnName in columns["head"]["vars"]:
        if columnNames == "":
            columnNames = '"' + columnName + '"'
        else: 
            columnNames = columnNames + "," + '"' + columnName + '"'
    if header:
        with open(filename + ".csv","a", encoding="utf-8") as f:
            f.write(columnNames + "\n")
    
    for i in range(limit):
        print("Rows Processed: " + str(1000*i))
        query = originalQuery + """
                                    LIMIT 1000
                                    OFFSET """ + str(i) + "000"
        sparql.setQuery(query)
        results = sparql.query().convert()
        
        if results["results"]["bindings"]==[]:
            break
        for result in results["results"]["bindings"]:
            resultRow = ""
            for column in columns["head"]["vars"]:
                value = result.get(column, "na")
                if value != "na":
                    value = value["value"]
                if resultRow == "":
                    resultRow = '"' + value + '"'
                else: 
                    resultRow = resultRow + "," + '"' + value + '"'
            with open(filename + ".csv","a", encoding="utf-8") as f:
                f.write(resultRow + "\n")

In [92]:
originalQuery = """
SELECT DISTINCT ?film, ?number, ?abstract, (GROUP_CONCAT(DISTINCT ?starring; SEPARATOR="-") AS ?starring), ?name, (GROUP_CONCAT(DISTINCT ?subject; SEPARATOR="-") AS ?subjects)
                , ?cinematography, ?director, ?gross, (GROUP_CONCAT(DISTINCT ?producer; SEPARATOR="-") AS ?producer), ?language
WHERE
     {
        ?film dbo:wikiPageWikiLink dbr:Romantic_comedy .
        ?film dbo:wikiPageID ?number .
        ?film dbp:starring ?starring .
        ?film rdfs:comment ?abstract .
        ?film dbp:name ?name .
        OPTIONAL { ?film dct:subject ?subject } .
        OPTIONAL { ?film dbo:cinematography ?cinematography } .
        OPTIONAL { ?film dbo:director ?director } .
        OPTIONAL { ?film dbo:gross ?gross } .
        OPTIONAL { ?film dbo:producer ?producer } .
        OPTIONAL { ?film dbp:language ?language } .


        FILTER ( LANG ( ?abstract ) = 'en' )
      }
"""

In [94]:
queryDBPedia(originalQuery, limit=100, filename="RAWRomCom")

Rows Processed: 0
Rows Processed: 1000
Rows Processed: 2000
Rows Processed: 3000
Rows Processed: 4000
Rows Processed: 5000
Rows Processed: 6000


In [15]:
genres = {"RomCom":"dbr:Romantic_comedy", 
          "Action":"dbr:Action_film", 
          "2020Horror":"dbc:2020_horror_films", 
          "2010Horror":"dbc:2010_horror_films", 
          "2000Horror":"dbc:2000_horror_films", 
          "1990Horror":"dbc:1990_horror_films", 
          "1980Horror":"dbc:1980_horror_films", 
          "1970Horror":"dbc:1970_horror_films",}

for genre in genres.keys():
    print("Processing " + genre)
    
    query = '''
        SELECT DISTINCT ?film, ?number, ?abstract, (GROUP_CONCAT(DISTINCT ?starring; SEPARATOR="--") AS ?starring), ?name, (GROUP_CONCAT(DISTINCT ?subject; SEPARATOR="-") AS ?subjects)
                , ?cinematography, ?director, ?gross, (GROUP_CONCAT(DISTINCT ?producer; SEPARATOR="--") AS ?producer), ?language
        WHERE
     {
        ?film dbo:wikiPageWikiLink 
        '''+ genres[genre] +'''.
        ?film dbo:wikiPageID ?number .
        ?film dbp:starring ?starring .
        ?film rdfs:comment ?abstract .
        ?film dbp:name ?name .
        OPTIONAL { ?film dct:subject ?subject } .
        OPTIONAL { ?film dbo:cinematography ?cinematography } .
        OPTIONAL { ?film dbo:director ?director } .
        OPTIONAL { ?film dbo:gross ?gross } .
        OPTIONAL { ?film dbo:producer ?producer } .
        OPTIONAL { ?film dbp:language ?language } .


        FILTER ( LANG ( ?abstract ) = 'en' )
      }'''
    
    queryDBPedia(query, limit=100, filename="Movie Data/RAW/RAW"+genre)

Processing RomCom
Rows Processed: 0
Rows Processed: 1000
Rows Processed: 2000
Rows Processed: 3000
Rows Processed: 4000
Rows Processed: 5000
Rows Processed: 6000
Processing Action
Rows Processed: 0
Rows Processed: 1000
Rows Processed: 2000
Rows Processed: 3000
Rows Processed: 4000
Rows Processed: 5000
Rows Processed: 6000
Rows Processed: 7000
Rows Processed: 8000
Rows Processed: 9000
Processing 2020Horror
Rows Processed: 0
Rows Processed: 1000
Processing 2010Horror
Rows Processed: 0
Rows Processed: 1000
Processing 2000Horror
Rows Processed: 0
Rows Processed: 1000
Processing 1990Horror
Rows Processed: 0
Rows Processed: 1000
Processing 1980Horror
Rows Processed: 0
Rows Processed: 1000
Processing 1970Horror
Rows Processed: 0
Rows Processed: 1000
