In [3]:
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON
from rdflib import Graph, Namespace, URIRef, Literal
from rdflib.namespace import RDF, RDFS, XSD, FOAF
import re

class NYCMayorKnowledgeGraph:
    """
    Extracts NYC Mayor data from DBpedia and builds a readable RDF Knowledge Graph
    """
    def __init__(self):
        self.endpoint = SPARQLWrapper("http://dbpedia.org/sparql")
        self.endpoint.setReturnFormat(JSON)
        self.endpoint.setTimeout(30)
        
        self.graph = Graph()
        
        # Define namespaces
        self.NYC = Namespace("http://example.org/nyc/")
        self.MAYOR = Namespace("http://example.org/mayor/")
        self.SCHEMA = Namespace("http://schema.org/")
        
        # Bind namespaces
        self.graph.bind("nyc", self.NYC)
        self.graph.bind("mayor", self.MAYOR)
        self.graph.bind("schema", self.SCHEMA)
        self.graph.bind("foaf", FOAF)
        
    def generate_sparql_query(self):
        """SPARQL query to fetch NYC mayors with optional data"""
        query = """
        PREFIX dbo: <http://dbpedia.org/ontology/>
        PREFIX dbp: <http://dbpedia.org/property/>
        PREFIX dct: <http://purl.org/dc/terms/>
        PREFIX dbc: <http://dbpedia.org/resource/Category:>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

        SELECT DISTINCT ?mayor ?name ?birth ?death ?party ?start ?end
        WHERE {
            ?mayor dct:subject dbc:Mayors_of_New_York_City .
            ?mayor rdfs:label ?name .
            FILTER(lang(?name)='en')
            OPTIONAL { ?mayor dbo:birthDate ?birth }
            OPTIONAL { ?mayor dbo:deathDate ?death }
            OPTIONAL { ?mayor dbo:party ?party }
            OPTIONAL { ?mayor dbp:termStart ?start }
            OPTIONAL { ?mayor dbp:termEnd ?end }
        }
        ORDER BY ?start
        """
        return query
    
    def fetch_mayors_from_dbpedia(self):
        """Fetch mayor data from DBpedia and return as list of dicts"""
        self.endpoint.setQuery(self.generate_sparql_query())
        try:
            results = self.endpoint.query().convert()
            bindings = results["results"]["bindings"]
            mayors_data = []
            for b in bindings:
                mayors_data.append({
                    "uri": b.get("mayor", {}).get("value", ""),
                    "name": b.get("name", {}).get("value", "N/A"),
                    "birth_date": self._clean_date(b.get("birth", {}).get("value")),
                    "death_date": self._clean_date(b.get("death", {}).get("value")),
                    "party_uri": b.get("party", {}).get("value"),
                    "party_name": self._extract_name_from_uri(b.get("party", {}).get("value")),
                    "term_start": self._clean_date(b.get("start", {}).get("value")),
                    "term_end": self._clean_date(b.get("end", {}).get("value"))
                })
            return mayors_data
        except Exception as e:
            print(f"Error querying DBpedia: {e}")
            return []
    
    def _clean_date(self, date_str):
        if date_str: 
            return date_str.split('T')[0]
        return None
    
    def _extract_name_from_uri(self, uri):
        if uri: 
            return uri.split('/')[-1].replace('_', ' ')
        return None
    
    def _create_uri(self, name):
        clean_name = re.sub(r'[^\w\s-]', '', name)
        clean_name = re.sub(r'[-\s]+', '_', clean_name)
        return self.MAYOR[clean_name]
    
    def filter_by_date_range(self, mayors_data, start_year=2000, end_year=2025):
        """Filter mayors who served within a specific year range"""
        filtered = []
        for m in mayors_data:
            try:
                start = int(m.get("term_start", "0")[:4])
                end = int(m.get("term_end", str(end_year))[:4])
                if start <= end_year and end >= start_year:
                    filtered.append(m)
            except:
                continue
        return filtered
    
    def build_knowledge_graph(self, mayors_data):
        """Create RDF graph from mayor data"""
        nyc_city = self.NYC["New_York_City"]
        self.graph.add((nyc_city, RDF.type, self.SCHEMA.City))
        self.graph.add((nyc_city, RDFS.label, Literal("New York City")))
        
        mayor_office = self.NYC["Office_of_Mayor"]
        self.graph.add((mayor_office, RDF.type, self.SCHEMA.GovernmentOrganization))
        self.graph.add((mayor_office, RDFS.label, Literal("Office of the Mayor")))
        self.graph.add((mayor_office, self.SCHEMA.parentOrganization, nyc_city))
        
        for m in mayors_data:
            mayor_uri = self._create_uri(m["name"])
            self.graph.add((mayor_uri, RDF.type, self.SCHEMA.Person))
            self.graph.add((mayor_uri, RDFS.label, Literal(m["name"])))
            
            if m.get("birth_date"):
                self.graph.add((mayor_uri, self.SCHEMA.birthDate, Literal(m["birth_date"], datatype=XSD.date)))
            if m.get("death_date"):
                self.graph.add((mayor_uri, self.SCHEMA.deathDate, Literal(m["death_date"], datatype=XSD.date)))
            
            if m.get("party_name"):
                party_uri = self.NYC[f"Party_{m['party_name'].replace(' ', '_')}"]
                self.graph.add((party_uri, RDF.type, self.SCHEMA.Organization))
                self.graph.add((party_uri, RDFS.label, Literal(m['party_name'])))
                self.graph.add((mayor_uri, self.SCHEMA.affiliation, party_uri))
            
            if m.get("term_start"):
                term_uri = self.NYC[f"Term_{m['name'].replace(' ', '_')}_{m['term_start'][:4]}"]
                self.graph.add((term_uri, RDF.type, self.SCHEMA.Role))
                self.graph.add((term_uri, RDFS.label, Literal(f"Mayoral Term of {m['name']}")))
                self.graph.add((mayor_uri, self.SCHEMA.hasOccupation, term_uri))
                self.graph.add((term_uri, self.SCHEMA.isPartOf, mayor_office))
                self.graph.add((term_uri, self.SCHEMA.startDate, Literal(m['term_start'], datatype=XSD.date)))
                self.graph.add((term_uri, self.SCHEMA.endDate, Literal(m.get('term_end', 'Present'))))
        
        # Add succession relationships
        for i in range(len(mayors_data)-1):
            current = self._create_uri(mayors_data[i]["name"])
            next_mayor = self._create_uri(mayors_data[i+1]["name"])
            self.graph.add((current, self.MAYOR.succeededBy, next_mayor))
            self.graph.add((next_mayor, self.MAYOR.precededBy, current))
    
    def save_to_turtle(self, filename="nyc_mayors_kg.ttl"):
        self.graph.serialize(destination=filename, format="turtle")
    
    def print_triples(self):
        """Print triples with readable names instead of full URIs"""
        def readable(node):
            if isinstance(node, URIRef):
                return node.split('/')[-1].replace('_', ' ')
            return str(node)
        
        for s, p, o in self.graph:
            print(f"{readable(s)} → {readable(p)} → {readable(o)}")

# -------------------- MAIN EXECUTION --------------------

def main():
    kg_builder = NYCMayorKnowledgeGraph()
    
    all_mayors = kg_builder.fetch_mayors_from_dbpedia()
    filtered_mayors = kg_builder.filter_by_date_range(all_mayors)
    
    kg_builder.build_knowledge_graph(filtered_mayors)
    kg_builder.save_to_turtle("nyc_mayors_kg.ttl")
    kg_builder.print_triples()

if __name__ == "__main__":
    main()


Rudy Giuliani → 22-rdf-syntax-ns#type → Person
Michael Bloomberg → succeededBy → Bill de Blasio
Term Michael Bloomberg 2002 → 22-rdf-syntax-ns#type → Role
Party Republican Party (United States) → rdf-schema#label → Republican Party (United States)
Term Rudy Giuliani 1981 → 22-rdf-syntax-ns#type → Role
Michael Bloomberg → affiliation → Party Democratic Party (United States)
Term Rudy Giuliani 1994 → endDate → 2001-12-31
Term Rudy Giuliani 1983 → endDate → 2001-12-31
Eric Adams → succeededBy → Bill de Blasio
Bill de Blasio → precededBy → Eric Adams
Party Republican Party (United States) → 22-rdf-syntax-ns#type → Organization
Rudy Giuliani → rdf-schema#label → Rudy Giuliani
Rudy Giuliani → succeededBy → Michael Bloomberg
Michael Bloomberg → birthDate → 1942-02-14
Eric Adams → affiliation → Party Democratic Party (United States)
Term Rudy Giuliani 1983 → 22-rdf-syntax-ns#type → Role
New York City → rdf-schema#label → New York City
Term Rudy Giuliani 1983 → startDate → 1983-06-03
Term Bill 