In [None]:
from rdflib import Graph
from rdflib import RDF
from rdflib import URIRef
from rdflib import Literal
import networkx as nx
import matplotlib.pyplot as plt
from rdflib.plugins.stores.sparqlstore import SPARQLStore
from queries import QUERY_AI, QUERY_FILM, QUERY_PERSON
from paths_to_examples import REPO_BOOKS, REPO_PEOPLE, WIKIDATA, GPT_EX, GPT_SUBCLASSES
from SPARQLWrapper import SPARQLWrapper, RDF, JSON
import random
from concurrent.futures import ThreadPoolExecutor
import sys


DEBUG = True  # for debug prints
PRINTGRAPH = False # for showing the networkX graph
LOCAL = False # for choosing local ttl file path or SPARQL endpoint 

def debug_print(*args, **kwargs):
    if DEBUG:
        print(*args, **kwargs)


# printing all elements of a set
def print_set(set):
    if DEBUG:
        for i in set:
            print(i)

def show_graph(G):
    if PRINTGRAPH:
        # calculating positions for nodes
        pos = nx.spring_layout(G, k=0.5, iterations=50)

        # printing nodes and edges
        plt.figure(figsize=(12, 8))
        nx.draw(G, pos, with_labels=True, node_color="lightblue", node_size=1500, font_size=10, font_weight="bold", arrows=True)

        # printing labels for the edges
        edge_labels = nx.get_edge_attributes(G, 'label')
        nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_color='red')

        plt.title("RDF Graph")
        plt.axis("off")
        plt.show()

        print("FINISHED PRINTING GRAPH\n\n") 

def send_query(query, sparql, format):
    sparql.setQuery(query)
    #sparql.setMethod("POST")
    try: 
        #print("Return Format:", format)
        sparql.setReturnFormat(format)
        results = sparql.query()
        results = results.convert()
        
        #triples = results.convert() # this converts directly to an RDFlib Graph object

        # add triples to graph
        #g += triples
    except Exception as e:
        print("Sending query failed:", e)  
        #sys.exit() 

    return results    

# Calculating all paths from one root node (start node) for a sparql endpoint
def find_all_paths_endpoint(endpoint_url, default_graph, literals, start_node):
    neighbors_cache = {}  # global oder in der Funktion definiert

    # helperfunction for finding neighbors of a node (triple=(node, pred, neighbor))
    def get_neighbors(node):
        
        node_str = str(node)

        # 1. Prüfen, ob der Node bereits im Cache ist
        if node_str in neighbors_cache:
            # Debug-Ausgabe
            #print(f"Cache-Treffer für {node_str}")
            return neighbors_cache[node_str]
 
        if (len(str(node)) == 0): #or (not (node.startswith("http://") or node.startswith("https://"))):
            return []  # Literale und Blank Nodes haben keine gültigen SPARQL-Patterns

        #print("\n Searching for neighbors of: " + str(node) + " will start.")

        # wollte construct verwenden, damit das mit den blank nodes gelöst wird
        query = f"""
        CONSTRUCT {{
            <{node}> ?p ?next .
        }} WHERE {{
            <{node}> ?p ?next .
        }}
        """

        query = f"""
        SELECT DISTINCT ?next WHERE {{
            <{node}> ?p ?next .
        }}
        """

        
        #sparql.setQuery(query)
        #sparql.setReturnFormat(JSON)

        results = send_query(query, sparql, JSON)
       
        # Alle Objekte der Tripel extrahieren
        neighbors = []
        for binding in results["results"]["bindings"]:
            next_obj = binding["next"]
            value = next_obj["value"]
            value_type = next_obj["type"]  # 'uri', 'literal', 'bnode'
            #if value.startswith("POLYGO"):
             #   print("Value Type: " + value_type)

            if value_type == "literal" or value_type == "typed-literal":
                lang_tag = next_obj.get("xml:lang") 
                datatype = next_obj.get("datatype") 
                
                if lang_tag:  # Sprach-Tag hinzufügen
                    #if value.startswith("POLYGO"):
                    #    print("lang_tag: " + lang_tag)
                    value = f"{value}@{lang_tag}"
                elif datatype:  # Datentyp hinzufügen (optional, für Vollständigkeit)
                   # if value.startswith("POLYGO"):
                    #    print("datatype: " + datatype)
                    value = f"{value}^^{datatype}"
            
            if value_type != "literal" and value_type != "typed-literal" and value_type != "uri" and value_type != "iri" and value_type != "bnode":
                print("OTHER VALUE TYPE: " + value_type)

            neighbors.append((value, value_type))
            #neighbors.append(binding["next"]["value"])
            #if str(node) == "https://nfdi.fiz-karlsruhe.de/ontology/MIME-Type":
             #   value_type = binding["next"]["type"]  # <--- Hier bekommst du den Typ: uri, literal, bnode
              #  value = binding["next"]["value"]
               # # Debug-Output
                #print(f"  Nachbar gefunden: {value} (Typ: {value_type})")    

        neighbors_cache[node_str] = neighbors

        return neighbors
    
    def dfs(path, node, node_type="uri"):
        global num_paths
        global abs_depth
        global max_depth

        # skip node if it already is in path (avoiding cycles)
        if node in path:
            return
        
        # add current node to path
        path.append(node)
        
        neighbors = list(get_neighbors(node)) if (node_type != "literal" and node_type != "typed-literal" and node not in literals) else []

        if not neighbors:
            paths.append(list(path))
            num_paths += 1
            abs_depth += (len(path) - 1)
            max_depth = max(max_depth, len(path) - 1)
        else:
            for neighbor, neighbor_type in neighbors:
                #print("\node: " + str(node) + " - neighbor: " + str(neighbor))
                # if node is literal -> it does not have any neighbors -> path is finished
                #if neighbor_type == "literal" or (neighbor in literals):
                if neighbor in literals:
                    #print("Ist in Literals: " +  neighbor)
                #if neighbor in literals:
                    path.append(neighbor)
                    #print("Found Path: ", path)
                    paths.append(list(path))
                    num_paths += 1
                    abs_depth += len(path) - 1
                    max_depth = max(max_depth, len(path) - 1)
                    # remove node from the path to find next path
                    path.pop()
                else:
                    dfs(path, neighbor, neighbor_type)

        # remove node from the path to find next path
        path.pop()

    #sparql = SPARQLWrapper(endpoint_url)
    

    #sparql.addDefaultGraph(default_graph)
    #sparql.setTimeout(60)  # 120 Sekunden Timeout

    # list which stores all paths
    paths = []

    dfs([], start_node, node_type="uri")

    return paths

In [None]:

num_paths = 0
abs_depth = 0
max_depth = 0

endpoint_url = "https://data.europa.eu/sparql"
default_graph = "http://data.europa.eu/88u/dataset/0800af55-8e56-49a0-8986-aa55151d0440"
endpoint_url = "https://lov.linkeddata.es/dataset/lov/sparql"
#endpoint_url = "https://sparql.europeana.eu/"
#endpoint_url = "https://nfdi4culture.de/sparql"

endpoint_url = "https://lov.linkeddata.es/dataset/lov/sparql"
default_graph = "http://purl.org/cwmo/#"
default_graph = "https://w3id.org/ecfo"
default_graph = "http://vocab.deri.ie/csp"
default_graph = "http://purl.org/wf4ever/ro"

endpoint_url = "https://data.europa.eu/sparql"
default_graph = "http://data.europa.eu/88u/dataset/0800af55-8e56-49a0-8986-aa55151d0440"

#default_graph = "http://def.seegrid.csiro.au/isotc211/iso19156/2011/observation"
agent_={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) '
'AppleWebKit/537.11 (KHTML, like Gecko) '
'Chrome/23.0.1271.64 Safari/537.11',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding': 'none',
'Accept-Language': 'en-US,en;q=0.8',
'Connection': 'keep-alive'}
#sparql = SPARQLWrapper(endpoint_url, agent=agent_)
sparql = SPARQLWrapper(endpoint_url)
#sparql.addCustomHttpHeader("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36")
sparql.addDefaultGraph(default_graph)

print("STARTING GETTING LITERALS")

query_literals = """
SELECT DISTINCT ?literal
WHERE {
?s ?p ?literal .
FILTER(isLiteral(?literal))
}
"""

results = send_query(query_literals, sparql, JSON)

#count = results["results"]["bindings"][0]["literalCount"]["value"]
#print("Anzahl der Literale:", count)

literals = set()
#debug_print("Literals:")
i = 0
for res in results["results"]["bindings"]:
    #literal_value = res["literal"]["value"]
    #lang_tag = res["literal"].get("xml:lang", None)
    ## if literal has a tag
    #if lang_tag:
    #    literals.add(f"{literal_value}@{lang_tag}")
    #    print(f"{literal_value}@{lang_tag}")
    #else:
    #    literals.add(literal_value)
    #    print(literal_value)
    literal_value = res["literal"]["value"]
    lang_tag = res["literal"].get("xml:lang") 
    datatype = res["literal"].get("datatype") 
    if lang_tag:
        literal_value = f"{literal_value}@{lang_tag}"
    elif datatype:
        literal_value = f"{literal_value}^^{datatype}"
    #else:
     #   literal_value = literal_value

    literals.add(literal_value)
    #print(literal_value)



print(str(len(literals)) + " Literals existing")

print("FINISHED GETTING LITERALS\n")

#sys.exit()

print("STARTING CALCULATING PATHS")


query_roots = """
SELECT DISTINCT ?root
WHERE {
    ?root ?p ?o .
    FILTER NOT EXISTS {
        ?s ?p2 ?root .
    }
}
"""

results = send_query(query_roots, sparql, JSON)

#debug_print((results))
root_nodes = set()
#debug_print("Root Nodes:")
"""i = 0

# list of paths for all root nodes
all_paths = {}

for res in results["results"]["bindings"]:
    # calculating all paths from root node
   
    str_root = str(res["root"]["value"])
    debug_print("Starting searching for paths with root node: " + str_root)
    all_paths[str_root] = find_all_paths_endpoint(endpoint_url, default_graph, literals, str_root)

    #debug_print(res["root"]["value"])
    
    i += 1"""

for res in results["results"]["bindings"]:
    str_root = str(res["root"]["value"])
    root_nodes.add(str_root)

# Anzahl der Root-Nodes ausgeben
print(f"Number of root nodes: {len(root_nodes)}")

# Dictionary für alle Pfade
all_paths = {}

# Jetzt alle Pfade für die gespeicherten Root-Nodes berechnen
for str_root in root_nodes:
    debug_print("Calculating paths for root node: " + str_root)
    all_paths[str_root] = find_all_paths_endpoint(endpoint_url, default_graph, literals, str_root)


print("FINISHED CALCULATING PATHS\n")

# Output found paths
print("FOUND PATHS:")
for root_node, paths in all_paths.items():
    for path in paths:
        debug_print(path)
        debug_print("Path length = " + str(len(path)-1))

print("RESULTS:")

print("-Number of Paths: " + str(num_paths))
print("-Absolute depth: " + str(abs_depth))

# TODO: num_paths > 0
avg_depth = abs_depth / num_paths

print("-Average depth: " + str(avg_depth))
print("-Maximal depth: " + str(max_depth))

STARTING GETTING LITERALS
104 Literals existing
FINISHED GETTING LITERALS

STARTING CALCULATING PATHS
Number of root nodes: 1
Calculating paths for root node: http://data.europa.eu/88u/record/0800af55-8e56-49a0-8986-aa55151d0440
FINISHED CALCULATING PATHS

FOUND PATHS:
['http://data.europa.eu/88u/record/0800af55-8e56-49a0-8986-aa55151d0440', 'http://www.w3.org/ns/dcat#CatalogRecord']
Path length = 1
['http://data.europa.eu/88u/record/0800af55-8e56-49a0-8986-aa55151d0440', '2025-02-08T17:27:04Z^^http://www.w3.org/2001/XMLSchema#dateTime']
Path length = 1
['http://data.europa.eu/88u/record/0800af55-8e56-49a0-8986-aa55151d0440', 'http://data.europa.eu/88u/dataset/0800af55-8e56-49a0-8986-aa55151d0440', 'http://www.w3.org/ns/dcat#Dataset']
Path length = 2
['http://data.europa.eu/88u/record/0800af55-8e56-49a0-8986-aa55151d0440', 'http://data.europa.eu/88u/dataset/0800af55-8e56-49a0-8986-aa55151d0440', '2021-05-17^^http://www.w3.org/2001/XMLSchema#date']
Path length = 2
['http://data.europa.e

In [None]:
# TODO: Cohesion

DEBUG = True

agent_={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) '
'AppleWebKit/537.11 (KHTML, like Gecko) '
'Chrome/23.0.1271.64 Safari/537.11',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding': 'none',
'Accept-Language': 'en-US,en;q=0.8',
'Connection': 'keep-alive'}
#endpoint_url = "https://lov.linkeddata.es/dataset/lov/sparql"
#default_graph = "http://purl.org/cwmo/#"
#sparql = SPARQLWrapper(endpoint_url, agent=agent_)
#sparql = SPARQLWrapper(endpoint_url, agent=".Net Client")
sparql = SPARQLWrapper(endpoint_url)
#sparql.addCustomHttpHeader("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36")

sparql.addDefaultGraph(default_graph)

def format_literal_for_sparql(node_value, node_type, lang_tag, datatype):
    # Escape Backslashes und Quotes
    formatted_value = node_value.replace("\\", "\\\\").replace('"', '\\"')
    # Ersetze echte Zeilenumbrüche mit \n
    formatted_value = formatted_value.replace("\n", "\\n")

    #lang_tag = node_obj.get("xml:lang")
    #datatype = node_obj.get("datatype")

    if lang_tag:
        #debug_print("node before formatting: " + formatted_value)
        val, lang = node_value, lang_tag
        formatted_value = val.replace("\n", "\\n").replace('"', '\\"')
       # debug_print("node after formatting: " + f'"{formatted_value}"@{lang}')
        return f'"{formatted_value}"@{lang}'
    elif datatype:
        #debug_print("node before formatting: " + formatted_value)
        val, dtype = node_value, datatype
        formatted_value = val.replace("\n", "\\n").replace('"', '\\"')
        #debug_print("node after formatting: " + f'"{formatted_value}"@{dtype}')
        return f'"{formatted_value}"^^<{dtype}>'
    else:
        return f'"{formatted_value}"'
    
def get_new_neighbors(node_value, node_type, lang_tag, datatype, sparql, all_nodes, visited):
  # Liste der Nachbarn
  neighbors = set()
  
  if node_type in ["literal", "typed-literal"]:
    query = """
        SELECT DISTINCT ?neighbor ?o WHERE {
            ?neighbor ?p ?o .
            FILTER(isLiteral(?o))
        }
        """
    all_nghs = send_query(query, sparql, JSON)

    # Clientseitige Filterung: exakter Vergleich des Literals
    for binding in all_nghs["results"]["bindings"]:
      pot_node = binding["o"]["value"]
      pot_node_lang = binding["o"].get("xml:lang") 
      pot_node_dtype = binding["o"].get("datatype") 

      # Wenn es exakt das Literal ist, das wir suchen → Nachbarn hinzufügen
      #if compare_val == node_value or compare_val == (node_value + (f"@{lang_tag}" if lang_tag else f"^^{datatype}" if datatype else "")):
      if pot_node == node_value and ((pot_node_lang == lang_tag) or (pot_node_dtype == datatype)):
        neighbor_val = binding["neighbor"]["value"]
        neighbor_type = binding["neighbor"]["type"]
        neighbor_lang = binding["neighbor"].get("xml:lang") 
        neighbor_dtype = binding["neighbor"].get("datatype") 
        neighbor = (neighbor_val, neighbor_type, neighbor_lang, neighbor_dtype)
        print(neighbor)
        if ((neighbor in all_nodes) and (neighbor not in visited)):
          neighbors.add(neighbor)
          debug_print("-Found unvisited literal neighbor: " + format_literal_for_sparql(neighbor_val, neighbor_type, neighbor_lang, neighbor_dtype))
        elif neighbor not in visited:
          #debug_print("##### NEIGHBOR NOT IN ALL_NODES: " + format_literal_for_sparql(neighbor_val, neighbor_type, neighbor_lang, neighbor_dtype) + " #####")
          debug_print("##### NEIGHBOR NOT IN ALL_NODES: ", neighbor_val , neighbor_type, neighbor_lang, neighbor_dtype + " #####")

  else:
    query = f"""
    SELECT DISTINCT ?neighbor WHERE {{
        {{ <{node_value}> ?p1 ?neighbor . }}
        UNION
        {{ ?neighbor ?p2 <{node_value}> . }}
    }}
    """

    #debug_print("QUERY: ")
    #debug_print(query)

    pot_new_nghs = send_query(query, sparql, JSON)
    for binding in pot_new_nghs["results"]["bindings"]:
      pot_new_ngh_obj = binding["neighbor"]
      pot_new_ngh_val = pot_new_ngh_obj["value"]
      pot_new_ngh_type = pot_new_ngh_obj["type"]
      # i know lang_tag & datatype will be None, but using get function is important for variable types!!
      pot_new_ngh = pot_new_ngh_val, pot_new_ngh_type, pot_new_ngh_obj.get("xml:lang"), pot_new_ngh_obj.get("datatype")
      if ((pot_new_ngh in all_nodes) and (pot_new_ngh not in visited)):
        neighbors.add(pot_new_ngh)
        debug_print("-Found unvisited neighbor: " + format_literal_for_sparql(pot_new_ngh_val, pot_new_ngh_type, pot_new_ngh_obj.get("xml:lang"), pot_new_ngh_obj.get("datatype")))
      elif pot_new_ngh not in visited:
        #debug_print("##### NEIGHBOR NOT IN ALL_NODES: " + format_literal_for_sparql(neighbor_val, neighbor_type, neighbor_obj.get("xml:lang"), neighbor_obj.get("datatype")) + " #####")
        debug_print("##### 1 NEIGHBOR NOT IN ALL_NODES: " + pot_new_ngh_val + " - " + pot_new_ngh_type) + " - " + str(pot_new_ngh_obj.get("xml:lang")) + " - " + str(pot_new_ngh_obj.get("datatype")) + " #####")
  
  return neighbors


def get_all_neighbors(node_value, node_type, lang_tag, datatype):
  #value, node_type = node
  neighbors = set()

  if node_type in ["literal", "typed-literal"]:
    literal = format_literal_for_sparql(node_value, node_type, lang_tag, datatype)
    # Literal kann nur Objekt sein
    # SPARQL-Syntax für Literal: mit Quotes und optionalem Datentyp oder Lang-Tag
    """if "@" in value:
        literal = f'"{value.split("@")[0]}"@{value.split("@")[1]}'
    elif "^^" in value:
        lit_val, dtype = value.split("^^")
        literal = f'"{lit_val}"^^<{dtype}>'
    else:
        literal = f'"{value}"'"""

    query = f"""
    SELECT DISTINCT ?neighbor WHERE {{
        ?neighbor ?p1 {literal} .
    }}
    """
    
    
    print("\nLITERAL QUEUE:")
    print(query)

  else:
    query = f"""
    SELECT DISTINCT ?neighbor WHERE {{
        {{ <{node_value}> ?p1 ?neighbor . }}
        UNION
        {{ ?neighbor ?p2 <{node_value}> . }}
    }}
    """

    #print("\n QUEUE:")
    #print(query)

  results = send_query(query, sparql, JSON)

  return results

def check_literal_tags(node_obj, node_value, node_type):
  if node_type == "literal" or node_type == "typed-literal":
    debug_print("[check_literal_tags] " + node_value + " - lang_tag: " + str(node_obj.get("xml:lang")) + " - datatype: " + str(node_obj.get("datatype")))
    lang_tag = node_obj.get("xml:lang")
    datatype = node_obj.get("datatype")
    
    if lang_tag:  # add language tag
        node = f"{node}@{lang_tag}"
    elif datatype:  # add datatype tag
        node = f"{node}^^{datatype}"
        
    debug_print("[check_literal_tags] " + node)
  return node

# Getting all nodes

debug_print("Getting all nodes")

query_all_nodes = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
SELECT DISTINCT ?node WHERE {
    { ?node ?p ?o } UNION { ?s ?p ?node }
}
"""
results = send_query(query_all_nodes, sparql, JSON)

#print(results)

all_nodes = set()

for binding in results["results"]["bindings"]:
  #print("TEST " + node_obj)
  node_obj = binding["node"]
  #print("TEST " + node_obj)
  node_value = node_obj["value"]
  node_type = node_obj["type"]
  lang_tag = node_obj.get("xml:lang") 
  datatype = node_obj.get("datatype") 
  # if node is of type literal or typed-literal -> tags are being added
  #node = check_literal_tags(node_obj, node, node_type)
  #debug_print("Got node: " + str(node_obj))
  all_nodes.add((node_value, node_type, lang_tag, datatype))      
  #debug_print(str(node_value) + " - " + str(node_type) + " - " + str(lang_tag) + " - " + str(datatype))
#debug_print("All nodes:")
#for node, node_type in all_nodes:
  # repr -> shows string as python code (--> new line = \n, etc.)
  #debug_print(f"[{node_type}] {repr(node)}")

for node_value, node_type, lang_tag, datatype in all_nodes:
  debug_print(format_literal_for_sparql(node_value, node_type, lang_tag, datatype))
  #debug_print(node_value, node_type, lang_tag, datatype)
debug_print("Got all nodes")

## algorithm: searches for components of graph

# describes number of independent components of graph (like subgraphs which are connected internally)
components = 0

# describes visited / discovered nodes
visited = set()

while visited != all_nodes:
  
  # Choose first unvisited node
  # iter creates iterator for set - next gives next element in set
  start_node = next(iter(all_nodes - visited))
  node_value, node_type, lang_tag, datatype = start_node
  #if node_type in ["literal", "typed-literal", "bnode"]:
  debug_print("NEXT RANDOM NODE TO SEARCH FOR: " + format_literal_for_sparql(node_value, node_type, lang_tag, datatype))
  frontier = {start_node}
  visited.add(start_node)

  while frontier:
    new_frontier = set()
    for node_value, node_type, lang_tag, datatype in frontier:
            
      if (node_value, node_type, lang_tag, datatype) != start_node:
        debug_print("next state to search for: " + format_literal_for_sparql(node_value, node_type, lang_tag, datatype ))
      #print(node)
      #if node_type in ["literal", "typed-literal", "bnode"]:
      #  debug_print("-Searching for neighbors of: " + str(format_literal_for_sparql(node_value, node_type, lang_tag, datatype)))
      """results = get_all_neighbors(node_value, node_type, lang_tag, datatype)"""
      new_visited_neighbors = set()
      new_visited_neighbors = get_new_neighbors(node_value, node_type, lang_tag, datatype, sparql, all_nodes, visited)
      
      # neighbors just consists of new neighbors 
      """new_nodes = {n for n in neighbors if n in all_nodes and n not in visited}
      visited |= new_nodes
      new_frontier |= new_nodes"""
      visited |= new_visited_neighbors
      new_frontier |= new_visited_neighbors
      
      debug_print("New Frontier: ")
      for node_value, node_type, lang_tag, datatype in new_frontier:
        debug_print("-- " + format_literal_for_sparql(node_value, node_type, lang_tag, datatype))
    frontier = new_frontier
      #print("Visited Components: " + len(visited))

  components += 1

  #print(f"Component {components} with start node {start_node[0]} completed. \nVISITED: {len(visited)}/{len(all_nodes)}")
  print(f"Component {components} completed. \nVISITED: {len(visited)}/{len(all_nodes)}")

print("Cohesion: " + str(components))

# TODO: Solve following problems:
# LITERAL QUEUE:  SELECT DISTINCT ?neighbor WHERE { ?neighbor ?p2 ""download""@en .} does not work
# addressing bnodes with <bnode-id> does not work for every endpoints
# Literals with \n and emtpy spaces do not work in neighbor search! (ich denke das ist gelöst durch format_literal_for_sparql)
# "download"@en geht auch nicht

Getting all nodes
nodeID://b3793463705 bnode None None
nedladdning literal sv-t-en-t0-mtec None
http://www.w3.org/ns/dcat#DataService uri None None
Įkvėpimo paslauga plėtros planui XPlanning paslauga plano statybos linijoms Rindelbach Rattstadt Am Ortsweg Nr. 5 (XPlanGML 5.0.1) (INSPIRE GML) literal lt-t-de-t0-mtec None
íoslódáil literal ga-t-en-t0-mtec None
nodeID://b3793463702 bnode None None
τηλεφόρτωση literal el-t-en-t0-mtec None
scarica literal it-t-en-t0-mtec None
baixar literal pt-t-en-t0-mtec None
Inspirationstjänst för utvecklingsplanen XPlanning för Plan Construction Lines Rindelbach Rattstadt Am Ortsweg No. 5 (XPlanGML 5.0.1) (INSPIRE GML) literal sv-t-de-t0-mtec None
2024-09-23T04:54:37Z typed-literal None http://www.w3.org/2001/XMLSchema#dateTime
http://purl.org/dc/terms/MediaType uri None None
Inspiration Service for the Development Plan Plan Service (XPlanGML 5.0.1) Suunnitelmalinjat Rindelbach Rattstadt Am Ortsweg No. 5 Ellwangen (Jagst) (INSPIRE GML) literal fi-t-de-t

In [None]:
# Calculate Tangledness 
# source 73 - page 4
# tangledness = mean number of classes with more than 1 direct ancestor, so two primitive 
# measurements (number of classes and number of direct ancestors) are used for computing the metric 

# Select number of classes in graph 
query_classes = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>

SELECT (COUNT(DISTINCT ?class) AS ?num_classes)
WHERE {
  {
    # 1. explicitly/implicitly used RDF classes
    # explicitly: ?class a owl:Class . or ?class a rdfs:Class .
    # implicitly: ?any rdf:type ?class . (includes also explicitly used classes)

    ?any rdf:type ?class .
  }
  UNION
  {
    # 2. subclasses
    ?class rdfs:subClassOf ?any .
  }
  UNION
  {
    # 3. superclasses
    ?any rdfs:subClassOf ?class .
  }
  UNION
  {
    # 4. classes used with owl:equivalentClass
    { ?class owl:equivalentClass ?any . }
    UNION
    { ?any owl:equivalentClass ?class . }
  }
  UNION
  {
    # 5. OWL restriction classes
    ?class a owl:Restriction .
  }
  UNION
  {
    # 6. complex classes with using unionOf, intersectionOf etc.
    ?class owl:unionOf|owl:intersectionOf|owl:complementOf|owl:oneOf ?list .
  }
  UNION
  {
    # 7. OWL hasValue restrictions
    ?class owl:hasValue ?val .
  }
}"""

results = send_query(query_classes, sparql, JSON)

num_classes = 0

for binding in results["results"]["bindings"]:
    num_classes = int(binding["num_classes"]["value"])
    print("Number of classes in graph: " + str(num_classes))

# Select number of classes with more than one ingoing isA arc (Dr. Jovanovik said I should use is-a)
# source 37 - page 3 says the same, but look at query_var2
query_var1 = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
SELECT (COUNT(?class) AS ?tangledCount)
WHERE
{
  SELECT ?class (COUNT(?s) AS ?numIngoing)
  WHERE {
    { ?s rdf:type ?class . }
  }
  GROUP BY ?class
  HAVING (COUNT(?s) > 1) # problem here was: i used ?numIngoing instead of COUNT(?s)
}
"""

# Select number of classes with more than one superclass (source 73 says I should use this query)
query_var2 = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
SELECT (COUNT(?class) AS ?tangledCount)
WHERE {
  SELECT ?class (COUNT(?super) AS ?numSupers)
  WHERE {
    ?class rdfs:subClassOf ?super .
  }
  GROUP BY ?class
  HAVING (COUNT(?super) > 1)
}
"""

results = send_query(query_var2, sparql, JSON)

t = 0

for binding in results["results"]["bindings"]:
    t = int(binding["tangledCount"]["value"])
    print("Number of classes with more than one superclass: " + str(t))

print("\nRESULT:")

if num_classes > 0:
  # source 37 says num_classes / t
  # source 73 says denominator and numerator should be switched -> t / num_classes
  tangledness = t / num_classes
  print("-Tangledness: " + str(tangledness))
else:
  # TODO
   print("-Tangledness is INF")


Number of classes in graph: 20
Number of classes with more than one superclass: 0

RESULT:
-Tangledness: 0.0


: 

: 

In [None]:
# Degree Distribution (Formula in: source 37, page 7)
# nG...number of nodes in gaph
# nE...number of edges in graph

print("STARTING CALCULATING DEGREE DISTRIBUTION")

# Calculating nE
query_nE = """
SELECT (COUNT(*) AS ?tripleCount)
WHERE {
  ?s ?p ?o .
}
"""


results = send_query(query_nE, sparql, JSON)


#for row in results:
    #print("Number of edges in graph: " + str(row['tripleCount']))
   # nE = int(row['tripleCount'])
#print(results)
nE = int(results["results"]["bindings"][0]["tripleCount"]["value"])
print("Number of edges in graph: ", str(nE))

# Calculating nG
query_nG = """
SELECT (COUNT(DISTINCT ?node) AS ?nodeCount)
WHERE {
  {
    SELECT ?node WHERE {
      { ?node ?p1 ?o }       
      UNION
      { ?s ?p2 ?node }       
    }
  }
}
"""

results = send_query(query_nG, sparql, JSON)

nG = int(results["results"]["bindings"][0]["nodeCount"]["value"])
print("Number of nodes in graph: ", str(nG))

# Calculating degree for every node in graph
query_degrees = """
SELECT ?node (COUNT(?any) AS ?degree)
WHERE {
  {
    { ?node ?p1 ?any }     
    UNION
    { ?any ?p2 ?node }   
  }
}
GROUP BY ?node
"""

results = send_query(query_degrees, sparql, JSON)

degrees = []
for binding in results["results"]["bindings"]:
    node = binding["node"]["value"]
    degree = int(binding["degree"]["value"])
    degrees.append((node, degree))
    debug_print(node + ": " + str(degree))

# Summe aller Degrees
sum_of_degrees = sum(d for _, d in degrees)

# sum of degrees should be equal to 2 * nE
print("Sum of Degrees: " + str(sum_of_degrees))

if nG > 0:
    mean_degree = (2 * nE) / nG
    squared_diffs = [(deg_v - mean_degree) ** 2 for _,deg_v in degrees]
    degree_distribution = sum(squared_diffs) / (nG-1)
else:
    degree_distribution = 0 

print("FINISHED CALCULATING DEGREE DISTRIBUTION")

print("\nRESULT:")
print("-Degree Distribution: " + str(degree_distribution))

STARTING CALCULATING DEGREE DISTRIBUTION
Number of edges in graph:  179
Number of nodes in graph:  163
prenos: 1
http://purl.org/dc/terms/MediaType: 1
Serviço INSPIRE para o Serviço do Plano de Desenvolvimento (XPlanGML 5.0.1) para as linhas de construção do plano Rindelbach Rattstadt Am Ortsweg N.º 5 da cidade de Ellwangen (Jagst) (INSPIRE GML): 1
lataa: 1
http://inspire.ec.europa.eu/metadata-codelist/ResourceType/dataset: 1
INSPIRE-Dienst für den Bebauungsplan XPlanung-Dienst für den Plan Baulinien Rindelbach Rattstadt Am Ortsweg Nr. 5 (XPlanGML 5.0.1) (INSPIRE GML): 1
Usługa inspiracyjna dla planu rozwoju XPlanning dla Plan Construction Lines Rindelbach Rattstadt Am Ortsweg nr 5 (XPlanGML 5.0.1) (INSPIRE GML): 1
nodeID://b3793463703: 4
nodeID://b3793463696: 4
PlannedLandUse: 1
Seirbhís INSPIRE don tSeirbhís um Phlean Forbartha (XPlanGML 5.0.1) do Línte Tógála Phlean Rindelbach Rattstadt Am Ortsweg Uimh. 5 de chathair Ellwangen (Jagst) (INSPIRE GML): 1
nodeID://b3793463699: 7
http://

: 

: 

In [None]:
print("STARTING CALCULATING ENTITIES/CLASSES/PROPERTIES/...")

# TODO: Entities

# Number of instances per type
query_inst = """ 
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT ?type (COUNT(?s) AS ?count)
WHERE {
  ?s rdf:type ?type . 
}
GROUP BY ?type """

results = send_query(query_inst, sparql, JSON)
print(results)
num_instances = 0

for binding in results["results"]["bindings"]:
        rdf_type = binding["type"]["value"]
        count = int(binding["count"]["value"])
        num_instances += count
        
# Number of classes
# Defintion of Class: 
# source: 213, page: 5 - source 250, page 3
# TNOC (total number of classes/concepts) = classes, subclasses, superclasses, anonymous classes
# anonymous classes = equivalent/restriction/unionOf/intersectionOf/complementOf/oneOf/hasValue classes

query_classes = """ 
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>

SELECT DISTINCT ?class
WHERE {
  {
    # 1. explicitly/implicitly used RDF classes
    # explicitly: ?class a owl:Class . or ?class a rdfs:Class .
    # implicitly: ?any rdf:type ?class . (includes also explicitly used classes)

    ?any rdf:type ?class .
  }
  UNION
  {
    # 2. subclasses
    ?class rdfs:subClassOf ?any .
  }
  UNION
  {
    # 3. superclasses
    ?any rdfs:subClassOf ?class .
  }
  UNION
  {
    # 4. classes used with owl:equivalentClass
    { ?class owl:equivalentClass ?any . }
    UNION
    { ?any owl:equivalentClass ?class . }
  }
  UNION
  {
    # 5. OWL restriction classes
    ?class a owl:Restriction .
  }
  UNION
  {
    # 6. complex classes with using unionOf, intersectionOf etc.
    ?class owl:unionOf|owl:intersectionOf|owl:complementOf|owl:oneOf ?list .
  }
  UNION
  {
    # 7. OWL hasValue restrictions
    ?class owl:hasValue ?val .
  }
}
"""

results = send_query(query_classes, sparql, JSON)

num_classes = 0

for binding in results["results"]["bindings"]:
        rdf_class = binding["class"]["value"]
        num_classes += 1
        debug_print("Class " + str(num_classes) + ": " + str(rdf_class))

# number of properties in T-Box
query_properties = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
SELECT (COUNT(DISTINCT ?property) AS ?NoP)
WHERE {
  VALUES ?type { owl:ObjectProperty owl:DatatypeProperty owl:AnnotationProperty }
  ?property rdf:type ?type .
}
"""

results = send_query(query_properties, sparql, JSON)

num_properties = 0

#print(results)

for binding in results["results"]["bindings"]:
  num_properties = int(binding["NoP"]["value"])

# Number of object properties
# Non-Inheritance -> excluding inheritance properties like rdfs:subPropertyOf or rdfs:subClassOf
query_object_properties = """
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT (COUNT(DISTINCT ?property) AS ?numObjectProperties)
WHERE {
  ?property rdf:type owl:ObjectProperty .
}
"""

num_obj_properties = 0

results = send_query(query_object_properties, sparql, JSON)

for binding in results["results"]["bindings"]:
  num_obj_properties = int(binding["numObjectProperties"]["value"])

print("RESULTS: ")
print("-Number of properties: " + str(num_properties))
print("-Number of classes: " + str(num_classes))
print("-Number of instances: " + str(num_instances))
print("-Number of object properties: " + str(num_obj_properties))
        

STARTING CALCULATING ENTITIES/CLASSES/PROPERTIES/...
{'head': {'link': [], 'vars': ['type', 'count']}, 'results': {'distinct': False, 'ordered': True, 'bindings': [{'type': {'type': 'uri', 'value': 'http://purl.org/dc/terms/Standard'}, 'count': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#integer', 'value': '3'}}, {'type': {'type': 'uri', 'value': 'http://spdx.org/rdf/terms#Checksum'}, 'count': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#integer', 'value': '1'}}, {'type': {'type': 'uri', 'value': 'http://www.w3.org/ns/locn#Address'}, 'count': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#integer', 'value': '1'}}, {'type': {'type': 'uri', 'value': 'http://www.w3.org/ns/dcat#DataService'}, 'count': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#integer', 'value': '1'}}, {'type': {'type': 'uri', 'value': 'http://www.w3.org/ns/dcat#Dataset'}, 'count': {'type': 'typed-literal', 'datatyp

: 

: 

In [6]:
# TODO: Depth of Inheritance Tree
# Calculating all paths from one root node (start node) for a sparql endpoint

def find_all_paths_subclasses(endpoint_url, default_graph,  start_node):
    neighbors_cache = {}  # global oder in der Funktion definiert

    # helperfunction for finding neighbors of a node (triple=(node, pred, neighbor))
    def get_neighbors(node):
        
        node_str = str(node)

        # 1. Prüfen, ob der Node bereits im Cache ist
        if node_str in neighbors_cache:
            # Debug-Ausgabe
            #print(f"Cache-Treffer für {node_str}")
            return neighbors_cache[node_str]
 
        if (len(str(node)) == 0): #or (not (node.startswith("http://") or node.startswith("https://"))):
            return []  # Literale und Blank Nodes haben keine gültigen SPARQL-Patterns

        query = f"""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

        SELECT DISTINCT ?child
        WHERE {{
          ?child rdfs:subClassOf <{node}> .
        }}
        """
        
        #sparql.setQuery(query)
        #sparql.setReturnFormat(JSON)
        #sparql.addDefaultGraph(default_graph)
        results = send_query(query, sparql, JSON)
       
        # Alle Objekte der Tripel extrahieren
        neighbors = []
        for binding in results["results"]["bindings"]:
            next_obj = binding["next"]
            value = next_obj["value"]
            value_type = next_obj["type"]  # 'uri', 'literal', 'bnode'
            #if value.startswith("POLYGO"):
             #   print("Value Type: " + value_type)

            """if value_type == "literal" or value_type == "typed-literal":
                lang_tag = next_obj.get("xml:lang")
                datatype = next_obj.get("datatype")
                
                if lang_tag:  # Sprach-Tag hinzufügen
                    #if value.startswith("POLYGO"):
                    #    print("lang_tag: " + lang_tag)
                    value = f"{value}@{lang_tag}"
                elif datatype:  # Datentyp hinzufügen (optional, für Vollständigkeit)
                   # if value.startswith("POLYGO"):
                    #    print("datatype: " + datatype)
                    value = f"{value}^^{datatype}"
            
            if value_type != "literal" and value_type != "typed-literal" and value_type != "uri" and value_type != "iri" and value_type != "bnode":
                print("OTHER VALUE TYPE: " + value_type)"""

            neighbors.append((value, value_type))
            #neighbors.append(binding["next"]["value"])
            #if str(node) == "https://nfdi.fiz-karlsruhe.de/ontology/MIME-Type":
             #   value_type = binding["next"]["type"]  # <--- Hier bekommst du den Typ: uri, literal, bnode
              #  value = binding["next"]["value"]
               # # Debug-Output
                #print(f"  Nachbar gefunden: {value} (Typ: {value_type})")    

        neighbors_cache[node_str] = neighbors

        return neighbors
    
    def dfs(path, node, node_type="uri"):
        global num_paths_inh_tree
        global max_depth_inh_tree

        # skip node if it already is in path (avoiding cycles)
        if node in path:
            return
        
        # add current node to path
        path.append(node)
        
        neighbors = list(get_neighbors(node)) 

        if not neighbors:
            paths.append(list(path))
            num_paths_inh_tree += 1
            max_depth_inh_tree = max(max_depth_inh_tree, len(path) - 1)
        else:
            for neighbor, neighbor_type in neighbors:
              #print("\node: " + str(node) + " - neighbor: " + str(neighbor))
              # if node is literal -> it does not have any neighbors -> path is finished
              #if neighbor_type == "literal" or (neighbor in literals):
              """if neighbor in literals:
                  #print("Ist in Literals: " +  neighbor)
              #if neighbor in literals:
                  path.append(neighbor)
                  #print("Found Path: ", path)
                  paths.append(list(path))
                  num_paths += 1
                  abs_depth += len(path) - 1
                  max_depth = max(max_depth, len(path) - 1)
                  # remove node from the path to find next path
                  path.pop()
              else:"""
              dfs(path, neighbor, neighbor_type)

        # remove node from the path to find next path
        path.pop()

    sparql = SPARQLWrapper(endpoint_url)
    sparql.addDefaultGraph(default_graph)
    #sparql.setTimeout(60)  # 120 Sekunden Timeout

    # list which stores all paths
    paths = []

    # TODO: ich glaub dass node_type=uri nicht passt, weil bnodes auch vorkommen können (vllt passts aber auch mt uri weil gleich gesucht wird)
    dfs([], start_node, node_type="uri")

    return paths

#endpoint_url = ""
#default_graph = "http://def.seegrid.csiro.au/ontology/om/sam-lite"
#sparql = SPARQLWrapper(endpoint_url)
#sparql.addDefaultGraph(default_graph)
# ?root a owl:Class -> to ensure root is a class
# FILTER NOT EXISTS {?root rdfs:subClassOf ?anyClass .} -> to get root which has no superclass
query_root = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
SELECT DISTINCT ?root
WHERE {
  {
    ?root rdf:type owl:Class .
  }
  UNION
  {
    ?root rdf:type rdfs:Class .
  }
  
  FILTER NOT EXISTS {
    ?root rdfs:subClassOf ?anyClass .
  }
}
"""

# node types that can be in results: uri/iri or blank nodes  
results = send_query(query_root, sparql, JSON)

tree_roots = set()

for binding in results["results"]["bindings"]:
    node = binding["root"]["value"]
    debug_print("Found Tree node: " + node)
    tree_roots.add(node)

# Anzahl der Root-Nodes ausgeben
debug_print(f"Number of root nodes: {len(tree_roots)}")
    
max_depth_inh_tree = 0
num_paths_inh_tree = 0

# Dictionary für alle Pfade
all_paths = {}

# Jetzt alle Pfade für die gespeicherten Root-Nodes berechnen
for root in tree_roots:
    debug_print("Calculating subclass paths for tree node: " + root)
    all_paths[root] = find_all_paths_subclasses(endpoint_url, default_graph, root)

# Output found paths
print("FOUND PATHS:")
for root_node, paths in all_paths.items():
    for path in paths:
        debug_print(path)
        debug_print("Path length = " + str(len(path)-1))

debug_print("-Number of Paths: " + str(num_paths_inh_tree))
print("-Depth of Inheritance Tree: " + str(max_depth_inh_tree))

# Problem: Ich glaube in diesem dataset ist die tiefe einfach 0 (siehe chatgpt)

Number of root nodes: 0
FOUND PATHS:
-Number of Paths: 0
-Depth of Inheritance Tree: 0


In [None]:
# Property Class Ratio
if num_classes > 0:
    prop_class_ratio = num_properties / num_classes 
    print("Property Class Ratio: " + str(prop_class_ratio))
else:
    # TODO
    print("Property Class Ratio is INF!")

# Class Property Ratio
if num_properties > 0:
    class_prop_ratio = num_classes / num_properties 
    print("Class Property Ratio: " + str(class_prop_ratio))
else:
    # TODO
    print("Class Property Ratio is INF!")

# Inheritance Richness = average number of subclasses per clas (source 227 - page 9) 
query_subclasses = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
SELECT (COUNT(*) AS ?numInheritanceRelations)
WHERE {
  ?subclass rdfs:subClassOf ?superclass .
}
"""

num_subclasses = 0

results = send_query(query_subclasses, sparql, JSON)

for binding in results["results"]["bindings"]:
  num_subclasses = int(binding["numInheritanceRelations"]["value"])

if num_classes > 0:
    inheritance_richness = num_subclasses / num_classes 
    print("Inheritance Richness: " + str(inheritance_richness))
else:
    # TODO
    print("Inheritance Richness is INF!")

query_datatype_properties = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
SELECT (COUNT(DISTINCT ?property) AS ?numDatatypeProperties)
WHERE {
  ?property rdf:type owl:DatatypeProperty .
}
"""

num_datatype_properties = 0

results = send_query(query_datatype_properties, sparql, JSON)

for binding in results["results"]["bindings"]:
  num_datatype_properties = int(binding["numDatatypeProperties"]["value"])

debug_print("Number of Datatype properties: " + str(num_datatype_properties))

if num_classes > 0:
    attr_richness = num_datatype_properties / num_classes
    print("Attribute Richness: " + str(attr_richness))
else:
    print("Attribute Richness is INF!")


Property Class Ratio: 0.0
Class Property Ratio is INF!
Inheritance Richness: 0.0
Number of Datatype properties: 0
Attribute Richness: 0.0


: 

: 

In [None]:
# Average Connectivity
if num_classes > 0:
    avg_population = num_instances / num_classes
    print("Average Population: " + str(avg_population))
else:
    print("Average Population is INF!")


Average Population: 1.15


: 

: 

In [None]:
# Average Class Connectivity
# Connectivity of a class is defined as the total number of relationships instances of 
# the class have with instances of other classes (source 227 - page 10)

# looking for number of triples (c1, p, c2) or (c3, p, c1) for each class with instances c1
# c1, c2 are instances of classes 
# c1 != c2
# property != rdf:type because we are not interested in the class relationships 
query_class_connectivity = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
SELECT ?class (COUNT(*) AS ?connectivity)
WHERE {
  {
    ?instance ?property ?target .

    ?instance rdf:type ?class .
    ?target rdf:type ?targetClass .

    FILTER(?property != rdf:type)
    FILTER(?class != ?targetClass)
  }
  UNION
  {
    ?instance ?property ?target .

    ?target rdf:type ?class .
    ?instance rdf:type ?targetClass .

    FILTER(?property != rdf:type)
    FILTER(?class != ?targetClass)
  }
}
GROUP BY ?class

"""
results = send_query(query_class_connectivity, sparql, JSON)

class_connectivity_list = []
sum_connectivities = 0

for binding in results["results"]["bindings"]:
    class_name = binding["class"]["value"]
    connectivity = int(binding["connectivity"]["value"])
    class_connectivity_list.append((class_name, connectivity))
    sum_connectivities += connectivity


for class_name, connectivity in class_connectivity_list:
  debug_print("Connectivity of Class " + class_name + ": " + str(connectivity))

if num_classes > 0:
  avg_class_connectivity = sum_connectivities / num_classes
  print("Average Class Connectivity: " + str(avg_class_connectivity))
else:
  print("Average Class Connectivity is INF!")


Average Class Connectivity: 0.0


: 

: 

In [None]:
import requests

endpoint_url = "https://data.europa.eu/sparql"
#endpoint_url = "https://lov.linkeddata.es/dataset/lov/sparql"
endpoint_url = "https://sparql.europeana.eu/"
#endpoint_url = "https://nfdi4culture.de/sparql"
endpoint_url = "https://query.wikidata.org/sparql"
#endpoint_url = "https://graphdb.ontotext.com/repositories/ontotext_public?query=SELECT+*+WHERE+%7B%7D+LIMIT+1"
#endpoint_url = "https://sparql.bioontology.org/"
#endpoint_url = "http://rdf4j.org/sparql"
#endpoint_url = "https://franz.com/agraph/sparql"

r = requests.get(endpoint_url)
print(r.headers.get("Server"))
print(r.text[:1000])  # manchmal im HTML ein Hinweis wie "Virtuoso" oder "Fuseki"


nginx/1.18.0
<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF
	xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">

<rdf:Description rdf:nodeID="service">
	<rdf:type rdf:resource="http://www.w3.org/ns/sparql-service-description#Service"/>
	<endpoint xmlns="http://www.w3.org/ns/sparql-service-description#" rdf:resource="http://query.wikidata.org/bigdata/namespace/wdq/sparql"/>
	<supportedLanguage xmlns="http://www.w3.org/ns/sparql-service-description#" rdf:resource="http://www.w3.org/ns/sparql-service-description#SPARQL10Query"/>
	<supportedLanguage xmlns="http://www.w3.org/ns/sparql-service-description#" rdf:resource="http://www.w3.org/ns/sparql-service-description#SPARQL11Query"/>
	<supportedLanguage xmlns="http://www.w3.org/ns/sparql-service-description#" rdf:resource="http://www.w3.org/ns/sparql-service-description#SPARQL11Update"/>
	<feature xmlns="http://www.w3.org/ns/sparql-service-description#" rdf:resource="http://www.w3.org/ns/sparql-service-description#BasicFederatedQuery

: 

: 