In [2]:
import rdflib
from rdflib.namespace import RDF, RDFS, OWL

# Load the ontology from TTL file
g = rdflib.Graph()
g.parse("final_ontology_extension.ttl", format="turtle")

# Count classes (entities of type owl:Class or appearing in subclass relationships)
classes = {s for s, p, o in g.triples((None, RDF.type, OWL.Class))} 
for s, p, o in g.triples((None, RDFS.subClassOf, None)):
    classes.add(s); 
    if o != OWL.Class:  # superclass is also a class URI
        classes.add(o)

# Count individuals (entities of type owl:NamedIndividual or instances of classes)
individuals = {s for s,p,o in g.triples((None, RDF.type, OWL.NamedIndividual))}
for s, p, o in g.triples((None, RDF.type, None)):
    if o in classes:
        individuals.add(s)

# Count used properties (predicates that appear in at least one triple, excluding RDF/RDFS builtins)
properties = {p for s,p,o in g if p not in (RDF.type, RDFS.subClassOf)}

# Count total triples
total_triples = len(g)

# Compute annotation coverage: fraction of entities with an rdfs:label, rdfs:comment or dcterms:description
annot_props = {RDFS.label, RDFS.comment, rdflib.term.URIRef("http://purl.org/dc/terms/description")}
annotated_entities = {s for s,p,o in g.triples((None, None, None)) if p in annot_props}
entities = classes | individuals
coverage = len(entities & annotated_entities) / len(entities) if entities else 0.0

print(f"Classes: {len(classes)}")
print(f"Individuals: {len(individuals)}")
print(f"Entities (classes+individuals): {len(entities)}")
print(f"Properties (used): {len(properties)}")
print(f"Triples: {total_triples}")
print(f"Annotation coverage: {coverage:.2%}")

Classes: 24
Individuals: 217
Entities (classes+individuals): 241
Properties (used): 19
Triples: 1349
Annotation coverage: 70.95%


In [3]:
import networkx as nx

# Build a directed graph from the RDF triples
G = nx.DiGraph()
for subj, pred, obj in g:
    # Skip literal nodes (annotations) to focus on structural graph
    if isinstance(obj, rdflib.term.Literal):
        continue
    # Optionally, skip rdf:type edges to omit class membership (focus on instance network)
    if pred == RDF.type:
        continue
    G.add_edge(str(subj), str(obj), predicate=str(pred))

# Basic graph info
print(f"Graph nodes: {G.number_of_nodes()}") 
print(f"Graph edges: {G.number_of_edges()}")

Graph nodes: 521
Graph edges: 640


In [5]:
import numpy as np
import networkx as nx

# --- Compute Graph-Level Metrics ---
num_nodes = G.number_of_nodes()
num_edges = G.number_of_edges()
density = nx.density(G)  # Graph density

# Centrality measures
deg_centrality = nx.degree_centrality(G)            # degree centrality (as undirected)
in_deg_centrality = nx.in_degree_centrality(G)      # in-degree centrality
out_deg_centrality = nx.out_degree_centrality(G)    # out-degree centrality
closeness = nx.closeness_centrality(G)              # closeness (treating G as is or as undirected)
betweenness = nx.betweenness_centrality(G)          # betweenness centrality

# --- REPLACEMENT for eigenvector_centrality_numpy ---
# Use the iterative version that handles disconnected graphs gracefully:
try:
    eigenvector = nx.eigenvector_centrality(G, max_iter=1000, tol=1e-06)
except nx.PowerIterationFailedConvergence as e:
    print(f"[Warning] Eigenvector centrality did not converge: {e}")
    # Fallback: set all eigenvector centralities to 0 or handle as needed
    eigenvector = {n: 0 for n in G.nodes()}

# PageRank
pagerank = nx.pagerank(G, alpha=0.85)

# Clustering coefficient (treat as undirected)
clustering = nx.average_clustering(G.to_undirected())

print(f"Nodes: {num_nodes}, Edges: {num_edges}, Density: {density:.4f}")
print(f"Average clustering coefficient: {clustering:.4f}")

# Example: Show top 5 nodes by degree centrality & betweenness
top5_deg = sorted(deg_centrality.items(), key=lambda x: x[1], reverse=True)[:5]
top5_betw = sorted(betweenness.items(), key=lambda x: x[1], reverse=True)[:5]
print("Top 5 nodes by degree centrality:", [n for n, _ in top5_deg])
print("Top 5 nodes by betweenness centrality:", [n for n, _ in top5_betw])

Nodes: 521, Edges: 640, Density: 0.0024
Average clustering coefficient: 0.0156
Top 5 nodes by degree centrality: ['http://www.semanticweb.org/vbr240/ontologies/2022/4/untitled-ontology-51/processingInformation', 'https://github.com/EliasLiinamaa/kgst_project_group_3/10.3233/_actor_Human_Annotator', 'https://github.com/EliasLiinamaa/kgst_project_group_3/10.3233/_actor_Artificial_Agent', 'https://github.com/EliasLiinamaa/kgst_project_group_3/Market_Trading', 'http://www.semanticweb.org/vbr240/ontologies/2022/4/untitled-ontology-51/hasInteraction']
Top 5 nodes by betweenness centrality: ['http://www.semanticweb.org/vbr240/ontologies/2022/4/untitled-ontology-51/processingInformation', 'https://github.com/EliasLiinamaa/kgst_project_group_3/10.3233/_actor_Human_Annotator', 'https://github.com/EliasLiinamaa/kgst_project_group_3/10.3233/_actor_Artificial_Agent', 'https://github.com/EliasLiinamaa/kgst_project_group_3/Market_Trading', 'http://www.semanticweb.org/vbr240/ontologies/2022/4/untitled

In [6]:
import pandas as pd
centrality_df = pd.DataFrame({
    'in_degree': pd.Series(in_deg_centrality),
    'out_degree': pd.Series(out_deg_centrality),
    'closeness': pd.Series(closeness),
    'betweenness': pd.Series(betweenness),
    'eigenvector': pd.Series(eigenvector),
    'pagerank': pd.Series(pagerank)
}).fillna(0)
centrality_df.head(10)

Unnamed: 0,in_degree,out_degree,closeness,betweenness,eigenvector,pagerank
https://github.com/EliasLiinamaa/kgst_project_group_3/10.3233/_actor_AI_System_processing_information_https://github.com/EliasLiinamaa/kgst_project_group_3/Intelligent_Opinion_Sampling,0.0,0.013462,0.0,0.0,3.218165e-09,0.001471
https://github.com/EliasLiinamaa/kgst_project_group_3/Argument_Quality_Classification,0.001923,0.0,0.001923,0.0,1.84079e-06,0.00165
https://github.com/EliasLiinamaa/kgst_project_group_3/10.3233/_actor_Human_Annotator_processing_information_https://github.com/EliasLiinamaa/kgst_project_group_3/Topic_Assignment,0.0,0.011538,0.0,0.0,3.218165e-09,0.001471
http://www.semanticweb.org/vbr240/ontologies/2022/4/untitled-ontology-51/processingInformation,0.092308,0.003846,0.092308,0.000356,8.820668e-05,0.013224
https://github.com/EliasLiinamaa/kgst_project_group_3/User_Understanding,0.001923,0.003846,0.001923,7e-06,1.84079e-06,0.00168
http://dbpedia.org/resource/Understanding,0.001923,0.0,0.002564,0.0,0.0005255488,0.002185
https://github.com/EliasLiinamaa/kgst_project_group_3/_actor_Tutor_Agent_processing_information_https://github.com/EliasLiinamaa/kgst_project_group_3/Question-Answer_Processing,0.0,0.009615,0.0,0.0,3.218165e-09,0.001471
https://github.com/EliasLiinamaa/kgst_project_group_3/Question-Answer_Processing,0.003846,0.0,0.003846,0.0,3.678362e-06,0.0019
https://github.com/EliasLiinamaa/kgst_project_group_3/10.3233/N11f893214512447489651429b6f729c8_processing_information_https:/github.com/EliasLiinamaa/kgst_project_group_3/Interactive_Learning,0.0,0.013462,0.0,0.0,3.218165e-09,0.001471
https://github.com/EliasLiinamaa/kgst_project_group_3/Interactive_Learning,0.003846,0.0,0.003846,0.0,3.678362e-06,0.001789


In [8]:
# --- COMMUNITY DETECTION (without cdlib) ---

import networkx as nx

# Convert your directed graph G to undirected for community detection
G_und = G.to_undirected()

# Method 1: Greedy Modularity Communities
communities_generator = nx.algorithms.community.greedy_modularity_communities(G_und)
communities_list = list(communities_generator)
print(f"Detected {len(communities_list)} communities (using greedy_modularity_communities).")

for i, cset in enumerate(communities_list[:5], start=1):
    print(f"Community {i} has {len(cset)} nodes. Example nodes: {list(cset)[:5]} ...")



Detected 24 communities (using greedy_modularity_communities).
Community 1 has 69 nodes. Example nodes: ['http://dbpedia.org/resource/Human_being', 'https://github.com/EliasLiinamaa/kgst_project_group_3/Perspective_Alignment', 'https://api.conceptnet.io/c/en/artificial_person', 'https://github.com/EliasLiinamaa/kgst_project_group_3/10.3233/_scenario_Collaborative_Game_Play_interaction_https://github.com/EliasLiinamaa/kgst_project_group_3/Collaborative_Card_Game', 'https://github.com/EliasLiinamaa/kgst_project_group_3/Collaborative_Card_Game'] ...
Community 2 has 44 nodes. Example nodes: ['https://github.com/EliasLiinamaa/kgst_project_group_3/Intelligent_Merging', 'https://github.com/EliasLiinamaa/kgst_project_group_3/Pairwise_Annotation', 'https://github.com/EliasLiinamaa/kgst_project_group_3/Graph_Clustering', 'https://api.conceptnet.io/c/en/clusterise', 'https://github.com/EliasLiinamaa/kgst_project_group_3/Manual_Assignment'] ...
Community 3 has 42 nodes. Example nodes: ['http://dbp

In [19]:
from pyvis.network import Network
from IPython.display import IFrame

def short_label(uri, max_len=20):
    uri_str = str(uri)
    if '#' in uri_str:
        candidate = uri_str.rsplit('#', 1)[-1]
    else:
        candidate = uri_str.rsplit('/', 1)[-1]
    if len(candidate) > max_len:
        return candidate[:max_len] + "..."
    return candidate

# Create PyVis network
net = Network(height="600px",
              width="100%",
              notebook=True,
              cdn_resources='in_line',
              directed=True)

# This is valid JSON (not JavaScript) for net.set_options:
options_json = """
{
  "nodes": {
    "shape": "dot",
    "size": 10,
    "font": {
      "size": 12
    }
  },
  "edges": {
    "arrows": {
      "to": {
        "enabled": true,
        "scaleFactor": 0.5
      }
    }
  },
  "physics": {
    "forceAtlas2Based": {
      "gravitationalConstant": -50,
      "centralGravity": 0.01,
      "springLength": 100,
      "springConstant": 0.08
    },
    "maxVelocity": 50,
    "solver": "forceAtlas2Based",
    "timestep": 0.35,
    "stabilization": {
      "iterations": 150
    }
  }
}
"""

net.set_options(options_json)

# Suppose G is your graph. We'll add nodes/edges with shortened labels.
for node in G.nodes():
    s_label = short_label(node)
    net.add_node(
        str(node),
        label=s_label,
        title=str(node)
    )

for source, target in G.edges():
    net.add_edge(str(source), str(target))

net.show("graph.html")
display(IFrame(src="graph.html", width="100%", height="650px"))

graph.html
