# ABox Summary and Descriptive Statistics
This notebook provides an overview of the RDF knowledge graph defined in the ABox.
We compute statistics such as:

- Total number of triples
- Number of distinct classes and properties
- Instances per class
- Triple usage per property

In [None]:
# Run this once if rdflib is not installed
# !pip install rdflib matplotlib pandas

from rdflib import Graph, RDF, RDFS
from collections import defaultdict, Counter
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
g = Graph()
abox_path = "../GroupIvanWalter-B2-MartinezTroiani.ttl"
g.parse(abox_path, format="turtle")

print(f"Graph loaded with {len(g)} triples.")

In [None]:
# Count instances of each rdf:type (class)
class_counts = defaultdict(int)

for s, p, o in g.triples((None, RDF.type, None)):
    class_counts[o] += 1

# Display as sorted DataFrame
df_classes = pd.DataFrame(class_counts.items(), columns=["Class", "Count"])
df_classes = df_classes.sort_values(by="Count", ascending=False).reset_index(drop=True)
df_classes

In [None]:
# Barplot of instances per class
plt.figure(figsize=(10, 5))
plt.bar(df_classes["Class"].astype(str), df_classes["Count"], color="skyblue")
plt.xticks(rotation=90)
plt.title("Instances per Class")
plt.ylabel("Count")
plt.tight_layout()
plt.show()

In [None]:
# Count of properties (predicates)
property_counts = Counter(p for s, p, o in g)

df_props = pd.DataFrame(property_counts.items(), columns=["Property", "Count"])
df_props = df_props.sort_values(by="Count", ascending=False).reset_index(drop=True)
df_props

In [None]:
# Barplot of properties used
plt.figure(figsize=(10, 5))
plt.bar(df_props["Property"].astype(str), df_props["Count"], color="salmon")
plt.xticks(rotation=90)
plt.title("Usage per Property")
plt.ylabel("Triple Count")
plt.tight_layout()
plt.show()

In [None]:
# Pie chart of most frequent class types
df_top = df_classes[df_classes["Count"] > 5]  # show only top classes
plt.figure(figsize=(6, 6))
plt.pie(df_top["Count"], labels=df_top["Class"].astype(str), autopct='%1.1f%%')
plt.title("Distribution of Main Classes")
plt.show()