In [1]:
!pip install rdflib torch_geometric

Collecting rdflib
  Downloading rdflib-7.1.4-py3-none-any.whl.metadata (11 kB)
Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m683.3 kB/s[0m eta [36m0:00:00[0m
Downloading rdflib-7.1.4-py3-none-any.whl (565 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m565.1/565.1 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rdflib, torch_geometric
Successfully installed rdflib-7.1.4 torch_geometric-2.6.1


In [2]:
import os
import torch
import torch.nn.functional as F
from torch import nn
from torch_geometric.data import Data
from torch_geometric.nn import RGCNConv
from torch_geometric.utils import negative_sampling
import rdflib
from rdflib.namespace import Namespace, RDF
from shapely import wkt
import glob
import random
import re
import requests
from sklearn.metrics import precision_score
from rdflib.namespace import NamespaceManager

In [3]:
OWNER = "bxbalaban"
REPO = "ai-lab"
BRANCH = "db0c4603d6b5b78929720437b2bbf93ede6ff6b6"
FOLDER_PATH = "data"

os.makedirs("ttl_files", exist_ok=True)

api_url = f"https://api.github.com/repos/{OWNER}/{REPO}/contents/{FOLDER_PATH}?ref={BRANCH}"

response = requests.get(api_url)
response.raise_for_status()
files = response.json()

ttl_files = [f for f in files if f["name"].endswith(".ttl")]

print(f"Found {len(ttl_files)} TTL files.")

for file_info in ttl_files:
    download_url = file_info["download_url"]
    filename = file_info["name"]
    print(f"Downloading {filename} ...")
    r = requests.get(download_url)
    r.raise_for_status()
    with open(os.path.join("ttl_files", filename), "wb") as f:
        f.write(r.content)

print("All TTL files downloaded to ./ttl_files/")

Found 100 TTL files.
Downloading building1.ttl ...
Downloading building10.ttl ...
Downloading building100.ttl ...
Downloading building11.ttl ...
Downloading building12.ttl ...
Downloading building13.ttl ...
Downloading building14.ttl ...
Downloading building15.ttl ...
Downloading building16.ttl ...
Downloading building17.ttl ...
Downloading building18.ttl ...
Downloading building19.ttl ...
Downloading building2.ttl ...
Downloading building20.ttl ...
Downloading building21.ttl ...
Downloading building22.ttl ...
Downloading building23.ttl ...
Downloading building24.ttl ...
Downloading building25.ttl ...
Downloading building26.ttl ...
Downloading building27.ttl ...
Downloading building28.ttl ...
Downloading building29.ttl ...
Downloading building3.ttl ...
Downloading building30.ttl ...
Downloading building31.ttl ...
Downloading building32.ttl ...
Downloading building33.ttl ...
Downloading building34.ttl ...
Downloading building35.ttl ...
Downloading building36.ttl ...
Downloading building

In [10]:
BOT = Namespace("https://w3id.org/bot#")
BOTAI = Namespace("http://www.aiLab.org/botAiLab#")
GEO = Namespace("http://www.opengis.net/ont/geosparql#")
LOCAL = Namespace("http://example.org/building/")
relation_uris = [
        BOTAI.adjacentElement,
        BOTAI.isAbove,
        BOTAI.isBelow,
        BOTAI.intersectsElement
    ]
label_map = {
        BOTAI.Column: 0,
        BOTAI.Slab: 1,
        BOTAI.Wall: 2,
        BOTAI.FloorSlab: 3,
        BOTAI.RoofSlab: 4
    }
inv_label_map = {v: k for k, v in label_map.items()}
target_classes = list(label_map.keys())
num_features = 7

In [11]:
class GeoNodeClassifier(nn.Module):
    def __init__(self, in_channels, hidden_channels=128):
        super().__init__()
        num_relations = len(relation_uris)
        num_classes = len(label_map)

        self.conv1 = RGCNConv(in_channels, hidden_channels, num_relations)
        self.conv2 = RGCNConv(hidden_channels, hidden_channels, num_relations)
        self.classifier = nn.Linear(hidden_channels, num_classes)

    def forward(self, x, edge_index, edge_type):
        x = self.conv1(x, edge_index, edge_type).relu()
        x = self.conv2(x, edge_index, edge_type).relu()
        return self.classifier(x)

def parse_ttl_to_pyg(ttl_path):
    g = rdflib.Graph()
    g.parse(ttl_path, format="turtle")
    g.bind("botAiLab", BOTAI)
    g.bind("geo", GEO)

    nodes = list(set(
        s for cls in target_classes
        for s in g.subjects(RDF.type, cls)
    ))
    node_idx = {n: i for i, n in enumerate(nodes)}
    num_nodes = len(nodes)

    features = torch.zeros((num_nodes, num_features))
    labels = torch.full((num_nodes,), -1, dtype=torch.long)  # -1 means unlabeled

    for s in nodes:
        i = node_idx[s]

        # Features
        loc = g.value(s, GEO.asWKT)
        if loc:
            pt = wkt.loads(str(loc))
            features[i][:3] = torch.tensor([pt.x, pt.y, getattr(pt, 'z', 0.0)])

        size = g.value(s, BOTAI.hasSize)
        if size:
            try:
                w, h, d = map(float, str(size).split(","))
                features[i][3:6] = torch.tensor([w, h, d])
            except:
                pass

        rot = g.value(s, BOTAI.hasRotation)
        if rot:
            try:
                features[i][6] = float(str(rot))
            except:
                pass

        # Label
        for label_uri, label_id in label_map.items():
          if (s, RDF.type, label_uri) in g:
              labels[i] = label_id
              break

    relation_to_id = {rel: i for i, rel in enumerate(relation_uris)}

    edge_list = []
    edge_types = []
    for rel in relation_uris:
        for s, _, o in g.triples((None, rel, None)):
            if s in node_idx and o in node_idx:
                edge_list.append([node_idx[s], node_idx[o]])
                edge_types.append(relation_to_id[rel])

    if not edge_list:
        edge_index = torch.empty((2, 0), dtype=torch.long)
        edge_type = torch.empty((0,), dtype=torch.long)
    else:
        edge_index = torch.tensor(edge_list, dtype=torch.long).t().contiguous()
        edge_type = torch.tensor(edge_types, dtype=torch.long)

    return Data(x=features, edge_index=edge_index, edge_type=edge_type, y=labels)


def train(model, data, optimizer):
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index, data.edge_type)
    mask = data.y >= 0  # Only use labeled nodes
    loss = F.cross_entropy(out[mask], data.y[mask])
    loss.backward()
    optimizer.step()
    return loss.item()

def main(data_folder, epochs=200):
    ttl_files = glob.glob(os.path.join(data_folder, "*.ttl"))

    filtered_files = []
    for f in ttl_files:
        filename = os.path.basename(f)
        match = re.search(r'(\d+)\.ttl$', filename)
        if match:
            num = int(match.group(1))
            if 1 <= num <= 100:
                filtered_files.append(f)

    all_graphs = [parse_ttl_to_pyg(f) for f in filtered_files]

    model = GeoNodeClassifier(in_channels=7)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    for epoch in range(1, epochs + 1):
        total_loss = 0
        for graph in all_graphs:
            loss = train(model, graph, optimizer)
            total_loss += loss
        print(f"Epoch {epoch} - Loss: {total_loss:.4f}")

    torch.save(model.state_dict(), "node_classifier_model.pt")
    print("✅ Model saved to node_classifier_model.pt")

if __name__ == "__main__":
    main(data_folder="/content/ttl_files")

Epoch 1 - Loss: 98.2149
Epoch 2 - Loss: 59.2347
Epoch 3 - Loss: 52.8366
Epoch 4 - Loss: 47.4401
Epoch 5 - Loss: 45.3931
Epoch 6 - Loss: 44.6571
Epoch 7 - Loss: 43.2901
Epoch 8 - Loss: 48.9521
Epoch 9 - Loss: 44.5002
Epoch 10 - Loss: 45.3046
Epoch 11 - Loss: 43.1399
Epoch 12 - Loss: 42.6178
Epoch 13 - Loss: 39.9687
Epoch 14 - Loss: 39.1436
Epoch 15 - Loss: 38.3049
Epoch 16 - Loss: 39.1959
Epoch 17 - Loss: 42.3992
Epoch 18 - Loss: 38.9857
Epoch 19 - Loss: 40.5176
Epoch 20 - Loss: 37.9712
Epoch 21 - Loss: 38.4813
Epoch 22 - Loss: 39.2898
Epoch 23 - Loss: 37.7811
Epoch 24 - Loss: 36.5616
Epoch 25 - Loss: 43.2396
Epoch 26 - Loss: 36.8405
Epoch 27 - Loss: 38.3059
Epoch 28 - Loss: 37.1009
Epoch 29 - Loss: 35.9241
Epoch 30 - Loss: 35.2319
Epoch 31 - Loss: 34.0406
Epoch 32 - Loss: 35.8952
Epoch 33 - Loss: 33.8622
Epoch 34 - Loss: 36.0534
Epoch 35 - Loss: 36.1839
Epoch 36 - Loss: 39.2689
Epoch 37 - Loss: 36.7367
Epoch 38 - Loss: 35.8906
Epoch 39 - Loss: 35.1927
Epoch 40 - Loss: 33.1105
Epoch 41 

In [12]:
def load_graph_for_inference(ttl_file):
    g = rdflib.Graph()
    g.parse(ttl_file, format="turtle")
    g.bind("botAiLab", BOTAI)
    g.bind("geo", GEO)

    nodes = list(set(g.subjects()))
    node_index = {n: i for i, n in enumerate(nodes)}
    index_node = {i: n for n, i in node_index.items()}
    num_nodes = len(nodes)

    features = torch.zeros((num_nodes, num_features))

    for s in nodes:
        loc = g.value(s, BOTAI.hasLocation)
        if loc:
            try:
                coords = list(map(float, str(loc).split(",")))
                if len(coords) == 3:
                    x, y, z = coords
                    wkt_str = f"POINT Z({x} {y} {z})"
                    g.add((s, GEO.asWKT, rdflib.Literal(wkt_str, datatype=GEO.wktLiteral)))
            except:
                continue

    for s in nodes:
        i = node_index[s]
        loc = g.value(s, GEO.asWKT)
        if loc:
            pt = wkt.loads(str(loc))
            features[i][:3] = torch.tensor([pt.x, pt.y, getattr(pt, 'z', 0.0)])

        size = g.value(s, BOTAI.hasSize)
        if size:
            try:
                w, h, d = map(float, str(size).split(","))
                features[i][3:6] = torch.tensor([w, h, d])
            except:
                pass

        rot = g.value(s, BOTAI.hasRotation)
        if rot:
            try:
                features[i][6] = float(str(rot))
            except:
                pass

    relation_to_id = {rel: i for i, rel in enumerate(relation_uris)}

    edge_list = []
    edge_types = []

    for rel in relation_uris:
        for s, _, o in g.triples((None, rel, None)):
            if s in node_index and o in node_index:
                edge_list.append([node_index[s], node_index[o]])
                edge_types.append(relation_to_id[rel])

    if not edge_list:
        edge_index = torch.empty((2, 0), dtype=torch.long)
        edge_type = torch.empty((0,), dtype=torch.long)
    else:
        edge_index = torch.tensor(edge_list, dtype=torch.long).t().contiguous()
        edge_type = torch.tensor(edge_types, dtype=torch.long)

    return Data(x=features, edge_index=edge_index, edge_type=edge_type), index_node

In [13]:
def predict_node_classes(ttl_file, model_path):
    original_graph = rdflib.Graph()
    original_graph.parse(ttl_file, format="turtle")

    target_nodes = {}
    target_classes.append(BOT.Element)
    for cls in target_classes:
        for s in original_graph.subjects(RDF.type, cls):
            target_nodes[s] = cls

    if not target_nodes:
        print("🚫 No target-class nodes found.")
        return

    for node, original_class in target_nodes.items():
        original_graph.remove((node, RDF.type, original_class))
        original_graph.add((node, RDF.type, BOT.Element))

    temp_ttl = "temp_elementized.ttl"
    original_graph.serialize(temp_ttl, format="turtle")

    data, index_node = load_graph_for_inference(temp_ttl)
    node_index = {v: k for k, v in index_node.items()}

    element_indices = [
        node_index[s] for s in target_nodes.keys() if s in node_index
    ]
    if not element_indices:
        print("🚫 No valid bot:Element nodes found in graph.")
        return

    model = GeoNodeClassifier(in_channels=num_features)
    model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
    model.eval()

    with torch.no_grad():
        logits = model(data.x, data.edge_index, data.edge_type)
        predictions = logits.argmax(dim=1)

    # Compare predictions to original class labels
    correct = 0
    total = 0
    print("📊 Predictions on bot:Element nodes:")
    for idx in element_indices:
        node_uri = rdflib.URIRef(index_node[idx])
        pred_class = predictions[idx].item()
        predicted_uri = inv_label_map[pred_class]
        original_uri = target_nodes.get(node_uri)

        # Add predicted class triple
        original_graph.add((node_uri, RDF.type, predicted_uri))

        # Accuracy check
        if original_uri == predicted_uri:
            correct += 1
        total += 1

        print(f"{node_uri} → Predicted: {predicted_uri.split('#')[-1]}, "
              f"Original: {original_uri.split('#')[-1]}")

    accuracy = correct / total if total > 0 else 0
    print(f"\n✅ Classification accuracy: {accuracy:.2%}")

    # Save updated TTL
    folder = os.path.dirname(ttl_file)
    filename = os.path.basename(ttl_file)
    enriched_path = os.path.join(folder, f"classified_{filename}")

    namespace_manager = NamespaceManager(original_graph)
    namespace_manager.bind("botAiLab", BOTAI)
    namespace_manager.bind("bot", BOT)
    namespace_manager.bind("local", LOCAL)
    original_graph.namespace_manager = namespace_manager
    original_graph.serialize(destination=enriched_path, format="turtle")

    print(f"✅ Enriched TTL saved to: {enriched_path}")
    return accuracy

In [14]:
def predict_node_classes_in_folder(folder_path, model_path):
    ttl_files = glob.glob(os.path.join(folder_path, "*.ttl"))

    filtered_files = []
    for f in ttl_files:
        filename = os.path.basename(f)
        match = re.search(r'^building(\d+)\.ttl$', filename)
        if match:
            num = int(match.group(1))
            if 1 <= num <= 100:
                filtered_files.append(f)

    if not filtered_files:
        print("🚫 No TTL files found in the folder.")
        return

    accuracies = []
    for ttl_file in filtered_files:
        full_path = os.path.join(folder_path, ttl_file)
        print(f"\n🔍 Processing: {ttl_file}")
        try:
            acc = predict_node_classes(
                ttl_file=full_path,
                model_path=model_path
            )
            if acc is not None:
                accuracies.append(acc)
        except Exception as e:
            print(f"⚠️ Failed to classify {ttl_file}: {e}")

    if accuracies:
        avg_acc = sum(accuracies) / len(accuracies)
        print(f"\n📊 Average accuracy over {len(accuracies)} files: {avg_acc:.2%}")
    else:
        print("\n🚫 No accuracy could be computed.")


In [15]:
predict_node_classes_in_folder(
    folder_path="/content/ttl_files",
    model_path="/content/node_classifier_model.pt"
)


🔍 Processing: /content/ttl_files/building46.ttl
📊 Predictions on bot:Element nodes:
http://example.org/building/storey_1_column_1 → Predicted: Column, Original: Column
http://example.org/building/storey_1_column_3 → Predicted: Column, Original: Column
http://example.org/building/storey_4_column_1 → Predicted: Column, Original: Column
http://example.org/building/storey_4_column_2 → Predicted: Column, Original: Column
http://example.org/building/storey_4_column_3 → Predicted: Column, Original: Column
http://example.org/building/storey_5_column_1 → Predicted: Column, Original: Column
http://example.org/building/storey_5_column_2 → Predicted: Column, Original: Column
http://example.org/building/storey_5_column_3 → Predicted: Column, Original: Column
http://example.org/building/storey_1_column_2 → Predicted: Column, Original: Column
http://example.org/building/storey_3_column_1 → Predicted: Column, Original: Column
http://example.org/building/storey_3_column_2 → Predicted: Column, Original