In [2]:
import xml.etree.ElementTree as ET
from io import StringIO
from lxml import etree
import numpy as np
import pandas as pd
import re

In [3]:
pd.options.display.max_colwidth = 100

In [4]:
tree = ET.parse('admingeo.owl')
root = tree.getroot()

In [5]:
# find the namespace of owl file, The attrib attribute of an ElementTree element.Comment=
# The namespace whose name is http://www.w3.org/XML/1998/namespace is bound by definition to the prefix xml: 

base_ns = root.attrib['{http://www.w3.org/XML/1998/namespace}base']

In [6]:
print(f"The base namespace is {base_ns}")

The base namespace is http://data.ordnancesurvey.co.uk/ontology/admingeo/


In [7]:
# get all the namespace defined in the owl file

my_namespaces = dict([
     node for _, node in ET.iterparse("admingeo.owl", events=['start-ns'])])

In [8]:
my_namespaces

{'': 'http://data.ordnancesurvey.co.uk/ontology/admingeo/',
 'dc': 'http://purl.org/dc/elements/1.1/',
 'rdf-schema': 'http://data.ordnancesurvey.co.uk/ontology/admingeo/http://www.w3.org/2000/01/rdf-schema#',
 'spatialrelations': 'http://data.ordnancesurvey.co.uk/ontology/spatialrelations/',
 'geometry': 'http://data.ordnancesurvey.co.uk/ontology/geometry/',
 'rdfs': 'http://www.w3.org/2000/01/rdf-schema#',
 'owl2xml': 'http://www.w3.org/2006/12/owl2-xml#',
 'dct': 'http://purl.org/dc/terms/',
 'owl': 'http://www.w3.org/2002/07/owl#',
 'xsd': 'http://www.w3.org/2001/XMLSchema#',
 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
 'core': 'http://data.ordnancesurvey.co.uk/ontology/admingeo/http://www.w3.org/2004/02/skos/core#',
 'skos': 'http://www.w3.org/2004/02/skos/core#',
 'cc': 'http://creativecommons.org/ns#'}

In [9]:
tree = etree.parse('admingeo.owl')

In [10]:
tree

<lxml.etree._ElementTree at 0x7fad215f68c0>

In [11]:
my_namespaces['admingeo'] = my_namespaces.pop('')

In [12]:
my_namespaces

{'dc': 'http://purl.org/dc/elements/1.1/',
 'rdf-schema': 'http://data.ordnancesurvey.co.uk/ontology/admingeo/http://www.w3.org/2000/01/rdf-schema#',
 'spatialrelations': 'http://data.ordnancesurvey.co.uk/ontology/spatialrelations/',
 'geometry': 'http://data.ordnancesurvey.co.uk/ontology/geometry/',
 'rdfs': 'http://www.w3.org/2000/01/rdf-schema#',
 'owl2xml': 'http://www.w3.org/2006/12/owl2-xml#',
 'dct': 'http://purl.org/dc/terms/',
 'owl': 'http://www.w3.org/2002/07/owl#',
 'xsd': 'http://www.w3.org/2001/XMLSchema#',
 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
 'core': 'http://data.ordnancesurvey.co.uk/ontology/admingeo/http://www.w3.org/2004/02/skos/core#',
 'skos': 'http://www.w3.org/2004/02/skos/core#',
 'cc': 'http://creativecommons.org/ns#',
 'admingeo': 'http://data.ordnancesurvey.co.uk/ontology/admingeo/'}

In [13]:
#Xpath string to get all classes referenced on classes definitions.

paths_from_classes = """./owl:Class//owl:disjointWith[@rdf:resource]|
./owl:Class//owl:complementOf/owl:Class[@rdf:about]|
./owl:Class//owl:unionOf/owl:Class[@rdf:about]|
./owl:Class//owl:unionOf/rdf:Description[@rdf:about]|
./owl:Class//owl:intersectionOf/owl:Class[@rdf:about]|
./owl:Class//owl:intersectionOf/rdf:Description[@rdf:about]|
./owl:Class//rdfs:subClassOf/owl:Restriction/owl:allValuesFrom[@rdf:resource]|
./owl:Class//rdfs:subClassOf/owl:Restriction/owl:someValuesFrom[@rdf:resource]|
./owl:Class//rdfs:subClassOf/owl:Restriction/owl:onClass[@rdf:resource]|
./owl:Class/rdfs:subClassOf[@rdf:resource]"""

In [14]:
#Xpath string to get all classes referenced on properties definitions.

paths_from_properties = """./owl:ObjectProperty/rdfs:domain/owl:Class//owl:disjointWith[@rdf:resource]|
./owl:ObjectProperty/rdfs:domain/owl:Class//owl:complementOf/owl:Class[@rdf:about]|
./owl:ObjectProperty/rdfs:domain/owl:Class//owl:unionOf/owl:Class[@rdf:about]|
./owl:ObjectProperty/rdfs:domain/owl:Class//owl:unionOf/rdf:Description[@rdf:about]|
./owl:ObjectProperty/rdfs:domain/owl:Class//owl:intersectionOf/owl:Class[@rdf:about]|
./owl:ObjectProperty/rdfs:domain/owl:Class//owl:intersectionOf/rdf:Description[@rdf:about]|
./owl:ObjectProperty/rdfs:domain/owl:Class//rdfs:subClassOf/owl:Restriction/owl:allValuesFrom[@rdf:resource]|
./owl:ObjectProperty/rdfs:domain/owl:Class//rdfs:subClassOf/owl:Restriction/owl:someValuesFrom[@rdf:resource]|
./owl:ObjectProperty/rdfs:domain/owl:Class//rdfs:subClassOf/owl:Restriction/owl:onClass[@rdf:resource]|
./owl:ObjectProperty/rdfs:domain/owl:Class//rdfs:subClassOf[@rdf:resource]|
./owl:ObjectProperty/rdfs:domain/owl:Class[@rdf:about]|
./owl:ObjectProperty/rdfs:range/owl:Class//owl:disjointWith[@rdf:resource]|
./owl:ObjectProperty/rdfs:range/owl:Class//owl:complementOf/owl:Class[@rdf:about]|
./owl:ObjectProperty/rdfs:range/owl:Class//owl:unionOf/owl:Class[@rdf:about]|
./owl:ObjectProperty/rdfs:range/owl:Class//owl:unionOf/rdf:Description[@rdf:about]|
./owl:ObjectProperty/rdfs:range/owl:Class//owl:intersectionOf/owl:Class[@rdf:about]|
./owl:ObjectProperty/rdfs:range/owl:Class//owl:intersectionOf/rdf:Description[@rdf:about]|
./owl:ObjectProperty/rdfs:range/owl:Class//rdfs:subClassOf/owl:Restriction/owl:allValuesFrom[@rdf:resource]|
./owl:ObjectProperty/rdfs:range/owl:Class//rdfs:subClassOf/owl:Restriction/owl:someValuesFrom[@rdf:resource]|
./owl:ObjectProperty/rdfs:range/owl:Class//rdfs:subClassOf/owl:Restriction/owl:onClass[@rdf:resource]|
./owl:ObjectProperty/rdfs:range/owl:Class//rdfs:subClassOf[@rdf:resource]|
./owl:ObjectProperty/rdfs:range/owl:Class[@rdf:about]"""

In [15]:
paths = paths_from_classes + "|\n" + paths_from_properties

In [16]:
print(paths)

./owl:Class//owl:disjointWith[@rdf:resource]|
./owl:Class//owl:complementOf/owl:Class[@rdf:about]|
./owl:Class//owl:unionOf/owl:Class[@rdf:about]|
./owl:Class//owl:unionOf/rdf:Description[@rdf:about]|
./owl:Class//owl:intersectionOf/owl:Class[@rdf:about]|
./owl:Class//owl:intersectionOf/rdf:Description[@rdf:about]|
./owl:Class//rdfs:subClassOf/owl:Restriction/owl:allValuesFrom[@rdf:resource]|
./owl:Class//rdfs:subClassOf/owl:Restriction/owl:someValuesFrom[@rdf:resource]|
./owl:Class//rdfs:subClassOf/owl:Restriction/owl:onClass[@rdf:resource]|
./owl:Class/rdfs:subClassOf[@rdf:resource]|
./owl:ObjectProperty/rdfs:domain/owl:Class//owl:disjointWith[@rdf:resource]|
./owl:ObjectProperty/rdfs:domain/owl:Class//owl:complementOf/owl:Class[@rdf:about]|
./owl:ObjectProperty/rdfs:domain/owl:Class//owl:unionOf/owl:Class[@rdf:about]|
./owl:ObjectProperty/rdfs:domain/owl:Class//owl:unionOf/rdf:Description[@rdf:about]|
./owl:ObjectProperty/rdfs:domain/owl:Class//owl:intersectionOf/owl:Class[@rdf:abou

In [17]:
#Get the elements (nodes) of the xml tree that match our XPath string.
classes = tree.xpath(paths, namespaces = my_namespaces)

In [18]:
print(f"We found {len(classes)} classes.")

We found 94 classes.


In [19]:
# get the rdf:resource (or rdf:about) attribute of an element of the XML tree representing an ontology class.
def rdf_resource_or_about(element):
#get all atttribute of the xml nodes, element is a node 
    d = dict(element.attrib);
    if '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource' in d:        
        return element.attrib['{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource']
    elif '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about' in d:        
        return element.attrib['{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about']

In [20]:
# get the rdf:resource (or rdf:about) attribute of all elements of an array.
def classes_uri(elements):
    return list(map(lambda a : rdf_resource_or_about(a), elements))

In [21]:
# get the namespace of an URI string.
# ^  matches the start of the string. 
#.* match any character
# [/#] matches either the character / or # at the end of the string.
def get_namespace(string):
    return re.search("^.*[/#]", string).group()

In [22]:
classes_names = classes_uri(classes)
classes_names

['http://data.ordnancesurvey.co.uk/ontology/admingeo/GreaterLondonAuthorityAssemblyConstituency',
 'http://data.ordnancesurvey.co.uk/ontology/admingeo/ScottishParliamentConstituency',
 'http://data.ordnancesurvey.co.uk/ontology/admingeo/WelshAssemblyConstituency',
 'http://data.ordnancesurvey.co.uk/ontology/admingeo/GreaterLondonAuthority',
 'http://data.ordnancesurvey.co.uk/ontology/admingeo/ScottishParliamentElectoralRegion',
 'http://data.ordnancesurvey.co.uk/ontology/admingeo/WelshAssemblyElectoralRegion',
 'http://data.ordnancesurvey.co.uk/ontology/admingeo/County',
 'http://data.ordnancesurvey.co.uk/ontology/admingeo/GreaterLondonAuthority',
 'http://data.ordnancesurvey.co.uk/ontology/admingeo/District',
 'http://data.ordnancesurvey.co.uk/ontology/admingeo/LondonBorough',
 'http://data.ordnancesurvey.co.uk/ontology/admingeo/MetropolitanDistrict',
 'http://data.ordnancesurvey.co.uk/ontology/admingeo/UnitaryAuthority',
 'http://data.ordnancesurvey.co.uk/ontology/admingeo/County',
 

In [23]:

s = pd.Series(np.array(classes_names), name='class')
df = s.to_frame()
df['namespace'] = df['class'].apply(get_namespace)
df

Unnamed: 0,class,namespace
0,http://data.ordnancesurvey.co.uk/ontology/admingeo/GreaterLondonAuthorityAssemblyConstituency,http://data.ordnancesurvey.co.uk/ontology/admingeo/
1,http://data.ordnancesurvey.co.uk/ontology/admingeo/ScottishParliamentConstituency,http://data.ordnancesurvey.co.uk/ontology/admingeo/
2,http://data.ordnancesurvey.co.uk/ontology/admingeo/WelshAssemblyConstituency,http://data.ordnancesurvey.co.uk/ontology/admingeo/
3,http://data.ordnancesurvey.co.uk/ontology/admingeo/GreaterLondonAuthority,http://data.ordnancesurvey.co.uk/ontology/admingeo/
4,http://data.ordnancesurvey.co.uk/ontology/admingeo/ScottishParliamentElectoralRegion,http://data.ordnancesurvey.co.uk/ontology/admingeo/
...,...,...
89,http://data.ordnancesurvey.co.uk/ontology/admingeo/Constituency,http://data.ordnancesurvey.co.uk/ontology/admingeo/
90,http://data.ordnancesurvey.co.uk/ontology/admingeo/WelshAssemblyElectoralRegion,http://data.ordnancesurvey.co.uk/ontology/admingeo/
91,http://data.ordnancesurvey.co.uk/ontology/admingeo/CivilVotingArea,http://data.ordnancesurvey.co.uk/ontology/admingeo/
92,http://data.ordnancesurvey.co.uk/ontology/admingeo/WelshAssemblyConstituency,http://data.ordnancesurvey.co.uk/ontology/admingeo/


In [24]:
df.sample(n=10)

Unnamed: 0,class,namespace
31,http://data.ordnancesurvey.co.uk/ontology/admingeo/Ward,http://data.ordnancesurvey.co.uk/ontology/admingeo/
85,http://data.ordnancesurvey.co.uk/ontology/admingeo/District,http://data.ordnancesurvey.co.uk/ontology/admingeo/
23,http://data.ordnancesurvey.co.uk/ontology/admingeo/ScottishParliamentElectoralRegion,http://data.ordnancesurvey.co.uk/ontology/admingeo/
9,http://data.ordnancesurvey.co.uk/ontology/admingeo/LondonBorough,http://data.ordnancesurvey.co.uk/ontology/admingeo/
45,http://data.ordnancesurvey.co.uk/ontology/admingeo/CivilVotingArea,http://data.ordnancesurvey.co.uk/ontology/admingeo/
61,http://data.ordnancesurvey.co.uk/ontology/admingeo/GreaterLondonAuthorityAssemblyConstituency,http://data.ordnancesurvey.co.uk/ontology/admingeo/
22,http://data.ordnancesurvey.co.uk/ontology/admingeo/GreaterLondonAuthority,http://data.ordnancesurvey.co.uk/ontology/admingeo/
34,http://data.ordnancesurvey.co.uk/ontology/admingeo/EuropeanRegion,http://data.ordnancesurvey.co.uk/ontology/admingeo/
88,http://data.ordnancesurvey.co.uk/ontology/admingeo/UnitaryAuthority,http://data.ordnancesurvey.co.uk/ontology/admingeo/
25,http://data.ordnancesurvey.co.uk/ontology/admingeo/GreaterLondonAuthorityAssemblyConstituency,http://data.ordnancesurvey.co.uk/ontology/admingeo/


In [25]:
df.groupby(['namespace']).count()

Unnamed: 0_level_0,class
namespace,Unnamed: 1_level_1
http://data.ordnancesurvey.co.uk/ontology/admingeo/,89
http://data.ordnancesurvey.co.uk/ontology/geometry/,3
http://www.w3.org/2002/07/owl#,2


In [26]:
external_classes = df[df['namespace'] != base_ns]
external_classes

Unnamed: 0,class,namespace
32,http://www.w3.org/2002/07/owl#Thing,http://www.w3.org/2002/07/owl#
33,http://data.ordnancesurvey.co.uk/ontology/geometry/AbstractGeometry,http://data.ordnancesurvey.co.uk/ontology/geometry/
42,http://data.ordnancesurvey.co.uk/ontology/geometry/AbstractGeometry,http://data.ordnancesurvey.co.uk/ontology/geometry/
56,http://www.w3.org/2002/07/owl#Thing,http://www.w3.org/2002/07/owl#
57,http://data.ordnancesurvey.co.uk/ontology/geometry/AbstractGeometry,http://data.ordnancesurvey.co.uk/ontology/geometry/


In [27]:
external_classes.groupby(['class']).count()

Unnamed: 0_level_0,namespace
class,Unnamed: 1_level_1
http://data.ordnancesurvey.co.uk/ontology/geometry/AbstractGeometry,3
http://www.w3.org/2002/07/owl#Thing,2
