In [60]:
!pip install rdflib
!pip install pylode
!pip install owlready2



In [68]:
import csv
from rdflib import URIRef, Graph, RDF, Literal, XSD
from rdflib.void import generateVoID
from rdflib.namespace import DCTERMS
import pylode
import os
import re

In [69]:
base_iri = 'http://www.semanticweb.org/Scooters#'

# Classes

In [70]:
scooter_class = URIRef(base_iri + 'Scooter')
brand_class = URIRef(base_iri + 'Brand')

rental_class = URIRef(base_iri + 'Rental')
availability_class = URIRef(base_iri + 'Availability')
place_class = URIRef(base_iri + 'Place')

configuration_class = URIRef(base_iri + 'Configuration')

wheel_configuration_class = URIRef(base_iri + 'Wheel_configuration')
two_wheel_class = URIRef(base_iri + 'Two_wheel')
three_wheel_class = URIRef(base_iri + 'Three_wheel')

engine_configuration_class = URIRef(base_iri + 'Engine_configuration')
single_class = URIRef(base_iri + 'Single')
double_class = URIRef(base_iri + 'Double')

construction_configuration_class = URIRef(base_iri + 'Construction_configuration')
folding_class = URIRef(base_iri + 'With_seat')
with_seat_class = URIRef(base_iri + 'Folding')

target_audience_class = URIRef(base_iri + 'Target_audience')
children_class = URIRef(base_iri + 'Children')
adult_class = URIRef(base_iri + 'Adult')

rating_class = URIRef(base_iri + 'Rating')
none_class = URIRef(base_iri + 'None')
low_class = URIRef(base_iri + 'Low')
medium_class = URIRef(base_iri + 'Medium')
high_class = URIRef(base_iri + 'High')

place_of_usage_class = URIRef(base_iri + 'Place_of_usage')
countryside_class = URIRef(base_iri + 'Countryside')
city_class = URIRef(base_iri + 'City')

# Object properties

In [71]:
availableAt = URIRef(base_iri + 'availableAtPlace')
for_audience = URIRef(base_iri + 'for')
hasBrand = URIRef(base_iri + 'hasBrand')
hasConstruction = URIRef(base_iri + 'hasConstruction')
hasEngine = URIRef(base_iri + 'hasEngine')
hasRate = URIRef(base_iri + 'hasRate')
hasWheels = URIRef(base_iri + 'hasWheels')
isAvailable = URIRef(base_iri + 'isAvailable')
isRental = URIRef(base_iri + 'isRental')
useIn = URIRef(base_iri + 'useIn')

# Data properties

In [72]:
hasName = URIRef(base_iri + 'hasName')
hasMaxDistance = URIRef(base_iri + 'hasMaxDIstance')
hasAvailability = URIRef(base_iri + 'hasAvailability')
hasMaxSpeed = URIRef(base_iri + 'hasMaxSpeed')
hasPower = URIRef(base_iri + 'hasPower')
hasWeight = URIRef(base_iri + 'hasWeight')

# Create graph

In [73]:
g = Graph()
g.parse('Scooters.owl')

g.serialize(format='n3')

b'@prefix : <http://www.semanticweb.org/Scooters#> .\n@prefix owl: <http://www.w3.org/2002/07/owl#> .\n@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .\n\n<http://www.semanticweb.org/Scooters> a owl:Ontology .\n\n:Adult a owl:Class ;\n    rdfs:label "Adult" ;\n    rdfs:subClassOf :Target_audience .\n\n:Children a owl:Class ;\n    rdfs:label "Children" ;\n    rdfs:subClassOf :Target_audience .\n\n:City a owl:Class ;\n    rdfs:label "City" ;\n    rdfs:subClassOf :Place_of_usage .\n\n:Countryside a owl:Class ;\n    rdfs:label "Countryside" ;\n    rdfs:subClassOf :Place_of_usage .\n\n:Double a owl:Class ;\n    rdfs:label "Double" ;\n    rdfs:subClassOf :Engine_configuration .\n\n:Folding a owl:Class ;\n    rdfs:label "Folding" ;\n    rdfs:subClassOf :Construction_configuration .\n\n:High a owl:Class ;\n    rdfs:label "High" ;\n    rdfs:subClassOf :Raiting .\n\n:Low a owl:Class ;\n    rdfs:label "Low" ;\n    rdfs:subClassOf :Raiting

# Functions for adding individuals

In [74]:
def add_scooter(name, max_speed, power, distance, weight):
    scooter = URIRef(
        base_iri + name
    )
    g.add((scooter, RDF.type, scooter_class))
    g.add((scooter, hasName, Literal(name)))
    g.add((scooter, hasMaxSpeed, Literal(max_speed, datatype=XSD.integer)))
    g.add((scooter, hasPower, Literal(power, datatype=XSD.integer)))
    g.add((scooter, hasMaxDistance, Literal(distance, datatype=XSD.integer)))
    g.add((scooter, hasWeight, Literal(weight, datatype=XSD.integer)))

    return scooter


def add_brand(name):
    brand = URIRef(
        base_iri + name
    )
    g.add((brand, RDF.type, brand_class))
    g.add((brand, hasName, Literal(name)))

    return brand


def add_rental():
    rental = URIRef(base_iri + 'rental')
    g.add((rental, RDF.type, rental_class))

    return rental


def add_place(name):
    place = URIRef(
        base_iri + name
    )
    g.add((place, RDF.type, place_class))
    g.add((place, hasName, Literal(name)))

    return place


def add_availability(is_available, scooter_name):
    availability = URIRef(
        base_iri + scooter_name + '/' + str(is_available)
    )
    g.add((availability, RDF.type, availability_class))
    g.add((availability, hasAvailability, Literal(is_available, datatype=XSD.boolean)))

    return availability


def add_audience(specific_audience, auditory_type):
    auditory = URIRef(base_iri + 'audit/' + str(auditory_type))
    g.add((auditory, RDF.type, specific_audience))

    return auditory


def add_configuration(specific_configuration, type_config):
    configuration = URIRef(base_iri + 'config/' + str(type_config))
    g.add((configuration, RDF.type, specific_configuration))

    return configuration


def add_place_of_usage(specific_place, type_config):
    place = URIRef(base_iri + 'usage/' + str(type_config))
    g.add((place, RDF.type, specific_place))

    return place


def add_rating(specific_rating, type_config):
    rating = URIRef(base_iri + 'rating/' + str(type_config))
    g.add((rating, RDF.type, specific_rating))

    return rating


# Reading CSV data

In [75]:
rows = list()

with open('scooter-data.csv', newline='') as tsvfile:
    reader = csv.DictReader(tsvfile, delimiter="\t")
    for row in reader:
        rows.append(row)

# Adding calsses and data properties to graph

In [76]:
scooters = dict()
configurations = dict()
brands = dict()
rentals = dict()
places = dict()
availabilities = dict()
audiences = dict()
places_of_usage = dict()
ratings = dict()

def preprocess_name(name):
    name = re.sub(u'[\u0401-\u04f9]', '', name)
    name = re.sub(r'([0-9]+[a-zA-Z]+)', '', name)
    name = name.replace(' ', '_').replace('(', '').replace(')', '')
    return name


# Add all classes and data properties
for row in rows:

    scooters[preprocess_name(name=row['Name'])] = add_scooter(
        name=preprocess_name(name=row['Name']),
        max_speed=str(row['MaxSpeed']).split(' ')[0],
        power=str(row['Power']).split(' ')[0],
        distance=str(row['Distance']).split(' ')[0],
        weight=str(row['Weight']).split(' ')[0]
    )

    brands[preprocess_name(name=row['Brand'])] = add_brand(
        name=preprocess_name(name=row['Brand'])
    )

    configurations['With_seat'] = add_configuration(
        specific_configuration=with_seat_class,
        type_config=1
    )

    configurations['Folding'] = add_configuration(
        specific_configuration=folding_class,
        type_config=2
    )

    configurations['Two_wheel'] = add_configuration(
        specific_configuration=two_wheel_class,
        type_config=3
    )

    configurations['Three_wheel'] = add_configuration(
        specific_configuration=three_wheel_class,
        type_config=4
    )

    configurations['Single_engine'] = add_configuration(
        specific_configuration=single_class,
        type_config=5
    )

    configurations['Double_engine'] = add_configuration(
        specific_configuration=double_class,
        type_config=6
    )

    places_of_usage['City'] = add_place_of_usage(
        specific_place=city_class,
        type_config=1
    )

    places_of_usage['Countryside'] = add_place_of_usage(
        specific_place=countryside_class,
        type_config=2
    )

    availabilities[preprocess_name(name=row['Name'])] = add_availability(
        scooter_name=preprocess_name(name=row['Name']),
        is_available=row['Availability']
    )

    rentals['Rental'] = add_rental()

    places[preprocess_name(name=row['City'])] = add_place(
        name=preprocess_name(name=row['City'])
    )

    ratings['None'] = add_rating(
        specific_rating=none_class,
        type_config=1
    )

    ratings['Low'] = add_rating(
        specific_rating=low_class,
        type_config=2
    )

    ratings['Medium'] = add_rating(
        specific_rating=medium_class,
        type_config=3
    )

    ratings['High'] = add_rating(
        specific_rating=high_class,
        type_config=4
    )

    audiences['Children'] = add_audience(
        specific_audience=children_class,
        auditory_type=1
    )

    audiences['Adult'] = add_audience(
        specific_audience=adult_class,
        auditory_type=2
    )

# Adding object properties to graph

In [77]:
for row in rows:

    # Scooter -> useIn -> Place_of_usage
    if row['Child'] == 'true':
        g.add((scooters[preprocess_name(name=row['Name'])], for_audience, audiences['Children']))
    elif row['Adult'] == 'true':
        g.add((scooters[preprocess_name(name=row['Name'])], for_audience, audiences['Adult']))

    # Scooter -> hasConstruction -> Construction_configuration
    if row['With seat'] == 'true' and row['Folding'] == 'false':
        g.add((scooters[preprocess_name(name=row['Name'])], hasConstruction, configurations['With_seat']))
    elif row['With seat'] == 'false' and row['Folding'] == 'true':
        g.add((scooters[preprocess_name(name=row['Name'])], hasConstruction, configurations['Folding']))
    elif row['With seat'] == 'true' and row['Folding'] == 'true':
        g.add((scooters[preprocess_name(name=row['Name'])], hasConstruction, configurations['With_seat']))

    # Scooter -> hasEngine -> Engine_configuration
    if row['Double engine'] == 'true':
        g.add((scooters[preprocess_name(name=row['Name'])], hasEngine, configurations['Double_engine']))
    elif row['Double engine'] == 'false':
        g.add((scooters[preprocess_name(name=row['Name'])], hasEngine, configurations['Single_engine']))

    # Scooter -> hasWheels -> Wheel_configuration
    if row['Three-wheel'] == 'true':
        g.add((scooters[preprocess_name(name=row['Name'])], hasWheels, configurations['Three_wheel']))
    elif row['Three-wheel'] == 'false':
        g.add((scooters[preprocess_name(name=row['Name'])], hasWheels, configurations['Two_wheel']))

    # Scooter -> hasRating -> Rating
    if float(row['Rating']) >= 4.75:
        g.add((scooters[preprocess_name(name=row['Name'])], hasRate, ratings['High']))
    elif 4.75 >= float(row['Rating']) >= 3.0:
        g.add((scooters[preprocess_name(name=row['Name'])], hasRate, ratings['Medium']))
    elif 3.0 > float(row['Rating']) > 0.0:
        g.add((scooters[preprocess_name(name=row['Name'])], hasRate, ratings['Low']))
    elif float(row['Rating']) == 0.0:
        g.add((scooters[preprocess_name(name=row['Name'])], hasRate, ratings['None']))

    # Scooter -> isRental -> Rental
    if row['Rental'] == 'true':
        g.add((scooters[preprocess_name(name=row['Name'])], isRental, rentals['Rental']))

    # Scooter -> hasBrand -> Brand
    g.add((scooters[preprocess_name(name=row['Name'])], hasBrand, brands[preprocess_name(name=row['Brand'])]))

    # Scooter -> isAvailable -> Availability
    g.add((scooters[preprocess_name(name=row['Name'])], isAvailable, availabilities[preprocess_name(name=row['Name'])]))

    # Availability -> availableAtPlace -> Place
    if row['Availability'] == 'true':
        g.add((availabilities[preprocess_name(name=row['Name'])], availableAt, places[preprocess_name(name=row['City'])]))

    # Scooter -> useIn -> Place_of_usage
    if row['For_city'] == 'true':
        g.add((scooters[preprocess_name(name=row['Name'])], useIn, places_of_usage['City']))
    elif row['Countryside'] == 'true':
        g.add((scooters[preprocess_name(name=row['Name'])], useIn, places_of_usage['Countryside']))

# SPARQL

In [78]:
# Найти самые популярные детские трехколесные самокаты
query_1 = g.query(
    """
    PREFIX scooters: <http://www.semanticweb.org/Scooters#>
    SELECT ?name WHERE {
        ?scooter rdf:type scooters:Scooter .
        ?scooter scooters:for ?a .
        ?a rdf:type scooters:Children .
        ?scooter scooters:hasWheels ?w .
        ?w rdf:type scooters:Three_wheel .
        ?scooter scooters:hasRate ?r .
        ?r rdf:type scooters:High .
        ?scooter scooters:hasName ?name
       }
       """)

for row in query_1:
    print("%s" % row)

Halten_Kiddy


In [79]:
# Найти самокаты производителя X доступные в городе Y (для покупки)
query_2 = g.query(
    """
    PREFIX scooters: <http://www.semanticweb.org/Scooters#>
    SELECT ?name WHERE {
        ?scooter rdf:type scooters:Scooter .
        ?scooter scooters:hasBrand ?b .
        ?b scooters:hasName "Xiaomi" .
        ?scooter scooters:isAvailable ?av .
        ?av scooters:hasAvailability true .
        ?av scooters:availableAtPlace ?pl .
        ?pl scooters:hasName "Moscow" .
        ?scooter scooters:hasName ?name
       }
       """)

for row in query_2:
    print("%s" % row)

Xiaomi_Mijia_m365_EURO_


In [80]:
# Найти самые популярные городские самокаты, которые используется в прокате
query_3 = g.query(
    """
    PREFIX scooters: <http://www.semanticweb.org/Scooters#>
    SELECT ?name WHERE {
        ?scooter rdf:type scooters:Scooter .
        ?scooter scooters:useIn ?p .
        ?p rdf:type scooters:City .
        ?scooter scooters:isRental ?r.
        ?scooter scooters:hasName ?name
       }
       """)

for row in query_3:
    print("%s" % row)

Xiaomi_Mi_Electric_Scooter_Pro_2
Ninebot_eKickScooter_Zing_E8
Xiaomi_Mijia_m365_Pro
Xiaomi_Mi_Electric_Scooter_Essential
Ninebot_Air_t15
Ninebot_Kickscooter_E22
Ninebot_by_Segway_KickScooter_ES
Ninebot_ES2_V1.7
Ninebot_KickScooter_E25
Ninebot_Kickscooter_Max_G
Xiaomi_Mijia_m365_EURO_
Ninebot_eKickScooter_Zing_E10
Ninebot_KickScooter_Max
Xiaomi_Mijia_m365_
Xiaomi_Mijia_Electric_Scooter__


In [82]:
# Найти самые популярные детские самокаты доступные в городе X (для покупки)
query_4 = g.query(
    """
    PREFIX scooters: <http://www.semanticweb.org/Scooters#>
    SELECT ?name WHERE {
        ?scooter rdf:type scooters:Scooter .
        ?scooter scooters:for ?a .
        ?a rdf:type scooters:Children .
        ?scooter scooters:isAvailable ?av .
        ?av scooters:hasAvailability true .
        ?av scooters:availableAtPlace ?pl .
        ?pl scooters:hasName "Saint-Petersburg" .
        ?scooter scooters:hasRate ?r .
        ?r rdf:type scooters:High .
        ?scooter scooters:hasName ?name
       }
       """)

for row in query_4:
    print("%s" % row)

Razor_E_-
Halten_Kiddy


# Write populated ontology to new file

In [38]:
g.serialize(destination='scooter-result.rdf')

# Provide Java

In [21]:
!apt-get install -y openjdk8-jdk-headless -qq > /dev/null
os.environ["JAVA_HOME"] = 'usr/lib/jvm/java8-openjdk-amd64'
!java -version

E: Unable to locate package openjdk8-jdk-headless
openjdk version "11.0.11" 2021-04-20
OpenJDK Runtime Environment (build 11.0.11+9-Ubuntu-0ubuntu2.18.04)
OpenJDK 64-Bit Server VM (build 11.0.11+9-Ubuntu-0ubuntu2.18.04, mixed mode, sharing)


# Reasoner

In [83]:
ontology = get_ontology('scooters-result.rdf').load()

sync_reasoner()

inferred_ontology = get_ontology('http://inferences/')
inferred_ontology

* Owlready2 * Running HermiT...
    java -Xmx2000M -cp /usr/local/lib/python3.7/dist-packages/owlready2/hermit:/usr/local/lib/python3.7/dist-packages/owlready2/hermit/HermiT.jar org.semanticweb.HermiT.cli.CommandLine -c -O -D -I file:////tmp/tmpqfe50knx
* Owlready2 * HermiT took 2.4276323318481445 seconds
* Owlready * (NB: only changes on entities loaded in Python are shown, other changes are done but not listed)


get_ontology("http://inferences/")

# Generate VoID

In [None]:
prefix = dict(g.namespaces())['']

g.add((prefix, DCTERMS.title, Literal('Electric scooters catalog')))
g.add((prefix, DCTERMS.publisher, Literal('Aleksey Petrenko')))

void_g, _ = generateVoID(g=g)
print(void_g.serialize().decode('utf-8'))

<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF
   xmlns:ns1="http://rdfs.org/ns/void#"
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
>
  <rdf:Description rdf:about="http://example.org/Dataset_property22">
    <ns1:triples rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">473</ns1:triples>
    <ns1:distinctObjects rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">52</ns1:distinctObjects>
    <rdf:type rdf:resource="http://rdfs.org/ns/void#Dataset"/>
    <ns1:property rdf:resource="http://www.semanticweb.org/Scooters#hasPower"/>
    <ns1:entities rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">472</ns1:entities>
    <ns1:classes rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">2</ns1:classes>
    <ns1:distinctSubjects rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">472</ns1:distinctSubjects>
    <ns1:properties rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1</ns1:properties>
  </rdf:Description>
  <rdf:Description rdf:about="http

# Generate documentation

In [None]:
html = pylode.MakeDocco(
    input_data_file='Scooters.owl',
    outputformat="html"
).document()

f = open('doc.html', 'w')
f.write(html)
f.close()