From d3466e88189e9574c65328f33086dc6909ebc6e1 Mon Sep 17 00:00:00 2001
From: Anna Bobasheva <33026767+AnnaBobasheva@users.noreply.github.com>
Date: Tue, 3 Dec 2024 12:55:14 +0100
Subject: [PATCH 1/3] updated example dir and notebook
---
.gitignore | 12 +-
.../data/beatles-validator.ttl | 0
.../data/beatles.rdf | 0
examples/example1.ipynb | 1506 +++++++++++++++++
{python_examples => examples}/simple_query.py | 0
python_examples/example1.ipynb | 446 -----
6 files changed, 1513 insertions(+), 451 deletions(-)
rename {python_examples => examples}/data/beatles-validator.ttl (100%)
rename {python_examples => examples}/data/beatles.rdf (100%)
create mode 100644 examples/example1.ipynb
rename {python_examples => examples}/simple_query.py (100%)
mode change 100755 => 100644
delete mode 100644 python_examples/example1.ipynb
diff --git a/.gitignore b/.gitignore
index 6947822..1d79d3f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -160,18 +160,20 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/
-.gradle
-build
-log
# VSCode
.vscode/
-
# garbage
\#*
.\#*
# backup files
resources/
-*.bak
\ No newline at end of file
+*.bak
+
+# Java class files
+.gradle/
+build/
+log/
+bin/
diff --git a/python_examples/data/beatles-validator.ttl b/examples/data/beatles-validator.ttl
similarity index 100%
rename from python_examples/data/beatles-validator.ttl
rename to examples/data/beatles-validator.ttl
diff --git a/python_examples/data/beatles.rdf b/examples/data/beatles.rdf
similarity index 100%
rename from python_examples/data/beatles.rdf
rename to examples/data/beatles.rdf
diff --git a/examples/example1.ipynb b/examples/example1.ipynb
new file mode 100644
index 0000000..2351443
--- /dev/null
+++ b/examples/example1.ipynb
@@ -0,0 +1,1506 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "oCetcC1MQz13"
+ },
+ "source": [
+ "# Using pycorese\n",
+ "\n",
+ "This notebook demonstrates how to use the **pycorese** package:\n",
+ "\n",
+ "- to load knowledge graph\n",
+ "- to perform a SPARQL query\n",
+ "- to validate a SHACL form\n",
+ "- to access the classes of Corese Java API"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "tZjvQGgGe64i"
+ },
+ "source": [
+ "## Install pycorese"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "nFeJr1PbQz18"
+ },
+ "source": [
+ "Java Runtime Environment (JRE) 11 or higher is required to run **pycorese**.\n",
+ "\n",
+ "If you don't have Java installed please refer to the [official website](https://www.java.com/en/download/help/download_options.html) to download and install it."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "IKx255qaQz1_",
+ "outputId": "29b40851-6439-459b-c5f5-1e8cb89f7e84"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "openjdk version \"11.0.25\" 2024-10-15\n",
+ "OpenJDK Runtime Environment (build 11.0.25+9-post-Ubuntu-1ubuntu122.04)\n",
+ "OpenJDK 64-Bit Server VM (build 11.0.25+9-post-Ubuntu-1ubuntu122.04, mixed mode, sharing)\n"
+ ]
+ }
+ ],
+ "source": [
+ "!java -version"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "QzKUfvL8Qz2G"
+ },
+ "source": [
+ "**pycorese** is available on PyPI and can be installed using pip:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "OdY7kuBeQz2I",
+ "outputId": "f0deca77-241c-4c58-970c-2906bcbc4078"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Collecting pycorese\n",
+ " Downloading pycorese-1.0.1-py3-none-any.whl.metadata (32 kB)\n",
+ "Requirement already satisfied: py4j>=0.10.9.7 in /usr/local/lib/python3.10/dist-packages (from pycorese) (0.10.9.7)\n",
+ "Collecting jpype1>=1.5.0 (from pycorese)\n",
+ " Downloading jpype1-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n",
+ "Requirement already satisfied: pandas>=1.3.3 in /usr/local/lib/python3.10/dist-packages (from pycorese) (2.2.2)\n",
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from jpype1>=1.5.0->pycorese) (24.2)\n",
+ "Requirement already satisfied: numpy>=1.22.4 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.3.3->pycorese) (1.26.4)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.3.3->pycorese) (2.8.2)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.3.3->pycorese) (2024.2)\n",
+ "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.3.3->pycorese) (2024.2)\n",
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas>=1.3.3->pycorese) (1.16.0)\n",
+ "Downloading pycorese-1.0.1-py3-none-any.whl (95.3 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m95.3/95.3 MB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading jpype1-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (493 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m493.8/493.8 kB\u001b[0m \u001b[31m29.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hInstalling collected packages: jpype1, pycorese\n",
+ "Successfully installed jpype1-1.5.1 pycorese-1.0.1\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install pycorese"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "aKYekc7UQz2K"
+ },
+ "outputs": [],
+ "source": [
+ "# delete\n",
+ "import sys\n",
+ "if 'google.colab' in sys.modules:\n",
+ " !pip install git+https://github.com/corese-stack/corese-python.git@develop\n",
+ "else:\n",
+ " sys.path.append('..\\\\src')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "pRlL21fgQz2M"
+ },
+ "source": [
+ "Download the data files from the GitHub repository:\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "kOvrNs-ze64n",
+ "outputId": "731259ca-8854-4497-fadc-aca4b4ec3714"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "beatles.rdf beatles-validator.ttl\n"
+ ]
+ }
+ ],
+ "source": [
+ "# change the links to example production\n",
+ "import os\n",
+ "import sys\n",
+ "if not os.path.exists('./data/beatles.rdf'):\n",
+ " print('Downloading the data files...')\n",
+ " !mkdir -p ./data\n",
+ " !wget https://raw.githubusercontent.com/corese-stack/corese-python/main/examples/data/beatles.rdf -O ./data/beatles.rdf\n",
+ " !wget https://raw.githubusercontent.com/corese-stack/corese-python/main/examples/data/beatles-validator.ttl -O ./data/beatles-validator.ttl\n",
+ "\n",
+ "if sys.platform == 'win32':\n",
+ " !dir /b .\\data\\*.*\n",
+ "else:\n",
+ " !ls ./data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "PLBixnURe64o"
+ },
+ "source": [
+ "### Connect to Corese API\n",
+ "\n",
+ "Demonstrate loading and querying data with CoreseAPI connected through `Py4J` or `JPype` packages. If you don't specify the java bridge type, the default is `Py4J`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "id": "wN4TDhjXe64p"
+ },
+ "outputs": [],
+ "source": [
+ "#%%timeit -n 1 -r 1\n",
+ "from pycorese.api import CoreseAPI\n",
+ "\n",
+ "python_to_java_bridge = 'py4j'\n",
+ "corese = CoreseAPI(java_bridge=python_to_java_bridge)\n",
+ "corese.loadCorese()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "7WzP7gCle64p"
+ },
+ "source": [
+ "### High-level API"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "1hHYhnIve64p"
+ },
+ "source": [
+ "#### Run SELECT query"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
+ },
+ "id": "KiVYUBGhe64p",
+ "outputId": "786d7754-23a2-4ba6-800d-e36bd199adc7"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "summary": "{\n \"name\": \"results\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"subject\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"http://example.com/Please_Please_Me\",\n \"http://example.com/McCartney\",\n \"http://example.com/Imagine\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"p\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"http://example.com/date\",\n \"http://example.com/artist\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"o\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"http://example.com/Paul_McCartney\",\n \"1970-04-17\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
+ "type": "dataframe",
+ "variable_name": "results"
+ },
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " subject | \n",
+ " p | \n",
+ " o | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " http://example.com/Please_Please_Me | \n",
+ " http://example.com/artist | \n",
+ " http://example.com/The_Beatles | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " http://example.com/McCartney | \n",
+ " http://example.com/artist | \n",
+ " http://example.com/Paul_McCartney | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " http://example.com/Imagine | \n",
+ " http://example.com/artist | \n",
+ " http://example.com/John_Lennon | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " http://example.com/Please_Please_Me | \n",
+ " http://example.com/date | \n",
+ " 1963-03-22 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " http://example.com/McCartney | \n",
+ " http://example.com/date | \n",
+ " 1970-04-17 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "text/plain": [
+ " subject p \\\n",
+ "0 http://example.com/Please_Please_Me http://example.com/artist \n",
+ "1 http://example.com/McCartney http://example.com/artist \n",
+ "2 http://example.com/Imagine http://example.com/artist \n",
+ "3 http://example.com/Please_Please_Me http://example.com/date \n",
+ "4 http://example.com/McCartney http://example.com/date \n",
+ "\n",
+ " o \n",
+ "0 http://example.com/The_Beatles \n",
+ "1 http://example.com/Paul_McCartney \n",
+ "2 http://example.com/John_Lennon \n",
+ "3 1963-03-22 \n",
+ "4 1970-04-17 "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import os\n",
+ "data_path = os.path.abspath('./data/beatles.rdf')\n",
+ "\n",
+ "query = '''\n",
+ "SELECT *\n",
+ "WHERE {?subject ?p ?o} LIMIT 5'''\n",
+ "\n",
+ "graph = corese.loadRDF(data_path)\n",
+ "results = corese.sparqlSelect(graph, query=query, return_dataframe=True)\n",
+ "\n",
+ "results"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "7iohNvoue64q"
+ },
+ "source": [
+ "#### Load inference rules"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "0kzC2BXSe64q",
+ "outputId": "67cdc27e-20b1-45ca-b045-a981abc2a6b5"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " s type\n",
+ "0 http://example.com/Please_Please_Me http://example.com/Album\n",
+ "1 http://example.com/McCartney http://example.com/Album\n",
+ "2 http://example.com/Imagine http://example.com/Album\n",
+ "3 http://example.com/The_Beatles http://example.com/Band\n",
+ "4 http://example.com/John_Lennon http://example.com/SoloArtist\n",
+ "5 http://example.com/Paul_McCartney http://example.com/SoloArtist\n",
+ "6 http://example.com/Ringo_Starr http://example.com/SoloArtist\n",
+ "7 http://example.com/George_Harrison http://example.com/SoloArtist\n",
+ "8 http://example.com/Love_Me_Do http://example.com/Song\n",
+ "Graph size: 29\n"
+ ]
+ }
+ ],
+ "source": [
+ "corese.resetRuleEngine(graph)\n",
+ "query = \"select * where {?s a ?type} order by ?type\"\n",
+ "print(corese.sparqlSelect(graph, query=query))\n",
+ "print(\"Graph size: \", graph.graphSize())\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "SbSrRfsWe64q"
+ },
+ "source": [
+ "Adding inference rules to the Corese engine should change the results of the query by adding new triples."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "op-vR8rge64q",
+ "outputId": "4eb133de-a58c-43e0-9d90-04722db0f6b3"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Graph size: 33\n"
+ ]
+ }
+ ],
+ "source": [
+ "corese.loadRuleEngine(graph, profile=corese.RuleEngine.Profile.RDFS)\n",
+ "print(\"Graph size: \", graph.graphSize())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "0ILJiiQJe64q"
+ },
+ "source": [
+ "Let's see what was added."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "X_rFryste64q",
+ "outputId": "b1f67902-a052-4075-f4d8-66ce54d0fefa"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " s type\n",
+ "0 http://example.com/Please_Please_Me http://example.com/Album\n",
+ "1 http://example.com/McCartney http://example.com/Album\n",
+ "2 http://example.com/Imagine http://example.com/Album\n",
+ "3 http://example.com/The_Beatles http://example.com/Band\n",
+ "4 http://example.com/John_Lennon http://example.com/Person\n",
+ "5 http://example.com/Paul_McCartney http://example.com/Person\n",
+ "6 http://example.com/Ringo_Starr http://example.com/Person\n",
+ "7 http://example.com/George_Harrison http://example.com/Person\n",
+ "8 http://example.com/John_Lennon http://example.com/SoloArtist\n",
+ "9 http://example.com/Paul_McCartney http://example.com/SoloArtist\n",
+ "10 http://example.com/Ringo_Starr http://example.com/SoloArtist\n",
+ "11 http://example.com/George_Harrison http://example.com/SoloArtist\n",
+ "12 http://example.com/Love_Me_Do http://example.com/Song\n",
+ "Graph size: 33\n"
+ ]
+ }
+ ],
+ "source": [
+ "query = \"select * where {?s a ?type} order by ?type\"\n",
+ "print(corese.sparqlSelect(graph, query=query))\n",
+ "print(\"Graph size: \", graph.graphSize())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "yvIlPQVue64r"
+ },
+ "source": [
+ "The inference was that the solo artist is also a person although it was not explicitly stated in the data."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "enpoRBHve64r"
+ },
+ "source": [
+ "#### Run CONSTRUCT query"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "9Ii81qu1e64r",
+ "outputId": "c2e57c03-f591-433f-ebfb-397f49a2519e"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "prefixes = '@prefix ex: '\n",
+ "contruct = '''CONSTRUCT {?A_Beatle a ex:BandMember }\n",
+ " WHERE { ex:The_Beatles ex:member ?A_Beatle}'''\n",
+ "\n",
+ "results = corese.sparqlConstruct(graph, prefixes=prefixes, query=contruct)\n",
+ "\n",
+ "print(results)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "0PPF87F2e64r"
+ },
+ "source": [
+ "By default, the CONSTRUCT query returns the RDF/XML format. For more concise format convert the results to Turtle."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "TULma6_Ce64r",
+ "outputId": "88cd769a-f09f-49d4-f4cd-a0d031b73d60"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ " a .\n",
+ "\n",
+ " a .\n",
+ "\n",
+ " a .\n",
+ "\n",
+ " a .\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "ttl = corese.toTurtle(results)\n",
+ "\n",
+ "print(ttl)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "znZOJMz1e64r"
+ },
+ "source": [
+ "#### Run SHACL form validation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "liQCAMpFe64s"
+ },
+ "source": [
+ "In the example below, we will use the the SHACL shape file that validates that the *beatles* graph follows the rules:\n",
+ "\n",
+ "- A band has a name and at least on member who is also a Solo Artist\n",
+ "- An album has one name, one date and one artist associated with it\n",
+ "- A song has one name, one duration and at least writer and at least one performer associated with it\n",
+ "\n",
+ "The validation should fail because the *beatles* graph does not contain the required information. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "MOsEDfB3e64s",
+ "outputId": "1c2e0577-c583-41a2-f4da-50633daa5bee"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "PREFIX sh: \n",
+ "PREFIX xsd: \n",
+ "PREFIX ex: \n",
+ "\n",
+ "# Shape for Bands\n",
+ "ex:BandShape a sh:NodeShape ;\n",
+ " sh:targetClass ex:Band ;\n",
+ " sh:property [\n",
+ " sh:path ex:name ;\n",
+ " sh:datatype xsd:string ;\n",
+ " sh:minCount 1 ;\n",
+ " sh:maxCount 1 ;\n",
+ " ] ;\n",
+ " sh:property [\n",
+ " sh:path ex:member ;\n",
+ " sh:class ex:SoloArtist ;\n",
+ " sh:minCount 1 ;\n",
+ " ] .\n",
+ "\n",
+ "# Shape for Solo Artists\n",
+ "ex:SoloArtistShape a sh:NodeShape ;\n",
+ " sh:targetClass ex:SoloArtist .\n",
+ "\n",
+ "# Shape for Albums\n",
+ "ex:AlbumShape a sh:NodeShape ;\n",
+ " sh:targetClass ex:Album ;\n",
+ " sh:property [\n",
+ " sh:path ex:name ;\n",
+ " sh:datatype xsd:string ;\n",
+ " sh:minCount 1 ;\n",
+ " sh:maxCount 1 ;\n",
+ " ] ;\n",
+ " sh:property [\n",
+ " sh:path ex:date ;\n",
+ " sh:datatype xsd:date ;\n",
+ " sh:minCount 1 ;\n",
+ " sh:maxCount 1 ;\n",
+ " ] ;\n",
+ " sh:property [\n",
+ " sh:path ex:artist ;\n",
+ " sh:nodeKind sh:IRI ;\n",
+ " sh:minCount 1 ;\n",
+ " sh:maxCount 1 ;\n",
+ " ] .\n",
+ "\n",
+ "# Shape for Songs\n",
+ "ex:SongShape a sh:NodeShape ;\n",
+ " sh:targetClass ex:Song ;\n",
+ " sh:property [\n",
+ " sh:path ex:name ;\n",
+ " sh:datatype xsd:string ;\n",
+ " sh:minCount 1 ;\n",
+ " sh:maxCount 1 ;\n",
+ " ] ;\n",
+ " sh:property [\n",
+ " sh:path ex:length ;\n",
+ " sh:datatype xsd:integer ;\n",
+ " sh:minCount 1 ;\n",
+ " sh:maxCount 1 ;\n",
+ " ] ;\n",
+ " sh:property [\n",
+ " sh:path ex:performer ;\n",
+ " sh:nodeKind sh:IRI ;\n",
+ " sh:minCount 1 ;\n",
+ " ] ;\n",
+ " sh:property [\n",
+ " sh:path ex:writer ;\n",
+ " sh:nodeKind sh:IRI ;\n",
+ " sh:minCount 1 ;\n",
+ " ] .\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "data_shape_path = os.path.abspath('./data/beatles-validator.ttl')\n",
+ "\n",
+ "with open(data_shape_path, 'r') as file:\n",
+ " data_shape = file.read()\n",
+ " print(data_shape)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Oa-7_Q_qe64s",
+ "outputId": "d6764304-7a25-4274-d010-593cbbbbd5a2"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "@prefix xsh: .\n",
+ "@prefix sh: .\n",
+ "\n",
+ " a sh:ValidationResult ;\n",
+ " sh:focusNode ;\n",
+ " sh:resultMessage \"Fail at: [sh:minCount 1 ;\\n sh:nodeKind sh:IRI ;\\n sh:path ]\" ;\n",
+ " sh:resultPath ;\n",
+ " sh:resultSeverity sh:Violation ;\n",
+ " sh:sourceConstraintComponent sh:MinCountConstraintComponent ;\n",
+ " sh:sourceShape _:b7 ;\n",
+ " sh:value 0 .\n",
+ "\n",
+ "[a sh:ValidationReport ;\n",
+ " sh:conforms false ;\n",
+ " sh:result ] .\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "prefixes = '@prefix ex: '\n",
+ "report = corese.shaclValidate(graph, shacl_shape_ttl=data_shape_path, prefixes=prefixes)\n",
+ "\n",
+ "print(report)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ErOwy8yMe64s"
+ },
+ "source": [
+ "The SHACL validation report is verbose and can be reshaped into a DataFrame for readability."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 184
+ },
+ "id": "K4_fQT0Ye64s",
+ "outputId": "db9698e5-9f18-4714-ade4-93c6218b6047"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "summary": "{\n \"name\": \"report_dataframe\",\n \"rows\": 1,\n \"fields\": [\n {\n \"column\": \"o\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"urn:uuid:66d7b5ea-0065-4f84-b0e4-d65ba0b16a11\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"type\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"http://www.w3.org/ns/shacl#ValidationResult\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"focusNode\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"http://example.com/Love_Me_Do\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"resultMessage\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Fail at: [sh:minCount 1 ;\\n sh:nodeKind sh:IRI ;\\n sh:path ]\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"resultPath\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"http://example.com/performer\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"resultSeverity\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"http://www.w3.org/ns/shacl#Violation\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sourceConstraintComponent\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"http://www.w3.org/ns/shacl#MinCountConstraintComponent\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sourceShape\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"_:b9\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"value\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"0\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
+ "type": "dataframe",
+ "variable_name": "report_dataframe"
+ },
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " type | \n",
+ " focusNode | \n",
+ " resultMessage | \n",
+ " resultPath | \n",
+ " resultSeverity | \n",
+ " sourceConstraintComponent | \n",
+ " sourceShape | \n",
+ " value | \n",
+ "
\n",
+ " \n",
+ " | o | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | urn:uuid:66d7b5ea-0065-4f84-b0e4-d65ba0b16a11 | \n",
+ " http://www.w3.org/ns/shacl#ValidationResult | \n",
+ " http://example.com/Love_Me_Do | \n",
+ " Fail at: [sh:minCount 1 ;\n",
+ " sh:nodeKind sh:IRI... | \n",
+ " http://example.com/performer | \n",
+ " http://www.w3.org/ns/shacl#Violation | \n",
+ " http://www.w3.org/ns/shacl#MinCountConstraintC... | \n",
+ " _:b9 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "text/plain": [
+ " type \\\n",
+ "o \n",
+ "urn:uuid:66d7b5ea-0065-4f84-b0e4-d65ba0b16a11 http://www.w3.org/ns/shacl#ValidationResult \n",
+ "\n",
+ " focusNode \\\n",
+ "o \n",
+ "urn:uuid:66d7b5ea-0065-4f84-b0e4-d65ba0b16a11 http://example.com/Love_Me_Do \n",
+ "\n",
+ " resultMessage \\\n",
+ "o \n",
+ "urn:uuid:66d7b5ea-0065-4f84-b0e4-d65ba0b16a11 Fail at: [sh:minCount 1 ;\n",
+ " sh:nodeKind sh:IRI... \n",
+ "\n",
+ " resultPath \\\n",
+ "o \n",
+ "urn:uuid:66d7b5ea-0065-4f84-b0e4-d65ba0b16a11 http://example.com/performer \n",
+ "\n",
+ " resultSeverity \\\n",
+ "o \n",
+ "urn:uuid:66d7b5ea-0065-4f84-b0e4-d65ba0b16a11 http://www.w3.org/ns/shacl#Violation \n",
+ "\n",
+ " sourceConstraintComponent \\\n",
+ "o \n",
+ "urn:uuid:66d7b5ea-0065-4f84-b0e4-d65ba0b16a11 http://www.w3.org/ns/shacl#MinCountConstraintC... \n",
+ "\n",
+ " sourceShape value \n",
+ "o \n",
+ "urn:uuid:66d7b5ea-0065-4f84-b0e4-d65ba0b16a11 _:b9 0 "
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "report_dataframe = corese.shaclReportToDataFrame(report)\n",
+ "\n",
+ "report_dataframe"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "d6xlzpURe64t"
+ },
+ "source": [
+ "The report tells us that for the song *Love Me Do* a performer is not specified."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Nt-rvV1Pe64t"
+ },
+ "source": [
+ "## Low-level API"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "NgjIq4y-e64t"
+ },
+ "source": [
+ "### Adding triples manually to the graph."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "egIhirIce64t",
+ "outputId": "5a78aacf-06fd-4968-ad8d-9a3f7ed96147"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "JavaObject id=o37"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Namespace\n",
+ "ex = \"http://example.com/\"\n",
+ "\n",
+ "# Get the graph from either Graph or DataManager objects\n",
+ "graph = graph.getGraph()\n",
+ "\n",
+ "# Create and add statements: Help! is an album\n",
+ "new_album_IRI = graph.addResource(ex + \"Help\")\n",
+ "rdf_Type_Property = graph.addProperty(corese.Namespaces.RDF + 'type')\n",
+ "album_type_IRI = graph.addResource(ex + \"Album\")\n",
+ "\n",
+ "graph.addEdge(new_album_IRI, rdf_Type_Property, album_type_IRI)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "BqmK5N5Xe64t"
+ },
+ "source": [
+ "Let's see what was added."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "2l26SHjWe64u",
+ "outputId": "87add254-e354-427b-b614-4863a0be45d4"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "01 ?album = ; \n",
+ "02 ?album = ; \n",
+ "03 ?album = ; \n",
+ "04 ?album = ; \n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "query = f'''@prefix ex: <{ex}>\n",
+ " SELECT *\n",
+ " where {{?album a ex:Album }}'''\n",
+ "\n",
+ "exec = corese.QueryProcess.create(graph)\n",
+ "\n",
+ "results = exec.query(query)\n",
+ "\n",
+ "print(results)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "6IwJDXc4e64u"
+ },
+ "source": [
+ "The new triple (album *Help*) was added to the graph."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "o41J9o5We64u"
+ },
+ "source": [
+ "Wer can add some more detailes for the album *Help!* and see what was added."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "gSZwJp33e64u",
+ "outputId": "f2ab11b3-a89e-4d38-b5d7-0c5b40d6f26f"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "JavaObject id=o46"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Create and add statement: The name of the album is actually Help!\n",
+ "name_property_IRI = graph.addProperty(ex + \"name\")\n",
+ "name_literal = graph.addLiteral(\"Help!\")\n",
+ "\n",
+ "graph.addEdge(new_album_IRI, name_property_IRI, name_literal)\n",
+ "\n",
+ "# Create and add statement: The new album was released in 1965\n",
+ "xsd = \"http://www.w3.org/2001/XMLSchema#\"\n",
+ "release_property_IRI = graph.addProperty(ex + \"date\")\n",
+ "release_literal = graph.addLiteral(\"1965\", xsd + 'date')\n",
+ "\n",
+ "graph.addEdge(new_album_IRI, release_property_IRI, release_literal)\n",
+ "\n",
+ "\n",
+ "# Create and add statement: The Beatles is the creator of the album Help\n",
+ "artist_property_IRI = graph.addProperty(ex + \"artist\")\n",
+ "artist_IRI = graph.addLiteral(ex + \"The_Beatles\")\n",
+ "graph.addEdge(new_album_IRI, artist_property_IRI, artist_IRI)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "CTWzyXGUe64u",
+ "outputId": "7e2a01bd-3884-4694-a3c5-005a56fd2b60"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "@prefix xsd: .\n",
+ "@prefix ex: .\n",
+ "\n",
+ "ex:Help ex:artist \"http://example.com/The_Beatles\" ;\n",
+ " ex:date \"1965\"^^xsd:date ;\n",
+ " ex:name \"Help!\" ;\n",
+ " a ex:Album .\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "query = f'''@prefix ex: <{ex}>\n",
+ " CONSTRUCT {{ ?album ?p ?o }}\n",
+ " WHERE {{\n",
+ " VALUES ?album {{ ex:Help }}\n",
+ " ?album ?p ?o}} '''\n",
+ "\n",
+ "exec = corese.QueryProcess.create(graph)\n",
+ "\n",
+ "results = exec.query(query)\n",
+ "\n",
+ "results_ttl = corese.ResultFormat.create(results, corese.ResultFormat.TURTLE_FORMAT)\n",
+ "\n",
+ "print(results_ttl)"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": [],
+ "toc_visible": true
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.14"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/python_examples/simple_query.py b/examples/simple_query.py
old mode 100755
new mode 100644
similarity index 100%
rename from python_examples/simple_query.py
rename to examples/simple_query.py
diff --git a/python_examples/example1.ipynb b/python_examples/example1.ipynb
deleted file mode 100644
index 9055ddc..0000000
--- a/python_examples/example1.ipynb
+++ /dev/null
@@ -1,446 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [],
- "source": [
- "import sys\n",
- "sys.path.append('..\\\\src')"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Py4J or JPype\n",
- "\n",
- "Demonstrate loading and querying data with CoreseAPI connected through `Py4J` or `JPype` packages."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Connect to Corese API"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [],
- "source": [
- "python_to_java_bridge = 'py4j'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2024-09-25 15:46:35,984 - INFO - JPype: CORESE is stopped\n",
- "2024-09-25 15:46:36,030 - INFO - Py4J: Loading CORESE...\n",
- "2024-09-25 15:46:50,983 - INFO - Py4J: CORESE is loaded\n"
- ]
- }
- ],
- "source": [
- "#%%timeit -n 1 -r 1\n",
- "from pycorese.api import CoreseAPI\n",
- "\n",
- "corese = CoreseAPI(java_bridge=python_to_java_bridge)\n",
- "corese.loadCorese()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### High-level API"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Run SELECT query"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " s | \n",
- " p | \n",
- " o | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " http://example.com/Please_Please_Me | \n",
- " http://example.com/artist | \n",
- " http://example.com/The_Beatles | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " http://example.com/McCartney | \n",
- " http://example.com/artist | \n",
- " http://example.com/Paul_McCartney | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " http://example.com/Imagine | \n",
- " http://example.com/artist | \n",
- " http://example.com/John_Lennon | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " http://example.com/Please_Please_Me | \n",
- " http://example.com/date | \n",
- " 1963-03-22 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " http://example.com/McCartney | \n",
- " http://example.com/date | \n",
- " 1970-04-17 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " s p \\\n",
- "0 http://example.com/Please_Please_Me http://example.com/artist \n",
- "1 http://example.com/McCartney http://example.com/artist \n",
- "2 http://example.com/Imagine http://example.com/artist \n",
- "3 http://example.com/Please_Please_Me http://example.com/date \n",
- "4 http://example.com/McCartney http://example.com/date \n",
- "\n",
- " o \n",
- "0 http://example.com/The_Beatles \n",
- "1 http://example.com/Paul_McCartney \n",
- "2 http://example.com/John_Lennon \n",
- "3 1963-03-22 \n",
- "4 1970-04-17 "
- ]
- },
- "execution_count": 15,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "import os \n",
- "data_path = os.path.abspath('data\\\\beatles.rdf')\n",
- "\n",
- "query = '''\n",
- "SELECT *\n",
- "WHERE {?s ?p ?o} LIMIT 5'''\n",
- "\n",
- "graph = corese.loadRDF(data_path) \n",
- "results = corese.sparqlSelect(graph, query=query, return_dataframe=True)\n",
- "\n",
- "results"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Load inference rules "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " s type\n",
- "0 http://example.com/Please_Please_Me http://example.com/Album\n",
- "1 http://example.com/McCartney http://example.com/Album\n",
- "2 http://example.com/Imagine http://example.com/Album\n",
- "3 http://example.com/The_Beatles http://example.com/Band\n",
- "4 http://example.com/John_Lennon http://example.com/SoloArtist\n",
- "5 http://example.com/Paul_McCartney http://example.com/SoloArtist\n",
- "6 http://example.com/Ringo_Starr http://example.com/SoloArtist\n",
- "7 http://example.com/George_Harrison http://example.com/SoloArtist\n",
- "8 http://example.com/Love_Me_Do http://example.com/Song\n",
- "Graph size: 29\n"
- ]
- }
- ],
- "source": [
- "corese.resetRuleEngine(graph)\n",
- "query = \"select * where {?s a ?type} order by ?type\"\n",
- "print(corese.sparqlSelect(graph, query=query))\n",
- "print(\"Graph size: \", graph.graphSize())\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Adding inference rules to the Corese engine should change the results of the query by adding new triples."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Graph size: 33\n"
- ]
- }
- ],
- "source": [
- "corese.loadRuleEngine(graph, profile=corese.RuleEngine.Profile.RDFS)\n",
- "print(\"Graph size: \", graph.graphSize())"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Let's see what was added."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " s type\n",
- "0 http://example.com/Please_Please_Me http://example.com/Album\n",
- "1 http://example.com/McCartney http://example.com/Album\n",
- "2 http://example.com/Imagine http://example.com/Album\n",
- "3 http://example.com/The_Beatles http://example.com/Band\n",
- "4 http://example.com/John_Lennon http://example.com/Person\n",
- "5 http://example.com/Paul_McCartney http://example.com/Person\n",
- "6 http://example.com/Ringo_Starr http://example.com/Person\n",
- "7 http://example.com/George_Harrison http://example.com/Person\n",
- "8 http://example.com/John_Lennon http://example.com/SoloArtist\n",
- "9 http://example.com/Paul_McCartney http://example.com/SoloArtist\n",
- "10 http://example.com/Ringo_Starr http://example.com/SoloArtist\n",
- "11 http://example.com/George_Harrison http://example.com/SoloArtist\n",
- "12 http://example.com/Love_Me_Do http://example.com/Song\n",
- "Graph size: 33\n"
- ]
- }
- ],
- "source": [
- "query = \"select * where {?s a ?type} order by ?type\"\n",
- "print(corese.sparqlSelect(graph, query=query))\n",
- "print(\"Graph size: \", graph.graphSize())"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The inference was that the solo artist is also a person although it was not explicitly stated in the data."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Run CONSTRUCT query"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "\n",
- "\n",
- " \n",
- " \n",
- "\n",
- " \n",
- " \n",
- "\n",
- " \n",
- " \n",
- "\n",
- " \n",
- " \n",
- "\n",
- "\n"
- ]
- }
- ],
- "source": [
- "prefixes = '@prefix ex: '\n",
- "contruct = '''CONSTRUCT {?Beatle a ex:BandMember }\n",
- " WHERE { ex:The_Beatles ex:member ?Beatle}'''\n",
- "\n",
- "results = corese.sparqlConstruct(graph, prefixes=prefixes, query=contruct)\n",
- "\n",
- "print(results)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "By default, the CONSTRUCT query returns the RDF/XML format. For more concise format convert the results to Turtle."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- " a .\n",
- "\n",
- " a .\n",
- "\n",
- " a .\n",
- "\n",
- " a .\n",
- "\n",
- "\n"
- ]
- }
- ],
- "source": [
- "ttl = corese.toTurtle(results)\n",
- "\n",
- "print(ttl)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Low-level API"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Adding triples manually to the graph."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "01 ?s = ; ?type = ; \n",
- "02 ?s = ; ?type = ; \n",
- "03 ?s = ; ?type = ; \n",
- "04 ?s = ; ?type = ; \n",
- "\n"
- ]
- }
- ],
- "source": [
- "# Namespace\n",
- "ex = \"http://example.com/\"\n",
- "\n",
- "# Get the graph from either Graph or DataManager objects\n",
- "graph = graph.getGraph() \n",
- "\n",
- "# Create and add statement: Edith Piaf is an Singer\n",
- "new_album_IRI = graph.addResource(ex + \"Help\")\n",
- "rdf_Type_Property = graph.addProperty(corese.Namespaces.RDF + 'type')\n",
- "album_type_IRI = graph.addResource(ex + \"Album\")\n",
- "\n",
- "graph.addEdge(new_album_IRI, rdf_Type_Property, album_type_IRI)\n",
- "\n",
- "query = f'''@prefix ex: <{ex}>\n",
- " select * \n",
- " where {{?s a ?type filter (?type = ex:Album) }} \n",
- " order by ?type'''\n",
- "\n",
- "exec = corese.QueryProcess.create(graph)\n",
- "\n",
- "results = exec.query(query)\n",
- "\n",
- "print(results)\n"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.14"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
From 4761cbe1923abd3d4e5eaef180da4a3b4b31ee1b Mon Sep 17 00:00:00 2001
From: Anna Bobasheva <33026767+AnnaBobasheva@users.noreply.github.com>
Date: Tue, 3 Dec 2024 18:24:28 +0100
Subject: [PATCH 2/3] Update example1.ipynb
---
examples/example1.ipynb | 46 ++---------------------------------------
1 file changed, 2 insertions(+), 44 deletions(-)
diff --git a/examples/example1.ipynb b/examples/example1.ipynb
index 2351443..a8d1670 100644
--- a/examples/example1.ipynb
+++ b/examples/example1.ipynb
@@ -81,7 +81,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -89,50 +89,9 @@
"id": "OdY7kuBeQz2I",
"outputId": "f0deca77-241c-4c58-970c-2906bcbc4078"
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Collecting pycorese\n",
- " Downloading pycorese-1.0.1-py3-none-any.whl.metadata (32 kB)\n",
- "Requirement already satisfied: py4j>=0.10.9.7 in /usr/local/lib/python3.10/dist-packages (from pycorese) (0.10.9.7)\n",
- "Collecting jpype1>=1.5.0 (from pycorese)\n",
- " Downloading jpype1-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n",
- "Requirement already satisfied: pandas>=1.3.3 in /usr/local/lib/python3.10/dist-packages (from pycorese) (2.2.2)\n",
- "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from jpype1>=1.5.0->pycorese) (24.2)\n",
- "Requirement already satisfied: numpy>=1.22.4 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.3.3->pycorese) (1.26.4)\n",
- "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.3.3->pycorese) (2.8.2)\n",
- "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.3.3->pycorese) (2024.2)\n",
- "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.3.3->pycorese) (2024.2)\n",
- "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas>=1.3.3->pycorese) (1.16.0)\n",
- "Downloading pycorese-1.0.1-py3-none-any.whl (95.3 MB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m95.3/95.3 MB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hDownloading jpype1-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (493 kB)\n",
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m493.8/493.8 kB\u001b[0m \u001b[31m29.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25hInstalling collected packages: jpype1, pycorese\n",
- "Successfully installed jpype1-1.5.1 pycorese-1.0.1\n"
- ]
- }
- ],
- "source": [
- "!pip install pycorese"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "aKYekc7UQz2K"
- },
"outputs": [],
"source": [
- "# delete\n",
- "import sys\n",
- "if 'google.colab' in sys.modules:\n",
- " !pip install git+https://github.com/corese-stack/corese-python.git@develop\n",
- "else:\n",
- " sys.path.append('..\\\\src')"
+ "!pip install pycorese"
]
},
{
@@ -164,7 +123,6 @@
}
],
"source": [
- "# change the links to example production\n",
"import os\n",
"import sys\n",
"if not os.path.exists('./data/beatles.rdf'):\n",
From 4951e366382a0bee86b067a69927adefbb76d69e Mon Sep 17 00:00:00 2001
From: Anna Bobasheva <33026767+AnnaBobasheva@users.noreply.github.com>
Date: Tue, 3 Dec 2024 18:29:17 +0100
Subject: [PATCH 3/3] Changed GoogleColab link
---
examples/example1.ipynb | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/example1.ipynb b/examples/example1.ipynb
index a8d1670..0de6786 100644
--- a/examples/example1.ipynb
+++ b/examples/example1.ipynb
@@ -6,7 +6,7 @@
"id": "view-in-github"
},
"source": [
- "
"
+ "
"
]
},
{