diff --git a/CHANGES.md b/CHANGES.md index f738841..35c31d6 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,8 +1,19 @@ # Changelog +## Version 0.1.5 + +- added corese-python/pycorese API documentation +- wraps corese-core v4.6.0 + +## Version 0.1.4 + +- downgraded version to 0.1.4 since it's not ready for 1.0.0 yet +- updated markdown documentation +- wraps corese-core v4.6.0 + ## Version 1.0.1 -- document local installaiton process (INSTALL.md) +- document local installation process (INSTALL.md) - build jar file then running `python -m build` (may need more work) - add doc skeleton and github action files - add coreseVersion (not fully implemented yet) diff --git a/INSTALL.md b/INSTALL.md index 8c8ddf6..c8f2469 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -87,7 +87,7 @@ The custom `sdist` command adds the following steps: > [!NOTE] > - do not run `python setup.py` that will not build the full package. > - the versions of `pycorese` and Java libraries are maintained separately. -> - `corese-python` version should be the same as `corese-core` it depends on, for simplicity reasons. +> - `corese-python` version should be the same as `corese-core` it depends on, for simplicity reasons. > - the commands for the first two steps are provided in the [Obtaining Java libraries manually](#obtain-java-libraries-manually) section. ## Testing the package @@ -134,7 +134,7 @@ pip install dist/pycorese-0.1.1.tar.gz ## Verifying the installation -``` +```bash $ pip list | grep corese pycorese 0.1.1 @@ -148,13 +148,13 @@ $ python -c 'import pycorese' Without installing the package you can run the following command (the default Java bridge is `py4j`): -``` +```bash ./examples/simple_query.py -j $PWD/build/libs/corese-python-4.6.0-jar-with-dependencies.jar ``` or change the bridge to `jpype`: -``` +```bash ./examples/simple_query.py -b jpype -j $PWD/build/libs/corese-core-4.6.0-jar-with-dependencies.jar ``` diff --git a/README.md b/README.md index 3a8e5d8..4da256d 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ **pycorese** is a Python package that provides a simple way to integrate the [corese-core](https://github.com/corese-stack/corese-core) Java library into Python applications. -**pycorese** provides an intuitive API to interact with Corese's capabilities such as storage, SPARQL engine, RDFS and OWL reasoning, and SHACL validation. +**pycorese** offers an intuitive API to interact with Corese's capabilities such as storage, SPARQL engine, RDFS and OWL reasoning, and SHACL validation. **pycorese** unlocks the potential of Semantic Web stack for applications such as semantic data analysis, knowledge graph construction, and Machine Learning. diff --git a/VERSION.txt b/VERSION.txt deleted file mode 100644 index 845639e..0000000 --- a/VERSION.txt +++ /dev/null @@ -1 +0,0 @@ -0.1.4 diff --git a/docs/README.md b/docs/README.md index 739336c..2a275bb 100644 --- a/docs/README.md +++ b/docs/README.md @@ -22,8 +22,8 @@ sphinx-multiversion docs/source build/html ## Switcher generation -- To navigate between versions by means of the switcher (the dropdown list indicating the available version), the switcher.json object must be generated. -- To improve navigability, a landing page must also be generated to redirect to the latest version of the documentation. +- To navigate between versions by means of the switcher (the dropdown list indicating the available version), the switcher.json object must be generated. +- To improve navigability, a landing page must also be generated to redirect to the latest version of the documentation. To this end a script must be executed and write the output to the output html directory: @@ -33,4 +33,4 @@ To this end a script must be executed and write the output to the output html di Both sphinx-multiversion and switcher_generator work on tags following the ```^v[0-9]+\.[0-9]+\.[0-9]+$``` syntax and ordered by refname. -The minimal version set in the switcher_generator allows to not generate entries in the switcher and landing page for unexisting or uncompatible documentation. +The minimal version set in the switcher_generator allows to not generate entries in the switcher and landing page for nonexisting or incompatible documentation. diff --git a/docs/requirements.txt b/docs/requirements.txt index b051c24..fec0404 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -6,6 +6,7 @@ docutils<0.21,>=0.18.1 Jinja2>=3.0 sphinx-design==0.5.0 myst-parser==2.0.0 +myst_nb==1.1.2 sphinxcontrib-mermaid==0.9.2 breathe==4.35.0 exhale==0.3.7 diff --git a/docs/source/.gitignore b/docs/source/.gitignore new file mode 100644 index 0000000..b19b9b1 --- /dev/null +++ b/docs/source/.gitignore @@ -0,0 +1,2 @@ +user_guide.ipynb +dev_install.md \ No newline at end of file diff --git a/docs/source/apis.rst b/docs/source/apis.rst index 1536ca1..69cf531 100644 --- a/docs/source/apis.rst +++ b/docs/source/apis.rst @@ -1,17 +1,18 @@ -CORESE APIs +.. The links for corese-command and corese-server are not available in the corese-stack at the moment. +.. Using the old repository documentation at https://wimmics.github.io/corese. + +Corese APIs ########### .. toctree:: :hidden: - Python API - .. grid:: 2 .. grid-item-card:: :shadow: sm :class-card: sd-rounded-3 - :link: https://corese-stack.github.io/corese-core/ + :link: https://corese-stack.github.io/corese-core/v4.6.0/java_api/library_root.html Corese Java API ^^^^^^^^^^^^^^^^^^^^^^^ @@ -31,7 +32,7 @@ CORESE APIs .. grid-item-card:: :shadow: sm :class-card: sd-rounded-3 - :link: https://corese-stack.github.io/corese-command/ + :link: https://wimmics.github.io/corese/cli_ref/cli_root.html Corese command-line interface ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -47,7 +48,7 @@ CORESE APIs .. grid-item-card:: :shadow: sm :class-card: sd-rounded-3 - :link: https://github.com/corese-stack/corese-server/ + :link: https://wimmics.github.io/corese/rest_api/api_root.html Corese REST API ^^^^^^^^^^^^^^^^^^^^^^^ @@ -62,7 +63,7 @@ CORESE APIs .. grid-item-card:: :shadow: sm :class-card: sd-rounded-3 - :link: https://github.com/corese-stack/corese-python/ + :link: python_api/api_root.html Corese Python API ^^^^^^^^^^^^^^^^^^^^^^^ @@ -73,4 +74,4 @@ CORESE APIs * run SPARQL queries (SELECT, CONSTRUCT, ASK, UPDATE) * validate RDF data against SHACL shapes - in development... + and more... diff --git a/docs/source/conf.py b/docs/source/conf.py index 0dc9c8f..decbb82 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -10,57 +10,49 @@ # add these directories to sys.path here. import pathlib import sys -import os +import shutil -sys.path.insert(0, pathlib.Path(__file__).parents[1].resolve().as_posix()) -sys.path.insert(0, pathlib.Path(__file__).parents[2].resolve().as_posix()) -#sys.path.insert(0, pathlib.Path(__file__).parents[2].joinpath('code').resolve().as_posix()) +# -- Path to the Python source code ------------------------------------------------ +sys.path.insert(0, pathlib.Path(__file__).parents[2].joinpath('src').resolve().as_posix()) +# -- Copy files for docs -------------------------------------------------- +# +# To avoid duplicating the information and symlinks +shutil.copyfile(pathlib.Path(__file__).parents[2].joinpath('INSTALL.md'), "dev_install.md") +shutil.copyfile(pathlib.Path(__file__).parents[2].joinpath('examples/example1.ipynb'), "user_guide.ipynb") -project = 'CORESE-PYTHON' +# -- Project information ----------------------------------------------------- +project = 'corese-python' copyright = '2024, WIMMICS' -author = 'WIMMICS' - -# -- General configuration --------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration - -extensions = [ - 'sphinx.ext.duration', # to display the duration of Sphinx processing - 'sphinx.ext.todo', # to include todo items in the documentation - # Uncomment the following lines if/when include the python code (not used in this project yet) - #'sphinx.ext.doctest', # to test code snippets in the documentation - #'sphinx.ext.autodoc', # to automatically generate documentation from docstrings - #'sphinx.ext.autosummary', # this extension generates function/method/attribute summary lists - #'sphinx.ext.autosectionlabel', # to automatically generate section labels - 'sphinx_multiversion', - 'sphinx_design', # to render panels - 'myst_parser', # to parse markdown - 'sphinxcontrib.mermaid', # to render mermaid diagrams - # Alternative ways to include markdown files, cannot be used together with myst_parser - # advantages of sphynx_mdinclude/m2r3: it can include partial markdown files - # - #'sphinx_mdinclude', # to include partial markdown files - #'m2r3', # to include markdown files - 'sphinx_copybutton', # to add copy buttons to code blocks - ] - -templates_path = ['_templates'] -exclude_patterns = [] +author = 'Corese Team' # The suffix(es) of source filenames. source_suffix = ['.rst', '.md'] +#exclude_patterns = [] -# -- Options for HTML output ------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] -html_theme = 'pydata_sphinx_theme' +# Add any paths that contain data files here, relative to this directory. html_static_path = ['_static'] +# Define the css files to include in the documentation html_css_files = [ "css/custom.css", ] + +# Define the js files to include in the documentation html_js_files = [] +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output +html_sidebars = { + #"corese": [], # syntax for hiding the sidebar +} + +# Tell sphinx what the pygments highlight language should be. +highlight_language = 'python' + # Project logo, to place at the top of the sidebar. html_logo = "_static/corese.svg" @@ -68,13 +60,14 @@ html_favicon = "_static/Corese-square-logo-transparent.svg" # Modify the title to get good social-media links -html_title = "CORESE-PYTHON" -html_short_title = "CORESE-PYTHON" +#html_title = "CORESE-PYTHON" +#html_short_title = "CORESE-PYTHON" # -- Theme Options ----------------------------------------------------------- # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the -# documentation. +# documentation. https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output +html_theme = 'pydata_sphinx_theme' html_theme_options = { "logo": { "image_relative": "_static/corese.svg", @@ -93,27 +86,78 @@ "switcher": {"json_url": "https://corese-stack.github.io/corese-python/switcher.json", "version_match": r"v\d+\.\d+\.\d+"} } -html_sidebars = { - "install": [], -} - -# -- MySt-parcer extension Options ------------------------------------------- -# https://myst-parser.readthedocs.io/en/latest/ - -myst_heading_anchors = 4 -myst_fence_as_directive = ["mermaid"] -# Tell sphinx what the primary language being documented is. -primary_domain = 'python' +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration +extensions = [ + 'sphinx.ext.duration', # to display the duration of Sphinx processing + 'sphinx.ext.todo', # to include todo items in the documentation + #'sphinx.ext.githubpages', # to deploy the documentation on GitHub pages ??? + 'sphinx.ext.viewcode', # to add links to the source code + 'sphinx.ext.doctest', # to test code snippets in the documentation + 'sphinx.ext.autodoc', # to automatically generate documentation from docstrings + 'sphinx.ext.autosummary', # this extension generates function/method/attribute summary lists + 'sphinx.ext.autosectionlabel', # to automatically generate section labels + 'sphinx.ext.napoleon', # to parse Google-style docstrings + 'sphinx_design', # to render panels + #'myst_parser', # to parse markdown + "myst_nb", # to parse jupyter notebooks and markdown files + #'sphinxcontrib.mermaid', # to render mermaid diagrams + # Alternative ways to include markdown files, cannot be used together with myst_parser + # advantages of sphynx_mdinclude/m2r3: it can include partial markdown files + # + #'sphinx_mdinclude', # to include partial markdown files + #'m2r3', # to include markdown files + 'sphinx_copybutton', # to add copy buttons to code blocks + 'sphinx_multiversion', # to build documentation for multiple versions + ] -# Tell sphinx what the pygments highlight language should be. -highlight_language = 'python' +# -- Options for sphinx.ext.autodoc / sphinx.ext.autosummary----------------------------- +# generate autosummary even if no references +#autodoc_default_flags = ["members", "inherited-members"] +autosummary_generate = True + +autodoc_default_options = { + #"member-order": "bysource", + #"special-members": "__init__", + # "undoc-members": True, + # "show-inheritance": True, + # "template": "_templates/base.rst", # Path to your template +} -# Setup the sphinx.ext.todo extension +# -- Options for sphinx.ext.napoleon---------------------------------------- +napoleon_google_docstring = True +napoleon_numpy_docstring = True +napoleon_use_admonition_for_notes = True +napoleon_use_rtype = False -# Set to false in the final version +# -- Setup the sphinx.ext.todo extension ------------------------------------ +# TODO: Set to false in the final version todo_include_todos = True +# -- sphinx-multiversion extension configuration ----------------------------------- # Optional: Exclude certain branches or tags from multi-versioning #smv_branch_whitelist = r'develop' # TODO Build documentation only for feature/retrieve-doc, must be replaced with "main" for production smv_tag_whitelist = r'^v\d+\.\d+.*$' # Only build tags that match version pattern like v1.0 + +# -- MyST-NB configuration --------------------------------------------------- +# https://myst-nb.readthedocs.io/en/latest/ +# Take the example notebook as-is, without executing it +nb_execution_mode = "off" + +# Suppress warnings +suppress_warnings = [ + "myst.xref_missing", # Suppress warnings about missing references after fixing the one you can fix + "mystnb.unknown_mime_type" # Suppress warnings about Google Colab button in the notebook +] + +# Substitute the relative path in the markdown file for the GitHub repo root URL +def preprocess_markdown(app, docname, source): + base_url = "https://github.com/corese-stack/corese-python/blob/main/" + if docname == "dev_install": # Replace with the actual document using the Markdown file + content = source[0] + # Replace relative paths with appropriate links + source[0] = content.replace("./" , base_url) + +def setup(app): + app.connect("source-read", preprocess_markdown) \ No newline at end of file diff --git a/docs/source/corese.rst b/docs/source/corese.rst new file mode 100644 index 0000000..297afea --- /dev/null +++ b/docs/source/corese.rst @@ -0,0 +1,103 @@ +.. CORESE documentation master file, created by + sphinx-quickstart on Tue Apr 16 14:51:03 2024. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + + +.. image:: _static/corese.svg + :align: center + :width: 400px + +.. centered:: Software platform for the Semantic Web of Linked Data + +Corese is a software platform implementing and extending the standards of the Semantic Web. It allows to create, manipulate, parse, serialize, query, reason and validate RDF data. + + + +.. Define named hyperlinks for the references of W3C standards +.. _RDF: https://www.w3.org/RDF/ +.. _RDFS: https://www.w3.org/2001/sw/wiki/RDFS +.. _SPARQL1.1 Query & Update: https://www.w3.org/2001/sw/wiki/SPARQL +.. _OWL RL: https://www.w3.org/2005/rules/wiki/OWLRL +.. _SHACL: https://www.w3.org/TR/shacl/ + +.. Define named hyperlinks for the references of extensions +.. _STTL SPARQL: ./_static/extensions/sttl.html +.. _SPARQL Rule: ./_static/extensions/rule.html +.. _LDScript: ./_static/extensions/ldscript.html + +.. Original location of the extensions documentation +.. .. _STTL SPARQL: https://files.inria.fr/corese/doc/sttl.html +.. .. _SPARQL Rule: https://files.inria.fr/corese/doc/rule.html +.. .. _LDScript: https://files.inria.fr/corese/doc/ldscript.html + + +.. ############################################################################# +.. The statements below are to produce the grid of cards in the home page +.. grid:: 2 + + .. grid-item-card:: + :shadow: sm + :class-card: sd-rounded-3 + + Corese implements W3C standards and extensions + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + * W3C standards + * `RDF`_ + * `RDFS`_ + * `SPARQL1.1 Query & Update`_ + * `OWL RL`_ + * `SHACL`_ + * Extensions + * `STTL SPARQL`_ + * `SPARQL Rule`_ + * `LDScript`_ + + .. grid-item-card:: + :shadow: sm + :class-card: sd-rounded-3 + + Corese offers several interfaces + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + * `corese-core `_: Java library to process RDF data and use Corese features via an API. + * `corese-server `_: Tool to easily create, configure and manage SPARQL endpoints. + * `corese-GUI `_: Graphical interface that allows an easy and visual use of Corese features. + * `corese-python (beta) `_: Python wrapper for accessing and manipulating RDF data with Corese features using py4j. + * `corese-command (beta) `_: Command Line Interface for Corese that allows users to interact with Corese features from the terminal. + +.. .. raw:: html + +..

Contributions and discussions

+ +.. .. _discussion forum: https://github.com/Wimmics/corese/discussions/ +.. .. _issue reports: https://github.com/Wimmics/corese/issues/ +.. .. _pull requests: https://github.com/Wimmics/corese/pulls/ + +.. For support questions, comments, and any ideas for improvements you’d like to discuss, please use our `discussion forum`_. We welcome everyone to contribute to `issue reports`_, suggest new features, and create `pull requests`_. + + +.. ############################################################################# +.. The statements below are to produce the title of the page in the tab + and a menu with the links to the pages of the documentation + +.. raw html below is used to hide the title of the page but retain it in the + tab title. https://github.com/sphinx-doc/sphinx/issues/8356 +.. raw:: html + +
+ +About Corese +==================== + +.. raw:: html + +
+ +.. toctree:: + :maxdepth: 2 + + Corese APIs + +.. Install +.. User Guide +.. API diff --git a/docs/source/index.rst b/docs/source/index.rst index af9dad6..2fb7704 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,77 +1,40 @@ -.. CORESE documentation master file, created by - sphinx-quickstart on Tue Apr 16 14:51:03 2024. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - +.. pycorese documentation master file, created by + sphinx-quickstart on Thu Oct 14 2023. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. .. image:: _static/corese.svg :align: center :width: 400px -.. centered:: Software platform for the Semantic Web of Linked Data - -Corese is a software platform implementing and extending the standards of the Semantic Web. It allows to create, manipulate, parse, serialize, query, reason and validate RDF data. - - +.. centered:: Python API for Corese Semantic Web platform .. Define named hyperlinks for the references of W3C standards +.. _Corese: corese.html +.. _corese-core: https://github.com/corese-stack/corese-core + .. _RDF: https://www.w3.org/RDF/ .. _RDFS: https://www.w3.org/2001/sw/wiki/RDFS -.. _SPARQL1.1 Query & Update: https://www.w3.org/2001/sw/wiki/SPARQL +.. _SPARQL: https://www.w3.org/2001/sw/wiki/SPARQL .. _OWL RL: https://www.w3.org/2005/rules/wiki/OWLRL .. _SHACL: https://www.w3.org/TR/shacl/ -.. Define named hyperlinks for the references of extensions -.. _STTL SPARQL: ./_static/extensions/sttl.html -.. _SPARQL Rule: ./_static/extensions/rule.html -.. _LDScript: ./_static/extensions/ldscript.html +`Corese`_ is a software platform implementing and extending the standards of the Semantic Web. It allows to create, manipulate, parse, serialize, query, reason, and validate RDF data. Corese is based on the W3C standards `RDF`_, `RDFS`_, `OWL RL`_, `SPARQL`_ and `SHACL`_. Corese is implemented as a set of open-source Java libraries. -.. Original location of the extensions documentation -.. .. _STTL SPARQL: https://files.inria.fr/corese/doc/sttl.html -.. .. _SPARQL Rule: https://files.inria.fr/corese/doc/rule.html -.. .. _LDScript: https://files.inria.fr/corese/doc/ldscript.html +**pycorese** is a Python package that provides a simple way to integrate the `corese-core`_ Java library into Python applications. +**pycorese** offers an intuitive API to interact with Corese's capabilities such as storage, SPARQL engine, RDFS and OWL reasoning, and SHACL validation. -.. ############################################################################# -.. The statements below are to produce the grid of cards in the home page -.. grid:: 2 - - .. grid-item-card:: - :shadow: sm - :class-card: sd-rounded-3 - - Corese implements W3C standards and extensions - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - * W3C standards - * `RDF`_ - * `RDFS`_ - * `SPARQL1.1 Query & Update`_ - * `OWL RL`_ - * `SHACL`_ - * Extensions - * `STTL SPARQL`_ - * `SPARQL Rule`_ - * `LDScript`_ - - .. grid-item-card:: - :shadow: sm - :class-card: sd-rounded-3 - - Corese offers several interfaces - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - * `corese-core `_: Java library to process RDF data and use Corese features via an API. - * `corese-server `_: Tool to easily create, configure and manage SPARQL endpoints. - * `corese-GUI `_: Graphical interface that allows an easy and visual use of Corese features. - * `corese-python (beta) `_: Python wrapper for accessing and manipulating RDF data with Corese features using py4j. - * `corese-command (beta) `_: Command Line Interface for Corese that allows users to interact with Corese features from the terminal. +**pycorese** unlocks the potential of Semantic Web stack for applications such as semantic data analysis, knowledge graph construction, and Machine Learning. .. raw:: html

Contributions and discussions

-.. _discussion forum: https://github.com/Wimmics/corese/discussions/ -.. _issue reports: https://github.com/Wimmics/corese/issues/ -.. _pull requests: https://github.com/Wimmics/corese/pulls/ + +.. _discussion forum: https://github.com/corese-stack/corese-python/discussions/ +.. _issue reports: https://github.com/corese-stack/corese-python/issues/ +.. _pull requests: https://github.com/corese-stack/corese-python/pulls/ For support questions, comments, and any ideas for improvements you’d like to discuss, please use our `discussion forum`_. We welcome everyone to contribute to `issue reports`_, suggest new features, and create `pull requests`_. @@ -80,13 +43,13 @@ For support questions, comments, and any ideas for improvements you’d like to .. The statements below are to produce the title of the page in the tab and a menu with the links to the pages of the documentation -.. raw html below is used to hide the title of the page but retain it in the +.. raw html below is used to hide the title of the page but retain it in the tab title. https://github.com/sphinx-doc/sphinx/issues/8356 .. raw:: html
-CORESE documentation +pycorese doc =================================== .. raw:: html @@ -94,10 +57,10 @@ CORESE documentation
.. toctree:: + :maxdepth: 2 :hidden: - Installation - User Guide - API Reference - Demo - + User Guide + API + Install + About Corese diff --git a/docs/source/python_api/bridges.rst b/docs/source/python_api/bridges.rst new file mode 100644 index 0000000..940acc4 --- /dev/null +++ b/docs/source/python_api/bridges.rst @@ -0,0 +1,54 @@ +Java bridge packages +==================== + +Corese is a stack of Java libraries that implement the standards of the Semantic Web. +To run and access Java libraries we tested two off-the-shelf packages: + +- `py4j `_ - this package establishes a bridge between Python and Java using sockets (IPC). The +**corese-python** Java library implements the listener and wrapper for ``corese-core`` library and is built with ``gradle`` specifically for **pycorese**. + +- `jpype `_ - this package utilizes a single shared memory space by embedding the JVM directly into the Python process. The actual ``corese-core`` Java library is downloaded from the Maven repository. + +At first we tried both packages to decide which one is better for our purposes. We found that both `py4j` and `jpype` have their pros and cons. See the comparison table below: + +.. list-table:: Compare Python- Java bridge packages + :widths: 24 38 38 + :header-rows: 1 + + * - + - JPype + - Py4J + + * - Core Technology + - JNI (direct JVM embedding) + - Reflection & Socket IPC + + * - Communication + - Shared memory (in-process) + - Socket based (out-of-process) + + * - Performance + - High (no serialization) + - Moderate (due to IPC overhead) + + * - Setup + - Requires JVM in-process + - *Requires a GatewayServer* + + * - Use case + - High-performance scenarios + - Lightweight, flexible usage + + * - Crash Tolearnce + - *Low: JVM crash affects Python (shared process) and cannot be restarted on its own* + - High: JVM and Python are separate processes, so a JVM crash won't take down Python. + + * - Distribution + - Download ``corese-core-{version}.jar`` + - Build ``corese-python-{version}.jar`` + +Although ``jpype`` is more performant and does not require an extra wrapper library, we decided to use ``py4j`` as a default because it is more crash tolerant. The main reason is that ``jpype`` requires the JVM to be embedded in the Python process and prohibits the restart of this subprocess in case of a crash (by design). This could be a critical issue for long-running applications or services. + +However we decided to leave both bridges in the package and let the user choose the one that fits their needs better. + +Both Java Libraries are installed with the package and can be used interchangeably. See the `User Guide <../user_guide.html>`_. diff --git a/docs/source/python_api/index.rst b/docs/source/python_api/index.rst new file mode 100644 index 0000000..b847899 --- /dev/null +++ b/docs/source/python_api/index.rst @@ -0,0 +1,86 @@ +.. .. currentmodule:: pycorese + + +Python API Reference +=================================== + +**pycorese** is a Python wrapper for accessing and manipulating RDF data with Corese features connected by one of the java bridge packages: ``py4j`` or ``jpype``. + +.. note:: + + **pycorese** is still in beta version and is under active development. The API may change in future releases. + + +In the following sections, you will find the documentation of the Python API of **pycorese**. + +High-level API +-------------- + +HIgh-level API is a set of convenience methods to facilitate the common tasks of working with Knowledge Graphs. + +.. automodule:: pycorese.api + :members: CoreseAPI + :undoc-members: + + +.. contents:: + :local: + :depth: 2 + +Low-level API +------------- + +Low-level API is a subset of ``corese-core`` classes exposed as Python objects. These are dynamically created classes +and can be accessed only after the Corese engine is loaded + +For the details of these classes and their methods, please refer to the `Corese Java documentation `_. + +.. autoattribute:: pycorese.api.CoreseAPI.Graph + :annotation: + + Corese ``fr.inria.corese.core.Graph`` object. + +.. autoattribute:: pycorese.api.CoreseAPI.Load + :annotation: + + Corese ``fr.inria.corese.core.load.Load`` object. + +.. autoattribute:: pycorese.api.CoreseAPI.QueryProcess + :annotation: + + Corese ``fr.inria.corese.core.query.QueryProcess`` object. + +.. autoattribute:: pycorese.api.CoreseAPI.ResultFormat + :annotation: + + Corese ``fr.inria.corese.core.print.ResultFormat`` object. + +.. autoattribute:: pycorese.api.CoreseAPI.RuleEngine + :annotation: + + Corese ``fr.inria.corese.core.rule.RuleEngine`` object. + +.. autoattribute:: pycorese.api.CoreseAPI.Transformer + :annotation: + + Corese ``fr.inria.corese.core.transform.Transformer`` object. + +.. autoattribute:: pycorese.api.CoreseAPI.Shacl + :annotation: + + Corese ``fr.inria.corese.core.shacl.Shacl`` object. + +.. autoattribute:: pycorese.api.CoreseAPI.DataManager + :annotation: + + Corese ``fr.inria.corese.core.storage.api.dataManager.DataManager`` object. + +.. autoattribute:: pycorese.api.CoreseAPI.CoreseGraphDataManager + :annotation: + + Corese ``fr.inria.corese.core.storage.CoreseGraphDataManager`` object. + +.. toctree:: + :maxdepth: 2 + + About Java bridges \ No newline at end of file diff --git a/pkg/env/corese-python.yaml b/pkg/env/corese-python.yaml index a8ae57e..08dd2de 100644 --- a/pkg/env/corese-python.yaml +++ b/pkg/env/corese-python.yaml @@ -6,6 +6,9 @@ dependencies: - python>=3.10 - pandas - py4j + - sphinx>=7.1.2 + - sphinx-multiversion>=0.2.4 + - sphinx-design==0.5.0 - pip - pip: - jpype1 @@ -14,5 +17,7 @@ dependencies: - build - pytest - pytest-cov - - sphinx - - pydata_sphinx_theme + - pydata_sphinx_theme>=0.14.4 + - sphinx-copybutton>=0.5.0 + - myst-parser==2.0.0 + - myst_nb==1.1.2 diff --git a/pyproject.toml b/pyproject.toml index 6fda984..33bb634 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,20 +4,24 @@ build-backend = "setuptools.build_meta" [tool.setuptools.dynamic] -version = { file = "VERSION.txt" } +version = { file = "src/pycorese/VERSION.txt" } +readme = { file = "README.md" } + [project] name = "pycorese" -dynamic = ["version"] +dynamic = ["version", "readme"] authors = [ + { name = "Corese Team", email = "corese@inria.fr" }, { name = "Anna Bobasheva", email = "anna.bobasheva@inria.fr" }, { name = "Jean-Luc Szpyrka", email = "jean-luc.szpyrka@inria.fr" }, - { name = "Remi Ceres", email = "remi.ceres@inria.fr"} + { name = "Remi Ceres", email = "remi.ceres@inria.fr"}, + { name = "Erwan Demairy", email = "erwan.demairy@inria.fr"}, ] description = "pycorese: Python API for CORESE Semantic Web platform" keywords = ["Query Engine", "SPARQL", "SHACL", "RDF", "RDFS", "OWL", "Reasoning", "Knowledge Graph"] -readme = "README.md" + license = {file = "LICENSE"} requires-python = ">=3.10" dependencies = [ @@ -64,9 +68,11 @@ include-package-data = true where = ["src"] # Adding the Java jars to the package. -# The jar files are built by the custom build step in the setup.py. -# One jar is built by the gradle build system, -# the other is downloaded from the maven repository. -# Both are copied to the `resources` directory. +# The jar files are obtained by the custom `sdist` step in the setup.py. +# (1) corese-python-{version}-with-dependencies.jar is built by the gradle build system, +# (2) corese-core-{version}-with-dependencies.jar is downloaded from the maven repository +# (https://mvnrepository.com/artifact/fr.inria.corese/corese-core). +# Both files are copied to the `resources` directory +# and installed to the `share/pycorese` directory of the venv. [tool.setuptools.data-files] "share/pycorese" = ["resources/*.jar"] diff --git a/src/pycorese/VERSION.txt b/src/pycorese/VERSION.txt new file mode 100644 index 0000000..9faa1b7 --- /dev/null +++ b/src/pycorese/VERSION.txt @@ -0,0 +1 @@ +0.1.5 diff --git a/src/pycorese/__init__.py b/src/pycorese/__init__.py index 5b1e960..b643642 100644 --- a/src/pycorese/__init__.py +++ b/src/pycorese/__init__.py @@ -1,10 +1,17 @@ """The module provides a wrapper to corese library (Software platform for the Semantic Web of Linked Data)""" +from .api import CoreseAPI -__version__ = '1.0.1' -__version_info__ = tuple([ int(num) for num in __version__.split('.')]) +all = ['CoreseAPI'] -from .api import CoreseAPI +# Read version of the package from the file +# that is in the project rood directory +from pathlib import Path + +with open(Path(__file__).parent / "VERSION.txt") as _f: + __version__ = _f.read().strip() + +__version_info__ = tuple([ int(num) for num in __version__.split('.')]) import logging diff --git a/src/pycorese/api.py b/src/pycorese/api.py index 5cb47fd..c333fd0 100644 --- a/src/pycorese/api.py +++ b/src/pycorese/api.py @@ -1,6 +1,3 @@ -"""The module provides the capability to launch corese-python jar.""" - - import pandas as pd from io import StringIO import os @@ -29,14 +26,20 @@ def _is_turtle(content): class CoreseAPI: """ - Python implementation of Corese API. - - :param bridge: Bridge name to use for Java integration ('py4j' or 'jpype'). Default is 'py4j'. + Simplified API to leverage functionality of Corese Java library (``corese-core``). + + Parameters + ---------- + java_bridge : str, optional + Package name to use for Java integration. Options: 'py4j', 'jpype'. Default is 'py4j'. + corese_path : str, optional + Path to the corese-python library. If not specified (default), the jar + file that was installed with the package is used. """ def __init__(self, java_bridge: str = 'py4j', - corese_path: str = None): + corese_path: str|None = None): if java_bridge.lower() not in ['py4j', 'jpype']: raise ValueError('Invalid java bridge. Only "py4j" and "jpype" are supported.') @@ -46,24 +49,39 @@ def __init__(self, self.java_gateway = None self._bridge = None - self.Graph = None - self.QueryProcess = None - self.ResultFormat = None - self.Load = None + # This is a minimum set of Corese classes required for the API to work + self.Graph = None # Corese ``fr.inria.corese.core.Graph`` object + self.QueryProcess = None # Corese ``fr.inria.corese.core.query.QueryProcess`` object + self.ResultFormat = None # Corese ``fr.inria.corese.core.print.ResultFormat`` object + self.Load = None # Corese ``fr.inria.corese.core.load.Load`` object + self.RuleEngine = None # Corese ``fr.inria.corese.core.rule.RuleEngine`` object + self.Transformer = None # Corese ``fr.inria.corese.core.transform.Transformer`` object + self.Shacl = None # Corese ``fr.inria.corese.core.shacl.Shacl`` object + self.DataManager = None # Corese ``fr.inria.corese.core.storage.api.dataManager.DataManager`` object + self.CoreseGraphDataManager = None # Corese ``fr.inria.corese.core.storage.CoreseGraphDataManager`` object + self.CoreseGraphDataManagerBuilder = None # Corese ``fr.inria.corese.core.storage.CoreseGraphDataManagerBuilder`` object - def coreseVersion(self): + + def coreseVersion(self)-> str|None: """ - returns the corese-version + Get the version of the corese-core library. - Remark: corese engine must be loaded first. + Notes + ----- + Corese library must be loaded first. - TODO: implement this to call the coreseVersion() from - the corese engine (at the moment this method is staic and - may return bad result) + Returns + ------- + str + The version of the ``corese-core`` library used. If the library is not loaded, returns None. """ + #TODO: implement this to call the coreseVersion() from + # the corese engine (at the moment this method is static and + # may return bad result) + if self._bridge is None: - print(f"Corese engine not loaded yet") + print("Corese engine is not loaded yet") return None return self._bridge.coreseVersion() @@ -75,7 +93,9 @@ def unloadCorese(self): It's not necessary to call this method, as the library is automatically unloaded when the Python interpreter exits. - WARNING: After unloading Corese bridged by JPype it is not possible to restart it. + Warning + ------- + After unloading Corese bridged by ``JPype`` it is not possible to restart it. """ self._bridge.unloadCorese() @@ -87,8 +107,9 @@ def unloadCorese(self): self.Load = None def loadCorese(self) -> None: - """Load Corese library into JVM and expose the Corese classes.""" - + """Load Corese library into JVM and expose the Corese classes. + """ + #TODO: refactor if self.java_bridge == 'py4j': from .py4J_bridge import Py4JBridge @@ -136,21 +157,20 @@ def loadCorese(self) -> None: #TODO: Add support for the other RDF formats def loadRDF(self, rdf: str, graph=None)-> object: """ - Load RDF file/string into Corese graph. + Load RDF file/string into Corese graph. Supported formats are RDF/XML and Turtle. Parameters ---------- - rdf: str - Path to the RDF file or RDF content. - graph : object (fr.inria.corese.core.Graph or - fr.inria.corese.core.storage.CoreseGraphDataManager), optional - Corese graph object. Default is None. + rdf : str + Path to the RDF file or a string with RDF content. + graph : object, optional + Corese object of either ``fr.inria.corese.core.Graph`` or ``fr.inria.core.storage.CoreseGraphDataManager`` type. + If an object is not provided (default), new Graph and GraphManager will be created. Returns ------- - object (fr.inria.corese.core.Graph or - fr.inria.core.storage.CoreseGraphDataManager) - Corese Graph object. + object + Corese ``fr.inria.core.storage.CoreseGraphDataManager`` object. """ if not self.java_gateway: self.loadCorese() @@ -179,56 +199,52 @@ def loadRDF(self, rdf: str, graph=None)-> object: return graph_mgr def loadRuleEngine(self, graph: object, - profile: object, - replace:bool = False)-> object: - """ - Load rule engine for the given graph. - - Parameters - ---------- - graph : object (fr.inria.corese.core.Graph or fr.inria.core.storage.CoreseGraphDataManager) - Corese graph object or DataManager. - profile : object - Profile object for the rule engine. Accepted values: - - RuleEngine.Profile.RDFS - - RuleEngine.Profile.OWLRL - - RuleEngine.Profile.OWLRL_LITE - - RuleEngine.Profile.OWLRL_EXT - replace : bool, optional - Replace the existing rule engine. Default is False. - - Returns - ------- - object (fr.inria.core.rule.RuleEngine) - RuleEngine object. - """ - assert self.RuleEngine, 'Corese classes are not loaded properly.' - assert graph, 'Graph object is required.' - assert profile, 'Profile object is required.' - #TODO: assert profile is valid - - if replace: - self.resetRuleEngine(graph) - - rule_engine = self.RuleEngine.create(graph) - - rule_engine.setProfile(profile) - rule_engine.process() - - return rule_engine - - def resetRuleEngine(self, graph: object)-> None: + profile: object, + replace:bool = False)-> object: """ - Reset the rule engine for the given graph. + Load the rule engine for a given graph. Parameters ---------- - graph : object (fr.inria.corese.core.Graph or fr.inria.core.storage.CoreseGraphDataManager) - Corese graph object or DataManager. + graph : object + Corese Graph or DataManager object + profile : object + Profile object for the rule engine. Accepted values: + ``RuleEngine.Profile.RDFS``, + ``RuleEngine.Profile.OWLRL``, + ``RuleEngine.Profile.OWLRL_LITE``, + ``RuleEngine.Profile.OWLRL_EXT`` + replace : bool, optional + Replace the existing rule engine. Default is False. Returns ------- - None + object + Corese ``fr.inria.core.rule.RuleEngine`` object. + """ + assert self.RuleEngine, 'Corese classes are not loaded properly.' + assert graph, 'Graph object is required.' + assert profile, 'Profile object is required.' + #TODO: assert profile is valid + + if replace: + self.resetRuleEngine(graph) + + rule_engine = self.RuleEngine.create(graph) + + rule_engine.setProfile(profile) + rule_engine.process() + + return rule_engine + + def resetRuleEngine(self, graph: object)-> None: + """ + Reset the rule engine for a given graph. + + Parameters + ---------- + graph : object + Corese Graph or DataManager object """ assert self.RuleEngine, 'Corese classes are not loaded properly.' assert graph, 'Graph object is required.' @@ -241,23 +257,24 @@ def sparqlSelect(self, graph: object, query: str ='SELECT * WHERE {?s ?p ?o} LIMIT 5', return_dataframe: bool =True)-> object|pd.DataFrame: """ - Execute SPARQL SELECT or ASK query on Corese graph. + Execute SPARQL SELECT or ASK query on Corese graph. Optionally return the result as DataFrame. Parameters ---------- - graph : object (fr.inria.corese.core.Graph) - Corese graph object. - prefixes : str or list - SPARQL prefixes. Default is None. - query : str - SPARQL query. Default is 'SELECT * WHERE {?s ?p ?o} LIMIT 5'. - return_dataframe : bool, optional. Default is True. + graph : object + Corese Graph or DataManager object + prefixes : str or list, optional + namespace prefixes. Default is None. + query : str, optional + SPARQL query. By default five first triples of the graph are returned. + return_dataframe : bool, optional + Return the result as a DataFrame. Default is True. Returns ------- - object (fr.inria.core.print.ResultFormat) - Result of the SPARQL - + object or pd.DataFrame + Result of the SPARQL query in CSV-formatted ``fr.inria.core.print.ResultFormat`` + object or a DataFrame. """ assert self.QueryProcess, 'Corese classes are not loaded properly.' assert self.ResultFormat, 'Corese classes are not loaded properly.' @@ -285,21 +302,20 @@ def sparqlSelect(self, graph: object, def toDataFrame(self, queryResult: object, dtypes: list|dict|None = None)-> pd.DataFrame: """ - Convert Corese ResultFormat object to pandas DataFrame. + Convert Corese ResultFormat object to ``pandas.DataFrame``. Parameters ---------- - queryResult : csv resultFormat object (fr.inria.core.print.ResultFormat) - ResultFormat object. + queryResult : object + CSV-formatted ``fr.inria.core.print.ResultFormat`` object. dtypes : list or dict, optional - Data types for the columns in the format required by Pandas - read_csv method https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html. - Default is None. + Optional column data types for the columns in the format as in ``panads.read_csv`` method. + https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html Returns ------- pd.DataFrame - Result in DataFrame format. + Corese object converted to a DataFrame. """ assert self.ResultFormat, 'Corese classes are not loaded properly.' @@ -325,18 +341,18 @@ def sparqlConstruct(self, graph: object, Parameters ---------- - graph : object (fr.inria.corese.core.Graph) - Corese graph object. - prefixes : str or list - SPARQL prefixes. Default is None. - query : str + graph : object + Corese Graph or DataManager object + prefixes : str or list, optional + namespace prefixes. Default is None. + query : str, optional SPARQL query. Default is empty string resulting in empty graph. merge : bool, optional - Merge the result with the existing graph. Default is False. + Option to merge the result with the existing graph. Default is False. Returns ------- - object (fr.inria.core.print.ResultFormat) + object Result of the SPARQL CONSTRUCT query in RDF/XML format. """ assert self.QueryProcess, 'Corese classes are not loaded properly.' @@ -367,14 +383,15 @@ def toTurtle(self, rdf:object)-> str: Parameters ---------- - rdf : object (fr.inria.corese.core.Graph) - Corese graph object. + rdf : object + Corese RDF object Returns ------- str RDF in Turtle format. """ + assert self.Transformer, 'Corese classes are not loaded properly.' # TODO: ASk Remi about getGraph, the Graph and the right way to do the transformation @@ -390,20 +407,22 @@ def shaclValidate(self, graph: object, """ Validate RDF graph against SHACL shape. - This Version supports only Turtle format. + This version supports only Turtle format to define a SHACL shape. Parameters ---------- - graph : object (fr.inria.corese.core.Graph) - Corese graph object. - shacl_shape_ttl : str - SHACL shape in Turtle format. + graph : object + Corese Graph or DataManager object prefixes : str or list, optional - Prefixes. Default is None. + namespace prefixes. Default is None. + shacl_shape_ttl : str, optional + SHACL shape in Turtle format. If not provided, the validation will be skipped. + return_dataframe : bool, optional + Return the validation report as a DataFrame. Default is False. Returns ------- - str + object SHACL validation report in Turtle format. """ assert self.Shacl, 'Corese classes are not loaded properly.' @@ -442,7 +461,7 @@ def shaclValidate(self, graph: object, # Parse validation report def shaclReportToDataFrame(self, validation_report: str)-> pd.DataFrame: """ - Convert SHACL validation report to pandas DataFrame. + Convert SHACL validation report to ``pandas.DataFrame``. Parameters ---------- @@ -452,7 +471,7 @@ def shaclReportToDataFrame(self, validation_report: str)-> pd.DataFrame: Returns ------- pd.DataFrame - Validation report in DataFrame format. + Validation report as a DataFrame. """ prefix_shacl = f'@prefix sh: <{self.Namespaces.SHACL}> .' diff --git a/src/pycorese/py4J_bridge.py b/src/pycorese/py4J_bridge.py index 4f0ab91..f4f9aa6 100644 --- a/src/pycorese/py4J_bridge.py +++ b/src/pycorese/py4J_bridge.py @@ -68,7 +68,7 @@ def coreseVersion(self): pass if version is None: - loggingWarning(f"Py4j: the CORESE library is too old. coreseVersion() is available since 4.6.0 only.") + logging.Warning(f"Py4j: the CORESE library is too old. coreseVersion() is available since 4.6.0 only.") return version