diff --git a/INSTALL.rst b/INSTALL.rst index d70ef4c729..000e0aa80a 100644 --- a/INSTALL.rst +++ b/INSTALL.rst @@ -1,6 +1,6 @@ .. This file is part of Invenio. - Copyright (C) 2015-2018 CERN. + Copyright (C) 2015-2019 CERN. Invenio is free software; you can redistribute it and/or modify it under the terms of the MIT License; see LICENSE file for more details. @@ -8,19 +8,38 @@ Installation ============ -The best way to get an Invenio demo instance up and running immediately is by -using Docker or Vagrant, for example: +Please see our quick start guide on +https://invenio.readthedocs.io/en/latest/quickstart/index.html + +1. Scaffold +----------- .. code-block:: console - $ docker-compose build - $ docker-compose up -d - $ docker-compose run --rm web ./scripts/populate-instance.sh - $ firefox http://127.0.0.1/records/1 + # prerequisites: cookiecutter and pipenv + # scaffold my-site instance + $ cookiecutter gh:inveniosoftware/cookiecutter-invenio-instance \ + --checkout v3.2 + +2. Install +---------- + +.. code-block:: console -This will start an Invenio demo instance containing several example records and -all the needed services such as PostgreSQL, Elasticsearch, Redis, RabbitMQ. + $ cd my-site/ + # start services (db, es, mq, cache) + $ docker-compose up + # build and install my-site instance + $ ./scripts/bootstrap + +3. Run +------ + +.. code-block:: console -For a detailed walk-through on how to set up your Invenio instance, please see -our `installation documentation -`_. + # setup database and indexes + $ ./scripts/setup + # start webserver and task queue + $ ./scripts/server + # your site is running! + $ firefox https://127.0.0.1:5000/ diff --git a/README.rst b/README.rst index 32b77a5b81..1d443338b7 100644 --- a/README.rst +++ b/README.rst @@ -6,9 +6,9 @@ under the terms of the MIT License; see LICENSE file for more details. -============ - Invenio v3 -============ +====================== + Invenio Framework v3 +====================== **Open Source framework for large-scale digital repositories.** @@ -24,6 +24,6 @@ .. image:: https://badges.gitter.im/Join%20Chat.svg :target: https://gitter.im/inveniosoftware/invenio -Invenio is like a Swiss Army knife of battle-tested, safe and secure modules -providing you with all the features you need to run a trusted digital +Invenio Framework is like a Swiss Army knife of battle-tested, safe and secure +modules providing you with all the features you need to run a trusted digital repository. diff --git a/docs/_static/invenio-files-integration.png b/docs/_static/invenio-files-integration.png new file mode 100644 index 0000000000..0649689220 Binary files /dev/null and b/docs/_static/invenio-files-integration.png differ diff --git a/docs/_static/invenio-records-file.png b/docs/_static/invenio-records-file.png new file mode 100644 index 0000000000..d971179f0c Binary files /dev/null and b/docs/_static/invenio-records-file.png differ diff --git a/docs/conf.py b/docs/conf.py index be83729603..a9357b2728 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -320,19 +320,43 @@ # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - 'flask': ('https://flask.readthedocs.io/', None), + 'flask': ('https://flask.palletsprojects.com/en/1.1.x/', None), 'flaskassets': ('https://flask-assets.readthedocs.io/en/latest/', None), 'flaskregistry': ( 'https://flask-registry.readthedocs.io/en/latest/', None), 'flaskscript': ('https://flask-script.readthedocs.io/en/latest/', None), + 'invenio-app': ( + 'https://invenio-app.readthedocs.io/en/latest/', None), 'invenio-access': ( 'https://invenio-access.readthedocs.io/en/latest/', None), - 'jinja': ('https://jinja.readthedocs.io/', None), + 'invenio-celery': ( + 'https://invenio-celery.readthedocs.io/en/latest/', None), + 'invenio-config': ( + 'https://invenio-config.readthedocs.io/en/latest/', None), + 'invenio-db': ( + 'https://invenio-db.readthedocs.io/en/latest/', None), + 'invenio-formatter': ( + 'https://invenio-formatter.readthedocs.io/en/latest/', None), + 'invenio-indexer': ( + 'https://invenio-indexer.readthedocs.io/en/latest/', None), + 'invenio-files-rest': ( + 'https://invenio-files-rest.readthedocs.io/en/latest/', None), + 'invenio-previewer': ( + 'https://invenio-previewer.readthedocs.io/en/latest/', None), + 'invenio-iiif': ( + 'https://invenio-iiif.readthedocs.io/en/latest/', None), + 'invenio-records-files': ( + 'https://invenio-records-files.readthedocs.io/en/latest/', None), + 'invenio-records-rest': ( + 'https://invenio-records-rest.readthedocs.io/en/latest/', None), + 'invenio-theme': ( + 'https://invenio-theme.readthedocs.io/en/latest/', None), + 'jinja': ('https://jinja.palletsprojects.com/en/2.10.x/', None), 'python': ('https://docs.python.org/', None), 'sqlalchemy': ('http://docs.sqlalchemy.org/en/latest/', None), 'webassets': ('https://webassets.readthedocs.io/en/latest/', None), - 'werkzeug': ('https://werkzeug.readthedocs.io/', None), + 'werkzeug': ('https://werkzeug.palletsprojects.com/en/0.16.x/', None), } -# Autodoc configuraton. +# Autodoc configuration. autoclass_content = 'both' diff --git a/docs/general/bundles.rst b/docs/general/bundles.rst index 44bacfe153..d983dd995a 100644 --- a/docs/general/bundles.rst +++ b/docs/general/bundles.rst @@ -128,10 +128,9 @@ Included modules: Files bundle ------------ -The files bundle contains all modules related to management of files in -Invenio, including an object storage REST API, multiple supported storage -backends, file previewers, and IIIF image server and an integration layer -between files and records. +The files bundle contains all modules related to files management, +an object storage REST API, storage backends, file previewers, +IIIF image APIs and an integration layer between files and records. Included modules: @@ -233,10 +232,6 @@ to change prior to final release and in most cases are missing documentation. - `invenio-query-parser `_ - Invenio v1 compatible query parser for Invenio v3. Note the module is GPL licensed due to a GPL-licensed dependency. -- `invenio-records-editor `_ - - JSON record editor. -- `invenio-records-editor-js `_ - - Angular 4 application for editing JSON records. - `invenio-s3 `_ - Support for the S3 storage protocol in Invenio. - `invenio-saml `_ diff --git a/docs/general/introduction.rst b/docs/general/introduction.rst index a7090c8309..c83b83182f 100644 --- a/docs/general/introduction.rst +++ b/docs/general/introduction.rst @@ -1,6 +1,6 @@ .. This file is part of Invenio. - Copyright (C) 2018 CERN. + Copyright (C) 2019 CERN. Invenio is free software; you can redistribute it and/or modify it under the terms of the MIT License; see LICENSE file for more details. @@ -55,13 +55,13 @@ Internally Invenio natively store records as JSON documents whose structure can be validated and described with JSONSchemas. Records can easily be linked via JSONRef providing you with powerful tools to model your records. Invenio further comes with robust metadata transformation layer that can serialize -records to e.g. MARCXML, DataCite XML, JSON-LD, Citation Style Langauge (CSL) +records to e.g. MARCXML, DataCite XML, JSON-LD, Citation Style Language (CSL) JSON and many other formats. In addition Invenio provides a persistent identifier store and a resolver that allows you to use your preferred persistent identifier scheme for identifying records such as DOIs (Digital Object Identifiers), Handles, PURLs, -URNs or your own local identifier. The persistent identifer resolver further +URNs or your own local identifier. The persistent identifier resolver further has support for advanced features such as tombstone pages, redirection and merged records. @@ -74,7 +74,7 @@ full-text search, powerful query syntax, advanced stemming and aggregations, super-fast auto-completion suggesters as well as geospatial search. Invenio further leverages both instant indexing as well as extremely fast -distributed bulk indexing with rates beyond 10,0000 records/second. +distributed bulk indexing with rates beyond 10,000 records/second. File management --------------- diff --git a/docs/index.rst b/docs/index.rst index cc39361ae3..c7ee554412 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -16,6 +16,7 @@ tutorials/build-a-repository.rst tutorials/understanding-data-models.rst tutorials/managing-access.rst + tutorials/handling-files.rst tutorials/upgrade-marshmallow.rst deployment/securing-your-instance.rst architecture/infrastructure.rst @@ -23,10 +24,12 @@ migrating.rst orcid-login.rst history.rst + upgrading.rst releases/index.rst community/index.rst + .. toctree:: :hidden: diff --git a/docs/quickstart/crud-operations.rst b/docs/quickstart/crud-operations.rst index bf7abe4940..9bdf3e7fb0 100644 --- a/docs/quickstart/crud-operations.rst +++ b/docs/quickstart/crud-operations.rst @@ -1,6 +1,6 @@ .. This file is part of Invenio. - Copyright (C) 2015-2018 CERN. + Copyright (C) 2015-2019 CERN. Copyright (C) 2018 Northwestern University, Feinberg School of Medicine, Galter Health Sciences Library. Invenio is free software; you can redistribute it and/or modify it @@ -8,146 +8,241 @@ .. _crud-operations: -Create, Display, Search Records -=============================== +Create, display and search records +================================== .. _create-a-record: Create a record --------------- -By default, the toy data model has a records REST API endpoint configured, -which allows performing CRUD and search operations over records. Let's create a -simple record via ``curl``, by sending a ``POST`` request to ``/api/records`` -with some sample data: - -.. code-block:: shell - - $ curl -k --header "Content-Type: application/json" \ - --request POST \ - --data '{"title":"Some title", "contributors": [{"name": "Doe, John"}]}' \ - https://localhost:5000/api/records/?prettyprint=1 - -When the request was successful, the server returns the details of the created -record: - -.. code-block:: shell - - { - "created": "2018-05-23T13:28:19.426206+00:00", - "id": 1, - "links": { - "self": "https://localhost:5000/api/records/1" - }, - "metadata": { - "contributors": [ - { - "name": "Doe, John" - } - ], - "id": 1, - "title": "Some title" - }, - "revision": 0, - "updated": "2018-05-23T13:28:19.426213+00:00" - } +Invenio provides REST APIs to perform operations on records, such as +create, search or retrieve. -.. note:: +Let's create a simple record via ``curl``, by sending a ``POST`` request to +the ``/api/records`` endpoint with some sample data: + +.. code-block:: console + + $ curl -k --header "Content-Type: application/json" \ + --request POST \ + --data '{"title":"Some title", "contributors": [{"name": "Doe, John"}]}' \ + https://127.0.0.1:5000/api/records/?prettyprint=1 + +The response of the request contains the newly created record metadata: - Because we are using a self-signed SSL certificate to enable HTTPS, your - web browser will probably display a warning when you access the website. - You can usually get around this by following the browser's instructions in - the warning message. For CLI tools like ``curl``, you can ignore the SSL - verification via the ``-k/--insecure`` option. +.. code-block:: console + + { + "created": "2019-11-22T10:30:06.135431+00:00", + "id": "1", + "links": { + "files": "https://127.0.0.1:5000/api/records/1/files", + "self": "https://127.0.0.1:5000/api/records/1" + }, + "metadata": { + "contributors": [ + { + "name": "Doe, John" + } + ], + "id": "1", + "title": "Some title" + }, + "revision": 0, + "updated": "2019-11-22T10:30:06.135438+00:00" + } .. _display-a-record: Display a record ---------------- -You can now visit the record's page at https://localhost:5000/records/1, or -fetch it via the REST API: - -.. code-block:: shell - - # You can find this URL under the "links.self" key of the previous response - $ curl -k --header "Content-Type: application/json" \ - https://localhost:5000/api/records/1?prettyprint=1 - - { - "created": "2018-05-23T13:28:19.426206+00:00", - "id": 1, - "links": { - "self": "https://localhost:5000/api/records/1" - }, - "metadata": { - "contributors": [ - { - "name": "Doe, John" - } - ], - "id": 1, - "title": "Some title" - }, - "revision": 0, - "updated": "2018-05-23T13:28:19.426213+00:00" - } +You can now visit the record's page at https://127.0.0.1:5000/records/1. + +.. note:: + To enable HTTPS, Invenio uses a self-signed SSL certificate. + Your web browser should display a warning when accessing the website + since it will consider it insecure. You can safely ignore it when + developing locally. + +You can also fetch the record via REST APIs: + +.. code-block:: console + + $ curl -k --header "Content-Type: application/json" \ + https://127.0.0.1:5000/api/records/1?prettyprint=1 + + { + "created": "2019-11-22T10:30:06.135431+00:00", + "id": "1", + "links": { + "files": "https://127.0.0.1:5000/api/records/1/files", + "self": "https://127.0.0.1:5000/api/records/1" + }, + "metadata": { + "contributors": [ + { + "name": "Doe, John" + } + ], + "id": "1", + "title": "Some title" + }, + "revision": 0, + "updated": "2019-11-22T10:30:06.135438+00:00" + } .. _search-for-records: Search for records ------------------ +The record that you have created is safely stored in the database but +also indexed in Elasticsearch for fast searching. You can see the list of +records and perform search queries at https://127.0.0.1:5000/search, +or via the REST API from the ``/api/records`` endpoint: + +.. code-block:: console + + $ curl -k --header "Content-Type: application/json" \ + https://127.0.0.1:5000/api/records/?prettyprint=1 + + { + "aggregations": { + "keywords": { + "buckets": [], + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0 + }, + "type": { + "buckets": [], + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0 + } + }, + "hits": { + "hits": [ + { + "created": "2019-11-22T10:30:06.135431+00:00", + "id": "1", + "links": { + "files": "https://127.0.0.1:5000/api/records/1/files", + "self": "https://127.0.0.1:5000/api/records/1" + }, + "metadata": { + "contributors": [ + { + "name": "Doe, John" + } + ], + "id": "1", + "title": "Some title" + }, + "revision": 0, + "updated": "2019-11-22T10:30:06.135438+00:00" + } + ], + "total": 1 + }, + "links": { + "self": "https://127.0.0.1:5000/api/records/?sort=mostrecent&size=10&page=1" + } + } + +.. _upload-a-file: + +Upload a file +------------- +Invenio allows you to attach files to a record. Let's upload a file +to the previously created record. + +.. code-block:: console + + # create a sample file + + $ echo 'my file content' > example.txt -The record you created before, besides being inserted into the database, is -also indexed in Elasticsearch and available for searching. You can search for -it via the Search UI page at https://localhost:5000/search, or via the REST -API from the ``/api/records`` endpoint: + # Upload the file to the record with PID 1 -.. code-block:: shell + $ curl -k -X PUT https://127.0.0.1:5000/api/records/1/files/example.txt \ + -H "Content-Type: application/octet-stream" \ + --data-binary @example.txt - $ curl -k --header "Content-Type: application/json" \ - https://localhost:5000/api/records/?prettyprint=1 +The response of the request contains the uploaded file's metadata: - { - "aggregations": { - "keywords": { - "buckets": [], - "doc_count_error_upper_bound": 0, - "sum_other_doc_count": 0 +.. code-block:: console + + { + "version_id": "059a6706-632f-403a-beab-36e31e370737", + "is_head": true, + "mimetype": "text/plain", + "size": 8, + "key": "example.txt", + "delete_marker": false, + "links": { + "self": "https://127.0.0.1:5000/api/records/1/files/example.txt", + "version": "https://127.0.0.1:5000/api/records/1/files/example.txt?versionId=059a6706-632f-403a-beab-36e31e370737", + "uploads": "https://127.0.0.1:5000/api/records/1/files/example.txt?uploads" }, - "type": { - "buckets": [], - "doc_count_error_upper_bound": 0, - "sum_other_doc_count": 0 + "checksum": "md5:ddce269a1e3d054cae349621c198dd52", + "created": "2019-11-22T10:34:08.944425", + "tags": {}, + "updated": "2019-11-22T10:34:08.951942" + } + +.. _list-files-of-a-record: + +List the files of a record +-------------------------- +You can use REST APIs to retrieve all the files attached to a record: + +.. code-block:: console + + $ curl -k -X GET https://127.0.0.1:5000/api/records/1/files?prettyprint=1 + + { + "contents": [ + { + "version_id": "059a6706-632f-403a-beab-36e31e370737", + "is_head": true, + "mimetype": "text/plain", + "size": 8, + "key": "example.txt", + "delete_marker": false, + "links": { + "self": "https://127.0.0.1:5000/api/records/1/files?key=example.txt", + "version": "https://127.0.0.1:5000/api/records/1/files?key=example.txt&versionId=059a6706-632f-403a-beab-36e31e370737", + "uploads": "https://127.0.0.1:5000/api/records/1/files?key=example.txt?uploads" + }, + "checksum": "md5:ddce269a1e3d054cae349621c198dd52", + "created": "2019-11-22T10:34:08.944425", + "tags": {}, + "updated": "2019-11-22T10:34:08.951942" } - }, - "hits": { - "hits": [ - { - "created": "2018-05-23T13:28:19.426206+00:00", - "id": 1, - "links": { - "self": "https://localhost:5000/api/records/1" - }, - "metadata": { - "contributors": [ - { - "name": "Doe, John" - } - ], - "id": 1, - "title": "Some title" - }, - "revision": 0, - "updated": "2018-05-23T13:28:19.426213+00:00" - } ], - "total": 1 - }, - "links": { - "self": "https://localhost:5000/api/records/?size=10&sort=mostrecent&page=1" + "id": "9ae1c979-9c6a-4603-afb2-38074eb48a54", + "size": 16, + "locked": false, + "max_file_size": null, + "links": { + "self": "https://127.0.0.1:5000/api/records/1/files", + "versions": "https://127.0.0.1:5000/api/records/1/files?versions", + "uploads": "https://127.0.0.1:5000/api/records/1/files?uploads" + }, + "quota_size": null, + "created": "2019-11-22T10:30:06.118477", + "updated": "2019-11-22T10:34:08.962336" } - } -Continue tutorial -~~~~~~~~~~~~~~~~~ -:ref:`next-steps` +.. _download-a-file: + +Download a file +--------------- +Let's download the file that we have just uploaded: + +.. code-block:: console + + $ curl -k -X GET https://127.0.0.1:5000/api/records/1/files/example.txt -o example.txt + +Final steps +----------- +Complete the initialisation of your Invenio application: :ref:`final-steps`. diff --git a/docs/quickstart/final-steps.rst b/docs/quickstart/final-steps.rst new file mode 100644 index 0000000000..82d00cf082 --- /dev/null +++ b/docs/quickstart/final-steps.rst @@ -0,0 +1,98 @@ +.. + This file is part of Invenio. + Copyright (C) 2015-2019 CERN. + Copyright (C) 2018 Northwestern University, Feinberg School of Medicine, Galter Health Sciences Library. + + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. + +.. _final-steps: + +Final steps +=========== + +You can now address the warnings displayed after completing the +``cookiecutter`` initialisation procedure. You can run the following +command to find all the ``TODO``: + +.. code-block:: console + + $ cd my-site/ + $ grep --color=always --recursive --context=3 --line-number TODO . + +Let's address them one by one: + +1. Python packages require a ``MANIFEST.in`` which specifies what files are + part of your application package. You can update the existing file by running + the following commands in your site directory: + + .. code-block:: console + + $ git init + $ git add --all + $ pipenv run check-manifest --update + +2. Translations configuration (``.tx/config``). You can generate + the necessary files to allow localization of the instance in different + languages via the `Transifex platform `_: + + .. code-block:: console + + # if you have activated the virtual environment skip `pipenv shell` + $ pipenv shell + (my-site)$ python setup.py extract_messages + (my-site)$ python setup.py init_catalog -l en + (my-site)$ python setup.py compile_catalog + + **Transifex** + + Make sure you edit ``.tx/config`` and sign-up for Transifex before trying + below steps. + + Install the transifex-client + + .. code-block:: console + + $ pipenv install transifex-client + + Push source (.pot) and translations (.po) to Transifex: + + .. code-block:: console + + $ pipenv run tx push --skip --translations + + Pull translations for a single language from Transifex + + .. code-block:: console + + $ pipenv run tx pull --language en + +REST APIs permissions +^^^^^^^^^^^^^^^^^^^^^ +By default, a new Invenio instance has no permissions configured. This means that +any user can perform operations to records such as read, update, create and delete. +You can check the :ref:`managing-access` documentation to learn how to configure +permissions in Invenio. + +Testing +^^^^^^^ +Tests are available in the `tests` folder. You can add your own tests and then run +run linting checks and tests using the script ``run-tests.sh``: + +.. code-block:: console + + $ ./run-tests.sh + # ...or to run individual tests + $ pipenv run pytest tests/test_version.py + +Documentation +^^^^^^^^^^^^^ +A basic documentation structure and configuration using ``Sphinx`` is available +in the ``docs`` folder. You build it to generate the final ``HTML`` files by running: + +.. code-block:: console + + $ pipenv run python setup.py build_sphinx + +Then, open the file ``docs/_build/html/index.html`` in your browser to see the generated +documentation. diff --git a/docs/quickstart/index.rst b/docs/quickstart/index.rst index 5019d49c79..04cb6f6295 100644 --- a/docs/quickstart/index.rst +++ b/docs/quickstart/index.rst @@ -15,4 +15,4 @@ Quickstart quickstart crud-operations - next-steps + final-steps diff --git a/docs/quickstart/next-steps.rst b/docs/quickstart/next-steps.rst deleted file mode 100644 index 9e077cb0f6..0000000000 --- a/docs/quickstart/next-steps.rst +++ /dev/null @@ -1,102 +0,0 @@ -.. - This file is part of Invenio. - Copyright (C) 2015-2018 CERN. - Copyright (C) 2018 Northwestern University, Feinberg School of Medicine, Galter Health Sciences Library. - - Invenio is free software; you can redistribute it and/or modify it - under the terms of the MIT License; see LICENSE file for more details. - -.. _next-steps: - -Next Steps -========== - -Although we can run and interact with the instance, we're not quite there yet -in terms of having a proper Python package that's ready to be tested and -deployed to a production environment. - -You may have noticed that after running the ``cookiecutter`` command for the -instance and the data model, there was a note for checking out some of the -TODOs. You can run the following command in the code repository directory -to see a summary of the TODOs again: - -.. code-block:: console - - $ grep --color=always --recursive --context=3 --line-number TODO . - -Let's have a look at some of them one-by-one and explain what they are for: - -1. Python packages require a ``MANIFEST.in`` which specifies what files are - part of the distributed package. You can update the existing file by running - the following commands in your site directory: - - .. code-block:: console - - (my-site)$ git init - (my-site)$ git add --all - (my-site)$ check-manifest --update - -2. Translations configuration (``.tx/config``): You might also want to generate - the necessary files to allow localization of the instance in different - languages via the `Transifex platform `_: - - .. code-block:: console - - # if you have activated the virtual environment skip `pipenv shell` - $ pipenv shell - (my-site)$ python setup.py extract_messages - (my-site)$ python setup.py init_catalog -l en - (my-site)$ python setup.py compile_catalog - - **Transifex** - - Make sure you edit ``.tx/config`` and sign-up for Transifex before trying - below steps. - - Install the transifex-client - - .. code-block:: console - - (my-site)$ pipenv install transifex-client - - Push source (.pot) and translations (.po) to Transifex: - - .. code-block:: console - - (my-site)$ tx push --skip --translations - - Pull translations for a single language from Transifex - - .. code-block:: console - - (my-site)$ tx pull --language en - -3. REST API permissions: By default your Invenio instance have no permissions - enabled, which means that any user will be allowed to perform any operation - (read, update, create and delete) over the records. Check - :ref:`managing-access` for information on how to adapt the permissions to - your needs. - -Testing -^^^^^^^ -In order to run tests for the instance, you can run: - -.. code-block:: shell - - # run all the tests... - (my-site)$ ./run-tests.sh - # ...or to run individual tests - (my-site)$ pytest tests/test_version.py - -Documentation -^^^^^^^^^^^^^ - -In order to build and preview the instance's documentation, you can run the -`python setup.py build_sphinx` command: - -.. code-block:: shell - - (my-site)$ python setup.py build_sphinx - -Open up ``docs/_build/html/index.html`` in your browser to see the -documentation. diff --git a/docs/quickstart/quickstart.rst b/docs/quickstart/quickstart.rst index 0c990b8136..700b971a40 100644 --- a/docs/quickstart/quickstart.rst +++ b/docs/quickstart/quickstart.rst @@ -1,6 +1,6 @@ .. This file is part of Invenio. - Copyright (C) 2015-2018 CERN. + Copyright (C) 2015-2019 CERN. Copyright (C) 2018 Northwestern University, Feinberg School of Medicine, Galter Health Sciences Library. Invenio is free software; you can redistribute it and/or modify it @@ -15,97 +15,83 @@ Launch an Invenio instance Prerequisites ------------- -To be able to develop and run Invenio you will need the following installed and -configured on your system: +Invenio requires the following software installed in your system: - `Docker v1.18+ `_ and `Docker Compose v1.23+ `_ - `NodeJS v6.x+ and NPM v4.x+ `_ -- `Enough virtual memory `_ +- `Enough virtual memory `_ for Elasticsearch (when running in Docker). -- `Cookiecutter `_ +- The Python package `Cookiecutter `_ - `Pipenv `_ -Invenio uses Cookiecutter to scaffold the boilerplate for your new instance and -uses Pipenv to manage Python dependencies in a virtual environment. Above links -contain detailed installation instructions, but the impatient can use following -commands: - -.. code-block:: shell - - # Install cookiecutter if it is not already installed - $ sudo apt-get install cookiecutter - $ sudo apt-get install pipenv - # or e.g. - $ pip install --upgrade cookiecutter pipenv - .. _bootstrap: -Scaffold --------- -First step is to scaffold a new instance using the `official Invenio +Create an Invenio instance +-------------------------- +First step is to create your new Invenio instance using the `official Invenio cookiecutter template `_. .. code-block:: shell - $ cookiecutter gh:inveniosoftware/cookiecutter-invenio-instance --checkout v3.1 - # ...fill in the fields... + $ cookiecutter gh:inveniosoftware/cookiecutter-invenio-instance --checkout v3.2 -Note, the cookiecutter script will ask you to resolve some TODOs. These will -be covered in the :ref:`next-steps` section of this quick start guide. +The cookiecutter initialisation procedure will prompt you with a series of questions +aiming to customise your new instance, e.g. the name of your application. -The scaffolded instance comes by default with a toy example data model to help -you get started. +.. note:: + At the end of the initialisation, you will be warned to manually change some parts + of the generated code marked with ``TODOs``. These will be covered in the + :ref:`final-steps` section of this quick start guide. Install ------- -Now that we have our instance's source code ready we can proceed with the -initial setup of the services and dependencies of the project: +Now that your project is generated, you will have to install all needed Python dependencies +and initialise the application services such as the database and the search engine. -First, fire up the database, Elasticsearch, Redis and RabbitMQ: +From now on, the quick start guide will use the name ``my-site`` to refer to your +newly created Invenio application. -.. code-block:: shell +Let's run the service using ``docker-compose``: - $ cd my-site/ - $ docker-compose up -d - Creating mysite_cache_1 ... done - Creating mysite_db_1 ... done - Creating mysite_es_1 ... done - Creating mysite_mq_1 ... done +.. code-block:: console -If the Elasticsearch service fails to start mentioning that it requires more -virtual memory, see the following -`fix `_. + $ cd my-site/ + $ docker-compose up + Creating my-site_cache_1 ... done + Creating my-site_db_1 ... done + Creating my-site_es_1 ... done + Creating my-site_mq_1 ... done -Next, activate the virtualenv of the new project by running: +If Elasticsearch service fails to start, it might be due to its requirement for +additional virtual memory than the one provided by your system defaults. +For more information, see +`Elasticsearch documentation `_. -.. code-block:: shell +Let's run the installation scripts: - $ pipenv shell +.. code-block:: console -Finally, install all dependencies, build the JS/CSS assets, create the database -tables and create the Elasticsearch indices by running the bootstrap and setup -scripts: + $ ./scripts/bootstrap + $ ./scripts/setup -.. code-block:: shell +This will: - (my-site)$ ./scripts/bootstrap - (my-site)$ ./scripts/setup +* install required Python packages +* build JS/CSS assets +* create and initialise the database and the search engine Run --- -You can now start the development web server and the background worker for your -new Invenio instance: +Let's run Invenio and open your browser to https://127.0.0.1:5000/: -.. code-block:: shell +.. code-block:: console - (my-site)$ ./scripts/server - * Environment: development - * Debug mode: on - * Running on https://127.0.0.1:5000/ (Press CTRL+C to quit) + $ ./scripts/server + * Environment: development + * Debug mode: on + * Running on https://127.0.0.1:5000/ (Press CTRL+C to quit) -You can now visit https://127.0.0.1:5000/ ! - -Continue tutorial -~~~~~~~~~~~~~~~~~ -:ref:`crud-operations` +Records +------- +Learn how to create and view records: :ref:`crud-operations`. diff --git a/docs/releases/index.rst b/docs/releases/index.rst index 70c9cb4897..2f9fd0b8a3 100644 --- a/docs/releases/index.rst +++ b/docs/releases/index.rst @@ -17,6 +17,7 @@ are here for the interested. :name: mastertoc maintenance-policy + v3.2.0 v3.1.1 v3.1.0 v3.0.2 diff --git a/docs/releases/v3.2.0.rst b/docs/releases/v3.2.0.rst new file mode 100644 index 0000000000..2739810d29 --- /dev/null +++ b/docs/releases/v3.2.0.rst @@ -0,0 +1,280 @@ +.. + This file is part of Invenio. + Copyright (C) 2019 CERN. + + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. + +Version 3.2.0 +============= + +*Released 2019-12-20* + +We are proud to announce the release of Invenio v3.2.0. + +**Python compatibility** + +Invenio v3.2 supports Python 2.7 (until 2019-12-31), Python 3.5 and Python 3.6. + +Getting started +--------------- + +See our :ref:`quickstart` guide. + +What's new in Invenio v3.2? +--------------------------- + +Files bundle +~~~~~~~~~~~~ + +We have released four new modules as part of the new Files bundle: + +- `Invenio-Files-REST `_: Object store REST API for managing files in Invenio. +- `Invenio-Records-Files `_: Integration layer between records and files. +- `Invenio-Previewer `_: File previewers support. +- `Invenio-IIIF `_: IIIF Image API support for thumbnail and zooming on images. + +To understand more about how to use the new Files bundle see our integration +guide in :ref:`integrating-files`. + + +Elasticsearch v7 support +~~~~~~~~~~~~~~~~~~~~~~~~ +Invenio now supports Elasticsearch v7. + + +Marshmallow 3 compatibility +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Invenio now support both Marshmallow v2 or v3. The support is done via a +compatibility layer, that allows Invenio to work with either schemas from v2 or +v3. This should allow users to upgrade to Invenio v3.2 without being forced to +upgrade their Marshmallow schemas immediately to v3. + +We advice all users to start planning an upgrade of their Marshmallow schemas +from v2 to v3 as the upgrade is non-trivial and needs proper testing due to +significant differences between Marshmallow v2 and v3. + +Invenio will continue support for both Marshmallow v2 and v3 for a transition +period to allow users to upgrade at their own pace. After the transition period +Marshmallow v2 support will be deprecated and removed from Invenio. + +For information about how to upgrade see :ref:`marshmallow`. + +Search index prefixing +~~~~~~~~~~~~~~~~~~~~~~ +Elasticsearch does not support the concept of virtual hosts and thus with +previous versions of Invenio it was not possible to share an Elasticsearch +cluster between multiple Invenio instances. + +We have now added support for index/alias/template prefixing, so that all +names can be prefixed with a string and thus allows multiple Invenio instances +to share an Elasticsearch cluster. Note, that this is only name prefixing, thus +technically two Invenio instances will can read each other indexes and thus +must trust each other. + +Read more about the new feature on +https://invenio-search.readthedocs.io/en/latest/configuration.html#index-prefixing. + + +Minor changes in v3.2 +--------------------- + +**Documentation improvements** + +We've improved documentation for a couple of modules: + +- Invenio-Search: `Configuring your Elasticsearch connection `_ +- Invenio-Records: `Optimistic concurrency control `_ +- Invenio-DB: `Database session management `_ + +**Elasticsearch request library changed** + +Previously Invenio was using the Python library ``requests`` for connecting via +HTTP to Elasticsearch. The library has now been changed to use ``urllib3`` +instead as recommended by Elasticsearch. + +The old behavior can be restored by configuring the ``connection_class`` in +the Elasticsearch client configuration: + +.. code-block:: python + + from elasticsearch.connection import RequestsHttpConnection + + SEARCH_CLIENT_CONFIG = dict( + connection_class=RequestsHttpConnection + ) + +**Hide webpack warnings** + +Webpack warnings are now by default hidden. + +**HTML sanitization improvements** + +Invenio-Formatter has a new template filter +:py:func:`~invenio_formatter.filters.html.sanitize_html` to use for HTML +sanitization. + +The default configuration provided by Invenio-Config now sets default values +for :py:data:`~invenio_config.default.ALLOWED_HTML_TAGS` and +:py:data:`~invenio_config.default.ALLOWED_HTML_ATTRS` which is used by the +``bleach`` library for HTML sanitization. Please be careful if you change these +values as it is very easy to open up for Cross-Site Scripting (XSS) attacks. + +**Sentry now supported via Sentry SDK instead of Raven** + +Invenio-Logging still has support for Raven, but being moved to Sentry SDK. +To use the old Raven library set the following variable in your + +.. code-block:: python + + SENTRY_SDK = False + +**Rate limiting improvements** + +There are now differentiated rate limits for authenticated (5000 per hour, +100 per minute) vs anonymous users (1000 per hour, 60 per minute). The +limits can be changed via +:py:data:`~invenio_app.config.RATELIMIT_AUTHENTICATED_USER` and +:py:data:`~invenio_app.config.RATELIMIT_GUEST_USER` configuration variables. +You can also setup per endpoint rate limits via the +:py:data:`~invenio_app.config.RATELIMIT_PER_ENDPOINT` configuration variable. + +There is also a new error template for when HTTP 429 (rate limited) error code +is being returned. It can be changed via +:py:data:`~invenio_theme.config.THEME_429_TEMPLATE` configuration variable. + +**Records REST API improvements** + +Two new query parameters was added to the search REST API in order to make +implementation of infinite scroll on a frontend application easier: + +- ``from``: For pagination you can now use ``from`` (an item index) and + ``size`` instead of ``page`` (a page index) and ``size``. +- ``aggs``: The parameter can be used to completely turn-off computation of all + aggregations (``?aggs=``) or select only specific aggregations to be show + ``?aggs=year``). + +**CSV serializer** + +A new CSV serializer ( +:py:class:`~invenio_records_rest.serializers.csv.CSVSerializer`) has been added +to Invenio-Records-REST. + +**Improved indexing signal** + +Introduced a new method ``.dynamic_connect()`` on the signal +:py:data:`~invenio_indexer.signals.before_record_index` which allows +conditional indexing. This is useful to e.g. only connect a receiver to a +specific index. + +**Bug fixes** + +- Invenio-OAuthClient: The ``next`` parameter which determines where to + redirect after an successful authentication could not have query string + parameters in the URL. + +Deprecations in v3.2 +-------------------- +Following list of features have been deprecated and will be removed in either +Invenio v3.2 or Invenio v3.3: + +Elasticsearch v2 and v5 support +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We did not remove Elasticsearch v2 support in Invenio v3.2 as originally +announced due to not having a user-friendly solution for upgrading +Elasticsearch indexes. In Invenio v3.3 we plan to release the +Invenio-Index-Migrator module which makes migrating/upgrading Elasticsearch +indexes easy, and we thus plan to fully remove Elasticsearvh v2 and v5 support +in either Invenio v3.3 or v3.4. + +Both Elasticsearch v2 and v5 have reached end of life and no longer receives +any bug or security fixes from Elastic. + +Both the support in Invenio-Search for creating indexes for v2/v5 as well as +any v2/v5 Elasticsearch mappings in other Invenio modules will be removed. + +AMD/RequireJS (reminder from v3.1) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Invenio's assets build system based on AMD/RequireJS will be removed in +Invenio v3.3 or v3.4. + +This involves e.g. the two CLI commands: + +.. code-block:: shell + + $ invenio npm + $ invenio assets build + +Several Python modules in Invenio-Assets will be removed, including (but not +limited to): + + - ``invenio_assets.npm`` + - ``invenio_assets.filters`` + - ``invenio_assets.glob`` + - ``invenio_assets.proxies`` + +Also, bundle definitions in other Invenio modules will be removed. These are +usually located in ``bundles.py`` files, e.g.: + + - ``invenio_theme.bundles`` + +Also, some static files will be removed from bundles, e.g.: + + - ``invenio_theme/static/js/*`` + - ``invenio_theme/static/scss/*`` + +AngularJS (reminder from v3.0) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In Invenio v3.0 we deprecated the AngularJS 1.4 application Invenio-Search-JS +as AngularJS by that time was already outdated. We have selected React and +SemanticUI as the replacement framework for AngularJS. + +The new Webpack build system released in Invenio v3.1 is part of the strategy +to move from AngularJS to React (note however that you can use Webpack with +your favorite framework, including AngularJS). + +We have started the rewrite of Invenio-Search-JS and have already released the +first version of +`React-SearchKit `_ which +eventually will replace Invenio-Search-JS. + +Features removed in v3.2 +------------------------ +These following *already* deprecated features have been removed in Invenio +v3.2. + +**Records CLI** + +The CLI in Invenio-Records module was removed. + +**DynamicPermission class** + +The ``DynamicPermission`` class was removed from Invenio-Access. The +functionality provided by the class is still available via the +:py:class:`~invenio_access.permissions.Permission` class. + + +Maintenance policy +------------------ + +Invenio v3.2 will be supported with bug and security fixes until the release of +Invenio v3.4 and minimum until 2020-11-29. + +See our :ref:`maintenance-policy`. + +What's next? +------------ +In Invenio v3.3 we are planning to release the **Statistics** bundle including: + +- `invenio-stats `_ + - Invenio module for statistical data processing and querying with support + for collecting COUNTER Research Data Usage Metrics compliant statistics. +- `counter-robots `_ + - Library for COUNTER-compliant detection of machines and robots. + +In addition to the Statistics bundle, we will also release final versions of +the following two modules: + +- `invenio-index-migrator `_ + - Elasticsearch index migrator for Invenio (aka zero down-time reindexing + and index migration). diff --git a/docs/tutorials/handling-files.rst b/docs/tutorials/handling-files.rst new file mode 100644 index 0000000000..1abcc69d19 --- /dev/null +++ b/docs/tutorials/handling-files.rst @@ -0,0 +1,597 @@ +.. + This file is part of Invenio. + Copyright (C) 2019 CERN. + + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. + +.. _integrating-files: + +Integrating Files +================= +With Invenio, you can attach files to records and use powerful REST APIs to +upload and download files. You can also integrate previewers to nicely display +the content of a file in a webpage and use APIs to deliver different types of +images using the IIIF de facto standard. + +.. _handling-files-overview: + +Overview +-------- +The first step when setting up the files integration is to decide where +the files should be stored. +In Invenio, you can specify where files are stored by defining a +``Location``: a location is a representation of your storage system. +It can be, for example, a local folder in your machine or a remote storage. +It has a ``name`` and a ``URI`` to define the base path to the files. + +To access and manage files in a location, Invenio uses a ``Storage`` implementation: +this defines how to access files in your location. In other words, it knows how files +are stored, e.g. in a hierarchy of folders, and how to read and write them. + +You can define multiple locations and storage. This is useful, for example, +when you are dealing with online and offline file systems. + +A single specific file is represented in Invenio by a ``FileInstance`` object: +it defines its relative path to the Location where it is stored and also +other useful properties such as its size or checksum. + +To sum up, you can think of the Location, Storage and FileInstance as +the physical representation of your file system(s). + +Object storage +++++++++++++++ +Invenio provides an abstraction of your physical file system with an object +storage representation similar to Amazon S3. This allows great flexibility +and easy to use APIs. + +You can compare this implementation to a traditional file system +where files are contained in folders. In Invenio object storage, +``ObjectVersions`` are contained in ``Buckets``. + +Buckets are sets of objects, they are uniquely identified by an ID +and can define constraints on file sizes or quotas. A bucket also +defines the default Location to use when adding files, if none is +provided. + +An ObjectVersion is a specific version (the ``version_id``) of a file +at a given moment in time. It has a reference to a FileInstance +and metadata such as the file name (the ``key``). The latest version +of a file is marked as the ``head``. + +ObjectVersions allow to perform operations to your files without +accessing the file system. Let's see a common example: a user wants +to delete a previously uploaded file. +With Invenio, this means creating a new ObjectVersion for this +file, the new head, without reference to the FileInstance that +it had before. The file on disk is not accessed or removed. +This is also called delete marker or soft deletion. + +You can read more in the +`Invenio-Files-REST `__ +module documentation. + +.. _handling-files-integration-with-records: + +Integration with records +------------------------ +Invenio integrates files and records by creating a reference +between a Record and a Bucket. By default in Invenio, +a record has a reference to one bucket and +a bucket to one record. + +.. image:: ../_static/invenio-files-integration.png + +Invenio allows you to set up different scenarios and have, for example, +multiple buckets per records and viceversa or buckets not attached +to any record. To achieve any of these, you will have to define +your own integration between records and files. + +The records-files integration provides a set of REST APIs to easily +add, retrieve or delete files for a given record. + +.. _handling-files-using-rest-apis: + +Using REST APIs +--------------- +If you haven't already done so, make sure you've followed the :ref:`quickstart` +so you have an Invenio instance to work on. + +Let's create a simple record: + +.. code-block:: console + + $ curl -k --header "Content-Type: application/json" \ + --request POST \ + --data '{"title":"Some title", "contributors": [{"name": "Doe, John"}]}' \ + https://127.0.0.1:5000/api/records/?prettyprint=1 + +Response: + +.. code-block:: json + + { + "created": "2019-11-25T15:02:24.379791+00:00", + "id": "1", + "links": { + "files": "https://127.0.0.1:5000/api/records/1/files", + "self": "https://127.0.0.1:5000/api/records/1" + }, + "metadata": { + "contributors": [ + { + "name": "Doe, John" + } + ], + "id": "1", + "title": "Some title" + }, + "revision": 0, + "updated": "2019-11-25T15:02:24.379798+00:00" + } + +You can now upload a file to this record (if you are not using the default +scripts to run the server, make sure your celery worker is running): + +.. code-block:: console + + $ echo "my file content" > my_file.txt + + $ curl -k -H "Content-Type: application/octet-stream" \ + --request PUT \ + --data-binary @my_file.txt \ + https://127.0.0.1:5000/api/records/1/files/my_file.txt?prettyprint=1 + +Response: + +.. code-block:: json + + { + "is_head": true, + "updated": "2019-11-25T15:21:07.276520", + "size": 16, + "version_id": "577a96b9-94a1-4abf-8f6a-a5c168ee6faa", + "key": "my_file.txt", + "tags": {}, + "links": { + "self": "https://127.0.0.1:5000/api/records/1/files/my_file.txt", + "version": "https://127.0.0.1:5000/api/records/1/files/my_file.txt?versionId=577a96b9-94a1-4abf-8f6a-a5c168ee6faa", + "uploads": "https://127.0.0.1:5000/api/records/1/files/my_file.txt?uploads" + }, + "mimetype": "text/plain", + "created": "2019-11-25T15:21:07.269683", + "delete_marker": false, + "checksum": "md5:1b7ea8126d278ecbfa9fcb9b0d7dc5af" + } + +If you now fetch the record again, you can see that the uploaded files +have been added to its metadata: + +.. code-block:: console + + $ curl -k --header "Content-Type: application/json" \ + https://localhost:5000/api/records/1?prettyprint=1 + +Response: + +.. code-block:: json + + { + "created": "2019-11-25T15:06:02.858325+00:00", + "files": [ + { + "bucket": "7ddc1409-35a3-4a65-8324-89da4245f2f9", + "checksum": "md5:1b7ea8126d278ecbfa9fcb9b0d7dc5af", + "file_id": "6f413750-82ca-45bb-aa5a-0f009b651843", + "key": "my_file.txt", + "size": 16, + "version_id": "577a96b9-94a1-4abf-8f6a-a5c168ee6faa" + } + ], + "id": "1", + "links": { + "files": "https://localhost:5000/api/records/1/files", + "self": "https://localhost:5000/api/records/1" + }, + "metadata": { + "contributors": [ + { + "name": "Doe, John" + } + ], + "id": "1", + "title": "Some title" + }, + "revision": 2, + "updated": "2019-11-25T15:21:07.453874+00:00" + } + +You can download the file by requesting it with its filename: + +.. code-block:: console + + $ curl -k --header "Content-Type: application/json" \ + https://localhost:5000/api/records/1/files/my_file.txt + +You can also delete the uploaded file: + +.. code-block:: console + + $ curl -k --header "Content-Type: application/json" \ + --request DELETE \ + https://localhost:5000/api/records/1/files/my_file.txt + +Integration details ++++++++++++++++++++ +When creating a new record, Invenio automatically creates and +assigns a new Bucket to the newly created record. Then, +when a new file is uploaded to the record, Invenio will: + +1. fetch the Bucket assigned to the Record +2. store the file in the bucket's default Location using + the configured Storage +3. create a new FileInstance with size, checksum and the URI path + pointing to the file +4. create a new ObjectVersion with a reference to the FileInstance + and the Bucket to which it belongs +5. update the record's metadata to add the metadata of the new file + +You can learn more on how record and files work together as well as +the available APIs by reading the documentation of +`Invenio-Records-Files `__ +and +`Invenio-Files-REST `__. + +.. _handling-files-setup-your-storage: + +Setup your storage +------------------ +With the quickstart application, a default Location is set up in the +same directory of your virtual environment. + +You can create your own locations by using the CLI. The only +constraint is that you will always have to define at least +one ``default`` Location. + +For example, you can define a new default location named ``shared`` in the path +``/mnt/shared``: + +.. code-block:: console + + $ pipenv run invenio files location shared /mnt/shared --default + +From now on, any new Bucket, besides the existing ones will use this location +and therefore files will be stored in ``/mnt/shared``. + +Invenio provides a default storage implementation based on +`PyFilesystem `__ and it will store files +in the path ``//data``. +The middle path ```` can be adjusted via configuration +variables. + +For example, the previously uploaded file ``my_file.txt`` will be saved on +disk in ``/mnt/shared/4j/0f/k7ss-h8k1-0k2h/data``. + +.. note:: + Every file in Invenio is stored on disk with the file name + ``data``. This is to avoid any possible issue with user input and + potentially unsupported special characters. The original file name is + stored in the ObjectVersion metadata and this internal implementation + is never exposed to the user. + +Custom storage +++++++++++++++ +The default storage implementation in Invenio uses +`PyFilesystem `__ +to access the file system. If this does not fulfill your +requirements, you can implement your own. + +The :py:class:`invenio_files_rest.storage.FileStorage` is the base class +interface that defines the operations used when accessing files. +You can create your own factory that will instantiate and return your +storage implementation. + +.. code-block:: python + + def my_storage_factory(fileinstance=None, default_location=None, + default_storage_class=None, + filestorage_class=MyFileStorage, fileurl=None, + size=None, modified=None, clean_dir=True): + fileurl = fileinstance.uri + return filestorage_class( + fileurl, size=size, modified=modified, clean_dir=clean_dir) + +Then, you can configure Invenio to use this new storage +by setting the related configuration variable in your ``config.py``: + +.. code-block:: python + + FILES_REST_STORAGE_FACTORY = "my_storage_factory" + + +If you are looking for an integration with a S3 object storage, you +can read more about it on the +`Invenio-S3 `__ documentation. + +.. _handling-files-permissions: + +Permissions +----------- +Files permissions relies on +`Invenio-Access `__ +to allow configured users or roles to perform actions. These concepts are +also described in the :ref:`managing-access` section. + +The integration with records does not set any particular permission on files: +it is your responsibility to decide how to give access to files based on your +record. + +The first step is to implement your own permission factory. As an example, +let's implement a factory that allows access to files only to the user that +is owner of the record (the record should have a field ``owner``). + +.. code-block:: python + + from flask_principal import UserNeed + from invenio_access import Permission, superuser_access + from invenio_files_rest.models import Bucket, MultipartObject, ObjectVersion + from invenio_records import Record + from invenio_records_files.models import RecordsBuckets + + def my_permission_factory(obj, action): + """Given an action, return the permission for the given object. + + :param obj: An instance of :class:`invenio_files_rest.models.Bucket` or + :class:`invenio_files_rest.models.ObjectVersion` or + :class:`invenio_files_rest.models.MultipartObject` or ``None`` if + the action is global. + :param action: The required action. + :raises RuntimeError: If the object is unknown. + :returns: A :class:`invenio_access.permissions.Permission` instance. + """ + # apply the same permission to any `action` + # retrieve the bucket from the requested `obj` + bucket_id = None + if isinstance(obj, Bucket): + bucket_id = str(obj.id) + elif isinstance(obj, ObjectVersion) or isinstance(obj, MultipartObject): + bucket_id = str(obj.bucket_id) + + if bucket_id is not None: + # retrieve the record with this bucket attached + # we assume that there is only one + record_bucket = RecordsBuckets.query.filter_by(bucket_id=bucket_id).one_or_none() + if record_bucket is not None: + # retrieve the owner field + record = Record.get_record(record_bucket.record_id) + owner = record.get("owner") + if owner: + return Permission(UserNeed(record["owner"])) + # allow only admins + return Permission(superuser_access) + +Then, configure Invenio to use this function when validating permissions by +setting the related configuration variable in ``config.py``: + +.. code-block:: python + + FILES_REST_PERMISSION_FACTORY = "my_permission_factory" + +Response codes +++++++++++++++ +If the authorization for an action fails, Invenio will normally returns +a ``403`` response code for authenticated users, ``401`` otherwise. +For security reasons, when trying to retrieve an unauthorized file, +it will return a ``404`` instead to hide the existence or non-existence +of the file. + +.. _handling-files-upload-large-files: + +Large files upload +------------------ +When trying to upload a large file, it might happen that your HTTP +request aborts and returns a response code +:code:`413 (Request Entity Too Large)`. The maximum upload size +is limited by the default configuration of Flask and most probably +your web server. + +You can adjust these configurations according to your needs. + +For Flask, set the :code:`MAX_CONTENT_LENGTH` configuration variable. +Be aware that if the request does not specify a :code:`CONTENT_LENGTH`, +no data will be read. + +.. code-block:: console + + $ app.config['MAX_CONTENT_LENGTH'] = 25 * 1024 * 1024 # bytes + +Here an example to tune the configuration of ``Nginx``. +In case you use another web server, please consult its documentation. + +.. code-block:: console + + http { + ... + client_max_body_size 25M; + } + +.. _handling-files-integrity-checks: + +Files integrity checks +---------------------- +To ensure that files in your file system are not damaged, it is +recommended to set up files integrity checks. This consists in a +periodical tasks that scan your files and re-compute each checksum +by comparing it with the one calculated when uploaded. In case +of mismatch, it will throw an exception. + +Configure the task in your ``config.py``: + +.. code-block:: python + + CELERY_BEAT_SCHEDULE = { + 'file-checks': { + 'task': 'invenio_files_rest.tasks.schedule_checksum_verification', + 'schedule': timedelta(hours=1), + } + } + +Make sure that `celery beat +`_ +is running: + +.. code-block:: console + + $ celery -A invenio_app.celery beat + +When the task +`schedule_checksum_verification `_ +runs, it will retrieve a number of files to check based on a set of constraints +in order to throttle the execution rate of the checks. For each file, +it will then spawn the task +`verify_checksum `_ +to calculate the checksum. + +Given that this task will constantly check files, it is recommended to +schedule these tasks on a separate low priority queue. + +Create a new queue called ``low`` in your ``config.py``: + +.. code-block:: python + + CELERY_TASK_ROUTES = { + 'invenio_files_rest.tasks.verify_checksum': {'queue': 'low'}, + } + +Then, spawn only one worker that will consume tasks sent to the ``low`` queue: + +.. code-block:: console + + $ celery -A invenio_app.celery worker -l info -Q low + +.. _handling-files-previewing: + +Previewing files +---------------- +Invenio has support for previewing many of the most popular file formats +including PDF, ZIP, Markdown, images and Jupyter Notebooks. + +Given an ObjectVersion with a filename (the ``key`` field), Invenio will +iterate through the available previewers and use the first matching the +file extension contained in the filename. The ordered list of previewers +can be configured via the configuration variable +`PREVIEWER_PREFERENCE `_. + +For example, given a ``thesis.pdf`` to preview and the following +configuration: + +.. code-block:: python + + PREVIEWER_PREFERENCE = [ + "simple_image", # previews .jpg and .png + "a_pdf_previewer", # previews .pdf + "another_pdf_previewer", # previews .pdf + ] + +only the ``a_pdf_previewer`` will be run as previewer. +``another_pdf_previewer`` will be never executed. + +To preview a file in your website, you can use the available +endpoint ``/records//preview/`` and the +view ``invenio_previewer.views:preview``. +In your ``config.py`` add: + +.. code:: python + + RECORDS_UI_ENDPOINTS=dict( + recid_previewer=dict( + pid_type='recid', + route='/records//preview/', + view_imp='invenio_previewer.views:preview', + record_class='invenio_records_files.api:Record', + ), + ) + +You see the list of available previewer and learn how +to create your own previewer by reading the documentation of +`Invenio-Previewer `__. + +.. _handling-files-iiif: + +Handling images using IIIF +-------------------------- +Invenio implements the `IIIF Image APIs `_, a de facto +standard for delivering images on the web. It allows you to +generate thumbnails, resize, zoom and preview images. + +For example, you can resize on the fly images uploaded by the user to a +dimension that best suites your website layout. This is very useful, +for example, when displaying thumbnails in the list of search results. + +Let's say that you want to resize the large image ``large.png`` +uploaded by the user to ``640x480`` pixels. +You can use the available REST APIs and retrieve +the image as the following: + +.. code-block:: console + + # IIIF Image specification: /region/size/rotation/quality.format + /api/iiif/::large.png/full/640,480/0/default.png + +Let's say that you now want to achieve the same when previewing +the image in your website, and not via REST APIs. You can take advantage +of the files preview and integrate IIIF with it. + +Add the IIIF previewer ``iiif_image`` in your ``config.py``: + +.. code-block:: python + + PREVIEWER_PREFERENCE = [ + 'iiif_image', + 'pdfjs', + 'zip', + ] + +and configure if to resize to your needs. In ``config.py``: + +.. code-block:: python + + IIIF_PREVIEWER_PARAMS = { + 'size': '640,480' + } + +To learn more about the IIIF integration, see the +`Invenio-IIIF `__ +documentation. + +.. _handling-files-security: + +Security +-------- +When serving files, you will have to take into account any security +implication. Here you can find some recommendations to mitigate possible +vulnerabilities, such as Cross-Site Scripting (XSS): + +1. If possible, serve user uploaded files from a separate domain + (not a subdomain). + +2. By default, Invenio-Files-REST sets some response headers to prevent + the browser from rendering and executing HTML files. For files that + you consider safe and you need to have rendered, you can configure the + `MIMETYPE_WHITELIST `_. + See + `send_stream `_ + for more information. + +3. Prefer file download instead of allowing the browser to preview any file, + by adding the :code:`?download` URL query argument. + + +Next steps +---------- +You can have detailed information by reading the documentation of each module: + +- `Invenio-Files-REST `__ +- `Invenio-Records-Files `__ +- `Invenio-Previewer `__ +- `Invenio-IIIF `__ diff --git a/docs/tutorials/upgrade-marshmallow.rst b/docs/tutorials/upgrade-marshmallow.rst index cd0ce9cc70..78a6b769c7 100644 --- a/docs/tutorials/upgrade-marshmallow.rst +++ b/docs/tutorials/upgrade-marshmallow.rst @@ -1,10 +1,19 @@ -Marshmallow v3 compatibility -============================ +.. + This file is part of Invenio. + Copyright (C) 2019 CERN. + + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. + +.. _marshmallow: + +Marshmallow v2/v3 compatibility +=============================== Marshmallow ----------- -Marhsmallow is a package for serialization and deserialization of complex data +Marshmallow is a package for serialization and deserialization of complex data structures from and to Python types. You can discover its usage within Invenio in the `documentation `_. diff --git a/docs/upgrading.rst b/docs/upgrading.rst new file mode 100644 index 0000000000..610e3f2ba2 --- /dev/null +++ b/docs/upgrading.rst @@ -0,0 +1,170 @@ +.. + This file is part of Invenio. + Copyright (C) 2015-2019 CERN. + + Invenio is free software; you can redistribute it and/or modify it + under the terms of the MIT License; see LICENSE file for more details. + +Upgrading to version 3.2 +======================== + +If you have your instance of Invenio v3.1 already up and running and +you would like to upgrade to version v3.2 you don't need to set up your +project from scratch. The goal of this guide is to show the steps to upgrade +your project without losing any of your work. + + +Pipfile modifications +--------------------- + +The most important changes that you will have to make are in ``Pipfile``. + +First you need to change the Invenio version: + +:: + + invenio = { version = "==3.2.0", extras = ["base", "auth", "metadata", "files", "postgresql", "elasticsearch7" ]} + +If you want to use the new files bundle make sure you include the ``files`` +bundle. Add any additional :ref:`bundles` you would like in your project in +``extras``. + +Make sure that your database and Elasticsearch version matches your +installation. In above example the database is ``postgresql`` and the +Elasticsearch version is ``elasticsearch7``. + +To install the new packages in ``Pipfile`` run the following commands: + +.. code:: bash + + # Update Pipfile.lock + pipenv lock --dev + + # Install packages specified in Pipfile.lock + pipenv sync --dev + + # Install application code and entrypoints from 'setup.py' + pipenv run pip install -e . + + # Build assets + pipenv run invenio collect -v + pipenv run invenio webpack buildall + + +Database tables +--------------- +Changes have been made to the database from Invenio 3.1 so you will need to +upgrade the database by running the latest Alembic recipes: + +.. code:: bash + + invenio alembic upgrade + +Your database should now have the latest changes. + + +Files +----- +To integrate the files bundle with your Invenio instance, please see the guide +to configure files for Invenio 3.2. + +For files to work properly ensure that the config variables +``RECORDS_FILES_REST_ENDPOINTS`` and ``FILES_REST_PERMISSION_FACTORY`` have +been configured properly. + +.. note:: + + If you are upgrading from a previous cookiecutter instance and you updated + ``records/config.py``, please remember to update the changed config keys in + ``records/ext.py``. + +Uploading files +~~~~~~~~~~~~~~~ +Records created after you upgraded to Invenio 3.2 will support files +out-of-the-box as long as files are configured properly. + +However, if you have records created by previous versions of Invenio they will +not work with files because there is no bucket attached to the record. +To support uploading files to an old record you first need to create +a bucket for each record you want to enable files support for and update the +record's metadata. + +Invenio currently doesn't provide a script for this migration. However, here a +snippet that can help with the migration: + +.. code-block:: python + + from invenio_db import db + from invenio_records_files.api import Record + from invenio_records_files.models import RecordsBuckets + + # Get all old records as invenio_records_files.api:Record objects + old_records = # ... + for record in old_records: + # Create a bucket + if not record.bucket_id: + bucket = Record.create_bucket(record) + if bucket: + # Attach bucket to the record + Record.dump_bucket(record, bucket) + RecordsBuckets.create(record=record.model, bucket=bucket) + record.commit() + db.session.commit + + +Elasticsearch +------------- +Invenio 3.2 comes with support for Elasticsearch 6 and 7. Support for +Elasticsearch v2 and v5 has been deprecated and will be removed in future +releases. It's recommended to upgrade your Elasticsearch version to stay +up-to-date. + +.. note:: + + If you're upgrading to Elasticsearch v7, don't forget to add mappings for + v7. + +There are currently two paths to upgrade to Elasticsearch v7: upgrade by +reindexing all your records or by using Elasticsearch rolling upgrades. + +Upgrade to v7 by reindexing +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The easiest way to upgrade to v7 is to upgrade your Invenio installation, +install Elasticsearch v7 and then reindex all your records stored in the +database with the following command: + +.. code-block:: console + + $ invenio index reindex -t + +.. warning:: + + This command will destroy your indexed records with the provided + ``pid_type`` and reindex all records. + +However, this means you have to reindex everything and will require some +downtime + +.. _rolling-upgrades: + +Upgrade by Elasticsearch rolling upgrades +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Elasticsearch supports `rolling upgrades `_ +which can upgrade your Elasticsearch installation between certain versions +without any interruption to your service. This will allow you to upgrade from +v5 to v6 or v6 to v7, but not from v5 to v7 due to index incompatibilities. + +Upgrade by index migration +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: + + This section describes an unreleased feature. + +Invenio v3.3 will add support for online index migration. This will allow you +to upgrade between Elasticsearch versions, migrate indexes between clusters as +well as upgrade Elasticsearch mappings. You can read more about this upcoming +feature on: + +- `Keeping up with Elasticsearch `_ +- `Invenio-Index-Migrator `_ diff --git a/invenio/version.py b/invenio/version.py index e707fb3cc6..24c1c3d555 100644 --- a/invenio/version.py +++ b/invenio/version.py @@ -14,4 +14,4 @@ from __future__ import absolute_import, print_function -__version__ = "3.2.0a9" +__version__ = "3.2.0" diff --git a/setup.py b/setup.py index 917b0db152..516282ee90 100644 --- a/setup.py +++ b/setup.py @@ -20,50 +20,47 @@ 'isort>=4.3', 'pydocstyle>=3.0.0', 'pytest-cov>=2.7.1', + 'pytest-invenio>=1.2.1,<1.3.0', 'pytest-pep8>=1.0.6', 'pytest>=4.6.4,<5.0.0', - 'pytest-invenio>=1.2.0,<1.3.0', ] db_version = '>=1.0.4,<1.1.0' -search_version = '>=1.2.0,<1.3.0' +search_version = '>=1.2.3,<1.3.0' extras_require = { # Bundles 'base': [ - 'invenio-admin>=1.1.1,<1.2.0', + 'invenio-admin>=1.1.2,<1.2.0', 'invenio-assets>=1.1.3,<1.2.0', 'invenio-formatter>=1.0.2,<1.1.0', - 'invenio-logging>=1.1.0,<1.3.0', - # 'invenio-logging>=1.2.0,<1.3.0', + 'invenio-logging>=1.2.0,<1.3.0', 'invenio-mail>=1.0.2,<1.1.0', - 'invenio-rest>=1.1.0,<1.2.0', + 'invenio-rest>=1.1.2,<1.2.0', 'invenio-theme>=1.1.4,<1.2.0', ], 'auth': [ 'invenio-access>=1.3.0,<1.4.0', 'invenio-accounts>=1.1.1,<1.2.0', - 'invenio-oauth2server>=1.0.3,<1.1.0', + 'invenio-oauth2server>=1.0.4,<1.1.0', 'invenio-oauthclient>=1.1.3,<1.2.0', 'invenio-userprofiles>=1.0.1,<1.1.0', ], 'metadata': [ - 'invenio-indexer>=1.1.0,<1.2.0', - 'invenio-jsonschemas>=1.0.0,<1.1.0', - 'invenio-oaiserver>=1.0.0,<1.2.0', - # 'invenio-oaiserver>=1.1.0,<1.2.0', - 'invenio-pidstore>=1.0.0,<1.2.0', - 'invenio-records-rest>=1.6.0,<1.7.0', - # 'invenio-records-rest>=1.5.0,<1.6.0', + 'invenio-indexer>=1.1.1,<1.2.0', + 'invenio-jsonschemas>=1.0.1,<1.1.0', + 'invenio-oaiserver>=1.1.1,<1.2.0', + 'invenio-pidstore>=1.1.0,<1.2.0', + 'invenio-records-rest>=1.6.4,<1.7.0', 'invenio-records-ui>=1.0.1,<1.1.0', 'invenio-records>=1.3.0,<1.4.0', 'invenio-search-ui>=1.1.1,<1.2.0', ], 'files': [ 'invenio-files-rest>=1.0.5,<1.1.0', - 'invenio-records-files>=1.2.0,<1.3.0', - 'invenio-previewer>=1.0.0,<1.1.0', 'invenio-iiif>=1.0.0,<1.1.0', + 'invenio-previewer>=1.1.0,<1.2.0', + 'invenio-records-files>=1.2.1,<1.3.0', ], # Database version 'postgresql': [ @@ -109,10 +106,10 @@ install_requires = [ 'Flask>=1.0.4', - 'invenio-app>=1.2.0,<1.3.0', - 'invenio-base>=1.1.0,<1.2.0', + 'invenio-app>=1.2.3,<1.3.0', + 'invenio-base>=1.2.0,<1.3.0', 'invenio-cache>=1.0.0,<1.1.0', - 'invenio-celery>=1.1.0,<1.2.0', + 'invenio-celery>=1.1.1,<1.2.0', 'invenio-config>=1.0.2,<1.1.0', 'invenio-i18n>=1.1.1,<1.2.0', ] @@ -157,7 +154,6 @@ 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', - 'Development Status :: 3 - Alpha', - # 'Development Status :: 5 - Production/Stable', + 'Development Status :: 5 - Production/Stable', ], )