Skip to content

Commit

Permalink
layout: Rename the Download to Download index for some files.
Browse files Browse the repository at this point in the history
  • Loading branch information
psaiz committed Mar 26, 2024
2 parents 0adde34 + 911ef23 commit 44585d0
Show file tree
Hide file tree
Showing 57 changed files with 25,819 additions and 1,043 deletions.
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,5 @@ target/
.idea/

# Locally generated certificates for development
elasticsearch-proxy/nginx.crt
elasticsearch-proxy/nginx.key
nginx/localhost.crt
nginx/localhost.key
1 change: 0 additions & 1 deletion .inveniorc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ export INVENIO_POSTGRESQL_DBNAME=cernopendata
export INVENIO_POSTGRESQL_DBUSER=cernopendata
export INVENIO_POSTGRESQL_DBPASS=dbpass123
export INVENIO_REDIS_HOST=192.168.50.12
export INVENIO_ELASTICSEARCH_HOST=192.168.50.13
export INVENIO_RABBITMQ_HOST=192.168.50.14
export INVENIO_WORKER_HOST=192.168.50.15
# sphinxdoc-kickstart-configuration-variables-end
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
# Use Invenio's alma image with Python-3.9
FROM registry.cern.ch/inveniosoftware/almalinux:1

# Use XRootD 5.6.6
ENV XROOTD_VERSION=5.6.6
# Use XRootD 5.6.8
ENV XROOTD_VERSION=5.6.8

# Install CERN Open Data Portal web node pre-requisites
# hadolint ignore=DL3033
Expand Down
3 changes: 0 additions & 3 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ include .inveniorc
include Dockerfile
include LICENSE
include babel.ini
include elasticsearch-proxy/Dockerfile
include elasticsearch-proxy/nginx.conf
include nginx/Dockerfile
include pytest.ini
include sentry/Dockerfile
Expand Down Expand Up @@ -45,4 +43,3 @@ recursive-include sentry *.json
recursive-include sentry *.py
recursive-include sentry *.sh
recursive-include sentry *.yml
recursive-exclude elasticsearch-proxy *
19 changes: 14 additions & 5 deletions cernopendata/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@

# Piwik tracking code: set None to disabled it
THEME_PIWIK_ID = os.environ.get("PIWIK_ID", None)

ACCOUNTS_SESSION_ACTIVITY_ENABLED = None
SITE_URL = os.environ.get("CERNOPENDATA_SITE_URL", "opendata.cern.ch")

# Logging - Set up Sentry for Invenio-Logging
Expand Down Expand Up @@ -91,6 +91,17 @@
"data:",
"https://cdnjs.cloudflare.com",
],
"img-src": [
"'self'",
"cms-results.web.cern.ch",
"raw.githubusercontent.com",
"www.github.com",
"github.com",
"cms-docdb.cern.ch",
"mybinder.org",
"cms-results.web.cern.ch",
"cds.cern.ch",
],
},
"content_security_policy_report_uri": None,
"content_security_policy_report_only": False,
Expand Down Expand Up @@ -427,10 +438,8 @@ def _query_parser_and(qstr=None):
CERNOPENDATA_DISABLE_DOWNLOADS = os.environ.get("CERNOPENDATA_DISABLE_DOWNLOADS", False)
# Search
# ======
#: Default Elasticsearch document type.
#: Default OpenSearch document type.
SEARCH_DOC_TYPE_DEFAULT = None
#: Do not map any keywords.
SEARCH_ELASTIC_KEYWORD_MAPPING = {}

# This one can be used to have multiple instances on the same cluster
# SEARCH_INDEX_PREFIX = "opendata-dev-"
Expand All @@ -447,7 +456,7 @@ def _query_parser_and(qstr=None):
SEARCH_UI_SEARCH_VIEW = search_legacy
# OAI-PMH
# =======
#: Default Elasticsearch index.
#: Default OpenSearch index.
OAISERVER_RECORD_INDEX = "records"
#: OAI ID prefix.
OAISERVER_ID_PREFIX = "oai:opendata.cern.ch:recid/"
Expand Down
38 changes: 38 additions & 0 deletions cernopendata/jsonschemas/records/record-v1.0.0.json
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,44 @@
},
"type": "object"
},
"cross_section": {
"properties": {
"filter_efficiency": {
"description": "Cross section filter efficiency as given by GenXSecAnalyser",
"type": "string"
},
"matching_efficiency": {
"description": "Cross section matching efficiency as given by GenXSecAnalyser",
"type": "string"
},
"neg_weight_fraction": {
"description": "Cross section negative weight fraction as given by GenXSecAnalyser",
"type": "string"
},
"total_value": {
"description": "Cross section total value as given by GenXSecAnalyser (in pb)",
"type": "string"
},
"total_value_uncertainty": {
"description": "Cross section total uncertainty as given by GenXSecAnalyser (in pb)",
"type": "string"
}
},
"type": "object"
},
"dataset_semantics_files": {
"properties": {
"url": {
"description": "Url of the dataset content description html file",
"type": "string"
},
"json": {
"description": "Name of the dataset content description json file",
"type": "string"
}
},
"type": "object"
},
"dataset_semantics": {
"items": {
"properties": {
Expand Down
2 changes: 1 addition & 1 deletion cernopendata/mappings/os-v2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.

"""CERN Open Data Elasticsearch v7 Mappings."""
"""CERN Open Data OpenSearch v7 Mappings."""
95 changes: 70 additions & 25 deletions cernopendata/modules/fixtures/data/docs/cms-about/cms-about.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ The Compact Muon Solenoid (CMS) is one of the large particle detectors at CERN's
You can find usage instructions and suggestions of CMS Open Data for different scopes in:

* [Guide page to education use of CMS Open Data](/docs/cms-guide-for-education)
* In a separate [CMS Open Data guide](https://cms-opendata-guide.web.cern.ch/) (under construction).
* In a separate [CMS Open Data guide](https://cms-opendata-guide.web.cern.ch/).

This page gives a brief overview of CMS Open Data contents:

Expand All @@ -23,35 +23,79 @@ The following are provided through this portal:
* Examples of [simplified datasets](/search?page=1&size=20&subtype=Derived&type=Dataset&experiment=CMS) derived from the primary ones for use in different applications and analyses
* Tools
* Downloadable [container images](/docs/cms-guide-docker) with the CMS software environment through which the datasets can be accessed
* Alternatively, a downloadable [Virtual Machine (VM)](/docs/cms-virtual-machine-2015) image with the CMS software environment
* [Getting started instructions](/docs/cms-getting-started-2015) for reading the primary dataset and producing intermediate derived data for the final analysis
* Alternatively, a downloadable [Virtual Machine (VM)](https://opendata.cern.ch/search?page=1&size=20&tags=VM&experiment=CMS) image with the CMS software environment
* Getting started instructions for reading and processing primary data in the [AOD format (Run 1)](/docs/cms-getting-started-aod), [MiniAOD format (Run 2)](/docs/cms-getting-started-miniaod), or [NanoAOD format (Run 2)](/docs/cms-getting-started-nanoaod).
* Ready-to-use online applications, such as [an event display](/visualise/events/cms) and [simple histogramming software](/visualise/histograms/cms)
* Source code for the various examples and applications, available in the [CMS software](/search?page=1&size=20&q=&type=Software&experiment=CMS) collection
* Guides
* Set of [topical guide pages](http://opendata.cern.ch/search?page=1&size=20&q=&subtype=Guide&type=Documentation&experiment=CMS), a comprehensive set of instructions is being collected in a separate [CMS Open Data guide](https://cms-opendata-guide.web.cern.ch/) with links to the latest tutorials.
* Set of [topical guide pages](http://opendata.cern.ch/search?page=1&size=20&q=&subtype=Guide&type=Documentation&experiment=CMS)
* A omprehensive set of instructions is being collected in a separate [CMS Open Data guide](https://cms-opendata-guide.web.cern.ch/) with links to the latest tutorials.

## <a name="primary">Primary and simulated datasets</a>

* Collision data in the primary datasets are in a format known as AOD or Analysis Object Data, while simulated data are in a format called AODSIM.
* AOD/AODSIM files contain the information that is needed for analysis:
* all the high-level [physics objects](/docs/cms-physics-objects-2015) (such as muons, electrons, etc.);
* tracks with associated hits, calorimetric clusters with associated hits, vertices; and
* information about event selection (triggers), data needed for further selection and identification criteria for the physics objects.
* The file is not the final event interpretation with a simple list of particles.
* It contains several instances of the same physics object (i.e. a jet reconstructed with different algorithms).
* It may have double-counting (i.e. a physics object may appear as a single object of its own type, but it may also be part of a jet).
* Additional knowledge is needed to define a "good" physics object.
* Definition of same objects is different in each analysis.
* Some datasets, such as those containing heavy-ion data, are provided in a format called RECO, which contains more information than the AOD format. This is done when the original analyses by the CMS collaboration were performed using this particular format.
* Starting from Run2 (2015), datasets are provided in the MiniAOD/MiniAODSIM format, which has superseded the AOD format in physics analysis:
* MiniAOD/MiniAODSIM is approximately one tenth of the size of AOD/AODSIM.
* The reduction is obtained defining light-weight physics-object candidate representations, increasing transverse momentum thresholds for storing physics-object candidates, and reduced numerical precision when it is not required at the analysis level.
* More information on the MiniAOD format
* [Mini-AOD: A New Analysis Data Format for CMS](https://doi.org/10.1088/1742-6596/664/7/072052)
* [MiniAOD analysis documentation](https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookMiniAOD2015)
* The files can be read in [ROOT](http://root.cern.ch/), but they cannot be opened (and understood) as simple data tables.
* Only the runs that are validated by data quality monitoring should be used in any analysis. The [list of the validated runs](/search?page=1&size=20&q=&type=Environment&subtype=Validation) is provided.
* Small samples of [raw data](/search?page=1&size=20&q=&experiment=CMS&file_type=raw) are also provided.
Collision data in the primary datasets are typically in a format known as AOD or Analysis Object Data, while simulated data are in a format called AODSIM. Beginning in Run 2, smaller
data formats called MiniAOD and NanoAOD were developed in CMS to implement common physics object processing and remove information that not often needed for analysis.

**AOD(SIM) and MiniAOD(SIM) files**

AOD/AODSIM files are provided for Run 1 primary datasets and contain the information that is needed for analysis:

* all the high-level [physics objects](/docs/cms-physics-objects-2015) (such as muons, electrons, etc.);
* tracks with associated hits, calorimetric clusters with associated hits, vertices;
* candidate particles created by the Particle Flow algorithm;
* information about event selection (triggers), data needed for further selection and identification criteria for the physics objects.

See the [Getting Started page for AOD data](/docs/cms-getting-started-aod) to learn more about analyzing AOD files.

Starting from Run 2 (2015), MiniAOD/MiniAODSIM files are provided. These files contain similar information to AOD, but physics objects are processed to include more
identification and selection information within a lighter C++ object, transverse momentum thresholds for storing objects are increased,
and some lower-level information has been removed.
MiniAOD datasets are appoximately one tenth of the size of AOD datasets. More information about MiniAOD:

* [Mini-AOD: A New Analysis Data Format for CMS](https://doi.org/10.1088/1742-6596/664/7/072052)
* [MiniAOD analysis documentation](https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookMiniAOD2015)
* [Getting Started with CMS MiniAOD data](/docs/cms-getting-started-miniaod)

AOD and MiniAOD files do not contain the final event interpretation with a simple list of particles.
The files can be read in [ROOT](http://root.cern.ch/), but they cannot be opened (and understood) as simple data tables.
A file typically contains several instances of the same physics object
(i.e. a jet reconstructed with different algorithms), and some physics objects may be "double-counted" (i.e. a physics object may appear as a single object of its own type, but
it may also be part of a jet).

Additional knowledge is needed to define a "good" physics object, and this definition can be different in each analysis.
Only the runs that are validated by data quality monitoring should be used in any analysis.
The [list of the validated runs](/search?page=1&size=20&q=&type=Environment&subtype=Validation) is provided.

**NanoAOD(SIM) files**

Starting from data collected in 2016, datasets in NanoAOD format are provided alongside MiniAOD.
Only a limited set of observables for each physics object is kept, with limited numerical precision.
For example, detector information is typically dropped in favor of pre-computed identification algorithm results.
The Particle Flow candidates are also dropped, since they are primarily used as inputs to higher-level physics object
reconstruction. The NanoAOD format is about 20 times smaller than MiniAOD, or about 200 times smaller than AOD.
NanoAOD files can be read in [ROOT](http://root.cern.ch) as a basic `TTree` containing standard data types.
More information about NanoAOD:

* [The NanoAOD event data format in CMS](https://doi.org/10.1088/1742-6596/1525/1/012038)
* [NanoAOD file documentation](https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookNanoAOD)
* [Getting Started with CMS NanoAOD data](/docs/cms-getting-started-nanoaod)

NanoAOD files may still contain several instances of the same physics object
(i.e. a jet reconstructed with different algorithms), and some physics objects may be "double-counted" (i.e. a physics object may appear as a single object of its own type, but
it may also be part of a jet).

Additional knowledge is needed to define a "good" physics object, and this definition can be different in each analysis.
Only the runs that are validated by data quality monitoring should be used in any analysis.
The [list of the validated runs](/search?page=1&size=20&q=&type=Environment&subtype=Validation) is provided.

**RECO files**

Some datasets, such as those containing heavy-ion data, are provided in a format called RECO, which contains more information than the AOD format.
This is done when the original analyses by the CMS collaboration were performed using this particular format.

**Raw data**

Small samples of [raw data](/search?page=1&size=20&q=&experiment=CMS&file_type=raw) are also provided.

## <a name="disclaimer">Disclaimer</a>

Expand All @@ -67,6 +111,7 @@ The following are provided through this portal:
* The release of 2012 data includes a larger sample of simulated data. A part of 2012 simulated data is released with the bibliographic information content only, and these datasets will be made available online on demand.
* The release of 2013 heavy-ion-related data includes simulated data corresponding to different collision types and centre-of mass energies.
* The release of 2015 data includes a large collection of simulated data, reprocessed with a software release compatible with the 2015 collision data, but it may still happen that some simulated data did not make it to this reprocessing and are therefore not available in this collection.
* The release of 2016 data includes a large collection of simulated data, reprocessed with a software release compatible with the 2016 collision data.
* If you are interested in joining the CMS Collaboration, please read [How to join CMS](https://cms.cern/collaboration/how-join-cms).

## <a name="other">Other CMS open data</a>
Expand All @@ -77,4 +122,4 @@ The following are provided through this portal:
## <a name="policies">Policies</a>

* [Data preservation and open access policy](/record/415)
* [Papers by CMS members using public data [internal]](https://cms-docdb.cern.ch/cgi-bin/DocDB/ShowDocument?docid=14372)
* [Papers by CMS members using public data [internal]](https://cms-docdb.cern.ch/cgi-bin/DocDB/ShowDocument?docid=14372)
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ $ cmsenv # do not execute this command if you are working in the container
<details>
<summary> 2016 </summary>
<p>
To analyse CMS data collected in 2016, you need <b>version 10.6.30</b> of CMSSW, supported on Scientific Linux 7. Once you have installed the <a href="/docs/cms-guide-docker">CMS open data container</a> or the <a href="/docs/cms-virtual-machine-2016-2018">CMS-specific CERN Virtual Machine (VM)</a>, you need to open a terminal.
To analyse CMS data collected in 2016, you need <b>version 10.6.30</b> of CMSSW, supported on Scientific Linux 7. Once you have installed the <a href="/docs/cms-guide-docker">CMS open data container</a> or the <a href="/docs/cms-virtual-machine-cc7">CMS-specific CERN Virtual Machine (VM)</a>, you need to open a terminal.
</p>
If you are using the VM, always use the "CMS shell" terminal available from the "CMS Shell" icon on the desktop for all CMSSW-specific commands, such as compilation and run. In the VM, execute the following command in the terminal if you haven't done so before; it ensures that you have this version of CMSSW running:

Expand Down

0 comments on commit 44585d0

Please sign in to comment.