diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 09fc3e2..573e987 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -21,14 +21,17 @@ jobs:
steps:
- name: Checkout the repository
- uses: actions/checkout@v1
+ uses: actions/checkout@v4
with:
submodules: true
- name: Setup dependencies
run: |
sudo apt update
- sudo apt install texlive-latex-base texlive-latex-recommended texlive-latex-extra texlive-fonts-recommended xsltproc latexmk cm-super texlive-extra-utils
+ sudo apt install texlive-latex-base texlive-latex-recommended \
+ texlive-latex-extra texlive-fonts-recommended \
+ librsvg2-bin latexmk \
+ pdftk xsltproc latexmk cm-super
- name: Build the document
run: make
@@ -39,7 +42,7 @@ jobs:
test -f ${{ env.doc_name }}.bbl
- name: Keep the PDF artefact
- uses: actions/upload-artifact@v1
+ uses: actions/upload-artifact@v4
with:
name: PDF Preview
path: ${{ env.doc_name }}.pdf
diff --git a/.github/workflows/preview.yml b/.github/workflows/preview.yml
index d437c43..17169ad 100644
--- a/.github/workflows/preview.yml
+++ b/.github/workflows/preview.yml
@@ -7,7 +7,9 @@
name: Update PDF Preview
env:
- doc_name: data-origin
+ doc_name : data-origin
+ branch_name: ${{ github.head_ref || github.ref_name }}
+ tag_preview: auto-pdf-preview
on:
push:
@@ -22,15 +24,17 @@ jobs:
steps:
- name: Checkout the repository
- uses: actions/checkout@v1
+ uses: actions/checkout@v4
with:
submodules: true
- name: Setup dependencies
run: |
sudo apt update
- sudo apt install texlive-latex-base texlive-latex-recommended texlive-latex-extra texlive-fonts-recommended xsltproc latexmk cm-super texlive-extra-utils
- sudo snap install pdftk
+ sudo apt install texlive-latex-base texlive-latex-recommended \
+ texlive-latex-extra texlive-fonts-recommended \
+ librsvg2-bin latexmk \
+ pdftk xsltproc latexmk cm-super
- name: Build the document
run: make ${{ env.doc_name }}-draft.pdf
@@ -40,27 +44,31 @@ jobs:
test -f ${{ env.doc_name }}-draft.pdf
test -f ${{ env.doc_name }}.bbl
- - name: Move the auto-pdf-preview tag
- uses: weareyipyip/walking-tag-action@v2
- with:
- tag-name: auto-pdf-preview
- tag-message: |
- Last commit taken into account for the automatically updated PDF preview of this IVOA document.
+ - name: Remove the former PDF preview (if any)
+ run: |
+ existingTag=$( gh release list --exclude-drafts --json 'isPrerelease,tagName' \
+ --jq '.[] | select(.isPrerelease == true and .tagName == "${{ env.tag_preview }}") | .tagName' \
+ | xargs -n 1 echo )
+ if [ -n "$existingTag" ];
+ then
+ gh release delete --cleanup-tag "$existingTag"
+ fi
env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- - name: Update the PDF preview
- uses: Xotl/cool-github-releases@v1
- with:
- mode: update
- isPrerelease: true
- tag_name: auto-pdf-preview
- release_name: "Auto PDF Preview"
- body_mrkdwn: |
- This release aims to provide a PDF preview of the last commit applied on this repository.
+ - name: Upload the new PDF preview
+ run: |
+ RELEASE_NOTES="This release aims to provide a PDF preview of the last commit applied on this repository.
It will be updated automatically after each merge of a PullRequest.
- **DO NOT PUBLISH THIS PRE-RELEASE!**"
- _Corresponding commit: ${{ github.sha }}_
- assets: ${{ env.doc_name }}-draft.pdf
- replace_assets: true
- github_token: ${{ secrets.GITHUB_TOKEN }}
+ **DO NOT PUBLISH THIS PRE-RELEASE!**
+ _Corresponding commit: ${{ github.sha }}_"
+
+ gh release create ${{ env.tag_preview }} \
+ ${{ env.doc_name }}-draft.pdf \
+ --prerelease \
+ --target "${{ env.branch_name }}" \
+ --title 'Auto PDF Preview' \
+ --notes "$RELEASE_NOTES"
+ env:
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
diff --git a/README.md b/README.md
index a08ecd6..bab602d 100644
--- a/README.md
+++ b/README.md
@@ -79,7 +79,6 @@ The following metadata can be repeated and could follow a controlled vocabulary.
- Author: name or ORCID
- Organization: name or URL
-- Editor: name or URL
- Journal: name or URL
- Datacenter that provides the result: name or URL
- Contact: email
@@ -115,7 +114,6 @@ For queries on evolving dataset, the version or the date must complete the infor
|version | Dataset version (or release date) | |
|service_protocol| Protcol access with version | |
|request| Request url | |
-|request_post| (POST Request) POST arguments **new** | |
|request_date| Query execution date | |
|contact| email or URL contact | |
|landing_page| Dataset landing page | |
@@ -133,12 +131,11 @@ Dataset-origin completes the "Query information" -
|Publication-id| Dataset identifier that can be used for citation | yes |
|Curation-level| Controled vocabulary | |
|Resource-version| Dataset version od last release | |
-|Rights| Licence URI | |
-|Rights-type| Licence type (eg: CC-by, CC-0, private, public) | |
-|Copyrights| Copyright text | |
+|Rights_URI| Licence URI | |
+|Rights| Licence type (eg: CC-by, CC-0, private, public) or copyrights| |
|Creator| Dataset Author(s) or group | |
|Publication-ref| Identifier of the original resource that can be an article or the origin Data Center|
-|Editor| editor name| |
+|Journal or Editor| journal or editor name| |
|Relation_type | controled vocabulary (VOResource: relationshipType ? ) to specify relation to related resource **new**|
|related_resource | Original resource **new**|
|Publication-date| Date of the original publication | |
@@ -152,33 +149,6 @@ eg: bibcode:...,
Serialisation example: <info> tag makes the jobs. see SCS example
-- Complex output involving **several tables** (eg: TAP query, ObsCore result)
-
-Dataset-origin depends on each table used for the output. Datamodels like Last-step -Provenance or DatasetDM allows to gather the metadata.
-
-DatasetDM Example:
-
-|meta-data| Description| Mandatory |
-|--- |:-: |:-: |
-|dataset:productType|||
-|dataset:productSubType| controled vocabulary||
-|dataset:DataID.datasetDID| dataset ivoid|yes|
-|dataset:DataID.title| dataset title||
-|dataset:DataID.creationType| type of resource ||
-|dataset:DataID.date| Publication date of original dataset/article||
-|dataset:Party.name| (first)Author | |
-|dataset:Curation.publisherDID| data-center identifier (ivoid)|yes|
-|dataset:Curation.rights| rights text| |
-|dataset:Curation.releaseDate| Data-center publication date|yes|
-|party.Organisation.email|Data-center contact||
-|dataset:Curation.doi| Dataset DOI| |
-|dataset:Curation.bibcode| Dataset bibcode||
-
-Serialisation example: DatasetDM serialisation. see TAP example
-
-(see also: datasetDM in TAP (ivoa-talk)
-
-
# About
This document describes simple means to declare basic provenance
diff --git a/data-origin.tex b/data-origin.tex
index f80b385..52657ca 100644
--- a/data-origin.tex
+++ b/data-origin.tex
@@ -1,5 +1,6 @@
\documentclass[11pt,a4paper]{ivoa}
\input tthdefs
+\lstset{flexiblecolumns=true}
\usepackage{todonotes}
\usepackage{array}
\marginparwidth=4cm
@@ -36,23 +37,24 @@
\section*{Acknowledgments}
-Alberto Accomazzi (ADS), Anne Catherine Raugh (University of Maryland), Rafaele d'Abrusco (CfA), Mihaela Buga (CDS), Nicolas Moreau (ObsParis)
+Alberto Accomazzi (ADS), Anne Catherine Raugh (University of Maryland), Rafaele d'Abrusco (CfA), Mihaela Buga (CDS), Mathieu Servillat (PObsParis), Nicolas Moreau (ObsParis)
\section*{Conformance-related definitions}
\section{Introduction}
-Information on the origin of a piece of data is important for end users to understand data, for meaningful data citation and to improved their reusability. It is a part of provenance, which in turn is as a mandatory criterion in the GOFair\footnote{https://www.go-fair.org/} or RDA FAIR definition\footnote{https://doi.org/10.15497/rda00050}.
+Information on the origin of a piece of data is important for end users to understand data, for meaningful data citation and to improve reusability. It is a part of provenance, which in turn is as a mandatory criterion in the GOFair\footnote{https://www.go-fair.org/} or RDA FAIR definition\footnote{https://doi.org/10.15497/rda00050}.
-The Virtual Observatory (VO) provides an advanced framework to search for, query, and consume astronomical data. The specification of Data Origin proposed here for VOTable output include both metadata originating at the data producer (e.g, author, space agency, observatory) and at the data centre (publisher) hosting the resource.
+The Virtual Observatory (VO) provides an advanced framework to search for, query, and consume astronomical data. The specification of Data Origin proposed here for VOTable output includes both metadata originating at the data producer (e.g, author, space agency, observatory) and at the data centre (publisher) hosting the resource.
-At this point, depending of the implementation, users can find the information conveyed in Data Origin in the data centre web pages (landing pages) or in the VO Registry. For citation, the ADS (NASA Astrophysics Data System) offers comprehensive bibliographic capabilities, including the production of BibTeX records for publications known to ADS. However, there are no VO standards to communicate this type of information yet.
+At this point, depending on the implementation, users can find the information conveyed in Data Origin in the data centre web pages (landing pages) or in the VO Registry. For citation, the ADS (NASA Astrophysics Data System) offers comprehensive bibliographic capabilities, including the production of BibTeX records for publications known to ADS. However, there are no VO standards to communicate this type of information yet.
+%However, there are standards for how to locate these types of information, and often it is not available machine-readably.
-A list of basic added metadata, reliably findable in a convenient location (i.e.,
+A list of basic data origin metadata, reliably findable in a convenient location (i.e.,
the VOTable produced by a query) will help users to properly cite or
acknowledge the data resources contributing to new or derived works.
-Tracing Data Origin, from the producer of the query to the production of the response, also allows an end users to determine the different agents implied in data preservation (authors, data centre, space agencies, journal), which is particularly helpful when debugging. A typical scenario here is when mirrored data is the subject to potentially differing curation actions in the different publication processes.
+Tracing Data Origin, from the producer of the query to the production of the response, also allows an end user to determine the different agents that contribute to data curation (authors, data centre, space agencies, journal), which is particularly helpful when debugging. A typical scenario here is when mirrored data is the subject to potentially differing curation actions in the different publication processes.
The list of metadata items proposed here is designed to meet the needs of basic provenance
tracking when using current VO protocols.\\
@@ -64,8 +66,8 @@ \section{Introduction}
section~\ref{sec:data-origin-in-votable}, which
describes the VOTable serialisation.
-To complete the picture, the document includes in appendixes, a mapping of Data Origin items with the IVOA Registry schema (see appendix~\ref{sec:appendixB})
-and a citation example (appendix \ref{sec:appendixC}) that illustrates how this information can be used.
+Non-normatively, in appendix~\ref{sec:appendixB}, we give a mapping of Data Origin items to entities from the IVOA Registry schema, and a citation template in appendix \ref{sec:appendixC}
+that illustrates how data origin information can be used in practice.
@@ -73,7 +75,7 @@ \section{Use cases}
\subsection{Data Origin information}
-Scenario: Researchers have data in a VOTable that shows an odd feature. They would now like investigate whether that feature is physical or an artefact.
+Scenario: Researchers have data in a VOTable that shows an odd feature. They would now like to investigate whether that feature is physical or an artefact.
Derived requirements:
@@ -85,7 +87,7 @@ \subsection{Data Origin information}
\item When data provided by the service is derived from external resources, those external resources are clearly identified. In that case, additional curation applied by the publisher can be detected.
\end{itemize}
-For instance, a table published in a journal or by a space agency is also hosted in multiple data centre. The details of the table schema may depend on the data centre, which can add associated data, enrich metadata, or make a sub-selection of columns.
+For instance, a table published in a journal or by a space agency may also be hosted in multiple data centres. The details of the table schema may depend on the data centre, which can add associated data, enrich metadata, or make a sub-selection of columns.
\subsection{Reproducibility}
@@ -127,9 +129,9 @@ \subsection{Workflow bibliography}
\section{State of the Art}
-Neither VOTable \citep{2019ivoa.spec.1021O} nor IVOA data access protocols at this point provide standard facilities for conveying Data Origin information. While protocols such as TAP \citep{2019ivoa.spec.0927D} have standard interfaces to retrieve table metadata (e.g., unit, type and description of columns) or metadata on service endpoints (``capabilities'') by virtue of providing VOSI \citep{2017ivoa.spec.0524G} endpoints, for basic metadata like authors or publication dates, clients have to consult the VO Registry. Even that may be difficult, because there is not even a standard way to obtain an identifier from a service itself.
+Neither VOTable \citep{2019ivoa.spec.1021O} nor IVOA data access protocols at this point provide standard facilities for conveying Data Origin information. While protocols such as TAP \citep{2019ivoa.spec.0927D} have standard interfaces to retrieve table metadata (e.g., unit, type and description of columns) or metadata on service endpoints (``capabilities'') by virtue of providing VOSI \citep{2017ivoa.spec.0524G} endpoints, for basic metadata like authors or publication dates, clients have to consult the VO Registry. Even that may be difficult, because you cannot in general obtain its IVOA identifier from a service itself.
-HiPS \citep{2017ivoa.spec.0519F} is a more recent protocol which includes for each Dataset a list of standardized metadata. HiPS metadata includes authors, publication year, data centre identifier or licenses.
+HiPS \citep{2017ivoa.spec.0519F} is a more recent protocol which includes for each dataset a list of standardized metadata. HiPS metadata includes authors, publication year, data centre identifier or licenses.
\begin{figure}
\centering
@@ -152,16 +154,14 @@ \subsection{Data Origin in IVOA Registry}
The Registry makes this information available through several interfaces, partly
hosted by the data centres themselves, partly provided by a central
infrastructure.
-The VO Registry is an open framework without any moderators.
-The IVOA hence does not guarantee the resources' continued availability.
-In consequence, there are no guarantees as to the continued availability
-of any metadata in central IVOA infrastructure; the Registry is
+Since the VO Registry is an open framework without any central curators,
+there are no guarantees as to the continued availability or the resources or even their descriptions; the Registry is
specifically \emph{not} designed as a persistent metadata repository
that artefacts could reliably refer to as metadata sources.
The IVOA Registry uses a unique identifier, the IVOID
\citep{2016ivoa.spec.0523D}, as the primary key for its resource
-collection. By the above considerations, this IVOID is not suitable as a means of citation, because it is a technical identifier with no provisions for persistence.
+collection. By the above considerations, this IVOID is not suitable as a means of citation.%, because it is a technical identifier with no provisions for persistence today. remove 2025-11-03
Both the Registry's metadata schema and the DataCite
\citep{std:DataCite40} metadata schema have been
@@ -177,20 +177,31 @@ \subsection{Data Origin and Provenance}
%The Provenance \citep{2020ivoa.spec.0411S} and Dataset Data Models can
%be used to express Data Origin.
-The Provenance Data Model \citep{2020ivoa.spec.0411S} is based on Entities, Agents and Activities as defined in the W3C Provenance model. The model's main focus is the detailed documentation of workflows.
+%Data origins information is intended to be provided in the results of queries. This information can be used to populate steps in a Provenance workflow.
+%Dataset Origin (see \ref{sec:dataset-origin}) can be serialized with Entities and Agent. The query information including information such as URL and parameters (see \ref{sec:query-information}) can be set with the configuration extension of the Provenance DM of the Virtual Observatory \citep{2020ivoa.spec.0411S}.
-For the serialisation of ProvDM instances within VOTables, MIVOT \citep{2023ivoa.spec.0620M} is available. At this point, however, the relatively complex model and many free parameters (for instance: serialisation) are obstacles for a wide and direct adoption of ProvDM+MIVOT to represent Data Origin, in particular when compared to the very straightforward mechanisms proposed here.
+Data Origin information is intended to be provided in the results of queries.
+DataOrigin records can be used to build a provenance graph describing how to
+get the main entity from a resource that is the data of origin, typically in one step
+(an entity generated by an activity that used another entity as origin).
+
+This mapping is illustrate in a VO Provenance Model \citep{2020ivoa.spec.0411S} in appendix
+
+%The Provenance Data Model \citep{2020ivoa.spec.0411S} is based on Entities, Agents and Activities as defined in the W3C Provenance model. The model's main focus is the detailed documentation of workflows.
+
+%For the serialisation of ProvDM instances within VOTables, MIVOT \citep{2023ivoa.spec.0620M} is available. At this point, however, the relatively complex model and many free parameters are obstacles for a wide and direct adoption of ProvDM+MIVOT to represent Data Origin, in particular when compared to the very straightforward mechanisms proposed here.
%``Last-Step-Provenance'' is a Provenance extension currently under discussion which would define a list of metadata corresponding to Data Origin. Its output will not be recursive and could be easily serialized in a table.\todo{If we write this here, everyone will ask: Well, so why don't we wait for that? Perhaps we ought to just drop this?}
-Other initiatives, in working progress, such as ``DatasetDM'', or
-``Last-Step-Provenance'' show the growing interest of adding a piece of
-Provenance. The metadata listed in Data Origin can be a reference for current
-and future models interested by the information.
+%Other initiatives, in working progress, such as ``DatasetDM'', or
+%``Last-Step-Provenance'' show the growing interest of adding pieces of
+%provenance to published datasets.
+%The metadata listed in Data Origin can be a reference for current
+%and future models interested by the information.
\subsection{DALI}
DALI \citep{2017ivoa.spec.0517D} defines common conventions for all
-modern IVOA data access protocols.
+modern IVOA data access protocols.
%A part of it defines in-band signalling of error conditions or overflows -- an important part of Data Origin -- for VOTables.
It also defines bespoke names for \xmlel{INFO} elements used to convey
@@ -204,9 +215,10 @@ \section{Expected Data Origin}
It includes reproducibility metadata (see Table~\ref{tab:query-names})
that reflects the context in which a query was executed. The information
-includes parameters allowing users to execute the query again as well as
-parameters that will aid debugging in case a later execution of the
-query does yield different results, such as version or the execution
+includes metadata allowing users to execute the query again as well as
+metadata that will aid debugging in case a later execution of the
+query does yield different results, such as versions of data and software
+or the execution
date, which is particularly relevant when the resources' data content
can evolve.
@@ -214,51 +226,30 @@ \section{Expected Data Origin}
The information is complemented by provenance metadata (see Table~\ref{tab:origin-names}) like authors, licence, references, or identifiers
for the resource or related resources (which could be IVOIDs, Bibcodes, or DOIs).
-Most provenance metadata is generally provided through the Registry.
-While giving relevant IVOID(s) would therefore in principle be
-sufficient for the definition of the metadata, as discussed above for
+Much of this information is already provided by the VO Registry.
+While giving relevant IVOID(s) would therefore in principle go a long way
+towards supplying relevant metadata, as discussed above for
persistent availability of the metadata, a serialisation
-directly into the VO output is preferred
+directly into the VO output is desirable
(see Table~\ref{sec:data-origin-in-votable}).
-% end
-% please don't comment out things in version control; it only makes
-% changes harder to follow -- and you can always recover deleted
-% material from the history if necessary.
-
-% remove 23-nov-2023
-
-\subsection{Condition for citation}
-% MD: since I don't know what to do with this section, I've not done
-% any editorial work on it.
-
-The DOI is the privileged persistent identifier to cite resources.\\
-
-%Data Citation requires a sustainable URL which is not guarantied in IVOA resources.
-%Unlike ivoid, the DOI guaranties a sustainable URL and should be used for citation. \\
-Data citation requires a persistent identifier and a sustainable URL.
-Both are guaranteed by DOI, but resource provided with an ivoid (the IVOA identifier)
-is not guaranteed to be sustainable.\\
-
-BibTeX requires curation that needs metadata-like identifier, authors, title, publisher and date of publication.
-ADS (NASA) provides a citation capability for its indexed resources. This curation quality has to be privileged or may be taken as example for any data providers and users.
-
-For instance, DOI providers like Datacite, provides a BibTeX capability. The BibTeX quality depends of the DOI metadata filled by the Data producers and publishers.\\
-
-The IVOA registry which contains metadata for any resources could be used to get the expected quality for citation if the following conditions are met:
-\begin{itemize}
-\item the registry resource includes a persistent identifier (DOI), typically in an \xmlel{altIdentifier} element
-\item the registry resource includes the metadata which meets the BibTeX requirements
-\end{itemize}
-
-% move metadata list : 23-nov-2023
-
\section{Data Origin in VOTable}
\label{sec:data-origin-in-votable}
The metadata listed below combines terms from DALI \citep{2017ivoa.spec.0517D}, Dublin Core \citep{std:DUBLINCORE} and extensions in order to provide Data Origin information to end users.
+\paragraph{Note about the following items.}
+While it is desirable that publishers provide the full set of
+metadata,
+%we consider the following items the minimally viable set:
+we consider the following items as having the highest impact (for reuse or citation):
+ivoid, publisher, service\_protocol, request, request\_date,
+citation, resource\_version, rights\_uri, creator, publication\_date,
+last\_update\_date.
+
+
\subsection{Query information}
+\label{sec:query-information}
Table~\ref{tab:query-names} lists the metadata items defined here to
convey query-related information in Data Origin.
@@ -267,26 +258,28 @@ \subsection{Query information}
For queries against evolving datasets, the request\_date item clearly is
particularly important.
+
\begin{table}
\hbox to\textwidth{\hss
-\begin{tabular}{|l|>{\raggedright}p{6cm}|l|l|} \hline
-\textbf{\vrule width0pt height 12pt depth 7pt Key} & \textbf{Description} & \textbf{Level} & \textbf{Dublin Core}\\ \hline
-publisher & Data centre that produced the VOTable & R & publisher\\ \hline
+\begin{tabular}{|l|>{\raggedright}p{7cm}|l|} \hline
+\textbf{\vrule width0pt height 12pt depth 7pt Key} & \textbf{Description} & \textbf{Dublin Core}\\ \hline
+% removed ivoid & IVOID of underlying data collection & R & \\ \hline
+publisher & Data centre that produced the VOTable & publisher\\ \hline
%rename 23-nov-2023 version & Software version (*) & & \\ \hline
-server\_software & Software version (*) & & \\ \hline
-service\_ivoid & IVOID of the service through which the data was retrieved & R & \\ \hline
-service\_protocol & IVOID of the protocol through which the data was
-retrieved& R& \\ \hline
-request & Full request URL including a query string (**)& R& \\ \hline
-query & An input query in a formal language (e.g, ADQL) & & \\ \hline
+server\_software & Software version (*) & \\ \hline
+service\_protocol & IVOID of the protocol through which the data was retrieved & \\ \hline
+service\_ivoid & IVOID of the service through which the data was retrieved & \\ \hline
+request & Full request URL including a query string (**)& \\ \hline
+query & An input query in a formal language (e.g., ADQL) & \\ \hline
% removed in 23-nov-2023
%request\_post & (POST Request) POST arguments & & \\ \hline
% end
-request\_date & Query execution date & R&\\ \hline
-contact & Email or URL to contact publisher & & \\ \hline
-\multicolumn{4}{p{\textwidth}}{\vskip 2pt\footnotesize(*) Operators are
-encouraged to follow \citet{note:opid} in this item} \\
-\multicolumn{4}{p{\textwidth}}{\footnotesize(**) For ``Simple''
+request\_date & Query execution date &\\ \hline
+contact & Email or URL to contact publisher & \\ \hline
+
+\multicolumn{3}{p{\textwidth}}{\vskip 2pt\footnotesize(*) Operators are
+encouraged to follow \citet{note:opid} in this item.} \\
+\multicolumn{3}{p{\textwidth}}{\footnotesize(**) For ``Simple''
protocols (regardless of the HTTP method), put the
application/x-www-form-urlencoded form of the query parameters in the
query part of the URL here.
@@ -301,10 +294,12 @@ \subsection{Query information}
\subsection{Dataset Origin}
+\label{sec:dataset-origin}
Dataset origin complements the query-related information to improve the
-understandability of the underlying data. This information is intended
-for end users. If the resource is also described in the Registry, care
-must be taken that in-response metadata remains reflects the metadata
+understandability of the underlying data. Clients should make
+sure that end users can easily access and inspect this information.
+If the resource is also described in the Registry, care
+must be taken that the in-response metadata reflects the metadata
available there at the time the response is produced.
@@ -313,52 +308,57 @@ \subsection{Dataset Origin}
\begin{table}
\hbox to\textwidth{\hss
-\begin{tabular}{|l|>{\raggedright}p{6cm}|l|l|} \hline
-\textbf{\vrule width0pt height 12pt depth 7pt Key} & \textbf{Description} & \textbf{Level} & \textbf{Dublin Core}\\ \hline
+\begin{tabular}{|l|>{\raggedright}p{7cm}|l|} \hline
+\textbf{\vrule width0pt height 12pt depth 7pt Key} & \textbf{Description} & \textbf{Dublin Core}\\ \hline
% removed 23-nov-2023 publication\_id & Dataset identifier that can be used for citation& M & identifier\\ \hline
-ivoid & IVOID of underlying data collection & R & \\ \hline
-citation & Dataset identifier that can be used for citation (e.g. dataset DOI) & R & identifier\\ \hline
-reference\_url & Dataset landing page & & \\ \hline % previously landing_page
+data\_ivoid & IVOID of underlying data collection & \\ \hline
+ivoid & (deprecated) use data\_ivoid & \\ \hline
+citation & Dataset identifier that can be used for citation (e.g. dataset DOI) & identifier\\ \hline
+reference\_url & Dataset landing page & \\ \hline % previously landing_page
% removed in 23-nov-2023
%curation\_level & Controled vocabulary
% (IVOA rdf, content\_level) & & \\ \hline
% end
% modifier 23-nov-2023 resource\_version & Dataset version or last release & R & \\ \hline
-resource\_version & Dataset version & R & \\ \hline
+resource\_version & Dataset version & \\ \hline
%rename 23-nov-2023 rights & (*) Licence URI & R & rights\\ \hline
-rights\_uri & Licence URI (*) & R & rights\\ \hline
+rights\_uri & Licence URI (*) & rights\\ \hline
% removed 23-nov-2023 rights\_type & (*) Licence type (eg: CC-by, CC-0, private, public) & & \\ \hline
%rename 23-nov-2023 copyrights & Copyright text & & \\ \hline
-rights & Licence or Copyright text & & rights\\ \hline
+rights & Licence or Copyright text & rights\\ \hline
creator & \raggedright The person(s) mainly involved in the
-creation of the resource; generally, the author(s).
- & R & creator\\ \hline
-editor & Editor name (article)& & \\ \hline
+creation of the resource; generally, the author(s)
+ & creator\\ \hline
+journal & Designation of the medium the originating scholarly publication was
+ published in. In general, that is a journal name. Common
+ abbreviations (ApJ, A\&A, \dots) are encouraged. & \\ \hline
% removed 15-dec-2023 to use cites or is_derived_from
%relation\_type & An identifier of a second resource and its relationship to the
% present resource.
% Controlled vocabulary (**)& & relation\\ \hline
%related\_resource & Information about a second resource from which the present resource
% is derived. The source is an identifier that can be prefixed with the identifier type: eg: bibcode:, doi:, ror: & & source\\ \hline
-article & Bibcode or DOI of a reference article & & relation\\ \hline
-cites & An Identifier (ivoid, DOI, bibcode) of second resource
- using relation type ``cites'' (**)& & relation\\ \hline
-is\_derived\_from & An Identifier (ivoid, DOI, bibcode) of second resource
- using relation type ``is\_derived\_from'' (**)& & relation\\ \hline
+article & Bibcode or DOI of a publication relevant for the data & relation\\ \hline
+cites & An Identifier (ivoid, DOI, bibcode) of a resource
+ being in a ``cites'' (**) relationship to the
+ originating resource & relation\\ \hline
+is\_derived\_from & An Identifier (ivoid, DOI, bibcode) of a resource
+ being in an ``is\_derived\_from'' (**) relationship
+ to the originating resourcd & relation\\ \hline
% remove 23-nov-2023
%publication\_date & Date of publication (DALI timestamp) & R & \\ \hline
%resource\_date & Date of the original publication (DALI timestamp) & R & date\\ \hline
%
-original\_date & Date of the original resource from which the present resource is derived (DALI timestamp) & & \\ \hline
-publication\_date & Date of first publication in the data centre (DALI timestamp) (***) & R & \\ \hline
-last\_update\_date & Last data centre update (DALI timestamp) (****) & R & date\\ \hline
-\multicolumn{4}{p{\textwidth}}{\vskip2pt\footnotesize(*) Following Registry
+original\_date & Date of the original resource from which the present resource is derived (DALI timestamp) & \\ \hline
+publication\_date & Date of first publication in the data centre (DALI timestamp) (***) & \\ \hline
+last\_update\_date & Last data centre update (DALI timestamp) (****) & date\\ \hline
+\multicolumn{3}{p{\textwidth}}{\vskip2pt\footnotesize(*) Following Registry
practice, this should come from
SPDX \url{https://spdx.org/licenses/}, though Creative Commons URLs
\url{https://creativecommons.org} are also admitted}\\
-\multicolumn{4}{p{\textwidth}}{\footnotesize(**) \url{https://www.ivoa.net/rdf/voresource/relationship\_type/}}\\
-\multicolumn{4}{p{\textwidth}}{\footnotesize(***) Equivalent to curation/date[@role='created'] in registry}\\
-\multicolumn{4}{p{\textwidth}}{\footnotesize(****) Equivalent to curation/date[@role='updated'] in registry}
+\multicolumn{3}{p{\textwidth}}{\footnotesize(**) \url{https://www.ivoa.net/rdf/voresource/relationship\_type/}}\\
+\multicolumn{3}{p{\textwidth}}{\footnotesize(***) Equivalent to curation/date[@role='created'] in registry}\\
+\multicolumn{3}{p{\textwidth}}{\footnotesize(****) Equivalent to curation/date[@role='updated'] in registry}
\end{tabular}\hss}
\caption{\xmlel{INFO} names available for specifying information
related to the origin of the data set(s) a VOTable was generated from}
@@ -368,64 +368,57 @@ \subsection{Dataset Origin}
\subsection{VOTable serialization}
-In this document, we focused on a basic serialization that allows data
+In this document, we focus on a basic serialization that allows data
providers to describe individual tables.
-This output is particularly suitable for protocols like Simple Cone Search.
+This is particularly suitable for protocols like Simple Cone Search.
The basic serialization uses INFO tags to populate Data Origin (see the example of a ConeSearch result in appendix \ref{sec:appendixA}).
INFO tags are allowed in VOTable under \xmlel{VOTABLE} or in \xmlel{RESOURCE} elements.
-Thus, it becomes possible to annotate a collection of \xmlel{TABLE} which are in different resources.
+It is expressly allowed to supply data origin in individual
+\xmlel{TABLE} or \xmlel{RESOURCE} elements in more complex VOTables.
-This specification at this point does not constrain the multiplicities of individual INFO items, and clients should not fail hard if any given INFO item occurs multiple times.
+This specification does not at this point constrain the multiplicities of individual INFO items, and clients should not fail hard if any given INFO item occurs multiple times.
-Complex queries (for instance, resulting from ADQL JOIN-s) need an advanced output serialization to gather metadata by resource.
+Complex queries (for instance, resulting from ADQL JOIN-s) need an advanced output serialization to gather the full metadata of all contributing resources.
Mechanisms to manage this requirement are being developed in the IVOA
(MIVOT).
The mechanisms defined here are generally still applicable in these
cases, but the authors acknowledge that they are certainly stretched to
-their limits there.
+their limits in such cases.
As a service to human readers, it is recommended to put descriptions, possibly derived from definitions provided in this document, into the bodies of the INFO elements.
-\section{Data Origin in Registry}
-%The ivo-id, now available in VOTable, allows to query the resource metadata which are in the VO registry.\\
-%The registry schema \citep{2018ivoa.spec.0625P} can be mapped with Data Origin items.
-The VO registry schema, which contains most of the Data Origin information, is completed by metadata described in VOResource \citep{2018ivoa.spec.0625P}.
-This information (assuming it has been filled in by the issuer of the registration) can be requested via the ivo-id now available in VOTable.
-
-The table \ref{tab:voresourcemapping} (see appendix \ref{sec:appendixB}) establishes the mapping between VOResource and Data Origin items.
-
-% may be in an other note?
+%\section{Data Origin in Registry} REMOVE 2025-11-03
+%The VO registry schema, which contains most of the Data Origin information, is completed by metadata described in VOResource \citep{2018ivoa.spec.0625P}.
+%This information (assuming it has been filled in by the issuer of the registration) can be requested via the ivo-id now available in VOTable.
%
-% UPDATE - licenses : type, uri as Datacite
-% ADD - copyrights
-% ADD - akcnowledgement
-% ADD - curation text.. added values, column selection...., extract....
-% list of Data Origin metadata in registry
+%The table \ref{tab:voresourcemapping} (see appendix \ref{sec:appendixB}) establishes the mapping between VOResource and Data Origin items.
\appendix
\section{Appendix, Cone search serialization}\label{sec:appendixA}
Simple Conesearch with its VOTable serialization. Data Origin are specified using INFO.
-\begin{verbatim}
+\begin{lstlisting}[basicstyle=\footnotesize\ttfamily]
- Query execution date
- Query execution date
+
- Publisher contact
- Software version
+ Publisher contact
+ Software version
117 exoplanets in habitable zone with Kepler DR25
-
+
ivoid identifier to link registry
data centre
@@ -443,7 +436,7 @@ \section{Appendix, Cone search serialization}\label{sec:appendixA}
Reference article
-
+
Journal of the reference article
@@ -454,7 +447,7 @@ \section{Appendix, Cone search serialization}\label{sec:appendixA}
-\end{verbatim}
+\end{lstlisting}
\section{Appendix, VOResource and Data origin}\label{sec:appendixB}
Expected metadata (VOResource) with their equivalent in Datacite schema (version 4.4) to provide Data Origin in the registry.\\
@@ -462,57 +455,72 @@ \section{Appendix, VOResource and Data origin}\label{sec:appendixB}
%\begin{table}
\label{tab:voresourcemapping}
-\begin{tabular}{|p{3cm}|p{3cm}|p{3cm}|p{5cm}|} \hline
-\textbf{VOResource} & \textbf{Data Origin} & \textbf{DataCite} & \textbf{Explain} \\ \hline
-identifier &ivoid & Identifier & ivoid of resource(s) hosted by the service\\ \hline
-title & & Title& resource title\\ \hline
-shortName &&& Resource short name\\ \hline
-altIdentifier & & AlternateIdentifier&
- Alternate identifier accepts bibcode, DOI or URL. DOI should be privileged to facilitate citation and link with DataCite or Crossref..eg: DOI \\ \hline
+
+The following table is a non-normative crosswalk for metadata
+concepts between the VO Registry (VOResource and VODataService)
+\footnote{This is not a full mapping of VOResource to DataCite. To the extent
+ this is possible (i.e., without having to mint DOIs), the XSLT at
+ https://github.com/ivoa/vor-doi gives such a mapping.}
+, data origin, and DataCite. It is
+intended to simplify the implementation of data origin for
+publishers that already have machine-readable resource descriptions
+available.
+
+\paragraph{}
+% 2025-11-06 gilles/Markus remove data_origin empty cells
+
+\begin{tabular}{|p{2.8cm}|p{3cm}|p{3cm}|p{4cm}|} \hline
+\textbf{VO Registry} & \textbf{Data Origin} & \textbf{DataCite} & \textbf{Explanation} \\ \hline
+identifier &ivoid & Identifier & The ivoid of resource(s) hosted by the service\\ \hline
+%title & & Title& The resource title\\ \hline
+%shortName &&& The short name of the resource\\ \hline
+%altIdentifier & & AlternateIdentifier&
+% Alternate identifier accepts bibcode, DOI or URL. DOI should be privileged to facilitate cross identification \\ \hline
\end{tabular}
-\begin{tabular}{|p{3cm}|p{3cm}|p{3cm}|p{5cm}|} \hline
+\paragraph{}
+\begin{tabular}{|p{2.8cm}|p{3cm}|p{3cm}|p{4cm}|} \hline
\multicolumn{4}{|l|}{\textbf{curation}} \\ \hline
-publisher & publisher & Publisher &publisher (*)\\ \hline
-creator & creator & Creator & author(s) (*)\\ \hline
-contributor & & Contributor & contributor(s) (*)\\ \hline
-date [Created]& publication\_date & Dates [created] & creation date (in data centre)\\ \hline
-date [Updated]& last\_update\_date & Dates [updated] & last modification\\ \hline
- ? & original\_date & PublicationYear & publication year in data centre\\ \hline
+publisher & publisher & Publisher & The publisher (*)\\ \hline
+creator & creator & Creator & The author(s) (*)\\ \hline
+%contributor & & Contributor & The contributor(s) (*)\\ \hline
+date [Created]& publication\_date & Dates [created] & The creation date (in data centre)\\ \hline
+date [Updated]& last\_update\_date & Dates [updated] & The date of the last modification\\ \hline
+ ? & original\_date & PublicationYear & The year of publication in data centre\\ \hline
version & resource\_version & Version &\\ \hline
contact & contact &&\\ \hline
\multicolumn{4}{l}{\small \footnotesize(*) terms allowing name and Orcid (\xmlel{altIdentifier} in VOResurce)} \\
\end{tabular}
-\begin{tabular}{|p{3cm}|p{3cm}|p{3cm}|p{5cm}|} \hline
+\paragraph{}
+\begin{tabular}{|p{2.8cm}|p{3cm}|p{3cm}|p{4cm}|} \hline
\multicolumn{4}{|l|}{\textbf{content} } \\ \hline
-source & article & RelatedIdentifier (*) & bibcode\\ \hline
-referenceURL & reference\_url & & Landing page \\ \hline
-type & & ResourceType & Resource type (catalog, etc)\\ \hline
-description & & Description & Abstract\\ \hline
-relationShip & & RelatedIdentifiers &Link to remote resource (Recommended to link Original data centre) \\ \hline
-relationshipType & cites, is\_derived\_from & relationType &\\ \hline
+source & article & RelatedIdentifier (*) & The Reference article (bibcode or DOI)\\ \hline
+referenceURL & reference\_url & & The landing page URL\\ \hline
+%type & & ResourceType & The Resource type (catalog, etc)\\ \hline
+%description & & Description & Usually, it is the abstract\\ \hline
+%relationship & & RelatedIdentifiers & The Link to remote resource (it is recommended to link Original data centre) \\ \hline
+relationshipType & cites, is\_derived\_from & relationType &\\ \hline
relatedResource & cites, is\_derived\_from & RelatedIdentifier & \\ \hline
\multicolumn{4}{l}{\small \footnotesize(*) DataCite sub-properties type=bibcode, relationType=IsSupplementTo} \\
\end{tabular}
-\begin{tabular}{|p{3cm}|p{3cm}|p{3cm}|p{5cm}|} \hline
+\paragraph{}
+\begin{tabular}{|p{2.8cm}|p{3cm}|p{3cm}|p{4cm}|} \hline
\multicolumn{4}{|l|}{\textbf{rights}} \\ \hline
-rights & rights & Rights& license
- The right element accepts free text. However, it is preferable to provide a machine-readable License (*)
- \\ \hline
-URI & rights\_uri& rightsURI & license URL\\ \hline
- & & rightsIdentifier & standard license name .ex CC-by.
- Copyright is accepted by FAIR principle. But copyright is only a link to the data producer. It gives the contact point to any users who would like to use data. Copyright is more simple to implement for data-centre that provides a copy of original resource, but its use is not well integrated in an interoperable workflow.
+rights & rights & Rights&
+ The right element accepts free text. However, machine-readable (*) license is preferable
\\ \hline
+URI & rights\_uri& rightsURI & The License URL\\ \hline
+% & & rightsIdentifier & The Standard license name .ex CC-by.
+% Copyright is accepted by FAIR principle. But copyright is only a link to the data producer. It gives the contact point to any users who would like to use data. Copyright is more simple to implement for data-centre that provides a copy of original resource, but its use is not well integrated in an interoperable workflow.
+% \\ \hline
\multicolumn{4}{p{\textwidth}}{\small \footnotesize(*) See SPDX list \url{https://spdx.org/licenses/} or Creative Commons licenses \url{https://creativecommons.org}}
\end{tabular}\\
%\caption{Expected metadata (VOResource) with their equivalent in Datacite schema (version 4.4) to provide Data Origin in the registry.}
%\end{table}
-
-
%%\textbf{Examples}
%%
%%Examples of rights serialization:
@@ -560,11 +568,25 @@ \section{Appendix, Citation Template} \label{sec:appendixC}
via services (ivoa resource=, )
using (version , executed at \\
-
\textbf{Example}:\\
We extract data published in bibcode:2021AJ....161...36B (Bryson S., 2021),
via CDS services (ivoa resource=ivo://cds.vizier/j/aj/161/36, 2021-03-16)
-using Simple Cone Search 1.03 (version 7.294, executed at 2022-10-30)
+using Simple Cone Search 1.03 (version 7.294, executed at 2022-10-30)\\
+
+\textbf{Example}: we can construct a citation (in APA style) from data-origin. However, we emphasise that ADS remain the standard for citation.\\
+
+"APA: (). [Dataset]. .
+
+
+
+\section{Appendix, DataOrigin and ProvDM}\label{sec:appendixD}
+This is an example of Provenance extracted from a SimpleConeSearch result containing DataOrigin.
+The figure is a graphical representation of provenance created using the VOPROV Python package.
+
+\begin{figure}[htbp]
+ \includegraphics[width=1.2\textwidth]{voprov_example.png}
+ \caption{Provenance serialization generated from DataOrigin}
+\end{figure}
\section{Appendix, Changes from Previous Versions}
@@ -576,7 +598,8 @@ \section{Appendix, Changes from Previous Versions}
\subsection{Difference between versions 1.1 and 1.2}
\begin{itemize}
\item New item: \textit{service\_ivoid}
- \item Move ivoid from Table1 (query) to Table2 (Origin)
+ \item Rename ivoid to data\_ivoid (now in Table2 (Origin))
+ \item Rename editor to journal
\end{itemize}
\subsection{Difference between versions 1.0 and 1.1}
@@ -588,8 +611,8 @@ \subsection{Difference between versions 1.0 and 1.1}
\textit{publication\_id} becomes \textit{citation} (DALI)
\textit{landing\_page} becomes \textit{reference\_url} (VOResource)
\textit{relation\_type} and \textit{related\_resource} to specific relation : \textit{cites} and \textit{is\_derived\_from} (VOResource)
-\item Remove \textit{curation\_level},
- \textit{request\_post},
+\item Remove \textit{curation\_level},
+ \textit{request\_post},
\textit{rights\_type}
\item New item: \textit{original\_date}, \textit{article}
\item Clarify date items and multiple INFO in VOTable.
@@ -597,7 +620,6 @@ \subsection{Difference between versions 1.0 and 1.1}
\item Language smoothing
\end{itemize}
-
\bibliography{ivoatex/ivoabib,ivoatex/docrepo,local}
diff --git a/fig-ext-ids.pdf b/fig-ext-ids.pdf
new file mode 100644
index 0000000..5fa6d7d
Binary files /dev/null and b/fig-ext-ids.pdf differ
diff --git a/ivoatex b/ivoatex
index 9a4d63b..d63b379 160000
--- a/ivoatex
+++ b/ivoatex
@@ -1 +1 @@
-Subproject commit 9a4d63b3ce6c5cafedd1e94c96d38b8a8f838a78
+Subproject commit d63b3792a5761d89fb9be4dcf338b710e99c69ea
diff --git a/reports/schema-doi.png b/reports/schema-doi.png
deleted file mode 100644
index 02ddf51..0000000
Binary files a/reports/schema-doi.png and /dev/null differ
diff --git a/tests/J_AJ_161_36_table8.xml b/tests/J_AJ_161_36_table8.xml
deleted file mode 100644
index 38cb95b..0000000
--- a/tests/J_AJ_161_36_table8.xml
+++ /dev/null
@@ -1,128 +0,0 @@
-
-
-
- VizieR Astronomical Server vizier.cds.unistra.fr
- Date: 2022-10-30T11:00:36 [V7.294]
- Explanations and Statistics of UCDs: See LINK below
- In case of problem, please report to: cds-question@unistra.fr
- In this version, NULL integer columns are written as an empty string
- <TD></TD>, explicitely possible from VOTable-1.3
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 117 exoplanets in habitable zone with Kepler DR25 (Bryson+, 2021)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Planet candidate properties
-
-
- Distance from center (285.36700+39.28000)[FK5/J2000], at Epoch=J2000.0
-
-
- Record number assigned by the VizieR team. Should Not be used for identification.
-
-
- Kepler Object Identifier
-
-
- [b] Flag on KOI (1)
-
-
- [0.79/5.02] Planetary radius
-
-
- [0.03/5964.94] Upper uncertainty in Radius
-
-
- [0.04/2.05] Lower uncertainty in Radius
-
-
- [28.46/623.71] Orbital period
-
-
- [0.24/3.21] Instellation in Earth units
-
-
- [0.02/0.52] Upper uncertainty in Instel
-
-
- [0.02/0.49] Lower uncertainty in Instel
-
-
- [3926/6753] Host star effective temperature
-
-
- [43/272] Upper uncertainty in Teff
-
-
- [45/320] Lower uncertainty in Teff
-
-
- [0.01/1] Reliability
-
-
- [0.00026/1] Inclusion probability
-
-
- Simbad column added by the CDS
-
-
-
-
- Position from SIMBAD (right ascension part)
-
-
- Position from SIMBAD (declination part)
-
-
-| 0.000347 | 1 | 4742.01 | b | 1.35 | 0.08 | 0.08 | 112.30 | 1.01 | 0.08 | 0.07 | 4602 | 84 | 76 | 0.91 | 1.00000 | Simbad | 285.36656 | +39.28006 |
-| 0.776606 | 10 | 8242.01 | b | 1.48 | 0.10 | 0.21 | 331.55 | 0.89 | 0.07 | 0.07 | 5736 | 105 | 97 | 0.53 | 1.00000 | Simbad | 286.04396 | +38.70877 |
-| 0.352560 | 20 | 8246.01 | b | 1.72 | 0.12 | 0.22 | 425.65 | 1.70 | 0.16 | 0.15 | 6091 | 125 | 123 | 0.36 | 0.99898 | Simbad | 285.60824 | +39.57930 |
-| 0.562212 | 37 | 4084.01 | b | 2.32 | 0.16 | 0.09 | 214.88 | 1.14 | 0.10 | 0.09 | 5288 | 94 | 89 | 0.99 | 0.86571 | Simbad | 286.05244 | +39.09604 |
-| 0.551218 | 56 | 812.03 | | 2.10 | 0.11 | 0.07 | 46.18 | 2.24 | 0.24 | 0.22 | 4293 | 82 | 90 | 1.00 | 0.43624 | Simbad | 286.07910 | +39.27832 |
-| 0.972806 | 113 | 401.02 | | 4.17 | 0.27 | 0.53 | 160.02 | 2.13 | 0.16 | 0.15 | 5516 | 90 | 87 | 0.95 | 0.00057 | Simbad | 285.85371 | +38.38411 |
-
-
-
-
diff --git a/tests/README.md b/tests/README.md
deleted file mode 100644
index 7f905d2..0000000
--- a/tests/README.md
+++ /dev/null
@@ -1,43 +0,0 @@
-# VOTable Serialisation example
-
-2 examples of Data-origin serialication.
-
-- simple case based on <INFO> for VOTable having a unique table (eg: SCS)
-- case based on Data-model (DatasetDM) for VOTable having 2 tables
-
-
-## Python implementation
-
-the current implementation exploits VOTable and registry metadata. It is in developement phase.
-The code enables to get Data-origin information and provides an api to cite the resource. Citing requires DOI.
-The code returns the ADS bibtex (completed with ivoid value) or makes a bibtex from meta-data in registry.
-
-When exists, the ADS bibtex is preferred to the registry for its curation and its clean integration into journals.
-
-**Note:** the mivot and ads library are not public , but just here as example -
-
-adsbib library uses the ADS API and the ADS token authorisation. You need to create a ".ads" file in the repository which contains the ADS token.
-see ADS: https://ui.adsabs.harvard.edu/help/api/
-
-```
-$ python3 dataorigin.py -h
-```
-
-Data-origin from registry using ivoid
-```
-$ python3 dataorigin.py -i ivo://nasa.heasarc/gc47tuccxo
-$ python3 dataorigin.py -i "ivo://cds.vizier/i/355"
-```
-
-Data-origin from Simple-cone-search
-```
-$ python3 dataorigin.py -f J_AJ_161_36_table8.xml
-```
-
-Data-origin from DatasetDM serialisation
-```
-$ python3 dataorigin.py -f tap.xml -t VODML
-```
-
-
-
diff --git a/tests/adsbib.py b/tests/adsbib.py
deleted file mode 100644
index 549c820..0000000
--- a/tests/adsbib.py
+++ /dev/null
@@ -1,96 +0,0 @@
-""" Get metadata from ADS
- G.Landais (CDS) 17-apr-2018
-"""
-
-import sys
-import os
-import logging
-import getopt
-import json
-
-from urllib import request, parse
-
-ADS_META = ["abstract", "ack", "aff", "alternate_bibcode", "alternate_title", "arxiv_class", "author",
- "bibcode", "bibgroup", "bibstem", "body*", "citation_count", "copyright", "data", "database",
- "doi", "doctype", "first_author", "grant", "id", "identifier", "indexstamp", "issue", "keyword",
- "lang*", "orcid_pub", "orcid_user", "orcid_other", "page", "property", "pub", "pubdate",
- "read_count", "title", "vizier", "volume", "year"]
-
-ADS_DEFAULT_META = ["author", "first_author", "id", "identifier", "doi", "orcid_pub", "orcid_user", "orcid_other", "copyright"]
-
-ADS_URL = "https://api.adsabs.harvard.edu"
-
-
-class adsbib:
-
- def __init__(self, certs=None):
- """Constructor
- :param certs: ADS token file
- """
- self.__csrf = self.__getcsrf(certs)
-
- def __getcsrf(self, certs):
- try:
- if certs is None:
- certs = ".ads"
-
- with open(certs, "r") as fd:
- csrf = fd.read().strip()
- except Exception as e:
- raise Exception("Error getting file certification {0}: {1}".format(certs, str(e)))
-
- return csrf
-
- def get(self, bibcode:str=None, doi:str=None)->str:
- """get ADS data
- :param name: list of metadata to retrieve (default see ADS_DEFAULT_META)
- :return: the result in a dictionary
- """
- url = f"{ADS_URL}/v1/export/bibtex"
- logging.info(f"url {url}")
-
- if bibcode:
- enc_bibcode = bibcode.replace("&","%26")
- data = bytes('{"bibcode":["'+enc_bibcode+'"]}', "utf-8")
- elif doi:
- data = bytes('{"bibcode":["'+bibcode+'"]}', "utf-8")
- else:
- raise Exception("bibcode or DOI are required")
- try:
- req = request.Request(url, data=data)
- req.add_header("Authorization", "Bearer:"+self.__csrf)
- logging.debug(f"url {url} -H Authorization: Bearer:{self.__csrf}")
-
- fd = request.urlopen(req)
- data = json.loads(fd.read().decode('utf8'))
- fd.close()
-
- if "export" in data:
- return data["export"]
- return None
- except Exception as e:
- raise Exception(f"Error getting ADS {e}")
-
-
-if __name__ == "__main__":
-
- try:
- __opt, __args = getopt.getopt(sys.argv[1:], 'b:d:h', ["help", "bibcode=", "doi="])
- except getopt.GetoptError as err:
- logging.error(str(err))
- sys.exit(1)
-
- __bibcode = None
- __doi = None
- for __o, __a in __opt:
- if __o in ("-h", "--help"):
- help("__main__")
- sys.exit(0)
- elif __o in ("-b", "--bibcode"):
- __bibcode = __a
- elif __o in ("-d", "--doi"):
- __doi = __a
-
- ads = adsbib()
- if __bibcode:
- print(ads.get(bibcode=__bibcode))
diff --git a/tests/dataorigin.py b/tests/dataorigin.py
deleted file mode 100644
index bed1b93..0000000
--- a/tests/dataorigin.py
+++ /dev/null
@@ -1,534 +0,0 @@
-"""by G.Landais (CDS) 2022-11-01
- Data Origin (test)
-
-
- Usage: dataoroigin.py [-h] [-f votable [-t type]] | [-i ivoid]
- -h: help
- -f: VOTable filename
- -t: VOTable type:("VOTable" or "VODML")
- -i: ivoid (search into registry)
-
-
- Notes:
- - howto extract XML from astropy.table.Table ? <- not possible!..
- - check data origin usinf relatedIdentifier ?
- - : http://www.ivoa.net/rdf/voresource/relationship_type eg: isDerivedFrom
- - Make a acknowledgment template
- - in registry: could we have a new field ?
- - mivot: use L.Michel library..
- - datasetDM: hoto add related identifier ?
- - use LastStepProvenance insteadof DatasetDM ?
-"""
-
-
-import astropy.io.votable as vot
-from astropy.table import Table
-import numpy as np
-import urllib.request
-import urllib.parse
-import requests as rq
-import xml.etree.ElementTree as ET
-import lxml.etree as etree
-import re
-import logging
-
-import adsbib
-
-DATA_ORIGIN_REQ = ("ivoid", "publisher", "version", "protocol", "request", "request_date", "contact", "landing_page")
-DATA_ORIGIN_PROV = ("publication_id", "curation_level", "resource_version", "rights", "title", # DataCenter info
- "creator", "related_resource", "editor", "publication_date", "resource_date") # bibliographic/origin info
-DATA_ORIGIN_DML= ("product_type", "product_sub_type", "did", "title", "creationType", "date", "author", "pud_did", "rights", "release_date", "contact", "doi", "bibcode")
-
-REGISTRY_SERVER = "https://dc.zah.uni-heidelberg.de/rr/q/pmh/pubreg.xml"
-ADS_URL = "https://api.adsabs.harvard.edu/v1/"
-
-
-class DataOrigin:
- """interface"""
- def print(self):
- print(str(self))
- def print_info(self):
- pass
- def cite(self):
- raise Exception("Cite not available yet")
- def ack(self):
- raise Exception("Ack not available yet")
-
-
-class DataOriginRegistry(DataOrigin):
- """Extract Data Origin from IVOA registry
- """
- def __init__(self, ivoid:str, registry:str=REGISTRY_SERVER, filename:str=None):
- self.__ivoid = ivoid
- self.__registry = registry
- if filename:
- with open(filename, "r") as fd:
- xml = fd.read().decode("utf-8")
- else:
- #ivoid = urllib.parse.quote(ivoid)
- req = urllib.request.Request(url=f"{registry}?verb=GetRecord&metadataPrefix=ivo_vor&identifier={ivoid}")
- with urllib.request.urlopen(req) as f:
- xml = f.read().decode("utf-8")
-
- self.__xml = ET.fromstring(xml)
- self.info = self.__get_registry_info()
-
-
- def __get_registry_info(self):
- root = self.__xml
- h = {}
- elt = root.findall(".//curation/publisher")[0].attrib["ivo-id"]
- if elt: h["publisher"] = root.findall(".//curation/publisher")[0].text
-
- author = []
- for elt in root.findall(".//curation//creator/name"):
- author.append(elt.text)
- if len(author)>0:
- h["creator"] = ",".join(author)
-
- elt = root.findall(".//curation/date[@role='Created']")
- if elt:
- h["publication_date"] = elt[0].text
-
- elt = root.findall(".//curation/contact/email")
- if elt:
- h["contact"] = elt[0].text
-
- elt = root.findall(".//content/type")
- if elt:
- h["type"] = elt[0].text
-
- elt = root.findall(".//content/contentLevel")
- if elt:
- h["curation_level"] = elt[0].text
-
- elt = root.findall(".//rights")
- if elt:
- h["rights"] = elt[0].text
-
- elt = root.findall(".//content/referenceURL")
- if elt:
- h["landing_page"] = elt[0].text
-
- elt = root.findall(".//identifier")
- if elt:
- h["ivoid"] = elt[0].text
-
-
- elt = root.findall(".//title")
- if elt:
- h["title"] = elt[0].text
-
- for elt in root.findall(".//altIdentifier"):
- if elt.text.find("doi:") > -1:
- h["doi"] = elt.text # only one DOI
-
- source=[]
- for elt in root.findall(".//content/source"):
- source.append(f"{elt.attrib['format']}:{elt.text}")
- h["source_origin"] = ",".join(source)
-
-
- return h
-
- def print_info(self):
- print(f"RESOURCE {self.__ivoid} (from registry):")
- for key in self.info:
- print(f"{key:<20}:{self.info[key]}")
-
- def cite(self):
- """Citation using registry metadata requires DOI
- """
- if "doi" not in self.info:
- raise Exception("DOI is required for citation")
-
- print("@dataset{")
- if "doi" in self.info:
- print(f" doi={{{self.info['doi']}}},")
- if "title" in self.info:
- print(f" title={{{self.info['title']}}}")
- if "creator" in self.info:
- print(f" authors={{{self.info['creator']}}},")
-
- ivoid_cite = f"{self.__registry}?verb=GetRecord&metadataPrefix=ivo_vor&identifier={self.__ivoid}"
- print(f" url={{{ivoid_cite}}},")
- if "publisher" in self.info:
- print(" publisher={"+self.info["publisher"]+"},")
- version = ""
- print(" ivoid={"+self.__ivoid+"},")
- if "version" in self.info:
- version += "(version "+self.info["version"]+")"
- if "publication_date" in self.info:
- version += " executed at "+self.info["publication_date"]
- if version != "":
- print(" version={"+version+"}\n")
-
- print("}")
-
-
-class VOFileDataOrigin(DataOrigin):
- """extract Data Origin from VOTable
- """
- def __init__(self, votable:vot.tree.VOTableFile, registry:str=REGISTRY_SERVER):
- self.__votable = votable
- self.ivoid = None
- self.data_origin = self.__extract_data_origin_from_info()
- self.data_request = self.__extract_request_from_info()
-
- def __extract_data_origin_from_info(self):
- """the idea is to populate a table with data origin info"""
- data = []
- for key in DATA_ORIGIN_PROV:
- try:
- elt = self.__votable.get_info_by_id(key)
- except Exception as e:
- logging.warning(e)
- data.append(None)
- continue
- data.append(elt.value)
- return Table(np.array(data), names=DATA_ORIGIN_PROV)
-
- def __extract_request_from_info(self):
- h = {}
- try:
- # Note ivoid could be multiple in case of xmatch/TAP
- self.ivoid = [self.__votable.get_info_by_id("ivoid").value]
- except :
- name = self.__votable.resources[0].name
- if name and name[0] in ('IVXJB') and name[1] == '/': #vizier
- self.ivoid = ["ivo://cds.vizier/"+name.lower()]
- else:
- raise Exception("ivoid is required")
-
- for key in DATA_ORIGIN_REQ:
- try:
- h[key] = self.__votable.get_info_by_id(key).value
- except Exception as e:
- logging.warning(e)
- return h
-
-
- def print(self):
- for key in self.data_request:
- print(f"{key}: {self.data_request[key]}")
-
- def print_info(self):
- for rec in self.data_origin:
- print(f"RESOURCE {self.ivoid[0]} (from VOTable)")
- print(f"{'ivoid':<20}:{self.ivoid[0]}")
- for key in DATA_ORIGIN_PROV:
- if rec[key] != None:
- print(f"{key:<20}:{rec[key]}")
-
- def cite(self) :
- """use VOTable header only"""
- bibtex = "% Protoype to cite a VO-query ?\n"
- bibtex += "@query{\n"
- bibtex += " ivoa={"+",".join(self.ivoid)+"}\n"
- if "publisher" in self.data_request:
- bibtex += " publisher={"+self.data_request["publisher"]+"}\n"
- version = ""
- if "protocol" in self.data_request:
- version += self.data_request["protocol"]+" "
- if "version" in self.data_request:
- version += "(version "+self.data_request["version"]+")"
- if "request_date" in self.data_request:
- version += " executed at "+self.data_request["request_date"]
- if version != "":
- bibtex += " version={"+version+"}\n"
- if "request" in self.data_request:
- bibtex += " url={"+self.data_request["request"]+"}\n"
- bibtex += "}"
- print(bibtex)
-
- def ack(self):
- """acknowledgment using VOTable header only
- needs: ivoid, publisher, protocol, version
- source_origin , author, date_origin
- """
- protocol = "?"
- if "protocol" in self.data_request:
- protocol = self.data_request["protocol"]
- datacenter = "?"
- if "publisher" in self.data_request:
- datacenter = self.data_request["publisher"]
- version = "?"
- if "version" in self.data_request:
- version = self.data_request["version"]
- qdate = "?"
- if "request_date" in self.data_request:
- qdate = self.data_request["request_date"]
-
-
- author = "?"
- if "creator" in self.data_origin:
- author = self.data_origin["creator"]
- source_origin = "?"
- if "source" in self.data_origin:
- source_origin = self.data_origin["source"]
- date_origin = "?"
- if "date" in self.data_origin:
- date_origin = self.data_origin["date"]
-
-
- import textwrap
- print("\n".join(textwrap.wrap(f"""(from VOTable)\nWe extract data published in {source_origin} ({author}, {date_origin}),
-via {datacenter} services (ivoa resource={self.__ivoid}, {pubdate})
-using {protocol} (version {version}, executed at {qdate})""", width=80)))
-
-
-
-class VODMLFileDataOrigin(DataOrigin):
- """extract Data Origin from VOTable (VODMLite/mivot)
- """
- def __init__(self, filename:str):
- self.__filename = filename
- self.ivoid = None
- self.data_origin = self.__extract_data_origin_from_info()
- #self.data_request = self.__extract_request_from_info()
-
- def __extract_data_origin_from_info(self):
- """the idea is to populate a table with data origin info"""
-
- import mivot
-
- with open(self.__filename, "r") as fd:
- xml = fd.read()
-
- xml_piece = mivot.extract_vodml_from_votable(xml)
- parser = mivot.vodml_parser(xml_piece)
- root = parser.parse()
-
- data = []
- coll = parser.search("dataset:Dataset")
- if coll is None :
- return
-
- for dset in coll[0]:
- product_type = parser.search("dataset:productType", dset)[0].value
- product_sub_type = parser.search("dataset:productSubType", dset)[0].value
- did = parser.search("dataset:DataID.datasetDID", dset)[0].value
- title = parser.search("dataset:DataID.title", dset)[0].value
- creationType = parser.search("dataset:DataID.creationType", dset)[0].value
- date = parser.search("dataset:DataID.date", dset)[0].value
- author = parser.search("dataset:Party.name", dset)[0].value
- pud_did = parser.search("dataset:Curation.publisherDID", dset)[0].value
- rights = parser.search("dataset:Curation.rights", dset)[0].value
- release_date = parser.search("dataset:Curation.releaseDate", dset)[0].value
- contact = parser.search("party.Organisation.email", dset)[0].value
- doi = parser.search("dataset:Curation.doi", dset)[0].value
- bibcode = parser.search("dataset:Curation.bibcode", dset)[0].value
-
- data.append((product_type,product_sub_type,did,title,creationType,date,author,pud_did,rights,release_date,contact,doi,bibcode))
- return Table(np.array(data), names=DATA_ORIGIN_DML)
-
- def print_info(self):
- for rec in self.data_origin:
- print("-----------------------------------")
- print(f"RESOURCE {rec['did']} (from VOTable)")
- print(f"{'ivoid':<20}:{rec['did']}")
- for key in DATA_ORIGIN_DML:
- if rec[key] != None:
- print(f"{key:<20}:{rec[key]}")
-
-
-
-class VOCite:
- def __init__(self, ivoid):
- self.__ivoid = ivoid
- self.__registry_info = None
-
- def get_registry_info(self):
- if self.__registry_info:
- return self.__registry_info
-
- self.__registry_info = DataOriginRegistry(self.__ivoid)
-
- return self.__registry_info
-
- def cite_source_origin(self):
- """- use ADS if resource exist : from bibcode/DOI
- - use registry (if DOI exists?)
- """
- info = self.get_registry_info()
-
- if "source_origin" in info.info:
- source = info.info["source_origin"].replace("\n","")
- mo = re.match(".*bibcode:([^ ]+).*$", source)
- if mo is None:
- mo = re.match(".*(\d{4}[a-zA-Z]+[a-zA-Z0-9.]+).*$", source)
- if mo:
- ads = adsbib.adsbib()
- try:
- export_citation = ads.get(bibcode=mo.group(1))
- if export_citation:
- print("%ADS export (from bibcode)\n")
- export_citation = re.sub("adsnote ="," ivoid = {"+self.__ivoid+"},\n adsnote =", export_citation)
- print(export_citation)
- return
- except Exception as e:
- logging.debug(e)
-
- mo = re.match(".*doi:([^ ]+).*$", info.info["source_origin"])
- if mo:
- ads = adsbib.adsbib()
- try:
- export_citation = ads.get(doi=mo.group(1))
- if export_citation:
- print("%ADS export (from doi)\n")
- export_citation = re.sub("adsnote ="," ivoid = {"+self.__ivoid+"},\n adsnote =", export_citation)
- print(export_citation)
- return
- except Exception as e:
- logging.debug(e)
- return
-
- def cite(self):
- info = self.get_registry_info()
- info.cite()
-
- def ack(self, vofile:VOFileDataOrigin=None):
- data_request = vofile.data_request
- protocol = "?"
- if "protocol" in data_request:
- protocol = data_request["protocol"]
- datacenter = "?"
- if "publisher" in data_request:
- datacenter = data_request["publisher"]
- version = "?"
- if "version" in data_request:
- version = data_request["version"]
- qdate = "?"
- if "request_date" in data_request:
- qdate = data_request["request_date"]
-
- reg_info = self.get_registry_info()
- info = reg_info.info
- pubdate = ""
- if "version" in info:
- pubdate += "(version "+info["version"]+")"
- if "publication_date" in info:
- pubdate += info["publication_date"]
- source_origin = ""
- if "creator" in info:
- author = info["creator"].split(",")[0]
- if "source_origin" in info:
- source_origin = info["source_origin"]
- if "datacenter" in info:
- datacenter = info["datacenter"]
- else:
- print("not available yet")
-
- date_origin = "???"
- if "date" in info:
- date_origin = info["date"]
-
- import textwrap
- print("\n".join(textwrap.wrap(f"""We extract data published in {source_origin} ({author}, {date_origin}),
-via {datacenter} services (ivoa resource={self.__ivoid}, {pubdate})
-using {protocol} (version {version}, executed at {qdate})""", width=80)))
-
-
-
-
-if __name__ == "__main__":
- import sys
- import getopt
-
- try:
- __opts, __args = getopt.getopt(sys.argv[1:], "ah:t:f:i:", ["help", "type=", "file=", "ivoid=","ads"])
- except getopt.GetoptError as err:
- help("__main__")
- sys.exit(1)
-
- __filename = None
- __ivoid = None
- __type = "VOTable"
- __use_ads = False
-
- for __o, __a in __opts:
- if __o in ("-f", "--file"):
- __filename = __a
- elif __o in ("-h", "--help"):
- help("__main__")
- sys.exit(0)
- elif __o in ("-i", "--ivoid"):
- __ivoid= __a
- elif __o in ("-t", "--type"):
- __type = __a
- elif __o in ("-a", "--ads"):
- __use_ads = True
-
- if __ivoid :
- regdataorig = DataOriginRegistry(__ivoid)
-
- print("\nGET Data Origin (from registry)")
- regdataorig.print_info()
-
- print("\nCITE a resource (from Registry)")
- try:
- regdataorig.cite()
- except Exception as e:
- logging.error(e);
-
- if __use_ads:
- try:
- vocite = VOCite(__ivoid)
- print("\nCITE the \"source origin\" (using ADS or registry)")
- vocite.cite_source_origin()
- except Exception as e:
- logging.error(e)
-
- elif __filename and __type == "VOTable":
- table = vot.parse(__filename)
- vodatorig = VOFileDataOrigin(table)
- regdataorig = DataOriginRegistry(vodatorig.ivoid[0])
-
- print("GET Data Origin - basic (from VOTable)")
- vodatorig.print()
- print("\nGET Data Origin (from VOTable)")
- vodatorig.print_info()
-
- print("\nGET Data Origin (from registry)")
- regdataorig.print_info()
- print("\nCITE a VO query (from VOTable)")
- vodatorig.cite()
-
- try:
- print("\nCITE a resource (from Registry)")
- regdataorig.cite()
- except Exception as e:
- logging.error(e)
-
- try:
- vocite = VOCite(vodatorig.ivoid[0])
- if __use_ads:
- print("\nCITE the \"source origin\" (using ADS or registry)")
- vocite.cite_source_origin()
- except Exception as e:
- logging.error(e)
-
- print("\nAck (from VOTable+registry)")
- vocite.ack(vodatorig)
-
- elif __filename and __type == "VODML":
- vodatorig = VODMLFileDataOrigin(__filename) #TODO use votable astropy
-
- print("\nGET Data Origin (from VOTable)")
- vodatorig.print_info()
-
- print(vodatorig.data_origin)
-
- #print("\nCITE a VO query (from VOTable)") TODO
- #vodatorig.cite()
-
- #for ivoid in vodatorig.data_origin["did"]: TODO
- # vocite = VOCite(ivoid)
- # print("\nCITE the \"source origin\" (using ADS or registry)")
- # vocite.cite_source_origin()
-
- # print("\nAck (from VOTable+registry)")
- # vocite.ack(vodatorig)
-
-
-
diff --git a/tests/ex_heasarc.rst b/tests/ex_heasarc.rst
deleted file mode 100644
index 257f9d0..0000000
--- a/tests/ex_heasarc.rst
+++ /dev/null
@@ -1,42 +0,0 @@
-# result from dataorigin.py -i ivo://nasa.heasarc/atlascscpt
-#
-
-GET Data Origin (from registry)
-RESOURCE ivo://nasa.heasarc/atlascscpt (from registry):
-datacenter :NASA/GSFC HEASARC
-Author :Norris et al.
-DATA-CENTER-CONTACT :heasarc-vo at athena.gsfc.nasa.gov
-type :Catalog
-Curation-level :Research
-LANDING-PAGE :https://heasarc.gsfc.nasa.gov/W3Browse/all/atlascscpt.html
-IVOID :ivo://nasa.heasarc/atlascscpt
-title :AT Large Area Survey (ATLAS) CDF-S/SWIRE 1.4-GHz Components Catalog
-source_origin ::
- 2006AJ....132.2409N
-
-
-CITE a resource (from Registry)
-
-CITE the "source origin" (using ADS or registry)
-%ADS export (from bibcode)
-
-@ARTICLE{2006AJ....132.2409N,
- author = {{Norris}, Ray P. and {Afonso}, Jos{\'e} and {Appleton}, Phil N. and {Boyle}, Brian J. and {Ciliegi}, Paolo and {Croom}, Scott M. and {Huynh}, Minh T. and {Jackson}, Carole A. and {Koekemoer}, Anton M. and {Lonsdale}, Carol J. and {Middelberg}, Enno and {Mobasher}, Bahram and {Oliver}, Seb J. and {Polletta}, Mari and {Siana}, Brian D. and {Smail}, Ian and {Voronkov}, Maxim A.},
- title = "{Deep ATLAS Radio Observations of the Chandra Deep Field-South/Spitzer Wide-Area Infrared Extragalactic Field}",
- journal = {\aj},
- keywords = {Catalogs, Galaxies: Active, Galaxies: Evolution, Radio Continuum: Galaxies, Surveys, Astrophysics},
- year = 2006,
- month = dec,
- volume = {132},
- number = {6},
- pages = {2409-2423},
- doi = {10.1086/508275},
-archivePrefix = {arXiv},
- eprint = {astro-ph/0610538},
- primaryClass = {astro-ph},
- adsurl = {https://ui.adsabs.harvard.edu/abs/2006AJ....132.2409N},
- ivoid = {ivo://nasa.heasarc/atlascscpt},
- adsnote = {Provided by the SAO/NASA Astrophysics Data System}
-}
-
-
diff --git a/tests/ex_vizier.rst b/tests/ex_vizier.rst
deleted file mode 100644
index 665ff58..0000000
--- a/tests/ex_vizier.rst
+++ /dev/null
@@ -1,88 +0,0 @@
-# result of dataoring.py -f J_AJ_161_36_table8.xml
-#
-GET Data Origin - basic (from VOTable)
-IVOID: ivo://cds.vizier/j/aj/161/36
-DATA-CENTER: ivo://cds.vizier
-VERSION: 7.294
-ACCESS-PROTOCOL: Simple Cone Search 1.03
-QUERY: https://vizier.cds.unistra.fr/viz-bin/conesearch/J/AJ/161/36/table8?RA=285.367%26DEC=39.28%26SR=1
-QUERY-DATE: 2022-10-30T12:08:00
-DATA-CENTER-CONTACT: cds-question@unistra.fr
-LANDING-PAGE: https://cdsarc.cds.unistra.fr/viz-bin/cat/J/AJ/161/36
-
-GET Data Origin (from VOTable)
-RESOURCE ivo://cds.vizier/j/aj/161/36 (from VOTable)
-IVOID :ivo://cds.vizier/j/aj/161/36
-Publication-id :doi:10.26093/cds/vizier.51610036
-Curation-level :Research
-Resource-version :2022-10-07T07:50:31Z
-Rights :https://cds.unistra.fr/vizier-org/licences_vizier.html
-Author :Bryson S.
-Publication-ref :2021AJ....161...36B
-Editor :Astronomical Journal
-
-GET Data Origin (from registry)
-RESOURCE ivo://cds.vizier/j/aj/161/36 (from registry):
-datacenter :CDS
-Author :Bryson S.,Kunimoto M.,Kopparapu R.K.,Coughlin J.L.,Borucki W.J.,Koch D.,Aguirre V.S.,Allen C.,Barentsen G.,Batalha N.M.,Berger T.,Boss A.,Buchhave L.A.,Burke C.J.,Caldwell D.A.,Campbell J.R.,Catanzarite J.,Chandrasekaran H.,Chaplin W.J.,Christiansen J.L.,Christensen-Dalsgaard J.,Ciardi D.R.,Clarke B.D.,Cochran W.D.,Dotson J.L.,Doyle L.R.,Duarte E.S.,Dunham E.W.,Dupree A.K.,Endl M.,Fanson J.L.,Ford E.B.,Fujieh M.,Gautier III T.N.,Geary J.C.,Gilliland R.L.,Girouard F.R.,Gould A.,Haas M.R.,Henze C.E.,Holman M.J.,Howard A.W.,Howell S.B.,Huber D.,Hunter R.C.,Jenkins J.M.,Kjeldsen H.,Kolodziejczak J.,Larson K.,Latham D.W.,Li J.,Mathur S.,Meibom S.,Middour C.,Morris R.L.,Morton T.D.,Mullally F.,Mullally S.E.,Pletcher D.,Prsa A.,Quinn S.N.,Quintana E.V.,Ragozzine D.,Ramirez S.V.,Sanderfer D.T.,Sasselov D.,Seader S.E.,Shabram M.,Shporer A.,Smith J.C.,Steffen J.H.,Still M.,Torres G.,Troeltzsch J.,Twicken J.D.,Uddin A.K.,Van Cleve J.E.,Voss J.,Weiss L.M.,Welsh W.F.,Wohler B.,Zamudio K.A.
-Publication-date :2021-03-16T13:00:26Z
-DATA-CENTER-CONTACT :cds-question@unistra.fr
-type :Catalog
-Curation-level :Research
-Rights :https://cds.unistra.fr/vizier-org/licences_vizier.html
-LANDING-PAGE :https://cdsarc.cds.unistra.fr/viz-bin/cat/J/AJ/161/36
-IVOID :ivo://CDS.VizieR/J/AJ/161/36
-title :117 exoplanets in habitable zone with Kepler DR25
-doi :doi:10.26093/cds/vizier.51610036
-source_origin :bibcode:2021AJ....161...36B
-
-CITE a VO query (from VOTable)
-% Protoype to cite a VO-query ?
-@query{
- ivoa={ivo://cds.vizier/j/aj/161/36}
- publisher={ivo://cds.vizier}
- version={Simple Cone Search 1.03 (version 7.294) executed at 2022-10-30T12:08:00}
- url={https://vizier.cds.unistra.fr/viz-bin/conesearch/J/AJ/161/36/table8?RA=285.367%26DEC=39.28%26SR=1}
-}
-
-CITE a resource (from Registry)
-@dataset{
- doi={doi:10.26093/cds/vizier.51610036},
- title={117 exoplanets in habitable zone with Kepler DR25}
- authors={Bryson S.,Kunimoto M.,Kopparapu R.K.,Coughlin J.L.,Borucki W.J.,Koch D.,Aguirre V.S.,Allen C.,Barentsen G.,Batalha N.M.,Berger T.,Boss A.,Buchhave L.A.,Burke C.J.,Caldwell D.A.,Campbell J.R.,Catanzarite J.,Chandrasekaran H.,Chaplin W.J.,Christiansen J.L.,Christensen-Dalsgaard J.,Ciardi D.R.,Clarke B.D.,Cochran W.D.,Dotson J.L.,Doyle L.R.,Duarte E.S.,Dunham E.W.,Dupree A.K.,Endl M.,Fanson J.L.,Ford E.B.,Fujieh M.,Gautier III T.N.,Geary J.C.,Gilliland R.L.,Girouard F.R.,Gould A.,Haas M.R.,Henze C.E.,Holman M.J.,Howard A.W.,Howell S.B.,Huber D.,Hunter R.C.,Jenkins J.M.,Kjeldsen H.,Kolodziejczak J.,Larson K.,Latham D.W.,Li J.,Mathur S.,Meibom S.,Middour C.,Morris R.L.,Morton T.D.,Mullally F.,Mullally S.E.,Pletcher D.,Prsa A.,Quinn S.N.,Quintana E.V.,Ragozzine D.,Ramirez S.V.,Sanderfer D.T.,Sasselov D.,Seader S.E.,Shabram M.,Shporer A.,Smith J.C.,Steffen J.H.,Still M.,Torres G.,Troeltzsch J.,Twicken J.D.,Uddin A.K.,Van Cleve J.E.,Voss J.,Weiss L.M.,Welsh W.F.,Wohler B.,Zamudio K.A.},
- url={https://dc.zah.uni-heidelberg.de/rr/q/pmh/pubreg.xml?verb=GetRecord&metadataPrefix=ivo_vor&identifier=ivo://cds.vizier/j/aj/161/36},
- ivoid={ivo://cds.vizier/j/aj/161/36},
- version={ executed at 2021-03-16T13:00:26Z}
-
-}
-
-CITE the "source origin" (using ADS or registry)
-%ADS export (from bibcode)
-
-@ARTICLE{2021AJ....161...36B,
- author = {{Bryson}, Steve and {Kunimoto}, Michelle and {Kopparapu}, Ravi K. and {Coughlin}, Jeffrey L. and {Borucki}, William J. and {Koch}, David and {Aguirre}, Victor Silva and {Allen}, Christopher and {Barentsen}, Geert and {Batalha}, Natalie M. and {Berger}, Travis and {Boss}, Alan and {Buchhave}, Lars A. and {Burke}, Christopher J. and {Caldwell}, Douglas A. and {Campbell}, Jennifer R. and {Catanzarite}, Joseph and {Chandrasekaran}, Hema and {Chaplin}, William J. and {Christiansen}, Jessie L. and {Christensen-Dalsgaard}, J{\o}rgen and {Ciardi}, David R. and {Clarke}, Bruce D. and {Cochran}, William D. and {Dotson}, Jessie L. and {Doyle}, Laurance R. and {Duarte}, Eduardo Seperuelo and {Dunham}, Edward W. and {Dupree}, Andrea K. and {Endl}, Michael and {Fanson}, James L. and {Ford}, Eric B. and {Fujieh}, Maura and {Gautier}, Thomas N., III and {Geary}, John C. and {Gilliland}, Ronald L. and {Girouard}, Forrest R. and {Gould}, Alan and {Haas}, Michael R. and {Henze}, Christopher E. and {Holman}, Matthew J. and {Howard}, Andrew W. and {Howell}, Steve B. and {Huber}, Daniel and {Hunter}, Roger C. and {Jenkins}, Jon M. and {Kjeldsen}, Hans and {Kolodziejczak}, Jeffery and {Larson}, Kipp and {Latham}, David W. and {Li}, Jie and {Mathur}, Savita and {Meibom}, S{\o}ren and {Middour}, Chris and {Morris}, Robert L. and {Morton}, Timothy D. and {Mullally}, Fergal and {Mullally}, Susan E. and {Pletcher}, David and {Prsa}, Andrej and {Quinn}, Samuel N. and {Quintana}, Elisa V. and {Ragozzine}, Darin and {Ramirez}, Solange V. and {Sanderfer}, Dwight T. and {Sasselov}, Dimitar and {Seader}, Shawn E. and {Shabram}, Megan and {Shporer}, Avi and {Smith}, Jeffrey C. and {Steffen}, Jason H. and {Still}, Martin and {Torres}, Guillermo and {Troeltzsch}, John and {Twicken}, Joseph D. and {Uddin}, Akm Kamal and {Van Cleve}, Jeffrey E. and {Voss}, Janice and {Weiss}, Lauren M. and {Welsh}, William F. and {Wohler}, Bill and {Zamudio}, Khadeejah A.},
- title = "{The Occurrence of Rocky Habitable-zone Planets around Solar-like Stars from Kepler Data}",
- journal = {\aj},
- keywords = {Exoplanets, 498, Astrophysics - Earth and Planetary Astrophysics, Astrophysics - Solar and Stellar Astrophysics},
- year = 2021,
- month = jan,
- volume = {161},
- number = {1},
- eid = {36},
- pages = {36},
- doi = {10.3847/1538-3881/abc418},
-archivePrefix = {arXiv},
- eprint = {2010.14812},
- primaryClass = {astro-ph.EP},
- adsurl = {https://ui.adsabs.harvard.edu/abs/2021AJ....161...36B},
- ivoid = {ivo://cds.vizier/j/aj/161/36},
- adsnote = {Provided by the SAO/NASA Astrophysics Data System}
-}
-
-
-
-Ack (from VOTable+registry)
-We extract data published in bibcode:2021AJ....161...36B (Bryson S., ???), via
-CDS services (ivoa resource=ivo://cds.vizier/j/aj/161/36, 2021-03-16T13:00:26Z)
-using Simple Cone Search 1.03 (version 7.294, executed at 2022-10-30T12:08:00)
-
diff --git a/tests/mivot.py b/tests/mivot.py
deleted file mode 100755
index 621286c..0000000
--- a/tests/mivot.py
+++ /dev/null
@@ -1,727 +0,0 @@
-#!/usr/bin/python3
-""" G.Landais (CDS)
- apr-2021
- manage vodmlinstance in VOTable
-
- parse vodmlinstance and store in classes.
-
- parser = vodml_parser(xml_piece)
- root = parser.parse()
- print(root)
- elements = parser.get("dmrole")
- ...
-
- main node is model_instance which contains global/table_mapping/template
- cf global, table_mapping, template which inherits from node class
-
- main test programme:
-
- ./voldmlinstance.py votable.vot
- cat file | ./voldmlinstance.py
-"""
-
-import logging
-import re, sys
-
-NODE_ATT = 1
-NODE_INSTANCE = 2
-NODE_COLLECTION = 3
-NODE_GLOBAL = 4
-NODE_MODEL = 5
-NODE_TEMPLATES = 6
-NODE_REF = 7
-
-NODE_NAME = {
-NODE_ATT : "attribute",
-NODE_INSTANCE : "instance",
-NODE_COLLECTION : "collection",
-NODE_GLOBAL :"global",
-NODE_MODEL : "model", #
-NODE_TEMPLATES : "templates", #
-NODE_REF : "reference",
-}
-
-class Out:
- def __init__(self, out=sys.stdout):
- self.out = out
- self.indent = 0
-
- def write(self,s):
- self.out.write(2*self.indent*" "+s)
-
-STDOut = Out(sys.stdout)
-
-class Node:
- """base Node"""
- def __init__(self, nodetype:int, dmrole:str=None, dmtype:str=None, id:str=None):
- """Node Constructor
- :param nodetype: NODE type (int)
- :param dmrole: DataModel role
- :param dmtype: DataModel type
- :param id: identifier in the
- """
- self.__type = nodetype
- self.__dmtype = dmtype
- self.__dmrole = dmrole
- self.id = id
-
- def nodetype(self)->int:
- """get Node type
- :return: Node type (int)
- """
- return self.__type
-
- def dmtype(self)->str:
- """get DataModel type (instance type)
- :return: DataModel type
- """
- return self.__dmtype
-
- def dmrole(self)->str:
- """get DataModel role
- :return: DataModel role
- """
- return self.__dmrole
-
- def print(self, out=STDOut):
- """print out
- :param out: output stream
- """
- opt = ""
- if self.__dmrole:
- opt= f" dmrole='{self.__dmrole}'"
- if self.__dmtype:
- opt= f" dmtype='{self.__dmtype}'"
- if self.id:
- opt= f" dmid='{self.id}'"
- out.write(f"<{NODE_NAME[self.__type]} {opt}/>")
-
- def __str__(self)->str:
- return f""
-
-
-class NodeIterable(Node):
- """Iterable Node"""
- def __init__(self, nodetype:int, dmrole:str=None, dmtype:str=None, id:str=None):
- """Iterable Node Constructor ()
- :param nodetype: NODE type (int)
- :param dmrole: DataModel role
- :param dmtype: DataModel type
- :param id: identifier in the
- """
- super().__init__(nodetype, dmrole, dmtype, id)
- self.__child = []
-
- def append(self, child:Node):
- """add a new node
- :param child: new node
- """
- self.__child.append(child)
-
- def __iter__(self):
- self.__n = 0
- return self
-
- def __next__(self):
- if self.__n >= len(self.__child):
- raise StopIteration
- self.__n += 1
- return self.__child[self.__n-1]
-
- def __len__(self):
- return len(self.__child)
-
- def print(self, out:STDOut):
- """print out"""
- out.write(f"")
-
- def __str__(self)->str:
- s = f""
- return s
-
-
-class Attribute(Node):
- def __init__(self, dmrole, dmtype, id=None, value:str=None):
- super().__init__(NODE_ATT, dmrole, dmtype, id)
- self.value = value
-
- def print(self, out=STDOut):
- opt = ""
- if self.dmrole():
- opt += f" dmrole='{self.dmrole()}'"
- if self.dmtype():
- opt += f" dmtype='{self.dmtype()}'"
- if self.id:
- opt += f" dmid='{self.id}'"
- if self.value:
- opt += f" value='{self.value}'"
- out.write(f"\n")
-
- def __str__(self):
- return f""
-
-
-class Instance(NodeIterable):
- def __init__(self, dmrole:str=None, dmtype:str=None, id:str=None):
- super().__init__(NODE_INSTANCE, dmrole, dmtype, id)
-
- def print(self, out=STDOut):
- opt = ""
- if self.dmrole():
- opt += f" dmrole='{self.dmrole()}'"
- if self.dmtype():
- opt += f" dmtype='{self.dmtype()}'"
- if self.id:
- opt += f" dmid='{self.id}'"
- out.write(f"\n")
- out.indent +=1
- for node in self:
- node.print(out)
- out.indent -=1
- out.write("\n")
-
- def get(self, dmrole:str)->Node:
- """get the dmrole in an instancenode (unique by definition)
- :param dmrole: dmrole to search
- :return: the node
- """
- for node in self:
- if node.dmrole() == dmrole: return node
-
-
-class Collection(NodeIterable):
- def __init__(self, dmrole:str):
- super().__init__(NODE_COLLECTION, dmrole)
-
- def append(self, child:Node):
- """add a new node
- :param child: new node
- """
- # check consistency
- ctype = child.nodetype()
- if ctype == NODE_COLLECTION:
- logging.error("Collection can't be child of Collection")
- return
- elif ctype in (NODE_INSTANCE, NODE_REF) :
- if child.dmrole() != None and child.dmrole() != "":
- logging.warning("dmrole is not accepted in Instance is a Collection child "+str(child))
-
- super().append(child)
-
- def print(self, out=STDOut):
- opt = ""
- if self.dmrole():
- opt += f" dmrole='{self.dmrole()}'"
- out.write(f"\n")
- out.indent +=1
- for node in self:
- node.print(out)
- out.indent -=1
- out.write("\n")
-
- def __str__(self):
- s = f""
- return s
-
-
-class Vodml:
- def __init__(self):
- self.models = None
- self.globals = Globals()
- self.templates = []
-
- def set_default_model(self):
- self.models = Model()
- self.models.name = "ivoa"
- self.models.uri = "https://www.ivoa.net/xml/VODML/IVOA-v1.vo-dml.xml"
-
- def print(self, out=STDOut):
- out.write("")
- out.write("")
- out.write("mivot.py mapping")
- if self.models:
- self.models.print(out)
- if self.globals:
- self.globals.print(out)
- for template in self.templates:
- template.print(out)
- out.write("")
- out.write("")
-
-
-class Globals(NodeIterable):
- def __init__(self):
- super().__init__(NODE_GLOBAL)
-
- def print(self, out=STDOut):
- out.write("\n\n")
- for node in self:
- node.print(out)
- out.write("\n")
-
- def __str__(self):
- s = ""
- return s
-
-
-class Reference(Node):
- def __init__(self, dmrole:str, ref:str=None, sourceref:str=None):
- super().__init__(NODE_REF, dmrole)
- self.ref = ref # INSTANCE
- self.sourceref = sourceref
-
- def print(self, out:Out):
- opt = ""
- if self.dmrole():
- opt += f" dmrole='{self.dmrole()}'"
- #if self.dmtype():
- # opt += f" dmtype='{self.dmtype()}'"
- if self.sourceref:
- opt += f" sourceref='{self.sourceref}'"
- if self.ref:
- opt += f" dmref='{self.ref}'"
- out.write(f"\n")
-
-
-class Model(Node):
- def __init__(self):
- super().__init__(NODE_MODEL)
- self.name = None
- self.syntax = None
- self.uri = None
-
- def print(self, out):
- opt = ""
- if self.name:
- opt += f" name='{self.name}'"
- if self.syntax:
- opt += f" syntax='{self.syntax}'"
- if self.uri:
- opt += f" url='{self.uri}'"
-
- out.write(f"")
-
- def __str__(self):
- return f""
-
-
-
-class Template(NodeIterable):
- def __init__(self, table_ref:str):
- super().__init__(NODE_TEMPLATES)
- self.table_ref = table_ref
-
- def print(self, out:Out):
- opt = ""
- if self.table_ref:
- opt += f" tableref='{self.table_ref}'"
- out.write(f"\n")
- out.indent += 1
- for node in self:
- node.print(out)
- out.indent -= 1
- out.write("\n")
-
- def __str__(self):
- s = f""
- return s
-
-""" Parser """
-import xmltodict
-#from lxml import etree
-
-class vodml_parser:
- def __init__(self, doc:str):
- """Constructor
- :param doc: piece ox XML
- """
- self.__doc = doc
- self.__xml = xmltodict.parse(doc)['VODML']
-
- self.__ids = {} # list od ID available for dmref usage
- self.__dmrefs = [] # list of instance having dmref
- self.__index = {} # dmrole index
-
-
- def get(self, dmrole:str) -> list:
- """get all node matching with dmrole
- :param dmrole: dmrole name
- :return: list of nodes
- """
- if dmrole in self.__dmroles:
- return self.__dmroles[dmrole]
- return None
-
- def parse(self):
- """parse xml and memorize
- """
- self.model = self.__fill_models()
- self.globals = self.__fill_globals()
- self.template = self.__fill_template()
- self.__resolve_dmrefs()
-
- def __resolve_dmrefs(self):
- for inst in self.__dmrefs:
- logging.debug("resolve dmref {}".format(inst.ref))
- if inst.ref not in self.__ids:
- logging.error("dmref {} is not resolved".format(inst.ref))
- else:
- inst.ref = self.__ids[inst.ref]
-
- def __get_att(self, doc, param):
- if param in doc: return doc[param]
- low = param.lower()
- if low in doc: return doc[low]
-
- def __add_index(self, node:Node):
- print(f"add index {node}")
- role = node.dmrole()
- if role is None:
- return
- if role not in self.__index:
- self.__index[role] = []
- self.__index[role].append(node)
-
-
- def __fill_models(self):
- doc = self.__xml;
- if "MODEL" not in doc:
- return None
-
- models = []
- if isinstance(self.__xml["MODEL"], list):
- lmodels = self.__xml["MODEL"]
- else:
- lmodels = [self.__xml["MODEL"]]
-
- for instxml in lmodels:
- model = Model()
- if "@syntax" in instxml:
- model.syntax = instxml["@syntax"]
- if "@name" in instxml:
- model.name = instxml["@name"]
- if "@url" in instxml:
- model.uri = instxml["@url"]
- models.append(model)
- return models
-
-
- def __fill_template(self, doc):
- if "INSTANCE" in doc:
- logging.debug("add table_row_template/instance")
- return self.__fill_instance(doc["INSTANCE"])
- raise Exception("only INSTANCE are acceptable under TEMPLATES")
-
- def __fill_instance(self, inst)->Instance:
- if "@dmrole" in inst:
- dmrole = inst["@dmrole"]
- else:
- dmrole = None
- if "@dmtype" in inst:
- dmtype = inst["@dmtype"]
- else:
- dmtype = None
- if "@dmid" in inst:
- id = inst["@dmid"]
- else:
- id = None
-
- instance = Instance(dmrole, dmtype, id)
- if id:
- self.__ids[id] = instance
-
- self.__add_index(instance)
- return instance
-
- def __fill_attribute(self,att)->Attribute:
- if "@dmrole" in att:
- dmrole = att["@dmrole"]
- else:
- dmrole = None
- if "@dmtype" in att:
- dmtype = att["@dmtype"]
- else:
- dmtype = None
- if "@value" in att:
- value = att["@value"]
- else:
- value = None
- if "@dmid" in att:
- id = att["@dmid"]
- else:
- id = None
-
- attribute = Attribute(dmrole, dmtype, id, value=value)
- if id :
- self.__ids[id] = attribute
-
- self.__add_index(attribute)
- return attribute
-
- def __fill_reference(self,ref)->Reference:
- if "@dmrole" in ref:
- dmrole = ref["@dmrole"]
- else:
- dmrole = None
-# if "@dmtype" in ref:
-# dmtype = ref["@dmtype"]
-# else:
-# dmtype = None
- dmref = sourceref = None
- if "@dmref" in ref:
- dmref = ref["@dmref"]
- elif "@sourceref" in ref:
- sourceref = ref["@sourceref"]
- else:
- raise Exception("reference is needed")
-
- reference = Reference(dmrole, dmref, sourceref)
- self.__dmrefs.append(reference)
- self.__add_index(reference)
- return reference
-
- def __fill_collection(self, collection):
- if "@dmrole" in collection:
- dmrole = collection["@dmrole"]
- else:
- dmrole = None
- coll = Collection(dmrole)
- self.__add_index(coll)
- return coll
-
-
- def __fill_nodes(self, node:Node, doc):
- for name in doc:
- elt = doc[name]
-
- if name == "INSTANCE":
- if isinstance(elt, list):
- for inst in elt:
- instance = self.__fill_instance(inst)
- node.append(instance)
- self.__fill_nodes(instance, inst)
- else:
- instance = self.__fill_instance(elt)
- node.append(instance)
- self.__fill_nodes(instance, elt)
-
-
- elif name == "ATTRIBUTE":
- if isinstance(elt, list):
- for atts in elt:
- att = self.__fill_attribute(atts)
- node.append(att)
- else:
- att = self.__fill_attribute(elt)
- node.append(att)
-
- elif name == "COLLECTION":
- if isinstance(elt, list):
- for colls in elt:
- coll = self.__fill_collection(colls)
- node.append(coll)
- self.__fill_nodes(coll, colls)
- else:
- coll = self.__fill_collection(elt)
- node.append(coll)
- self.__fill_nodes(coll, elt)
-
- elif name == "REFERENCE":
- if isinstance(elt, list):
- for refs in elt:
- ref = self.__fill_reference(refs)
- node.append(ref)
- else:
- ref = self.__fill_reference(elt)
- node.append(ref)
-
-
- def __fill_globals(self)->Globals:
- glob = Globals()
- self.__fill_nodes(glob, self.__xml["GLOBALS"])
- return glob
-
-
- def __fill_template(self):
- if "@tableref" in self.__xml["TEMPLATES"]:
- tableref = self.__xml["TEMPLATES"]["@tableref"]
- else:
- tableref = ""
- template = Template(tableref)
- self.__fill_nodes(template, self.__xml["TEMPLATES"])
- return template
-
- def __search_index(self, role:str)->list:
- if role in self.__index:
- return self.__index[role]
- return None
-
- def search(self, role:str, node:Node=None)->Node:
- #logging.debug(f"..search {role} in {node}")
- matches = self.__search_index(role)
- if node is None:
- return matches
- if matches is None:
- return None
- nmatches = len(matches)
-
- result = []
- if isinstance(node, NodeIterable):
- for n in node:
- if n in matches:
- result.append(n)
- if len(result) == nmatches:
- break
-
- if n.nodetype() == NODE_REF:
- logging.debug("I add the ref")
- if n.ref in matches:
- result.append(n)
- if len(result) == nmatches:
- break
- continue
-
- if n.nodetype() == NODE_ATT:
- continue
-
- f = self.search(role, n)
- if f:
- result += f
- else:
- if node in matches:
- return [node]
- if node.nodetype() == NODE_REF:
- if node.ref in matches:
- result.append(node)
-
- f = self.search(role, node)
- if f:
- return f
-
- if len(result) == 0:
- return None
- return result
-
-# def xpath(self, expression:str):
-# if self.__lxml is None:
-# self.__lxml = etree.parse(self.__doc)
-# l = tree.xpath(expression)
-# return l
-
-
-def extract_vodml_from_votable(doc:str):
- """extract vodml piece
- :param doc: piece of xml(votable)
- :return: vodml sectionmeas:GenericMeasure.coord.value
- """
- # try tag
- begin = doc.find("")
- if end < 0:
- raise Exception("NO VODML found")
- return doc[begin:end+8]
-
-
-# -----------------------------------------------------------------------------
-# TEST
-# -----------------------------------------------------------------------------
-def test_parser(filename:str):
- with open(filename, "r") as fd:
- data = extract_vodml_from_votable(fd.read())
- parser = vodml_parser(data)
- parser.parse()
- parser.globals.print()
-
- coll = parser.search("dataset:Dataset")
- print(coll)
- if coll is None:
- return
- for inst in coll[0]:
- print(f"look in {inst}")
- s = parser.search('dataset:Curation.bibcode', inst)
- print("search dataset:Curation.bibcode = "+str(s))
- print("SEARCH dataset:Dataset.dataID = "+str(parser.search('dataset:Dataset.dataID')))
-
-
-# -----------------------------------------------------------------------------
-if __name__ == "__main__":
- logging.basicConfig(level=logging.DEBUG)
-
- test_parser("tap.xml")
-
- #create
- vodml = Vodml()
- vodml.set_default_model()
- global_node = vodml.globals
-
- collection = Collection(dmrole='dataset:Dataset')
- global_node.append(collection)
-
- # create first DatasetDM instance
- dset = Instance(dmtype='dataset:Dataset')
- collection.append(dset)
-
- dset.append(Attribute(dmrole='dataset:productType', dmtype='ivoa:string', value='CATALOGUE'))
- dset.append(Attribute(dmrole='dataset:productSubType', dmtype='ivoa:string', value='Critical Comp.'))
-
- inst = Instance(dmrole='dataset:Dataset.dataID', dmtype='dataset:DataID')
- inst.append(Attribute(dmrole='dataset:DataID.datasetDID', dmtype='ivoa:string', value='ivo://CDS.VizieR.IV/39'))
- inst.append(Attribute(dmrole='dataset:DataID.title', dmtype='ivoa:string', value='TESS Input Catalog version 8.2 (TIC v8.2)'))
- inst.append(Attribute(dmrole='dataset:DataID.creationType', dmtype='ivoa:string', value='CATALOG_EXTRACTION'))
- inst.append(Attribute(dmrole='dataset:DataID.date', dmtype='ivoa:date', value='2021'))
- instcr = Instance(dmrole='dataset:DataID.creator', dmtype='Party')
- inst.append(instcr)
- instcr.append(Attribute(dmrole='dataset:Party.name', dmtype='ivoa:string', value='Paegert M.'))
- dset.append(inst)
-
- inst = Instance(dmrole='dataset:Dataset.curation', dmtype='dataset:Curation')
- dset.append(inst)
- inst.append(Attribute(dmrole='dataset:Curation.publisherDID', dmtype='ivoa:string', value='ivo://CDS'))
- inst.append(Attribute(dmrole='dataset:Curation.rights', dmtype='ivoa:string', value='https://cds.unistra.fr/vizier-org/licenses_vizier.html'))
- inst.append(Attribute(dmrole='dataset:Curation.releaseDate', dmtype='ivoa:date', value='2022-10-20T17:15:00Z'))
- instct = Instance(dmrole='dataset:Curation.contact', dmtype='party.Organisation')
- inst.append(instct)
- instct.append(Attribute(dmrole='party.Organisation.email', dmtype='ivoa:string', value='cds-question@unistra.fr'))
- inst.append(Attribute(dmrole='dataset:Curation.doi', dmtype='ivoa:string', value=''))
- inst.append(Attribute(dmrole='dataset:Curation.bibcode', dmtype='ivoa:string', value='2021arXiv210804778P'))
-
- # create 2D DatasetDM instance
- dset = Instance(dmtype='dataset:Dataset')
- collection.append(dset)
-
- dset.append(Attribute(dmrole='dataset:productType', dmtype='ivoa:string', value='CATALOGUE'))
- dset.append(Attribute(dmrole='dataset:productSubType', dmtype='ivoa:string', value='General Comp.'))
-
- inst = Instance(dmrole='dataset:Dataset.dataID', dmtype='dataset:DataID')
- inst.append(Attribute(dmrole='dataset:DataID.datasetDID', dmtype='ivoa:string', value='ivo://CDS.VizieR/J/AJ/161/36'))
- inst.append(Attribute(dmrole='dataset:DataID.title', dmtype='ivoa:string', value='117 exoplanets in habitable zone with Kepler DR25'))
- inst.append(Attribute(dmrole='dataset:DataID.creationType', dmtype='ivoa:string', value='COPY_ORIGINAL'))
- inst.append(Attribute(dmrole='dataset:DataID.date', dmtype='ivoa:date', value='2021'))
- instcr = Instance(dmrole='dataset:DataID.creator', dmtype='Party')
- inst.append(instcr)
- instcr.append(Attribute(dmrole='dataset:Party.name', dmtype='ivoa:string', value='Bryson S.'))
- dset.append(inst)
-
- inst = Instance(dmrole='dataset:Dataset.curation', dmtype='dataset:Curation')
- dset.append(inst)
- inst.append(Attribute(dmrole='dataset:Curation.publisherDID', dmtype='ivoa:string', value='ivo://CDS'))
- inst.append(Attribute(dmrole='dataset:Curation.rights', dmtype='ivoa:string', value='https://cds.unistra.fr/vizier-org/licenses_vizier.html'))
- inst.append(Attribute(dmrole='dataset:Curation.releaseDate', dmtype='ivoa:date', value='2022-10-07T06:50:31Z'))
- instct = Instance(dmrole='dataset:Curation.contact', dmtype='party.Organisation')
- inst.append(instct)
- instct.append(Attribute(dmrole='party.Organisation.email', dmtype='ivoa:string', value='cds-question@unistra.fr'))
- inst.append(Attribute(dmrole='dataset:Curation.doi', dmtype='ivoa:string', value='10.26093/cds/vizier.51610036'))
- inst.append(Attribute(dmrole='dataset:Curation.bibcode', dmtype='ivoa:string', value='2021AJ....161...36B'))
- vodml.print()
-
diff --git a/tests/tap.xml b/tests/tap.xml
deleted file mode 100644
index 400e0bf..0000000
--- a/tests/tap.xml
+++ /dev/null
@@ -1,999 +0,0 @@
-
-
-
-
-
-
-
- hand-made mapping
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- [1/10005000540] TESS Input Catalog identifier ( ID ) (1)
-
-
- Right Ascension (J2000) (ra) (2)
-
-
- Declination (J2000) (dec) (2)
-
-
- ? SDSS DR9 object identifier "objID"
-
-
- Position from SIMBAD (right ascension part)
-
-
- Position from SIMBAD (declination part)
-
-
- Kepler Object Identifier
-
-
-
-
- | 120499135 |
- 285.36655956808 |
- 39.28006211349 |
- 1237668679926415432 |
- 285.36656 |
- 39.28006 |
- 4742.01 |
-
-
- | 138967951 |
- 295.08750978211 |
- 41.03739932062 |
- |
- 295.08751 |
- 41.0374 |
- 8107.01 |
-
-
- | 272174474 |
- 296.00369206072 |
- 44.27754733739 |
- |
- 296.00369 |
- 44.27755 |
- 7016.01 |
-
-
- | 137090755 |
- 291.18495972126 |
- 40.3112052226 |
- |
- 291.18496 |
- 40.31121 |
- 2719.02 |
-
-
- | 164458488 |
- 283.2127162485 |
- 45.3498333543 |
- |
- 283.21272 |
- 45.34983 |
- 701.03 |
-
-
- | 28090925 |
- 297.28501229669 |
- 49.21244087035 |
- |
- 297.28501 |
- 49.21244 |
- 4036.01 |
-
-
- | 137550286 |
- 292.32646413733 |
- 38.65757565661 |
- |
- 292.32646 |
- 38.65758 |
- 2194.03 |
-
-
- | 120495880 |
- 285.34997853505 |
- 41.45224772819 |
- |
- 285.34998 |
- 41.45225 |
- 4087.01 |
-
-
- | 159303125 |
- 290.19995627229 |
- 45.42001263844 |
- |
- 290.19996 |
- 45.42001 |
- 7923.01 |
-
-
- | 120692691 |
- 286.04395840641 |
- 38.70876957085 |
- 1237668735224382468 |
- 286.04396 |
- 38.70877 |
- 8242.01 |
-
-
- | 1882924452 |
- 290.69628461492 |
- 49.06618053234 |
- |
- 290.69628 |
- 49.06618 |
- 8047.01 |
-
-
- | 290034561 |
- 290.69628461492 |
- 49.06618053234 |
- |
- 290.69628 |
- 49.06618 |
- 8047.01 |
-
-
- | 26415413 |
- 291.45474291631 |
- 49.04328348192 |
- |
- 291.45474 |
- 49.04328 |
- 8048.01 |
-
-
- | 159385594 |
- 290.30315083393 |
- 44.67807333651 |
- |
- 290.30315 |
- 44.67807 |
- 7894.01 |
-
-
- | 267744730 |
- 290.17500698609 |
- 52.2203375026 |
- |
- 290.17501 |
- 52.22034 |
- 2184.02 |
-
-
- | 139158306 |
- 295.2729759255 |
- 41.16826597362 |
- |
- 295.27298 |
- 41.16827 |
- 7749.01 |
-
-
- | 271545489 |
- 295.22936479557 |
- 45.62488969482 |
- |
- 295.22936 |
- 45.62489 |
- 7931.01 |
-
-
- | 159177229 |
- 289.92199173669 |
- 45.10395540896 |
- |
- 289.92199 |
- 45.10396 |
- 7915.01 |
-
-
- | 63289510 |
- 292.41209309632 |
- 46.36387913807 |
- |
- 292.41209 |
- 46.36388 |
- 7953.01 |
-
-
- | 158218731 |
- 286.3782770263 |
- 43.07322710389 |
- |
- 286.37828 |
- 43.07323 |
- 4450.01 |
-
-
- | 120573791 |
- 285.60823697204 |
- 39.57929722962 |
- 1237668680463286654 |
- 285.60824 |
- 39.5793 |
- 8246.01 |
-
-
- | 268159777 |
- 298.70003053793 |
- 43.97648369584 |
- |
- 298.70003 |
- 43.97648 |
- 6971.01 |
-
-
- | 158984573 |
- 289.21746007982 |
- 47.88442986995 |
- |
- 289.21746 |
- 47.88443 |
- 87.01 |
-
-
- | 1880800209 |
- 293.38069628591 |
- 41.15182597308 |
- |
- 293.38063 |
- 41.15188 |
- 7746.01 |
-
-
- | 138100143 |
- 293.38069628591 |
- 41.15182597308 |
- |
- 293.38063 |
- 41.15188 |
- 7746.01 |
-
-
- | 272607420 |
- 296.73725097252 |
- 44.50148835745 |
- |
- 296.73725 |
- 44.50149 |
- 2992.01 |
-
-
- | 279913347 |
- 285.71743233172 |
- 44.75840678221 |
- |
- 285.71743 |
- 44.75841 |
- 2931.01 |
-
-
- | 416282121 |
- 289.6649171902 |
- 50.02004336777 |
- |
- 289.66492 |
- 50.02004 |
- 3344.03 |
-
-
- | 26656980 |
- 292.58731931996 |
- 49.83445152087 |
- |
- 292.58732 |
- 49.83445 |
- 8063.01 |
-
-
- | 164671104 |
- 284.19214278617 |
- 44.76211325238 |
- |
- 284.19214 |
- 44.76211 |
- 8159.02 |
-
-
- | 159720814 |
- 291.31279123496 |
- 44.38191387607 |
- |
- 291.31279 |
- 44.38191 |
- 7882.01 |
-
-
- | 27014562 |
- 294.08305672373 |
- 50.50279677782 |
- |
- 294.08306 |
- 50.5028 |
- 3282.01 |
-
-
- | 267667567 |
- 284.56691156193 |
- 49.01256002105 |
- |
- 284.56691 |
- 49.01256 |
- 4622.01 |
-
-
- | 138972690 |
- 295.18682773467 |
- 39.57567684525 |
- |
- 295.18683 |
- 39.57568 |
- 5067.01 |
-
-
- | 268159861 |
- 298.65272341881 |
- 43.95500719275 |
- |
- 298.65272 |
- 43.95501 |
- 571.05 |
-
-
- | 406948346 |
- 290.25379349704 |
- 48.36079990736 |
- |
- 290.25379 |
- 48.3608 |
- 2770.01 |
-
-
- | 48304734 |
- 283.29710959217 |
- 48.26660804359 |
- |
- 283.29711 |
- 48.26661 |
- 8033.01 |
-
-
- | 27911400 |
- 296.85099930352 |
- 50.69890914151 |
- |
- 296.851 |
- 50.69891 |
- 2290.01 |
-
-
- | 120692095 |
- 286.05243584031 |
- 39.09603871029 |
- 1237668735761188137 |
- 286.05244 |
- 39.09604 |
- 4084.01 |
-
-
- | 164833065 |
- 284.94100293403 |
- 46.56651056505 |
- |
- 284.941 |
- 46.56651 |
- 250.04 |
-
-
- | 123202589 |
- 280.80155548597 |
- 44.03396401874 |
- |
- 280.80156 |
- 44.03396 |
- 4005.01 |
-
-
- | 120899838 |
- 286.60886061177 |
- 41.88934409231 |
- |
- 286.60886 |
- 41.88934 |
- 4054.01 |
-
-
- | 138098637 |
- 293.36475074143 |
- 41.78887835494 |
- |
- 293.36475 |
- 41.78888 |
- 5276.01 |
-
-
- | 164671591 |
- 284.21945618673 |
- 44.26854716745 |
- |
- 284.21946 |
- 44.26855 |
- 4015.01 |
-
-
- | 352013661 |
- 285.69200245739 |
- 45.68938403382 |
- |
- 285.692 |
- 45.68938 |
- 2162.02 |
-
-
- | 48217265 |
- 283.11378524986 |
- 48.13137038571 |
- |
- 283.11375 |
- 48.13139 |
- 1989.01 |
-
-
- | 158218178 |
- 286.45274566004 |
- 42.65786661367 |
- |
- 286.45275 |
- 42.65787 |
- 2028.03 |
-
-
- | 28362513 |
- 298.06230606519 |
- 48.72491861468 |
- |
- 298.06231 |
- 48.72492 |
- 5874.01 |
-
-
- | 270951576 |
- 294.30463987802 |
- 43.50968819472 |
- |
- 294.30464 |
- 43.50969 |
- 5433.01 |
-
-
- | 158434144 |
- 287.43917794297 |
- 43.83208303321 |
- |
- 287.43918 |
- 43.83208 |
- 518.03 |
-
-
- | 352015839 |
- 285.64084640378 |
- 47.81104406253 |
- |
- 285.64085 |
- 47.81104 |
- 7345.01 |
-
-
- | 121660595 |
- 288.53856831961 |
- 40.8771018778 |
- |
- 288.53857 |
- 40.8771 |
- 2834.01 |
-
-
- | 123414085 |
- 281.87014861481 |
- 47.39498064296 |
- |
- 281.87015 |
- 47.39498 |
- 8201.01 |
-
-
- | 299030246 |
- 288.61056777503 |
- 49.9686331781 |
- |
- 288.61057 |
- 49.96863 |
- 4745.01 |
-
-
- | 270615772 |
- 293.63935831297 |
- 44.98248913708 |
- |
- 293.63936 |
- 44.98249 |
- 2841.01 |
-
-
- | 137483321 |
- 292.13878228588 |
- 38.94153649391 |
- |
- 292.13878 |
- 38.94154 |
- 7673.01 |
-
-
- | 120900025 |
- 286.57614028027 |
- 41.74149671169 |
- |
- 286.57614 |
- 41.7415 |
- 4121.01 |
-
-
- | 120765183 |
- 286.07910140678 |
- 39.27831772362 |
- 1237668735761187448 |
- 286.0791 |
- 39.27832 |
- 812.03 |
-
-
- | 121462999 |
- 288.15670353284 |
- 41.87795654851 |
- |
- 288.1567 |
- 41.87796 |
- 2757.01 |
-
-
- | 272944990 |
- 296.99858735709 |
- 42.78195849394 |
- |
- 296.99859 |
- 42.78196 |
- 238.03 |
-
-
- | 237158408 |
- 283.34451495553 |
- 41.20177153037 |
- 1237668681536045496 |
- 283.34451 |
- 41.20177 |
- 4016.01 |
-
-
- | 164828012 |
- 284.96852528224 |
- 42.07918848341 |
- |
- 284.96853 |
- 42.07919 |
- 612.03 |
-
-
- | 28084836 |
- 297.09064957562 |
- 49.62655629063 |
- |
- 297.09065 |
- 49.62656 |
- 1876.01 |
-
-
- | 271429722 |
- 295.16363228491 |
- 44.42221084773 |
- |
- 295.16363 |
- 44.42221 |
- 8156.01 |
-
-
- | 273234825 |
- 297.46530698155 |
- 42.88285268374 |
- |
- 297.46531 |
- 42.88285 |
- 1353.03 |
-
-
- | 164455792 |
- 283.02284048865 |
- 47.26117142171 |
- |
- 283.02284 |
- 47.26117 |
- 427.03 |
-
-
- | 271350074 |
- 294.95258975661 |
- 44.2079136741 |
- |
- 294.95259 |
- 44.20791 |
- 7880.01 |
-
-
- | 299096985 |
- 289.09081463329 |
- 51.53201958663 |
- |
- 289.09081 |
- 51.53202 |
- 8238.01 |
-
-
- | 158487028 |
- 287.47123901086 |
- 46.08064730195 |
- |
- 287.47124 |
- 46.08065 |
- 4076.01 |
-
-
- | 48217868 |
- 283.03980364581 |
- 48.82536562974 |
- |
- 283.0398 |
- 48.82537 |
- 1430.03 |
-
-
- | 164892125 |
- 285.25367925894 |
- 46.58216288147 |
- |
- 285.25368 |
- 46.58216 |
- 1871.01 |
-
-
- | 123126460 |
- 280.51172242959 |
- 44.15936157273 |
- |
- 280.51172 |
- 44.15936 |
- 2762.01 |
-
-
- | 270857293 |
- 294.04340023713 |
- 45.13977792913 |
- |
- 294.0434 |
- 45.13978 |
- 5581.01 |
-
-
- | 239306899 |
- 300.48819020366 |
- 44.46065942237 |
- |
- 300.48819 |
- 44.46066 |
- 4356.01 |
-
-
- | 159098034 |
- 289.57456247574 |
- 44.5199349376 |
- |
- 289.57456 |
- 44.51993 |
- 7889.01 |
-
-
- | 275491028 |
- 293.17136037956 |
- 45.06561014914 |
- |
- 293.17136 |
- 45.06561 |
- 581.02 |
-
-
- | 185408817 |
- 301.72074517243 |
- 44.41207878581 |
- |
- 301.72075 |
- 44.41208 |
- 2529.02 |
-
-
- | 273373488 |
- 297.50986780665 |
- 46.96126198763 |
- |
- 297.50987 |
- 46.96126 |
- 1596.02 |
-
-
- | 28358785 |
- 297.88461866406 |
- 48.0014677467 |
- |
- 297.88462 |
- 48.00147 |
- 3086.01 |
-
-
- | 240178613 |
- 292.9851231016 |
- 46.50655690241 |
- |
- 292.98512 |
- 46.50656 |
- 4636.01 |
-
-
- | 164522561 |
- 283.34569343203 |
- 45.27440675927 |
- |
- 283.34569 |
- 45.27441 |
- 4009.01 |
-
-
- | 63362217 |
- 292.67253917706 |
- 46.37740810082 |
- |
- 292.67254 |
- 46.37741 |
- 1938.01 |
-
-
- | 120689253 |
- 285.99989225934 |
- 40.91933643232 |
- |
- 285.99989 |
- 40.91934 |
- 505.05 |
-
-
- | 164891054 |
- 285.34901849187 |
- 45.55618413637 |
- |
- 285.34902 |
- 45.55618 |
- 5622.01 |
-
-
- | 158325998 |
- 286.94973216027 |
- 45.35381699137 |
- |
- 286.94973 |
- 45.35382 |
- 4014.01 |
-
-
- | 158412743 |
- 287.29132391466 |
- 47.5982451135 |
- |
- 287.29132 |
- 47.59824 |
- 5790.01 |
-
-
- | 272706044 |
- 296.67126714558 |
- 43.49839004249 |
- |
- 296.67127 |
- 43.49839 |
- 1527.01 |
-
-
- | 273694712 |
- 298.02308973952 |
- 43.36350943023 |
- |
- 298.02309 |
- 43.36351 |
- 1707.02 |
-
-
- | 273378429 |
- 297.73401452871 |
- 46.96150473033 |
- |
- 297.73401 |
- 46.9615 |
- 8193.01 |
-
-
- | 121215172 |
- 287.30280469066 |
- 39.28968308215 |
- 1237668736298451177 |
- 287.3028 |
- 39.28968 |
- 2210.02 |
-
-
- | 121013472 |
- 286.69521733296 |
- 40.29180405691 |
- 1237668737371800067 |
- 286.69522 |
- 40.2918 |
- 4202.01 |
-
-
- | 63068329 |
- 291.86285401886 |
- 46.4292469608 |
- |
- 291.86285 |
- 46.42925 |
- 947.01 |
-
-
- | 159102744 |
- 289.53883926078 |
- 41.89279779039 |
- |
- 289.53884 |
- 41.8928 |
- 2828.01 |
-
-
- | 268382450 |
- 298.89878948156 |
- 47.46309387969 |
- |
- 298.89879 |
- 47.46309 |
- 4051.01 |
-
-
- | 352014147 |
- 285.66007543439 |
- 46.14345713953 |
- |
- 285.66008 |
- 46.14346 |
- 4242.01 |
-
-
- | 275571117 |
- 293.29834394915 |
- 43.55585001935 |
- |
- 293.29834 |
- 43.55585 |
- 2686.01 |
-
-
- | 270609457 |
- 293.62359564971 |
- 42.12965422134 |
- |
- 293.6236 |
- 42.12965 |
- 3508.01 |
-
-
- | 275573429 |
- 293.26096025757 |
- 44.8688945195 |
- |
- 293.26096 |
- 44.86889 |
- 172.02 |
-
-
- | 272368764 |
- 296.4078761867 |
- 44.77362913311 |
- |
- 296.40788 |
- 44.77363 |
- 8276.01 |
-
-
- | 139034625 |
- 295.16398717373 |
- 39.20140970065 |
- |
- 295.164 |
- 39.20142 |
- 2172.02 |
-
-
-
-
-
-
diff --git a/voprov_example.png b/voprov_example.png
new file mode 100644
index 0000000..9172ab8
Binary files /dev/null and b/voprov_example.png differ