Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(xmlupload): forbid empty strings in some tags of the XML, et al. (DEV-2439) #433

Merged
merged 7 commits into from
Jul 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/check-pr-title.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
steps:
- uses: deepakputhraya/action-pr-title@master
with:
regex: '[a-z]+(\([0-9a-z\-_, ]+\))?!?: [a-z].+\(DEV-\d+\)$|chore: release \d+\.\d+\.\d+$|chore\(deps.*\): bump .+ from .+ to .+$'
regex: '[a-z]+(\([0-9a-z\-_, ]+\))?!?: .+\(DEV-\d+\)$|chore: release \d+\.\d+\.\d+$|chore\(deps.*\): bump .+ from .+ to .+$'
allowed_prefixes: "fix,refactor,feat,docs,chore,style,test"
disallowed_prefixes: "feature,hotfix"
prefix_case_sensitive: true
Expand Down
3 changes: 2 additions & 1 deletion src/dsp_tools/fast_xmlupload/upload_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ def _upload_without_processing(
with open(file, "rb") as bitstream:
try:
response_upload = requests.post(
url=f"{regex.sub(r'/$', '', sipi_url)}/upload_without_processing?token={con.get_token()}",
url=f"{regex.sub(r'/$', '', sipi_url)}/upload_without_processing",
headers={"Authorization": f"Bearer {con.get_token()}"},
files={"file": bitstream},
timeout=8 * 60,
)
Expand Down
3 changes: 2 additions & 1 deletion src/dsp_tools/models/sipi.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def upload_bitstream(self, filepath: str) -> dict[Any, Any]:
"file": (os.path.basename(filepath), bitstream_file),
}
response = requests.post(
self.sipi_server + "/upload?token=" + self.token,
self.sipi_server + "/upload",
headers={"Authorization": "Bearer " + self.token},
files=files,
timeout=5 * 60,
)
Expand Down
18 changes: 13 additions & 5 deletions src/dsp_tools/resources/schema/data.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@
targetNamespace="https://dasch.swiss/schema"
elementFormDefault="qualified">

<!-- nonEmptyString-->
<xs:simpleType name="nonEmptyString">
<xs:restriction base="xs:string">
<xs:minLength value="1"/>
<xs:pattern value=".*\S+.*"/>
</xs:restriction>
</xs:simpleType>

<!-- data type for resrefs "ID|ID|..." -->
<xs:simpleType name="resrefs_type">
<xs:restriction base="xs:token">
Expand Down Expand Up @@ -118,7 +126,7 @@
<!-- geoname value type -->
<xs:complexType name="geoname_type">
<xs:simpleContent>
<xs:extension base="xs:string">
<xs:extension base="nonEmptyString">
<xs:attribute name="comment" type="xs:string"/>
<xs:attribute name="permissions" type="xs:NCName" use="optional"/>
</xs:extension>
Expand All @@ -128,7 +136,7 @@
<!-- list value type -->
<xs:complexType name="list_type">
<xs:simpleContent>
<xs:extension base="xs:string">
<xs:extension base="nonEmptyString">
<xs:attribute name="comment" type="xs:string"/>
<xs:attribute name="permissions" type="xs:NCName" use="optional"/>
</xs:extension>
Expand Down Expand Up @@ -158,7 +166,7 @@
<!-- period value type -->
<xs:complexType name="period_type">
<xs:simpleContent>
<xs:extension base="xs:string">
<xs:extension base="nonEmptyString">
<xs:attribute name="comment" type="xs:string"/>
<xs:attribute name="permissions" type="xs:NCName" use="optional"/>
</xs:extension>
Expand All @@ -168,7 +176,7 @@
<!-- resptr_type (link to other resource) value type -->
<xs:complexType name="resptr_type">
<xs:simpleContent>
<xs:extension base="xs:string">
<xs:extension base="nonEmptyString">
<xs:attribute name="comment" type="xs:string"/>
<xs:attribute name="permissions" type="xs:NCName" use="optional"/>
</xs:extension>
Expand Down Expand Up @@ -379,7 +387,7 @@
<!-- bitstream tag -->
<xs:complexType name="bitstream_type">
<xs:simpleContent>
<xs:extension base="xs:string">
<xs:extension base="nonEmptyString">
<xs:attribute name="permissions" type="xs:NCName" use="optional"/>
</xs:extension>
</xs:simpleContent>
Expand Down
25 changes: 25 additions & 0 deletions src/dsp_tools/utils/xml_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,29 @@ def _check_consistency_with_ontology(
logger.info("Resource types and properties are consistent with the ontology.")


def _check_if_bitstreams_exist(
root: etree._Element,
imgdir: str,
) -> None:
"""
Make sure that all bitstreams referenced in the XML file exist in the imgdir.

Args:
root: parsed XML file
imgdir: folder where the bitstreams are stored

Raises:
UserError: if a bitstream does not exist in the imgdir
"""
multimedia_resources = [x for x in root if any((y.tag == "bitstream" for y in x.iter()))]
for res in multimedia_resources:
pth = [Path(x.text) for x in res.iter() if x.tag == "bitstream" and x.text][0]
if not Path(imgdir / pth).is_file():
raise UserError(
f"Bitstream '{pth!s}' of resource '{res.attrib['label']}' does not exist in the imgdir '{imgdir}'."
)


def xml_upload(
input_file: Union[str, Path, etree._ElementTree[Any]],
server: str,
Expand Down Expand Up @@ -496,6 +519,8 @@ def xml_upload(
# parse the XML file
validate_xml_against_schema(input_file=input_file)
root = _parse_xml_file(input_file=input_file)
if not preprocessing_done:
_check_if_bitstreams_exist(root=root, imgdir=imgdir)
shortcode = root.attrib["shortcode"]
default_ontology = root.attrib["default-ontology"]
logger.info(f"Validated and parsed the XML file. Shortcode='{shortcode}' and default_ontology='{default_ontology}'")
Expand Down