From 0ad742c19c5150c6e0129ac4d3aeca8dc4ff9e20 Mon Sep 17 00:00:00 2001
From: Joana Sousa
Date: Tue, 1 Mar 2022 11:28:19 +0100
Subject: [PATCH 01/73] [maven-release-plugin] prepare for next development
iteration
---
metis-authentication/metis-authentication-common/pom.xml | 2 +-
metis-authentication/metis-authentication-rest-client/pom.xml | 2 +-
metis-authentication/metis-authentication-rest/pom.xml | 2 +-
metis-authentication/metis-authentication-service/pom.xml | 2 +-
metis-authentication/pom.xml | 2 +-
metis-common/metis-common-mongo/pom.xml | 2 +-
metis-common/metis-common-network/pom.xml | 2 +-
metis-common/metis-common-solr/pom.xml | 2 +-
metis-common/metis-common-utils/pom.xml | 2 +-
metis-common/metis-common-zoho/pom.xml | 2 +-
metis-common/pom.xml | 2 +-
metis-core/metis-core-common/pom.xml | 2 +-
metis-core/metis-core-rest/pom.xml | 2 +-
metis-core/metis-core-service/pom.xml | 2 +-
metis-core/pom.xml | 2 +-
metis-dereference/metis-dereference-common/pom.xml | 2 +-
metis-dereference/metis-dereference-import/pom.xml | 2 +-
metis-dereference/metis-dereference-rest/pom.xml | 2 +-
metis-dereference/metis-dereference-service/pom.xml | 2 +-
metis-dereference/pom.xml | 2 +-
metis-enrichment/metis-enrichment-client/pom.xml | 2 +-
metis-enrichment/metis-enrichment-common/pom.xml | 2 +-
metis-enrichment/metis-enrichment-rest/pom.xml | 2 +-
metis-enrichment/metis-enrichment-service/pom.xml | 2 +-
metis-enrichment/pom.xml | 2 +-
metis-harvesting/pom.xml | 2 +-
metis-indexing/pom.xml | 2 +-
metis-media-service/pom.xml | 2 +-
metis-normalization/pom.xml | 2 +-
metis-repository/pom.xml | 2 +-
metis-schema/pom.xml | 2 +-
metis-transformation/metis-transformation-service/pom.xml | 2 +-
metis-transformation/pom.xml | 2 +-
metis-validation/metis-validation-client/pom.xml | 2 +-
metis-validation/metis-validation-common/pom.xml | 2 +-
metis-validation/metis-validation-rest/pom.xml | 2 +-
metis-validation/metis-validation-service/pom.xml | 2 +-
metis-validation/pom.xml | 2 +-
pom.xml | 4 ++--
39 files changed, 40 insertions(+), 40 deletions(-)
diff --git a/metis-authentication/metis-authentication-common/pom.xml b/metis-authentication/metis-authentication-common/pom.xml
index a1ebce213..478002639 100644
--- a/metis-authentication/metis-authentication-common/pom.xml
+++ b/metis-authentication/metis-authentication-common/pom.xml
@@ -4,7 +4,7 @@
metis-authentication
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-authentication-common
diff --git a/metis-authentication/metis-authentication-rest-client/pom.xml b/metis-authentication/metis-authentication-rest-client/pom.xml
index fa5dcbd8f..bc73c59bc 100644
--- a/metis-authentication/metis-authentication-rest-client/pom.xml
+++ b/metis-authentication/metis-authentication-rest-client/pom.xml
@@ -4,7 +4,7 @@
metis-authentication
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-authentication-rest-client
diff --git a/metis-authentication/metis-authentication-rest/pom.xml b/metis-authentication/metis-authentication-rest/pom.xml
index c566c4380..eda05fa57 100644
--- a/metis-authentication/metis-authentication-rest/pom.xml
+++ b/metis-authentication/metis-authentication-rest/pom.xml
@@ -4,7 +4,7 @@
metis-authentication
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-authentication-rest
war
diff --git a/metis-authentication/metis-authentication-service/pom.xml b/metis-authentication/metis-authentication-service/pom.xml
index 0dea6f614..59c1fdcba 100644
--- a/metis-authentication/metis-authentication-service/pom.xml
+++ b/metis-authentication/metis-authentication-service/pom.xml
@@ -4,7 +4,7 @@
metis-authentication
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-authentication-service
diff --git a/metis-authentication/pom.xml b/metis-authentication/pom.xml
index a91b9daa4..3c276c648 100644
--- a/metis-authentication/pom.xml
+++ b/metis-authentication/pom.xml
@@ -4,7 +4,7 @@
metis-framework
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-authentication
pom
diff --git a/metis-common/metis-common-mongo/pom.xml b/metis-common/metis-common-mongo/pom.xml
index 9f45d1369..180197dbb 100644
--- a/metis-common/metis-common-mongo/pom.xml
+++ b/metis-common/metis-common-mongo/pom.xml
@@ -4,7 +4,7 @@
metis-common
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-common-mongo
diff --git a/metis-common/metis-common-network/pom.xml b/metis-common/metis-common-network/pom.xml
index 4184a069e..c13c1ed7e 100644
--- a/metis-common/metis-common-network/pom.xml
+++ b/metis-common/metis-common-network/pom.xml
@@ -4,7 +4,7 @@
metis-common
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-common-network
diff --git a/metis-common/metis-common-solr/pom.xml b/metis-common/metis-common-solr/pom.xml
index 041ed3dcc..1ac8f2188 100644
--- a/metis-common/metis-common-solr/pom.xml
+++ b/metis-common/metis-common-solr/pom.xml
@@ -4,7 +4,7 @@
metis-common
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-common-solr
diff --git a/metis-common/metis-common-utils/pom.xml b/metis-common/metis-common-utils/pom.xml
index ae8ccaa39..706b1b911 100644
--- a/metis-common/metis-common-utils/pom.xml
+++ b/metis-common/metis-common-utils/pom.xml
@@ -4,7 +4,7 @@
metis-common
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-common-utils
diff --git a/metis-common/metis-common-zoho/pom.xml b/metis-common/metis-common-zoho/pom.xml
index b3ba39426..728ea62c4 100644
--- a/metis-common/metis-common-zoho/pom.xml
+++ b/metis-common/metis-common-zoho/pom.xml
@@ -4,7 +4,7 @@
metis-common
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-common-zoho
diff --git a/metis-common/pom.xml b/metis-common/pom.xml
index 6e7f111a8..e1059bf25 100644
--- a/metis-common/pom.xml
+++ b/metis-common/pom.xml
@@ -4,7 +4,7 @@
metis-framework
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-common
pom
diff --git a/metis-core/metis-core-common/pom.xml b/metis-core/metis-core-common/pom.xml
index 612171884..50760dd35 100644
--- a/metis-core/metis-core-common/pom.xml
+++ b/metis-core/metis-core-common/pom.xml
@@ -4,7 +4,7 @@
metis-core
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-core-common
diff --git a/metis-core/metis-core-rest/pom.xml b/metis-core/metis-core-rest/pom.xml
index a01e06941..0ae469d7e 100644
--- a/metis-core/metis-core-rest/pom.xml
+++ b/metis-core/metis-core-rest/pom.xml
@@ -4,7 +4,7 @@
metis-core
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-core-rest
war
diff --git a/metis-core/metis-core-service/pom.xml b/metis-core/metis-core-service/pom.xml
index 1d8d2a29c..366ae33de 100644
--- a/metis-core/metis-core-service/pom.xml
+++ b/metis-core/metis-core-service/pom.xml
@@ -4,7 +4,7 @@
metis-core
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-core-service
diff --git a/metis-core/pom.xml b/metis-core/pom.xml
index ffba399b3..4cc3c771d 100644
--- a/metis-core/pom.xml
+++ b/metis-core/pom.xml
@@ -4,7 +4,7 @@
metis-framework
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-core
pom
diff --git a/metis-dereference/metis-dereference-common/pom.xml b/metis-dereference/metis-dereference-common/pom.xml
index e485103b5..1b6c4a6d6 100644
--- a/metis-dereference/metis-dereference-common/pom.xml
+++ b/metis-dereference/metis-dereference-common/pom.xml
@@ -4,7 +4,7 @@
metis-dereference
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-dereference-common
diff --git a/metis-dereference/metis-dereference-import/pom.xml b/metis-dereference/metis-dereference-import/pom.xml
index 6201b2a4c..5ef621cc5 100644
--- a/metis-dereference/metis-dereference-import/pom.xml
+++ b/metis-dereference/metis-dereference-import/pom.xml
@@ -3,7 +3,7 @@
metis-dereference
eu.europeana.metis
- 6
+ 7-SNAPSHOT
4.0.0
metis-dereference-import
diff --git a/metis-dereference/metis-dereference-rest/pom.xml b/metis-dereference/metis-dereference-rest/pom.xml
index 436e5bed8..87bfb7fe1 100644
--- a/metis-dereference/metis-dereference-rest/pom.xml
+++ b/metis-dereference/metis-dereference-rest/pom.xml
@@ -4,7 +4,7 @@
metis-dereference
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-dereference-rest
war
diff --git a/metis-dereference/metis-dereference-service/pom.xml b/metis-dereference/metis-dereference-service/pom.xml
index 5875c1722..62bc2d4f6 100644
--- a/metis-dereference/metis-dereference-service/pom.xml
+++ b/metis-dereference/metis-dereference-service/pom.xml
@@ -4,7 +4,7 @@
metis-dereference
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-dereference-service
diff --git a/metis-dereference/pom.xml b/metis-dereference/pom.xml
index f7a9345af..3e1cc3f38 100644
--- a/metis-dereference/pom.xml
+++ b/metis-dereference/pom.xml
@@ -3,7 +3,7 @@
4.0.0
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-framework
diff --git a/metis-enrichment/metis-enrichment-client/pom.xml b/metis-enrichment/metis-enrichment-client/pom.xml
index 55ff01ac5..040678062 100644
--- a/metis-enrichment/metis-enrichment-client/pom.xml
+++ b/metis-enrichment/metis-enrichment-client/pom.xml
@@ -4,7 +4,7 @@
metis-enrichment
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-enrichment-client
jar
diff --git a/metis-enrichment/metis-enrichment-common/pom.xml b/metis-enrichment/metis-enrichment-common/pom.xml
index 222ef65b6..2d4736c50 100644
--- a/metis-enrichment/metis-enrichment-common/pom.xml
+++ b/metis-enrichment/metis-enrichment-common/pom.xml
@@ -4,7 +4,7 @@
metis-enrichment
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-enrichment-common
diff --git a/metis-enrichment/metis-enrichment-rest/pom.xml b/metis-enrichment/metis-enrichment-rest/pom.xml
index 06cf038d8..6a8bed946 100644
--- a/metis-enrichment/metis-enrichment-rest/pom.xml
+++ b/metis-enrichment/metis-enrichment-rest/pom.xml
@@ -4,7 +4,7 @@
metis-enrichment
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-enrichment-rest
war
diff --git a/metis-enrichment/metis-enrichment-service/pom.xml b/metis-enrichment/metis-enrichment-service/pom.xml
index e7a20c1f0..93508ac5c 100644
--- a/metis-enrichment/metis-enrichment-service/pom.xml
+++ b/metis-enrichment/metis-enrichment-service/pom.xml
@@ -4,7 +4,7 @@
metis-enrichment
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-enrichment-service
jar
diff --git a/metis-enrichment/pom.xml b/metis-enrichment/pom.xml
index cc5ebd82f..7dedc1224 100644
--- a/metis-enrichment/pom.xml
+++ b/metis-enrichment/pom.xml
@@ -4,7 +4,7 @@
metis-framework
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-enrichment
pom
diff --git a/metis-harvesting/pom.xml b/metis-harvesting/pom.xml
index 018490942..643242ff5 100644
--- a/metis-harvesting/pom.xml
+++ b/metis-harvesting/pom.xml
@@ -4,7 +4,7 @@
metis-framework
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-harvesting
diff --git a/metis-indexing/pom.xml b/metis-indexing/pom.xml
index f44956ee8..3c2fd7601 100644
--- a/metis-indexing/pom.xml
+++ b/metis-indexing/pom.xml
@@ -4,7 +4,7 @@
metis-framework
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-indexing
diff --git a/metis-media-service/pom.xml b/metis-media-service/pom.xml
index eb178dceb..50cb6bc63 100644
--- a/metis-media-service/pom.xml
+++ b/metis-media-service/pom.xml
@@ -4,7 +4,7 @@
metis-framework
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-media-service
diff --git a/metis-normalization/pom.xml b/metis-normalization/pom.xml
index 8612e2382..d0fe155b5 100644
--- a/metis-normalization/pom.xml
+++ b/metis-normalization/pom.xml
@@ -4,7 +4,7 @@
metis-framework
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-normalization
diff --git a/metis-repository/pom.xml b/metis-repository/pom.xml
index c3ce88566..d60b3a2a7 100644
--- a/metis-repository/pom.xml
+++ b/metis-repository/pom.xml
@@ -3,7 +3,7 @@
metis-framework
eu.europeana.metis
- 6
+ 7-SNAPSHOT
4.0.0
metis-repository
diff --git a/metis-schema/pom.xml b/metis-schema/pom.xml
index 533590b08..f3620553b 100644
--- a/metis-schema/pom.xml
+++ b/metis-schema/pom.xml
@@ -4,7 +4,7 @@
metis-framework
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-schema
diff --git a/metis-transformation/metis-transformation-service/pom.xml b/metis-transformation/metis-transformation-service/pom.xml
index a425a454e..6435ce338 100644
--- a/metis-transformation/metis-transformation-service/pom.xml
+++ b/metis-transformation/metis-transformation-service/pom.xml
@@ -4,7 +4,7 @@
metis-transformation
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-transformation-service
diff --git a/metis-transformation/pom.xml b/metis-transformation/pom.xml
index 347166941..d7faa76dd 100644
--- a/metis-transformation/pom.xml
+++ b/metis-transformation/pom.xml
@@ -4,7 +4,7 @@
metis-framework
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-transformation
pom
diff --git a/metis-validation/metis-validation-client/pom.xml b/metis-validation/metis-validation-client/pom.xml
index f94e32e56..a08e74249 100644
--- a/metis-validation/metis-validation-client/pom.xml
+++ b/metis-validation/metis-validation-client/pom.xml
@@ -4,7 +4,7 @@
metis-validation
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-validation-client
diff --git a/metis-validation/metis-validation-common/pom.xml b/metis-validation/metis-validation-common/pom.xml
index 11f9cbf88..0e0e7c860 100644
--- a/metis-validation/metis-validation-common/pom.xml
+++ b/metis-validation/metis-validation-common/pom.xml
@@ -4,7 +4,7 @@
metis-validation
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-validation-common
diff --git a/metis-validation/metis-validation-rest/pom.xml b/metis-validation/metis-validation-rest/pom.xml
index 98d21ae0f..f808edc03 100644
--- a/metis-validation/metis-validation-rest/pom.xml
+++ b/metis-validation/metis-validation-rest/pom.xml
@@ -4,7 +4,7 @@
metis-validation
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-validation-rest
war
diff --git a/metis-validation/metis-validation-service/pom.xml b/metis-validation/metis-validation-service/pom.xml
index 9d5fe568d..00b46299d 100644
--- a/metis-validation/metis-validation-service/pom.xml
+++ b/metis-validation/metis-validation-service/pom.xml
@@ -4,7 +4,7 @@
metis-validation
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-validation-service
diff --git a/metis-validation/pom.xml b/metis-validation/pom.xml
index ebdd67a72..5c02f40ac 100644
--- a/metis-validation/pom.xml
+++ b/metis-validation/pom.xml
@@ -4,7 +4,7 @@
metis-framework
eu.europeana.metis
- 6
+ 7-SNAPSHOT
metis-validation
pom
diff --git a/pom.xml b/pom.xml
index 35d42ede1..70d297100 100644
--- a/pom.xml
+++ b/pom.xml
@@ -4,7 +4,7 @@
eu.europeana.metis
metis-framework
- 6
+ 7-SNAPSHOT
pom
@@ -27,7 +27,7 @@
scm:git:https://github.com/europeana/metis-framework
https://github.com/europeana/metis-framework
- v6
+ HEAD
scm:git:https://github.com/europeana/metis-framework
From 656c9338f1b5df680cfe116a26c1f460465ee1d0 Mon Sep 17 00:00:00 2001
From: Joana Sousa
Date: Thu, 3 Mar 2022 11:37:51 +0100
Subject: [PATCH 02/73] MET-4159 Created new method that returns HttpIterator
with InputStream
---
.../metis/harvesting/http/HttpHarvester.java | 2 +
.../harvesting/http/HttpHarvesterImpl.java | 62 ++++++++++---------
2 files changed, 35 insertions(+), 29 deletions(-)
diff --git a/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvester.java b/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvester.java
index 0b428f273..4d0658b3f 100644
--- a/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvester.java
+++ b/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvester.java
@@ -47,6 +47,8 @@ void harvestRecords(InputStream inputStream, CompressedFileExtension compressedF
*/
void setMaxNumberOfIterations(int maxOfIterations);
+ HttpRecordIterator createHttpHarvestIterator(InputStream input, CompressedFileExtension compressedFileType) throws HarvesterException;
+
/**
* An object representing an entry in a file archive.
*/
diff --git a/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvesterImpl.java b/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvesterImpl.java
index 7a4078180..f4faf5a23 100644
--- a/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvesterImpl.java
+++ b/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvesterImpl.java
@@ -49,26 +49,10 @@ public class HttpHarvesterImpl implements HttpHarvester {
public void harvestRecords(InputStream inputStream, CompressedFileExtension compressedFileType,
Consumer action) throws HarvesterException {
- // We chose where to store the temporary file.
- @SuppressWarnings("findsecbugs:PATH_TRAVERSAL_IN")
- Path tempDir = null;
- try {
-
- // Save the zip file in a temporary directory (and close the input stream).
- final String prefix = UUID.randomUUID().toString();
- final Path tempFile;
- try {
- tempDir = Files.createTempDirectory(prefix);
- tempFile = Files.createTempFile(tempDir, prefix, compressedFileType.getExtension());
- FileUtils.copyInputStreamToFile(inputStream, tempFile.toFile());
- } catch (IOException e) {
- throw new HarvesterException("Problem saving archive.", e);
- }
-
AtomicInteger currentNumberOfIterations = new AtomicInteger();
// Now perform the harvesting - go by each file.
- final HttpRecordIterator iterator = harvestRecords(tempFile);
+ final HttpRecordIterator iterator = createHttpHarvestIterator(inputStream, compressedFileType);
List> exception = new ArrayList<>(1);
iterator.forEach(path -> {
try (InputStream content = Files.newInputStream(path)) {
@@ -88,18 +72,6 @@ public void harvestRecords(InputStream inputStream, CompressedFileExtension comp
throw new HarvesterException("Could not process path " + exception.get(0).getKey() + ".",
exception.get(0).getValue());
}
-
- } finally {
-
- // Finally, attempt to delete the files.
- if (tempDir != null) {
- try {
- FileUtils.deleteDirectory(tempDir.toFile());
- } catch (IOException e) {
- LOGGER.warn("Could not delete temporary directory.", e);
- }
- }
- }
}
@Override
@@ -126,6 +98,38 @@ public HttpRecordIterator harvestRecords(String archiveUrl, String downloadDirec
return harvestRecords(downloadedFile);
}
+ @Override
+ public HttpRecordIterator createHttpHarvestIterator(InputStream input, CompressedFileExtension compressedFileType) throws HarvesterException {
+ // We chose where to store the temporary file.
+ @SuppressWarnings("findsecbugs:PATH_TRAVERSAL_IN")
+ Path tempDir = null;
+ final Path tempFile;
+ try {
+ // Save the zip file in a temporary directory (and close the input stream).
+ final String prefix = UUID.randomUUID().toString();
+
+ tempDir = Files.createTempDirectory(prefix);
+ tempFile = Files.createTempFile(tempDir, prefix, compressedFileType.getExtension());
+ FileUtils.copyInputStreamToFile(input, tempFile.toFile());
+
+ return harvestRecords(tempFile);
+ } catch (IOException e) {
+ throw new HarvesterException("Problem saving archive.", e);
+ } finally {
+
+ // Finally, attempt to delete the files.
+ if (tempDir != null) {
+ try {
+ FileUtils.deleteDirectory(tempDir.toFile());
+ } catch (IOException e) {
+ LOGGER.warn("Could not delete temporary directory.", e);
+ }
+ }
+ }
+
+
+ }
+
private HttpRecordIterator harvestRecords(Path archiveFile) throws HarvesterException {
// Extract the archive.
From 597d0f58035202df33a628853487d62990d71051 Mon Sep 17 00:00:00 2001
From: Joana Sousa
Date: Thu, 3 Mar 2022 16:33:14 +0100
Subject: [PATCH 03/73] MET-4159 Created a new method for HttpRecordIterator to
delete its temporary content
---
.../metis/harvesting/http/HttpHarvester.java | 10 +++++-
.../harvesting/http/HttpHarvesterImpl.java | 33 ++++++++++---------
.../harvesting/http/HttpRecordIterator.java | 2 ++
3 files changed, 29 insertions(+), 16 deletions(-)
diff --git a/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvester.java b/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvester.java
index 4d0658b3f..de9fcbd97 100644
--- a/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvester.java
+++ b/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvester.java
@@ -47,7 +47,15 @@ void harvestRecords(InputStream inputStream, CompressedFileExtension compressedF
*/
void setMaxNumberOfIterations(int maxOfIterations);
- HttpRecordIterator createHttpHarvestIterator(InputStream input, CompressedFileExtension compressedFileType) throws HarvesterException;
+ /**
+ * It creates a {@link HttpRecordIterator} with a InputStream into a temporary file directory.
+ * It is needed to use the {@link HttpRecordIterator#deleteIteratorContent()} method if this method is used.
+ * @param input The input stream from which we create the iterator
+ * @param compressedFileType The type of compressed file type
+ * @return A HttpRecordIterator based on a temporary file location
+ * @throws HarvesterException In case there is an issue while using the input stream
+ */
+ HttpRecordIterator createTemporaryHttpHarvestIterator(InputStream input, CompressedFileExtension compressedFileType) throws HarvesterException;
/**
* An object representing an entry in a file archive.
diff --git a/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvesterImpl.java b/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvesterImpl.java
index f4faf5a23..1b61617e8 100644
--- a/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvesterImpl.java
+++ b/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvesterImpl.java
@@ -52,7 +52,7 @@ public void harvestRecords(InputStream inputStream, CompressedFileExtension comp
AtomicInteger currentNumberOfIterations = new AtomicInteger();
// Now perform the harvesting - go by each file.
- final HttpRecordIterator iterator = createHttpHarvestIterator(inputStream, compressedFileType);
+ final HttpRecordIterator iterator = createTemporaryHttpHarvestIterator(inputStream, compressedFileType);
List> exception = new ArrayList<>(1);
iterator.forEach(path -> {
try (InputStream content = Files.newInputStream(path)) {
@@ -68,6 +68,9 @@ public void harvestRecords(InputStream inputStream, CompressedFileExtension comp
return IterationResult.TERMINATE;
}
});
+
+ iterator.deleteIteratorContent();
+
if (!exception.isEmpty()) {
throw new HarvesterException("Could not process path " + exception.get(0).getKey() + ".",
exception.get(0).getValue());
@@ -99,10 +102,10 @@ public HttpRecordIterator harvestRecords(String archiveUrl, String downloadDirec
}
@Override
- public HttpRecordIterator createHttpHarvestIterator(InputStream input, CompressedFileExtension compressedFileType) throws HarvesterException {
+ public HttpRecordIterator createTemporaryHttpHarvestIterator(InputStream input, CompressedFileExtension compressedFileType) throws HarvesterException {
// We chose where to store the temporary file.
@SuppressWarnings("findsecbugs:PATH_TRAVERSAL_IN")
- Path tempDir = null;
+ Path tempDir;
final Path tempFile;
try {
// Save the zip file in a temporary directory (and close the input stream).
@@ -115,18 +118,7 @@ public HttpRecordIterator createHttpHarvestIterator(InputStream input, Compresse
return harvestRecords(tempFile);
} catch (IOException e) {
throw new HarvesterException("Problem saving archive.", e);
- } finally {
-
- // Finally, attempt to delete the files.
- if (tempDir != null) {
- try {
- FileUtils.deleteDirectory(tempDir.toFile());
- } catch (IOException e) {
- LOGGER.warn("Could not delete temporary directory.", e);
- }
- }
- }
-
+ }
}
@@ -210,6 +202,17 @@ public FileIterator(Path extractedDirectory) {
this.extractedDirectory = extractedDirectory;
}
+ @Override
+ public void deleteIteratorContent() {
+ if (extractedDirectory != null) {
+ try {
+ FileUtils.deleteDirectory(extractedDirectory.toFile());
+ } catch (IOException e) {
+ LOGGER.warn("Could not delete directory.", e);
+ }
+ }
+ }
+
@Override
public void forEach(ReportingIteration action) throws HarvesterException {
try {
diff --git a/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpRecordIterator.java b/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpRecordIterator.java
index 43be9b0ba..e81fb7b04 100644
--- a/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpRecordIterator.java
+++ b/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpRecordIterator.java
@@ -13,6 +13,8 @@
*/
public interface HttpRecordIterator {
+ void deleteIteratorContent();
+
/**
* Iterate through the decompressed records.
*
From 68492d10dcdc20cd16c3d884361ed3c9deb36a7c Mon Sep 17 00:00:00 2001
From: Joana Sousa
Date: Thu, 3 Mar 2022 16:43:59 +0100
Subject: [PATCH 04/73] MET-4159 Removed maxNumberOfIterations variable. It is
not needed
---
.../metis/harvesting/http/HttpHarvester.java | 8 --------
.../metis/harvesting/http/HttpHarvesterImpl.java | 14 --------------
2 files changed, 22 deletions(-)
diff --git a/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvester.java b/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvester.java
index de9fcbd97..4a1a64e40 100644
--- a/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvester.java
+++ b/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvester.java
@@ -39,14 +39,6 @@ HttpRecordIterator harvestRecords(String archiveUrl, String downloadDirectory)
void harvestRecords(InputStream inputStream, CompressedFileExtension compressedFileType,
Consumer action) throws HarvesterException;
- /**
- * Method to set up the maximum number of iterations through records during harvesting.
- * If there is none, the harvesting iterate through all records.
- *
- * @param maxOfIterations The maximum number of iterations
- */
- void setMaxNumberOfIterations(int maxOfIterations);
-
/**
* It creates a {@link HttpRecordIterator} with a InputStream into a temporary file directory.
* It is needed to use the {@link HttpRecordIterator#deleteIteratorContent()} method if this method is used.
diff --git a/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvesterImpl.java b/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvesterImpl.java
index 1b61617e8..262dc54da 100644
--- a/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvesterImpl.java
+++ b/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvesterImpl.java
@@ -23,7 +23,6 @@
import java.util.List;
import java.util.Set;
import java.util.UUID;
-import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
import java.util.stream.Stream;
import org.apache.commons.io.FileUtils;
@@ -43,14 +42,10 @@ public class HttpHarvesterImpl implements HttpHarvester {
private static final Logger LOGGER = LoggerFactory.getLogger(HttpHarvesterImpl.class);
- private int maxNumberOfIterations = 0;
-
@Override
public void harvestRecords(InputStream inputStream, CompressedFileExtension compressedFileType,
Consumer action) throws HarvesterException {
- AtomicInteger currentNumberOfIterations = new AtomicInteger();
-
// Now perform the harvesting - go by each file.
final HttpRecordIterator iterator = createTemporaryHttpHarvestIterator(inputStream, compressedFileType);
List> exception = new ArrayList<>(1);
@@ -58,10 +53,6 @@ public void harvestRecords(InputStream inputStream, CompressedFileExtension comp
try (InputStream content = Files.newInputStream(path)) {
action.accept(new ArchiveEntryImpl(path.getFileName().toString(),
new ByteArrayInputStream(IOUtils.toByteArray(content))));
- currentNumberOfIterations.getAndIncrement();
- if (maxNumberOfIterations > 0 && currentNumberOfIterations.get() > maxNumberOfIterations) {
- return IterationResult.TERMINATE;
- }
return IterationResult.CONTINUE;
} catch (IOException | RuntimeException e) {
exception.add(new ImmutablePair<>(path, e));
@@ -77,11 +68,6 @@ public void harvestRecords(InputStream inputStream, CompressedFileExtension comp
}
}
- @Override
- public void setMaxNumberOfIterations(int maxOfIterations) {
- this.maxNumberOfIterations = maxOfIterations;
- }
-
@Override
public HttpRecordIterator harvestRecords(String archiveUrl, String downloadDirectory)
throws HarvesterException {
From af653bc9837e3cd41a205c558cca73129f4275b5 Mon Sep 17 00:00:00 2001
From: Adolfo Peixinho
Date: Thu, 3 Mar 2022 17:46:32 +0100
Subject: [PATCH 05/73] MET-4237 Updated pom.xml to point to the correct
E-Cloud version. Added implementation of depublication to indexPostProcess
method. Refactor of TestObjectFactory to align with E-Cloud SubTaskInfo
---
.../core/rest/utils/TestObjectFactory.java | 10 +-
.../core/execution/WorkflowExecutor.java | 3 +-
.../core/execution/WorkflowPostProcessor.java | 99 +++++++++++++------
.../metis/core/utils/TestObjectFactory.java | 43 ++++----
pom.xml | 2 +-
5 files changed, 96 insertions(+), 61 deletions(-)
diff --git a/metis-core/metis-core-rest/src/test/java/eu/europeana/metis/core/rest/utils/TestObjectFactory.java b/metis-core/metis-core-rest/src/test/java/eu/europeana/metis/core/rest/utils/TestObjectFactory.java
index fdc1ed04d..4dc8d7598 100644
--- a/metis-core/metis-core-rest/src/test/java/eu/europeana/metis/core/rest/utils/TestObjectFactory.java
+++ b/metis-core/metis-core-rest/src/test/java/eu/europeana/metis/core/rest/utils/TestObjectFactory.java
@@ -253,11 +253,11 @@ public static MetisUserView createMetisUser(String email) {
* @return the created sub task info
*/
public static List createListOfSubTaskInfo() {
- SubTaskInfo subTaskInfo1 = new SubTaskInfo(1, "some_resource_id1", RecordState.SUCCESS, "",
- "Sensitive Information");
- final int resourceNum = 2;
- SubTaskInfo subTaskInfo2 = new SubTaskInfo(resourceNum, "some_resource_id1", RecordState.SUCCESS, "",
- "Sensitive Information");
+
+ SubTaskInfo subTaskInfo1 = new SubTaskInfo(1, "some_resource_id1", RecordState.SUCCESS, "info",
+ "additional info", "europeanaId", 0L);
+ SubTaskInfo subTaskInfo2 = new SubTaskInfo(2, "some_resource_id2", RecordState.SUCCESS, "info",
+ "additional info", "europeanaId", 0L);
ArrayList subTaskInfos = new ArrayList<>();
subTaskInfos.add(subTaskInfo1);
subTaskInfos.add(subTaskInfo2);
diff --git a/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowExecutor.java b/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowExecutor.java
index 2c9393d0c..6281b87d0 100644
--- a/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowExecutor.java
+++ b/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowExecutor.java
@@ -24,6 +24,7 @@
import eu.europeana.metis.core.workflow.plugins.ExecutablePluginType;
import eu.europeana.metis.core.workflow.plugins.PluginStatus;
import eu.europeana.metis.core.workflow.plugins.PluginType;
+import eu.europeana.metis.exception.BadContentException;
import eu.europeana.metis.exception.ExternalTaskException;
import eu.europeana.metis.network.ExternalRequestUtil;
import java.util.Date;
@@ -491,7 +492,7 @@ private boolean applyPostProcessing(MonitorResult monitorResult, AbstractExecuta
if (monitorResult.getTaskState() == TaskState.PROCESSED) {
try {
this.workflowPostProcessor.performPluginPostProcessing(plugin, datasetId);
- } catch (DpsException | InvalidIndexPluginException | RuntimeException e) {
+ } catch (DpsException | InvalidIndexPluginException | BadContentException | RuntimeException e) {
processingAppliedOrNotRequired = false;
LOGGER.warn("Problem occurred during Metis post-processing.", e);
plugin.setFinishedDate(null);
diff --git a/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java b/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java
index 2cbc2fc9d..ceb8d6046 100644
--- a/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java
+++ b/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java
@@ -18,6 +18,8 @@
import eu.europeana.metis.core.dataset.DepublishRecordId.DepublicationStatus;
import eu.europeana.metis.core.exceptions.InvalidIndexPluginException;
import eu.europeana.metis.core.service.OrchestratorService;
+import eu.europeana.metis.core.util.DepublishRecordIdSortField;
+import eu.europeana.metis.core.util.SortDirection;
import eu.europeana.metis.core.workflow.WorkflowExecution;
import eu.europeana.metis.core.workflow.plugins.AbstractExecutablePlugin;
import eu.europeana.metis.core.workflow.plugins.AbstractMetisPlugin;
@@ -27,6 +29,7 @@
import eu.europeana.metis.core.workflow.plugins.IndexToPublishPlugin;
import eu.europeana.metis.core.workflow.plugins.MetisPlugin;
import eu.europeana.metis.core.workflow.plugins.PluginType;
+import eu.europeana.metis.exception.BadContentException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
@@ -40,7 +43,7 @@
import org.slf4j.LoggerFactory;
/**
- * This object can perform post processing for workflows.
+ * This object can perform post-processing for workflows.
*/
public class WorkflowPostProcessor {
@@ -56,10 +59,10 @@ public class WorkflowPostProcessor {
/**
* Constructor.
*
- * @param depublishRecordIdDao The DAO for depublished records.
- * @param datasetDao The DAO for datasets
- * @param workflowExecutionDao The DAO for workflow executions.
- * @param dpsClient the dps client
+ * @param depublishRecordIdDao The DAO for de-published records
+ * @param datasetDao The DAO for datasets
+ * @param workflowExecutionDao The DAO for workflow executions
+ * @param dpsClient the dps client
*/
public WorkflowPostProcessor(DepublishRecordIdDao depublishRecordIdDao,
DatasetDao datasetDao, WorkflowExecutionDao workflowExecutionDao, DpsClient dpsClient) {
@@ -70,13 +73,17 @@ public WorkflowPostProcessor(DepublishRecordIdDao depublishRecordIdDao,
}
/**
- * This method performs post processing after an individual workflow step.
+ * This method performs post-processing after an individual workflow step.
*
- * @param plugin The plugin that was successfully executed.
- * @param datasetId The dataset ID to which the plugin belongs.
+ * @param plugin The plugin that was successfully executed
+ * @param datasetId The dataset ID to which the plugin belongs
+ * @throws DpsException If communication with e-cloud dps failed
+ * @throws InvalidIndexPluginException If invalid type of plugin
+ * @throws BadContentException In case the records would violate the maximum number of de-published records that each
+ * dataset can have.
*/
void performPluginPostProcessing(AbstractExecutablePlugin> plugin, String datasetId)
- throws DpsException, InvalidIndexPluginException {
+ throws DpsException, InvalidIndexPluginException, BadContentException {
final PluginType pluginType = plugin.getPluginType();
LOGGER.info("Starting postprocessing of plugin {} in dataset {}.", pluginType, datasetId);
@@ -91,12 +98,15 @@ void performPluginPostProcessing(AbstractExecutablePlugin> plugin, String data
/**
* Performs post-processing for indexing plugins
*
- * @param indexPlugin the index plugin
- * @param datasetId the dataset id
- * @throws DpsException if communication with ecloud dps failed
+ * @param indexPlugin The index plugin
+ * @param datasetId The dataset id
+ * @throws DpsException If communication with e-cloud dps failed
+ * @throws InvalidIndexPluginException If invalid type of plugin
+ * @throws BadContentException In case the records would violate the maximum number of de-published records that each
+ * dataset can have.
*/
private void indexPostProcess(AbstractExecutablePlugin> indexPlugin, String datasetId)
- throws DpsException, InvalidIndexPluginException {
+ throws DpsException, InvalidIndexPluginException, BadContentException {
TargetIndexingDatabase targetIndexingDatabase;
TargetIndexingEnvironment targetIndexingEnvironment;
if (indexPlugin instanceof IndexToPreviewPlugin) {
@@ -105,8 +115,21 @@ private void indexPostProcess(AbstractExecutablePlugin> indexPlugin, String da
} else if (indexPlugin instanceof IndexToPublishPlugin) {
targetIndexingDatabase = ((IndexToPublishPlugin) indexPlugin).getTargetIndexingDatabase();
targetIndexingEnvironment = ((IndexToPublishPlugin) indexPlugin).getTargetIndexingEnvironment();
- //Reset depublish status
- depublishRecordIdDao.markRecordIdsWithDepublicationStatus(datasetId, null,
+
+ // get all tasks from dataset id and topology name
+ List taskReport = dpsClient.getDetailedTaskReport(indexPlugin.getTopologyName(),
+ Long.parseLong(indexPlugin.getExternalTaskId()));
+ // get all currently de-published records ids
+ Set depublishedRecordIds = depublishRecordIdDao
+ .getAllDepublishRecordIdsWithStatus(datasetId, DepublishRecordIdSortField.DEPUBLICATION_STATE, SortDirection.ASCENDING,
+ DepublicationStatus.DEPUBLISHED);
+ // filter the record ids that are a part of the given report, to be de-published
+ Set recordIdsToDepublish = taskReport.stream()
+ .filter(taskInfo -> depublishedRecordIds.contains(taskInfo.getEuropeanaId()))
+ .map(SubTaskInfo::getEuropeanaId).collect(Collectors.toSet());
+
+ // reset de-publish status
+ depublishRecordIdDao.markRecordIdsWithDepublicationStatus(datasetId, recordIdsToDepublish,
DepublicationStatus.PENDING_DEPUBLICATION, null);
} else {
throw new InvalidIndexPluginException("Plugin is not of the types supported");
@@ -118,13 +141,14 @@ private void indexPostProcess(AbstractExecutablePlugin> indexPlugin, String da
}
/**
- * Performs post processing for depublish plugins
+ * Performs post-processing for de-publish plugins
*
- * @param depublishPlugin the depublish plugin
- * @param datasetId the dataset id
- * @throws DpsException if communication with ecloud dps failed
+ * @param depublishPlugin The de-publish plugin
+ * @param datasetId The dataset id
+ * @throws DpsException If communication with e-cloud dps failed
*/
- private void depublishPostProcess(DepublishPlugin depublishPlugin, String datasetId) throws DpsException {
+ private void depublishPostProcess(DepublishPlugin depublishPlugin, String datasetId)
+ throws DpsException {
if (depublishPlugin.getPluginMetadata().isDatasetDepublish()) {
depublishDatasetPostProcess(datasetId);
} else {
@@ -132,12 +156,14 @@ private void depublishPostProcess(DepublishPlugin depublishPlugin, String datase
}
}
+ /**
+ * @param datasetId The dataset id
+ */
private void depublishDatasetPostProcess(String datasetId) {
// Set all depublished records back to PENDING.
depublishRecordIdDao.markRecordIdsWithDepublicationStatus(datasetId, null,
DepublicationStatus.PENDING_DEPUBLICATION, null);
-
// Find latest PUBLISH Type Plugin and set dataStatus to DELETED.
final PluginWithExecutionId latestSuccessfulPlugin = workflowExecutionDao
.getLatestSuccessfulPlugin(datasetId, OrchestratorService.PUBLISH_TYPES);
@@ -152,32 +178,45 @@ private void depublishDatasetPostProcess(String datasetId) {
workflowExecutionDao.updateWorkflowPlugins(workflowExecutionToUpdate);
}
}
-
// Set publication fitness to UNFIT.
final Dataset dataset = datasetDao.getDatasetByDatasetId(datasetId);
dataset.setPublicationFitness(PublicationFitness.UNFIT);
datasetDao.update(dataset);
}
- private void depublishRecordPostProcess(DepublishPlugin depublishPlugin, String datasetId) throws DpsException {
+ /**
+ * @param depublishPlugin The de-publish plugin
+ * @param datasetId The dataset id
+ * @throws DpsException If communication with e-cloud dps failed
+ */
+ private void depublishRecordPostProcess(DepublishPlugin depublishPlugin, String datasetId)
+ throws DpsException {
// Retrieve the successfully depublished records.
final long externalTaskId = Long.parseLong(depublishPlugin.getExternalTaskId());
final List subTasks = new ArrayList<>();
List subTasksBatch;
do {
- subTasksBatch = retryableExternalRequestForNetworkExceptionsThrowing(() -> dpsClient.getDetailedTaskReportBetweenChunks(
- depublishPlugin.getTopologyName(), externalTaskId, subTasks.size(),
- subTasks.size() + ECLOUD_REQUEST_BATCH_SIZE));
+ // need to change dpsCline call
+ subTasksBatch = retryableExternalRequestForNetworkExceptionsThrowing(
+ () -> dpsClient.getDetailedTaskReportBetweenChunks(
+ depublishPlugin.getTopologyName(), externalTaskId, subTasks.size(),
+ subTasks.size() + ECLOUD_REQUEST_BATCH_SIZE));
subTasks.addAll(subTasksBatch);
} while (subTasksBatch.size() == ECLOUD_REQUEST_BATCH_SIZE);
// Mark the records as DEPUBLISHED.
final Map> successfulRecords = subTasks.stream()
- .filter(subTask -> subTask.getRecordState() == RecordState.SUCCESS)
- .map(SubTaskInfo::getResource).map(DepublishRecordIdUtils::decomposeFullRecordId)
- .collect(Collectors.groupingBy(Pair::getLeft,
- Collectors.mapping(Pair::getRight, Collectors.toSet())));
+ .filter(subTask ->
+ subTask.getRecordState()
+ == RecordState.SUCCESS)
+ .map(SubTaskInfo::getResource).map(
+ DepublishRecordIdUtils::decomposeFullRecordId)
+ .collect(Collectors.groupingBy(
+ Pair::getLeft,
+ Collectors.mapping(
+ Pair::getRight,
+ Collectors.toSet())));
successfulRecords.forEach((dataset, records) ->
depublishRecordIdDao.markRecordIdsWithDepublicationStatus(dataset, records,
DepublicationStatus.DEPUBLISHED, new Date()));
diff --git a/metis-core/metis-core-service/src/test/java/eu/europeana/metis/core/utils/TestObjectFactory.java b/metis-core/metis-core-service/src/test/java/eu/europeana/metis/core/utils/TestObjectFactory.java
index 17b7f9833..38d389e4b 100644
--- a/metis-core/metis-core-service/src/test/java/eu/europeana/metis/core/utils/TestObjectFactory.java
+++ b/metis-core/metis-core-service/src/test/java/eu/europeana/metis/core/utils/TestObjectFactory.java
@@ -16,8 +16,8 @@
import eu.europeana.metis.core.common.Language;
import eu.europeana.metis.core.dao.WorkflowExecutionDao.ExecutionDatasetPair;
import eu.europeana.metis.core.dataset.Dataset;
-import eu.europeana.metis.core.dataset.DatasetXslt;
import eu.europeana.metis.core.dataset.Dataset.PublicationFitness;
+import eu.europeana.metis.core.dataset.DatasetXslt;
import eu.europeana.metis.core.rest.Record;
import eu.europeana.metis.core.workflow.ScheduleFrequence;
import eu.europeana.metis.core.workflow.ScheduledWorkflow;
@@ -134,8 +134,7 @@ private static WorkflowExecution createWorkflowExecutionObject(Dataset dataset)
}
/**
- * Create a list of dummy workflow executions. The dataset name will have a suffix number for each
- * dataset.
+ * Create a list of dummy workflow executions. The dataset name will have a suffix number for each dataset.
*
* @param size the number of dummy workflow executions to create
* @return the created list
@@ -146,8 +145,7 @@ public static List createListOfWorkflowExecutions(int size) {
}
/**
- * Create a list of dummy execution overviews. The dataset name will have a suffix number for each
- * dataset.
+ * Create a list of dummy execution overviews. The dataset name will have a suffix number for each dataset.
*
* @param size the number of dummy execution overviews to create
* @return the created list
@@ -180,8 +178,7 @@ public static ScheduledWorkflow createScheduledWorkflowObject() {
}
/**
- * Create a list of dummy scheduled workflows. The dataset name will have a suffix number for each
- * dataset.
+ * Create a list of dummy scheduled workflows. The dataset name will have a suffix number for each dataset.
*
* @param size the number of dummy scheduled workflows to create
* @return the created list
@@ -198,11 +195,11 @@ public static List createListOfScheduledWorkflows(int size) {
}
/**
- * Create a list of dummy scheduled workflows with pointer date and frequency. The dataset name
- * will have a suffix number for each dataset.
+ * Create a list of dummy scheduled workflows with pointer date and frequency. The dataset name will have a suffix number for
+ * each dataset.
*
- * @param size the number of dummy scheduled workflows to create
- * @param date the pointer date
+ * @param size the number of dummy scheduled workflows to create
+ * @param date the pointer date
* @param scheduleFrequence the schedule frequence
* @return the created list
*/
@@ -270,17 +267,15 @@ public static MetisUserView createMetisUser(String email) {
}
/**
- * Create a dummy sub task info
+ * Create a dummy subtask info
*
- * @return the created sub task info
+ * @return the created subtask info
*/
public static List createListOfSubTaskInfo() {
- SubTaskInfo subTaskInfo1 = new SubTaskInfo(1, "some_resource_id1", RecordState.SUCCESS, "",
- "Sensitive Information");
- final int resourceNum = 2;
- SubTaskInfo subTaskInfo2 = new SubTaskInfo(resourceNum, "some_resource_id1",
- RecordState.SUCCESS, "",
- "Sensitive Information");
+ SubTaskInfo subTaskInfo1 = new SubTaskInfo(1, "some_resource_id1", RecordState.SUCCESS, "info",
+ "additional info", "europeanaId", 0L);
+ SubTaskInfo subTaskInfo2 = new SubTaskInfo(2, "some_resource_id2", RecordState.SUCCESS, "info",
+ "additional info", "europeanaId", 0L);
ArrayList subTaskInfos = new ArrayList<>();
subTaskInfos.add(subTaskInfo1);
subTaskInfos.add(subTaskInfo2);
@@ -304,8 +299,8 @@ public static TaskErrorsInfo createTaskErrorsInfoListWithoutIdentifiers(int numb
}
/**
- * Create a task errors info object, which contains a list of {@link TaskErrorInfo} objects. These
- * will also contain a list of {@link ErrorDetails} that in turn contain dummy identifiers.
+ * Create a task errors info object, which contains a list of {@link TaskErrorInfo} objects. These will also contain a list of
+ * {@link ErrorDetails} that in turn contain dummy identifiers.
*
* @param numberOfErrorTypes the number of dummy error types
* @return the created task errors info
@@ -325,11 +320,11 @@ public static TaskErrorsInfo createTaskErrorsInfoListWithIdentifiers(int numberO
}
/**
- * Create a task errors info object, which contains a list of {@link TaskErrorInfo} objects. These
- * will also contain a list of {@link ErrorDetails} that in turn contain dummy identifiers.
+ * Create a task errors info object, which contains a list of {@link TaskErrorInfo} objects. These will also contain a list of
+ * {@link ErrorDetails} that in turn contain dummy identifiers.
*
* @param errorType the error type to be used for the internal {@link TaskErrorInfo}
- * @param message the message type to be used for the internal {@link TaskErrorInfo}
+ * @param message the message type to be used for the internal {@link TaskErrorInfo}
* @return the created task errors info
*/
public static TaskErrorsInfo createTaskErrorsInfoWithIdentifiers(String errorType,
diff --git a/pom.xml b/pom.xml
index 70d297100..15f414c74 100644
--- a/pom.xml
+++ b/pom.xml
@@ -137,7 +137,7 @@
1.0
4.2.0
- 6-SNAPSHOT
+ 7-SNAPSHOT
3.0.0
1.3
From cab7e8acc865cc6b0811c6644d3fae8b5d4e20b1 Mon Sep 17 00:00:00 2001
From: Simon Tzanakis
Date: Mon, 7 Mar 2022 14:49:52 +0100
Subject: [PATCH 06/73] MET-4159 Update javadoc
---
.../metis/harvesting/http/HttpHarvester.java | 35 +++++++++----------
1 file changed, 17 insertions(+), 18 deletions(-)
diff --git a/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvester.java b/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvester.java
index 4a1a64e40..f464841e7 100644
--- a/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvester.java
+++ b/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvester.java
@@ -6,30 +6,28 @@
import java.util.function.Consumer;
/**
- * Implementations of this interface provide the functionality to harvest from HTTP (compressed
- * archive).
+ * Implementations of this interface provide the functionality to harvest from HTTP (compressed archive).
*/
public interface HttpHarvester {
/**
* Harvest from HTTP (compressed archive).
*
- * @param archiveUrl The URL location of the compressed archive. The URL can use either the
- * http(s) protocol or the file protocol.
- * @param downloadDirectory The directory to which we download and extract the archive. Note: the
- * class does not clean up the downloaded or decompressed files. The caller is responsible for
- * providing a directory that is safe (i.e. on the right file system).
+ * @param archiveUrl The URL location of the compressed archive. The URL can use either the http(s) protocol or the file
+ * protocol.
+ * @param downloadDirectory The directory to which we download and extract the archive. Note: the class does not clean up the
+ * downloaded or decompressed files. The caller is responsible for providing a directory that is safe (i.e. on the right file
+ * system).
* @return An iterator that provides access to the decompressed records.
* @throws HarvesterException In case there was an issue during the harvest.
*/
HttpRecordIterator harvestRecords(String archiveUrl, String downloadDirectory)
- throws HarvesterException;
+ throws HarvesterException;
/**
- * Harvest from HTTP (compressed archive). This is a convenience method for {@link
- * #harvestRecords(String, String)} that copies the input stream to a temporary file (in the
- * system's temporary directory) first. An attempt will be made to remove the temporary file
- * before this method returns.
+ * Harvest from HTTP (compressed archive). This is a convenience method for {@link #harvestRecords(String, String)} that copies
+ * the input stream to a temporary file (in the system's temporary directory) first. An attempt will be made to remove the
+ * temporary file before this method returns.
*
* @param inputStream The input stream containing the compressed archive.
* @param compressedFileType The type of the archive.
@@ -37,17 +35,19 @@ HttpRecordIterator harvestRecords(String archiveUrl, String downloadDirectory)
* @throws HarvesterException In case there was an issue during the harvest.
*/
void harvestRecords(InputStream inputStream, CompressedFileExtension compressedFileType,
- Consumer action) throws HarvesterException;
+ Consumer action) throws HarvesterException;
/**
- * It creates a {@link HttpRecordIterator} with a InputStream into a temporary file directory.
- * It is needed to use the {@link HttpRecordIterator#deleteIteratorContent()} method if this method is used.
+ * It creates a {@link HttpRecordIterator} with a InputStream into a temporary file directory. When finished using the created
+ * iterator, the method {@link HttpRecordIterator#deleteIteratorContent()} should be used to clean up leftover files.
+ *
* @param input The input stream from which we create the iterator
* @param compressedFileType The type of compressed file type
* @return A HttpRecordIterator based on a temporary file location
* @throws HarvesterException In case there is an issue while using the input stream
*/
- HttpRecordIterator createTemporaryHttpHarvestIterator(InputStream input, CompressedFileExtension compressedFileType) throws HarvesterException;
+ HttpRecordIterator createTemporaryHttpHarvestIterator(InputStream input, CompressedFileExtension compressedFileType)
+ throws HarvesterException;
/**
* An object representing an entry in a file archive.
@@ -55,8 +55,7 @@ void harvestRecords(InputStream inputStream, CompressedFileExtension compressedF
interface ArchiveEntry {
/**
- * @return The name of the entry. This is the file name (including extension, excluding the
- * path).
+ * @return The name of the entry. This is the file name (including extension, excluding the path).
*/
String getEntryName();
From 51eb07d13190ef1cbe76303388d619b3015db054 Mon Sep 17 00:00:00 2001
From: Jorge Ortiz
Date: Mon, 7 Mar 2022 17:47:40 +0100
Subject: [PATCH 07/73] MET-4159 increased logging message
---
.../eu/europeana/metis/harvesting/http/HttpHarvesterImpl.java | 2 ++
1 file changed, 2 insertions(+)
diff --git a/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvesterImpl.java b/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvesterImpl.java
index 262dc54da..adccc46fe 100644
--- a/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvesterImpl.java
+++ b/metis-harvesting/src/main/java/eu/europeana/metis/harvesting/http/HttpHarvesterImpl.java
@@ -196,6 +196,8 @@ public void deleteIteratorContent() {
} catch (IOException e) {
LOGGER.warn("Could not delete directory.", e);
}
+ } else {
+ LOGGER.warn("Extracted directory undefined, nothing removed.");
}
}
From 32a19edb1789d54321190ab8d75bbca803a87ce3 Mon Sep 17 00:00:00 2001
From: Simon Tzanakis
Date: Tue, 8 Mar 2022 10:56:32 +0100
Subject: [PATCH 08/73] MET-4257 Remove alternative indexing environment fields
---
.../plugins/AbstractExecutablePlugin.java | 3 +--
.../plugins/AbstractIndexPluginMetadata.java | 9 ---------
.../workflow/plugins/DepublishPlugin.java | 4 ----
.../plugins/DepublishPluginMetadata.java | 9 ---------
.../plugins/IndexToPreviewPlugin.java | 12 -----------
.../plugins/IndexToPublishPlugin.java | 12 -----------
.../config/ConfigurationPropertiesHolder.java | 6 ------
.../core/rest/config/OrchestratorConfig.java | 2 --
.../main/resources/metis.properties.example | 3 ---
.../core/execution/WorkflowPostProcessor.java | 7 +------
.../service/WorkflowExecutionFactory.java | 20 -------------------
11 files changed, 2 insertions(+), 85 deletions(-)
diff --git a/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/AbstractExecutablePlugin.java b/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/AbstractExecutablePlugin.java
index c6543115f..918ba2cf1 100644
--- a/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/AbstractExecutablePlugin.java
+++ b/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/AbstractExecutablePlugin.java
@@ -156,7 +156,7 @@ DpsTask createDpsTaskForProcessPlugin(EcloudBasePluginParameters ecloudBasePlugi
}
DpsTask createDpsTaskForIndexPlugin(EcloudBasePluginParameters ecloudBasePluginParameters, String datasetId,
- boolean incrementalIndexing, Date harvestDate, boolean useAlternativeIndexingEnvironment, boolean preserveTimestamps,
+ boolean incrementalIndexing, Date harvestDate, boolean preserveTimestamps,
List datasetIdsToRedirectFrom, boolean performRedirects, String targetDatabase) {
final DateFormat dateFormat = new SimpleDateFormat(CommonStringValues.DATE_FORMAT_Z, Locale.US);
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
@@ -165,7 +165,6 @@ DpsTask createDpsTaskForIndexPlugin(EcloudBasePluginParameters ecloudBasePluginP
extraParameters.put(PluginParameterKeys.INCREMENTAL_INDEXING, String.valueOf(incrementalIndexing));
extraParameters.put(PluginParameterKeys.HARVEST_DATE, dateFormat.format(harvestDate));
extraParameters.put(PluginParameterKeys.METIS_TARGET_INDEXING_DATABASE, targetDatabase);
- extraParameters.put(PluginParameterKeys.METIS_USE_ALT_INDEXING_ENV, String.valueOf(useAlternativeIndexingEnvironment));
extraParameters.put(PluginParameterKeys.METIS_RECORD_DATE, dateFormat.format(getStartedDate()));
extraParameters.put(PluginParameterKeys.METIS_PRESERVE_TIMESTAMPS, String.valueOf(preserveTimestamps));
extraParameters.put(PluginParameterKeys.DATASET_IDS_TO_REDIRECT_FROM, String.join(",", datasetIdsToRedirectFrom));
diff --git a/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/AbstractIndexPluginMetadata.java b/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/AbstractIndexPluginMetadata.java
index 2fd8d4c64..ccf8a7c77 100644
--- a/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/AbstractIndexPluginMetadata.java
+++ b/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/AbstractIndexPluginMetadata.java
@@ -10,7 +10,6 @@
*/
public abstract class AbstractIndexPluginMetadata extends AbstractExecutablePluginMetadata {
- private boolean useAlternativeIndexingEnvironment;
private boolean preserveTimestamps;
private boolean performRedirects;
private List datasetIdsToRedirectFrom = new ArrayList<>();
@@ -21,14 +20,6 @@ public AbstractIndexPluginMetadata() {
//Required for json serialization
}
- public boolean isUseAlternativeIndexingEnvironment() {
- return useAlternativeIndexingEnvironment;
- }
-
- public void setUseAlternativeIndexingEnvironment(boolean useAlternativeIndexingEnvironment) {
- this.useAlternativeIndexingEnvironment = useAlternativeIndexingEnvironment;
- }
-
public boolean isPreserveTimestamps() {
return preserveTimestamps;
}
diff --git a/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/DepublishPlugin.java b/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/DepublishPlugin.java
index 155e69582..4e4bb705e 100644
--- a/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/DepublishPlugin.java
+++ b/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/DepublishPlugin.java
@@ -49,13 +49,9 @@ public String getTopologyName() {
@Override
public DpsTask prepareDpsTask(String datasetId,
EcloudBasePluginParameters ecloudBasePluginParameters) {
- boolean useAlternativeIndexingEnvironment = getPluginMetadata()
- .isUseAlternativeIndexingEnvironment();
Map extraParameters = new HashMap<>();
extraParameters.put(PluginParameterKeys.METIS_DATASET_ID, datasetId);
- extraParameters.put(PluginParameterKeys.METIS_USE_ALT_INDEXING_ENV,
- String.valueOf(useAlternativeIndexingEnvironment));
//Do set the records ids parameter only if record ids depublication enabled and there are record ids
if (!getPluginMetadata().isDatasetDepublish()) {
if (CollectionUtils.isEmpty(getPluginMetadata().getRecordIdsToDepublish())) {
diff --git a/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/DepublishPluginMetadata.java b/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/DepublishPluginMetadata.java
index 890a47209..0d615b393 100644
--- a/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/DepublishPluginMetadata.java
+++ b/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/DepublishPluginMetadata.java
@@ -15,7 +15,6 @@
public class DepublishPluginMetadata extends AbstractExecutablePluginMetadata {
private static final ExecutablePluginType pluginType = ExecutablePluginType.DEPUBLISH;
- private boolean useAlternativeIndexingEnvironment;
private boolean datasetDepublish;
private Set recordIdsToDepublish;
@@ -28,14 +27,6 @@ public ExecutablePluginType getExecutablePluginType() {
return pluginType;
}
- public boolean isUseAlternativeIndexingEnvironment() {
- return useAlternativeIndexingEnvironment;
- }
-
- public void setUseAlternativeIndexingEnvironment(boolean useAlternativeIndexingEnvironment) {
- this.useAlternativeIndexingEnvironment = useAlternativeIndexingEnvironment;
- }
-
public boolean isDatasetDepublish() {
return datasetDepublish;
}
diff --git a/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/IndexToPreviewPlugin.java b/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/IndexToPreviewPlugin.java
index 01bc8aa7f..a828690ae 100644
--- a/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/IndexToPreviewPlugin.java
+++ b/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/IndexToPreviewPlugin.java
@@ -2,7 +2,6 @@
import eu.europeana.cloud.service.dps.DpsTask;
import eu.europeana.cloud.service.dps.metis.indexing.TargetIndexingDatabase;
-import eu.europeana.cloud.service.dps.metis.indexing.TargetIndexingEnvironment;
/**
* Index to Preview Plugin.
@@ -38,7 +37,6 @@ public DpsTask prepareDpsTask(String datasetId, EcloudBasePluginParameters eclou
return createDpsTaskForIndexPlugin(ecloudBasePluginParameters, datasetId,
getPluginMetadata().isIncrementalIndexing(),
getPluginMetadata().getHarvestDate(),
- getPluginMetadata().isUseAlternativeIndexingEnvironment(),
getPluginMetadata().isPreserveTimestamps(),
getPluginMetadata().getDatasetIdsToRedirectFrom(),
getPluginMetadata().isPerformRedirects(), getTargetIndexingDatabase().name());
@@ -57,14 +55,4 @@ public String getTopologyName() {
public TargetIndexingDatabase getTargetIndexingDatabase() {
return TargetIndexingDatabase.PREVIEW;
}
-
- /**
- * Get the target indexing environment.
- *
- * @return the target indexing environment
- */
- public TargetIndexingEnvironment getTargetIndexingEnvironment() {
- return getPluginMetadata().isUseAlternativeIndexingEnvironment() ? TargetIndexingEnvironment.ALTERNATIVE
- : TargetIndexingEnvironment.DEFAULT;
- }
}
diff --git a/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/IndexToPublishPlugin.java b/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/IndexToPublishPlugin.java
index d57c81ba5..368ba7128 100644
--- a/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/IndexToPublishPlugin.java
+++ b/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/workflow/plugins/IndexToPublishPlugin.java
@@ -2,7 +2,6 @@
import eu.europeana.cloud.service.dps.DpsTask;
import eu.europeana.cloud.service.dps.metis.indexing.TargetIndexingDatabase;
-import eu.europeana.cloud.service.dps.metis.indexing.TargetIndexingEnvironment;
/**
* Index to Publish Plugin.
@@ -37,7 +36,6 @@ public DpsTask prepareDpsTask(String datasetId, EcloudBasePluginParameters eclou
return createDpsTaskForIndexPlugin(ecloudBasePluginParameters, datasetId,
getPluginMetadata().isIncrementalIndexing(),
getPluginMetadata().getHarvestDate(),
- getPluginMetadata().isUseAlternativeIndexingEnvironment(),
getPluginMetadata().isPreserveTimestamps(),
getPluginMetadata().getDatasetIdsToRedirectFrom(),
getPluginMetadata().isPerformRedirects(), getTargetIndexingDatabase().name());
@@ -56,14 +54,4 @@ public String getTopologyName() {
public TargetIndexingDatabase getTargetIndexingDatabase() {
return TargetIndexingDatabase.PUBLISH;
}
-
- /**
- * Get the target indexing environment.
- *
- * @return the target indexing environment
- */
- public TargetIndexingEnvironment getTargetIndexingEnvironment() {
- return getPluginMetadata().isUseAlternativeIndexingEnvironment() ? TargetIndexingEnvironment.ALTERNATIVE
- : TargetIndexingEnvironment.DEFAULT;
- }
}
diff --git a/metis-core/metis-core-rest/src/main/java/eu/europeana/metis/core/rest/config/ConfigurationPropertiesHolder.java b/metis-core/metis-core-rest/src/main/java/eu/europeana/metis/core/rest/config/ConfigurationPropertiesHolder.java
index dafee7c30..151297a1f 100644
--- a/metis-core/metis-core-rest/src/main/java/eu/europeana/metis/core/rest/config/ConfigurationPropertiesHolder.java
+++ b/metis-core/metis-core-rest/src/main/java/eu/europeana/metis/core/rest/config/ConfigurationPropertiesHolder.java
@@ -95,8 +95,6 @@ public class ConfigurationPropertiesHolder {
private int maxDepublishRecordIdsPerDataset;
// Ecloud configuration
- @Value("${metis.use.alternative.indexing.environment}")
- private boolean metisUseAlternativeIndexingEnvironment;
@Value("${metis.link.checking.default.sampling.size}")
private int metisLinkCheckingDefaultSamplingSize;
@Value("${solr.commit.period.in.mins}")
@@ -304,10 +302,6 @@ public int getMaxDepublishRecordIdsPerDataset() {
return maxDepublishRecordIdsPerDataset;
}
- public boolean isMetisUseAlternativeIndexingEnvironment() {
- return metisUseAlternativeIndexingEnvironment;
- }
-
public int getMetisLinkCheckingDefaultSamplingSize() {
return metisLinkCheckingDefaultSamplingSize;
}
diff --git a/metis-core/metis-core-rest/src/main/java/eu/europeana/metis/core/rest/config/OrchestratorConfig.java b/metis-core/metis-core-rest/src/main/java/eu/europeana/metis/core/rest/config/OrchestratorConfig.java
index c69fbd5b5..a3069107b 100644
--- a/metis-core/metis-core-rest/src/main/java/eu/europeana/metis/core/rest/config/OrchestratorConfig.java
+++ b/metis-core/metis-core-rest/src/main/java/eu/europeana/metis/core/rest/config/OrchestratorConfig.java
@@ -139,8 +139,6 @@ public WorkflowExecutionFactory getWorkflowExecutionFactory(
.setValidationExternalProperties(propertiesHolder.getValidationExternalProperties());
workflowExecutionFactory
.setValidationInternalProperties(propertiesHolder.getValidationInternalProperties());
- workflowExecutionFactory.setMetisUseAlternativeIndexingEnvironment(
- propertiesHolder.isMetisUseAlternativeIndexingEnvironment());
workflowExecutionFactory.setDefaultSamplingSizeForLinkChecking(
propertiesHolder.getMetisLinkCheckingDefaultSamplingSize());
return workflowExecutionFactory;
diff --git a/metis-core/metis-core-rest/src/main/resources/metis.properties.example b/metis-core/metis-core-rest/src/main/resources/metis.properties.example
index f7f7050da..07bec7656 100644
--- a/metis-core/metis-core-rest/src/main/resources/metis.properties.example
+++ b/metis-core/metis-core-rest/src/main/resources/metis.properties.example
@@ -88,7 +88,4 @@ metis.core.baseUrl=
#Metis Core (regardless on whether the list is paginated).
metis.core.max.served.execution.list.length=
metis.core.max.depublish.record.ids.per.dataset=
-#In the combination of TEST and ACCEPTANCE, TEST=false, ACCEPTANCE=true
-#For the production environment it should be false
-metis.use.alternative.indexing.environment=
metis.link.checking.default.sampling.size=
\ No newline at end of file
diff --git a/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java b/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java
index 2cbc2fc9d..d6b27d2e4 100644
--- a/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java
+++ b/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java
@@ -7,7 +7,6 @@
import eu.europeana.cloud.common.model.dps.SubTaskInfo;
import eu.europeana.cloud.service.dps.exception.DpsException;
import eu.europeana.cloud.service.dps.metis.indexing.TargetIndexingDatabase;
-import eu.europeana.cloud.service.dps.metis.indexing.TargetIndexingEnvironment;
import eu.europeana.metis.core.common.DepublishRecordIdUtils;
import eu.europeana.metis.core.dao.DatasetDao;
import eu.europeana.metis.core.dao.DepublishRecordIdDao;
@@ -98,13 +97,10 @@ void performPluginPostProcessing(AbstractExecutablePlugin> plugin, String data
private void indexPostProcess(AbstractExecutablePlugin> indexPlugin, String datasetId)
throws DpsException, InvalidIndexPluginException {
TargetIndexingDatabase targetIndexingDatabase;
- TargetIndexingEnvironment targetIndexingEnvironment;
if (indexPlugin instanceof IndexToPreviewPlugin) {
targetIndexingDatabase = ((IndexToPreviewPlugin) indexPlugin).getTargetIndexingDatabase();
- targetIndexingEnvironment = ((IndexToPreviewPlugin) indexPlugin).getTargetIndexingEnvironment();
} else if (indexPlugin instanceof IndexToPublishPlugin) {
targetIndexingDatabase = ((IndexToPublishPlugin) indexPlugin).getTargetIndexingDatabase();
- targetIndexingEnvironment = ((IndexToPublishPlugin) indexPlugin).getTargetIndexingEnvironment();
//Reset depublish status
depublishRecordIdDao.markRecordIdsWithDepublicationStatus(datasetId, null,
DepublicationStatus.PENDING_DEPUBLICATION, null);
@@ -112,8 +108,7 @@ private void indexPostProcess(AbstractExecutablePlugin> indexPlugin, String da
throw new InvalidIndexPluginException("Plugin is not of the types supported");
}
final Integer databaseTotalRecords = retryableExternalRequestForNetworkExceptionsThrowing(() ->
- (int) dpsClient.getTotalMetisDatabaseRecords(datasetId, targetIndexingDatabase,
- targetIndexingEnvironment));
+ (int) dpsClient.getTotalMetisDatabaseRecords(datasetId, targetIndexingDatabase));
indexPlugin.getExecutionProgress().setTotalDatabaseRecords(databaseTotalRecords);
}
diff --git a/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/service/WorkflowExecutionFactory.java b/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/service/WorkflowExecutionFactory.java
index bac712f4a..d0f0c644f 100644
--- a/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/service/WorkflowExecutionFactory.java
+++ b/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/service/WorkflowExecutionFactory.java
@@ -50,7 +50,6 @@ public class WorkflowExecutionFactory {
private ValidationProperties validationExternalProperties; // Use getter and setter!
private ValidationProperties validationInternalProperties; // Use getter and setter!
- private boolean metisUseAlternativeIndexingEnvironment; // Use getter and setter for this field!
private int defaultSamplingSizeForLinkChecking; // Use getter and setter for this field!
/**
@@ -112,24 +111,18 @@ private AbstractExecutablePlugin> createWorkflowExecutionPlugin(Dataset datase
this.setupValidationInternalForPluginMetadata(
(ValidationInternalPluginMetadata) pluginMetadata, getValidationInternalProperties());
} else if (pluginMetadata instanceof IndexToPreviewPluginMetadata) {
- ((IndexToPreviewPluginMetadata) pluginMetadata)
- .setUseAlternativeIndexingEnvironment(isMetisUseAlternativeIndexingEnvironment());
((IndexToPreviewPluginMetadata) pluginMetadata)
.setDatasetIdsToRedirectFrom(dataset.getDatasetIdsToRedirectFrom());
boolean performRedirects = shouldRedirectsBePerformed(dataset, workflowPredecessor,
ExecutablePluginType.PREVIEW, typesInWorkflowBeforeThisPlugin);
((IndexToPreviewPluginMetadata) pluginMetadata).setPerformRedirects(performRedirects);
} else if (pluginMetadata instanceof IndexToPublishPluginMetadata) {
- ((IndexToPublishPluginMetadata) pluginMetadata)
- .setUseAlternativeIndexingEnvironment(isMetisUseAlternativeIndexingEnvironment());
((IndexToPublishPluginMetadata) pluginMetadata)
.setDatasetIdsToRedirectFrom(dataset.getDatasetIdsToRedirectFrom());
boolean performRedirects = shouldRedirectsBePerformed(dataset, workflowPredecessor,
ExecutablePluginType.PUBLISH, typesInWorkflowBeforeThisPlugin);
((IndexToPublishPluginMetadata) pluginMetadata).setPerformRedirects(performRedirects);
} else if (pluginMetadata instanceof DepublishPluginMetadata) {
- ((DepublishPluginMetadata) pluginMetadata)
- .setUseAlternativeIndexingEnvironment(isMetisUseAlternativeIndexingEnvironment());
setupDepublishPluginMetadata(dataset, ((DepublishPluginMetadata) pluginMetadata));
} else if (pluginMetadata instanceof LinkCheckingPluginMetadata) {
((LinkCheckingPluginMetadata) pluginMetadata)
@@ -306,19 +299,6 @@ public void setValidationInternalProperties(ValidationProperties validationInter
}
}
- private boolean isMetisUseAlternativeIndexingEnvironment() {
- synchronized (this) {
- return metisUseAlternativeIndexingEnvironment;
- }
- }
-
- public void setMetisUseAlternativeIndexingEnvironment(
- boolean metisUseAlternativeIndexingEnvironment) {
- synchronized (this) {
- this.metisUseAlternativeIndexingEnvironment = metisUseAlternativeIndexingEnvironment;
- }
- }
-
private int getDefaultSamplingSizeForLinkChecking() {
synchronized (this) {
return defaultSamplingSizeForLinkChecking;
From 9961a3d807d587feeb345f55e1ef9d386e6e8bad Mon Sep 17 00:00:00 2001
From: Simon Tzanakis
Date: Tue, 8 Mar 2022 14:20:55 +0100
Subject: [PATCH 09/73] MET-4310 Reduce connection pool on mongo clients
---
.../mongo/connection/MongoClientProvider.java | 92 ++++++++++++-------
.../mongo/connection/MongoProperties.java | 22 ++++-
.../connection/MongoClientProviderTest.java | 43 +++++----
.../mongo/connection/MongoPropertiesTest.java | 3 +-
4 files changed, 105 insertions(+), 55 deletions(-)
diff --git a/metis-common/metis-common-mongo/src/main/java/eu/europeana/metis/mongo/connection/MongoClientProvider.java b/metis-common/metis-common-mongo/src/main/java/eu/europeana/metis/mongo/connection/MongoClientProvider.java
index 7b6ae4333..fb79ff0ed 100644
--- a/metis-common/metis-common-mongo/src/main/java/eu/europeana/metis/mongo/connection/MongoClientProvider.java
+++ b/metis-common/metis-common-mongo/src/main/java/eu/europeana/metis/mongo/connection/MongoClientProvider.java
@@ -9,6 +9,7 @@
import com.mongodb.ServerAddress;
import com.mongodb.client.MongoClient;
import com.mongodb.client.MongoClients;
+import com.mongodb.connection.ConnectionPoolSettings;
import eu.europeana.metis.mongo.connection.MongoProperties.ReadPreferenceValue;
import java.util.List;
import java.util.Optional;
@@ -17,8 +18,7 @@
import java.util.function.Supplier;
/**
- * This class can set up and provide a Mongo client given the Mongo properties. It applies the
- * following default values:
+ * This class can set up and provide a Mongo client given the Mongo properties. It applies the following default values:
*
* -
* The read preference for the connection is defaulted to {@link ReadPreference#secondaryPreferred()}.
@@ -46,6 +46,7 @@ public class MongoClientProvider {
private static final ReadPreference DEFAULT_READ_PREFERENCE = ReadPreference.secondaryPreferred();
private static final int DEFAULT_MAX_CONNECTION_IDLE_MILLIS = 30_000;
+ private static final int DEFAULT_MAX_CONNECTIONS = 20;
private static final boolean DEFAULT_RETRY_WRITES = false;
private static final String DEFAULT_APPLICATION_NAME = "Europeana Application Suite";
@@ -53,8 +54,8 @@ public class MongoClientProvider {
private final String authenticationDatabase;
/**
- * Constructor from a connection URI string (see the documentation of {@link MongoClientURI} for
- * the details). The connection URL can provide settings that will override the default settings.
+ * Constructor from a connection URI string (see the documentation of {@link MongoClientURI} for the details). The connection
+ * URL can provide settings that will override the default settings.
*
* @param connectionUri The connection URI as a string
* @param exceptionCreator How to report exceptions.
@@ -79,28 +80,27 @@ public MongoClientProvider(String connectionUri, Function exceptionCr
}
/**
- * Constructor from a {@link MongoProperties} object. The caller needs to provide settings that
- * will be used instead of the default settings.
+ * Constructor from a {@link MongoProperties} object. The caller needs to provide settings that will be used instead of the
+ * default settings.
*
- * @param properties The properties of the Mongo connection. Note that if the passed properties
- * object is changed after calling this method, those changes will not be reflected when creating
- * mongo clients.
- * @param clientSettingsBuilder The settings to be applied. The default settings will not be used.
- * The caller can however choose to incorporate the default settings as needed by using a client
- * settings builder obtained from {@link #getDefaultClientSettingsBuilder()} as input.
+ * @param properties The properties of the Mongo connection. Note that if the passed properties object is changed after calling
+ * this method, those changes will not be reflected when creating mongo clients.
+ * @param clientSettingsBuilder The settings to be applied. The default settings will not be used. The caller can however choose
+ * to incorporate the default settings as needed by using a client settings builder obtained from {@link
+ * #getDefaultClientSettingsBuilder()} as input.
* @throws E In case the properties are wrong
*/
public MongoClientProvider(MongoProperties properties, Builder clientSettingsBuilder)
throws E {
final ReadPreference readPreference = Optional.ofNullable(properties.getReadPreferenceValue())
- .map(ReadPreferenceValue::getReadPreferenceSupplier).map(Supplier::get)
- .orElse(DEFAULT_READ_PREFERENCE);
+ .map(ReadPreferenceValue::getReadPreferenceSupplier).map(Supplier::get)
+ .orElse(DEFAULT_READ_PREFERENCE);
clientSettingsBuilder.readPreference(readPreference);
final List mongoHosts = properties.getMongoHosts();
final MongoCredential mongoCredential = properties.getMongoCredentials();
this.authenticationDatabase = Optional.ofNullable(mongoCredential)
- .map(MongoCredential::getSource).orElse(null);
+ .map(MongoCredential::getSource).orElse(null);
clientSettingsBuilder
.applyToSslSettings(builder -> builder.enabled(properties.mongoEnableSsl()));
clientSettingsBuilder.applyToClusterSettings(builder -> builder.hosts(mongoHosts));
@@ -109,6 +109,9 @@ public MongoClientProvider(MongoProperties properties, Builder clientSettings
}
Optional.ofNullable(properties.getApplicationName()).filter(name -> !name.isBlank())
.ifPresent(clientSettingsBuilder::applicationName);
+
+ clientSettingsBuilder.applyToConnectionPoolSettings(
+ builder -> builder.applySettings(createConnectionPoolSettings(properties.getMaxConnectionPoolSize())));
final MongoClientSettings mongoClientSettings = clientSettingsBuilder.build();
this.creator = () -> MongoClients.create(mongoClientSettings);
@@ -117,9 +120,8 @@ public MongoClientProvider(MongoProperties properties, Builder clientSettings
/**
* Constructor from a {@link MongoProperties} object, using the default settings.
*
- * @param properties The properties of the Mongo connection. Note that if the passed properties
- * object is changed after calling this method, those changes will not be reflected when calling
- * {@link #createMongoClient()}.
+ * @param properties The properties of the Mongo connection. Note that if the passed properties object is changed after calling
+ * this method, those changes will not be reflected when calling {@link #createMongoClient()}.
* @throws E In case the properties are wrong
*/
public MongoClientProvider(MongoProperties properties) throws E {
@@ -131,19 +133,17 @@ public MongoClientProvider(MongoProperties properties) throws E {
*
* @return A new instance of {@link Builder} with the default settings.
*/
- public static Builder getDefaultClientSettingsBuilder() {
+ public static MongoClientSettings.Builder getDefaultClientSettingsBuilder() {
return MongoClientSettings.builder()
- // TODO: 7/16/20 Remove default retry writes after upgrade to mongo server version 4.2
- .retryWrites(DEFAULT_RETRY_WRITES)
- .applyToConnectionPoolSettings(builder -> builder
- .maxConnectionIdleTime(DEFAULT_MAX_CONNECTION_IDLE_MILLIS, TimeUnit.MILLISECONDS))
- .readPreference(DEFAULT_READ_PREFERENCE)
- .applicationName(DEFAULT_APPLICATION_NAME);
+ // TODO: 7/16/20 Remove default retry writes after upgrade to mongo server version 4.2
+ .retryWrites(DEFAULT_RETRY_WRITES)
+ .applyToConnectionPoolSettings(builder -> builder.applySettings(getDefaultConnectionPoolSettings()))
+ .readPreference(DEFAULT_READ_PREFERENCE)
+ .applicationName(DEFAULT_APPLICATION_NAME);
}
/**
- * Convenience method for {@link #MongoClientProvider(String, Function)}. See that
- * constructor for the details.
+ * Convenience method for {@link #MongoClientProvider(String, Function)}. See that constructor for the details.
*
* @param connectionUri The connection URI.
* @return An instance.
@@ -153,8 +153,7 @@ public static MongoClientProvider create(String connec
}
/**
- * Convenience method for {@link #MongoClientProvider(String, Function)}. See that
- * constructor for the details.
+ * Convenience method for {@link #MongoClientProvider(String, Function)}. See that constructor for the details.
*
* @param connectionUri The connection URI.
* @return A supplier for {@link MongoClient} instances based on this class.
@@ -164,8 +163,17 @@ public static Supplier createAsSupplier(String connectionUri) {
}
/**
- * Returns the authentication database for mongo connections that are provided. Can be null
- * (signifying that the default is to be used or that no authentication is specified).
+ * Get the default connection pool settings
+ *
+ * @return the default connection pool settings
+ */
+ private static ConnectionPoolSettings getDefaultConnectionPoolSettings() {
+ return createConnectionPoolSettings(null);
+ }
+
+ /**
+ * Returns the authentication database for mongo connections that are provided. Can be null (signifying that the default is to
+ * be used or that no authentication is specified).
*
* @return The authentication database.
*/
@@ -174,9 +182,8 @@ public final String getAuthenticationDatabase() {
}
/**
- * Creates a Mongo client. This method can be called multiple times and will create and return a
- * different client each time. The calling code is responsible for properly closing the created
- * client.
+ * Creates a Mongo client. This method can be called multiple times and will create and return a different client each time. The
+ * calling code is responsible for properly closing the created client.
*
* @return A mongo client.
* @throws E In case there is a problem with creating the client.
@@ -185,6 +192,23 @@ public final MongoClient createMongoClient() throws E {
return creator.createMongoClient();
}
+ /**
+ * Create a connection pool settings object. Settings that are null will be set to default settings.
+ *
+ * @param maxPoolSize the maximum connection pool size
+ * @return the connection pool settings
+ */
+ static ConnectionPoolSettings createConnectionPoolSettings(Integer maxPoolSize) {
+ final ConnectionPoolSettings.Builder builder = ConnectionPoolSettings.builder();
+ builder.maxConnectionIdleTime(DEFAULT_MAX_CONNECTION_IDLE_MILLIS, TimeUnit.MILLISECONDS);
+ if (maxPoolSize != null && maxPoolSize > 0) {
+ builder.maxSize(maxPoolSize);
+ } else {
+ builder.maxSize(DEFAULT_MAX_CONNECTIONS);
+ }
+ return builder.build();
+ }
+
private interface MongoClientCreator {
MongoClient createMongoClient() throws E;
diff --git a/metis-common/metis-common-mongo/src/main/java/eu/europeana/metis/mongo/connection/MongoProperties.java b/metis-common/metis-common-mongo/src/main/java/eu/europeana/metis/mongo/connection/MongoProperties.java
index 497215e66..e71ee360d 100644
--- a/metis-common/metis-common-mongo/src/main/java/eu/europeana/metis/mongo/connection/MongoProperties.java
+++ b/metis-common/metis-common-mongo/src/main/java/eu/europeana/metis/mongo/connection/MongoProperties.java
@@ -28,6 +28,7 @@ public class MongoProperties {
private MongoCredential mongoCredentials;
private boolean mongoEnableSsl;
private ReadPreferenceValue readPreferenceValue;
+ private Integer maxConnectionPoolSize;
private String applicationName;
/**
@@ -150,8 +151,16 @@ public void setReadPreferenceValue(ReadPreferenceValue readPreferenceValue) {
}
/**
- * Set the application name. Can be null, in which case a default generic application name is
- * to be used.
+ * Get the maximum connection pol size
+ *
+ * @return the maximum connection pool size
+ */
+ public Integer getMaxConnectionPoolSize() {
+ return maxConnectionPoolSize;
+ }
+
+ /**
+ * Set the application name. Can be null, in which case a default generic application name is to be used.
*
* @param applicationName The application name, or null for the default.
*/
@@ -221,6 +230,15 @@ public ReadPreferenceValue getReadPreferenceValue() {
return readPreferenceValue;
}
+ /**
+ * Set the maximum connection poll size. Can be null, in which case the default applies.
+ *
+ * @param maxConnectionPoolSize the maximum connection pool size
+ */
+ public void setMaxConnectionPoolSize(Integer maxConnectionPoolSize) {
+ this.maxConnectionPoolSize = maxConnectionPoolSize;
+ }
+
/**
* This method returns the value of the application name (or null for the default).
*
diff --git a/metis-common/metis-common-mongo/src/test/java/eu/europeana/metis/mongo/connection/MongoClientProviderTest.java b/metis-common/metis-common-mongo/src/test/java/eu/europeana/metis/mongo/connection/MongoClientProviderTest.java
index 4b68389e5..145bba9fc 100644
--- a/metis-common/metis-common-mongo/src/test/java/eu/europeana/metis/mongo/connection/MongoClientProviderTest.java
+++ b/metis-common/metis-common-mongo/src/test/java/eu/europeana/metis/mongo/connection/MongoClientProviderTest.java
@@ -8,8 +8,7 @@
import com.mongodb.MongoClientSettings;
import com.mongodb.ReadPreference;
import com.mongodb.client.MongoClient;
-import eu.europeana.metis.mongo.connection.MongoClientProvider;
-import eu.europeana.metis.mongo.connection.MongoProperties;
+import com.mongodb.connection.ConnectionPoolSettings;
import eu.europeana.metis.mongo.embedded.EmbeddedLocalhostMongo;
import java.util.concurrent.TimeUnit;
import java.util.function.Supplier;
@@ -40,14 +39,26 @@ static void tearDown() {
embeddedLocalhostMongo.stop();
}
- @Test
- void getDefaultClientSettingsBuilder() {
- MongoClientSettings.Builder actual = MongoClientProvider.getDefaultClientSettingsBuilder();
+ private static MongoProperties getMongoProperties() {
+ final String mongoHost = embeddedLocalhostMongo.getMongoHost();
+ final int mongoPort = embeddedLocalhostMongo.getMongoPort();
+ final MongoProperties mongoProperties = new MongoProperties<>(
+ IllegalArgumentException::new);
+ mongoProperties.setMongoHosts(new String[]{mongoHost}, new int[]{mongoPort});
+ mongoProperties.setMongoCredentials("user", "wachtwoord", "authenticationDB");
+ mongoProperties.setApplicationName(DATABASE_NAME);
+ mongoProperties.setMaxConnectionPoolSize(10);
+ return mongoProperties;
+ }
- assertFalse(actual.build().getRetryWrites());
- assertEquals(ReadPreference.secondaryPreferred(), actual.build().getReadPreference());
- assertEquals("Europeana Application Suite", actual.build().getApplicationName());
- assertEquals(30_000, actual.build().getConnectionPoolSettings().getMaxConnectionIdleTime(TimeUnit.MILLISECONDS));
+ @Test
+ void getClientSettingsBuilder() {
+ final MongoClientSettings mongoClientSettings = MongoClientProvider.getDefaultClientSettingsBuilder().build();
+ assertFalse(mongoClientSettings.getRetryWrites());
+ assertEquals(ReadPreference.secondaryPreferred(), mongoClientSettings.getReadPreference());
+ assertEquals("Europeana Application Suite", mongoClientSettings.getApplicationName());
+ assertEquals(30_000, mongoClientSettings.getConnectionPoolSettings().getMaxConnectionIdleTime(TimeUnit.MILLISECONDS));
+ assertEquals(20, mongoClientSettings.getConnectionPoolSettings().getMaxSize());
}
@Test
@@ -87,14 +98,10 @@ void createMongoClient() {
assertTrue(mongoClient instanceof MongoClient);
}
- private static MongoProperties getMongoProperties() {
- final String mongoHost = embeddedLocalhostMongo.getMongoHost();
- final int mongoPort = embeddedLocalhostMongo.getMongoPort();
- final MongoProperties mongoProperties = new MongoProperties<>(
- IllegalArgumentException::new);
- mongoProperties.setMongoHosts(new String[]{mongoHost}, new int[]{mongoPort});
- mongoProperties.setMongoCredentials("user","wachtwoord","authenticationDB");
- mongoProperties.setApplicationName(DATABASE_NAME);
- return mongoProperties;
+ @Test
+ void createConnectionPoolSettings() {
+ final ConnectionPoolSettings connectionPoolSettings = MongoClientProvider.createConnectionPoolSettings(10);
+ assertEquals(30_000, connectionPoolSettings.getMaxConnectionIdleTime(TimeUnit.MILLISECONDS));
+ assertEquals(10, connectionPoolSettings.getMaxSize());
}
}
\ No newline at end of file
diff --git a/metis-common/metis-common-mongo/src/test/java/eu/europeana/metis/mongo/connection/MongoPropertiesTest.java b/metis-common/metis-common-mongo/src/test/java/eu/europeana/metis/mongo/connection/MongoPropertiesTest.java
index 4df4b749a..321694e9a 100644
--- a/metis-common/metis-common-mongo/src/test/java/eu/europeana/metis/mongo/connection/MongoPropertiesTest.java
+++ b/metis-common/metis-common-mongo/src/test/java/eu/europeana/metis/mongo/connection/MongoPropertiesTest.java
@@ -5,7 +5,6 @@
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
-import eu.europeana.metis.mongo.connection.MongoProperties;
import eu.europeana.metis.mongo.connection.MongoProperties.ReadPreferenceValue;
import java.net.InetSocketAddress;
import org.junit.jupiter.api.Test;
@@ -33,6 +32,8 @@ void setAllProperties() throws Exception {
"testAplication");
assertMongoProperties(mongoProperties);
+ mongoProperties.setMaxConnectionPoolSize(10);
+ assertEquals(10, mongoProperties.getMaxConnectionPoolSize());
}
@Test
From 65086ba80a8ce3d3d5fefca54450b286096d25e2 Mon Sep 17 00:00:00 2001
From: Adolfo Peixinho
Date: Thu, 10 Mar 2022 15:44:37 +0100
Subject: [PATCH 10/73] MET-4237 Refactored implementation to get chunked tasks
from e-cloud client.
---
.../core/execution/WorkflowPostProcessor.java | 64 +++++++++++--------
1 file changed, 39 insertions(+), 25 deletions(-)
diff --git a/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java b/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java
index ceb8d6046..26046cd6f 100644
--- a/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java
+++ b/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java
@@ -49,7 +49,7 @@ public class WorkflowPostProcessor {
private static final Logger LOGGER = LoggerFactory.getLogger(WorkflowPostProcessor.class);
- private static final int ECLOUD_REQUEST_BATCH_SIZE = 100;
+ private static final int ECLOUD_REQUEST_BATCH_SIZE = 1000;
private final DepublishRecordIdDao depublishRecordIdDao;
private final DatasetDao datasetDao;
@@ -116,21 +116,35 @@ private void indexPostProcess(AbstractExecutablePlugin> indexPlugin, String da
targetIndexingDatabase = ((IndexToPublishPlugin) indexPlugin).getTargetIndexingDatabase();
targetIndexingEnvironment = ((IndexToPublishPlugin) indexPlugin).getTargetIndexingEnvironment();
- // get all tasks from dataset id and topology name
- List taskReport = dpsClient.getDetailedTaskReport(indexPlugin.getTopologyName(),
- Long.parseLong(indexPlugin.getExternalTaskId()));
- // get all currently de-published records ids
- Set depublishedRecordIds = depublishRecordIdDao
- .getAllDepublishRecordIdsWithStatus(datasetId, DepublishRecordIdSortField.DEPUBLICATION_STATE, SortDirection.ASCENDING,
- DepublicationStatus.DEPUBLISHED);
- // filter the record ids that are a part of the given report, to be de-published
- Set recordIdsToDepublish = taskReport.stream()
- .filter(taskInfo -> depublishedRecordIds.contains(taskInfo.getEuropeanaId()))
- .map(SubTaskInfo::getEuropeanaId).collect(Collectors.toSet());
-
- // reset de-publish status
- depublishRecordIdDao.markRecordIdsWithDepublicationStatus(datasetId, recordIdsToDepublish,
- DepublicationStatus.PENDING_DEPUBLICATION, null);
+ final long totalRecords = dpsClient.getTotalMetisDatabaseRecords(indexPlugin.getExternalTaskId(),
+ ((IndexToPublishPlugin) indexPlugin).getTargetIndexingDatabase());
+ List subTaskInfoList;
+
+ // get chunked tasks from dataset id and topology name
+ for (int i = 0; i < totalRecords; i = +ECLOUD_REQUEST_BATCH_SIZE) {
+ subTaskInfoList = dpsClient.getDetailedTaskReportBetweenChunks(indexPlugin.getTopologyName(),
+ Long.parseLong(indexPlugin.getExternalTaskId()), i, i + ECLOUD_REQUEST_BATCH_SIZE);
+ if (i >= totalRecords) {
+ subTaskInfoList = dpsClient.getDetailedTaskReportBetweenChunks(indexPlugin.getTopologyName(),
+ Long.parseLong(indexPlugin.getExternalTaskId()), (int) (totalRecords - (totalRecords % ECLOUD_REQUEST_BATCH_SIZE)),
+ (int) totalRecords);
+ }
+ // get all currently de-published records ids
+ Set depublishedRecordIds = depublishRecordIdDao
+ .getAllDepublishRecordIdsWithStatus(datasetId, DepublishRecordIdSortField.DEPUBLICATION_STATE,
+ SortDirection.ASCENDING,
+ DepublicationStatus.DEPUBLISHED);
+
+ // TODO: what if it's incremental
+ // filter the record ids that are a part of the given report, to be de-published
+ Set recordIdsToDepublish = subTaskInfoList.stream()
+ .filter(taskInfo -> depublishedRecordIds.contains(
+ taskInfo.getEuropeanaId()))
+ .map(SubTaskInfo::getEuropeanaId).collect(Collectors.toSet());
+ // reset de-publish status
+ depublishRecordIdDao.markRecordIdsWithDepublicationStatus(datasetId, recordIdsToDepublish,
+ DepublicationStatus.PENDING_DEPUBLICATION, null);
+ }
} else {
throw new InvalidIndexPluginException("Plugin is not of the types supported");
}
@@ -207,16 +221,16 @@ private void depublishRecordPostProcess(DepublishPlugin depublishPlugin, String
// Mark the records as DEPUBLISHED.
final Map> successfulRecords = subTasks.stream()
- .filter(subTask ->
- subTask.getRecordState()
- == RecordState.SUCCESS)
- .map(SubTaskInfo::getResource).map(
+ .filter(subTask ->
+ subTask.getRecordState()
+ == RecordState.SUCCESS)
+ .map(SubTaskInfo::getResource).map(
DepublishRecordIdUtils::decomposeFullRecordId)
- .collect(Collectors.groupingBy(
- Pair::getLeft,
- Collectors.mapping(
- Pair::getRight,
- Collectors.toSet())));
+ .collect(Collectors.groupingBy(
+ Pair::getLeft,
+ Collectors.mapping(
+ Pair::getRight,
+ Collectors.toSet())));
successfulRecords.forEach((dataset, records) ->
depublishRecordIdDao.markRecordIdsWithDepublicationStatus(dataset, records,
DepublicationStatus.DEPUBLISHED, new Date()));
From 53812ed805c3aa1ce81718346c6319cf612860b4 Mon Sep 17 00:00:00 2001
From: Adolfo Peixinho
Date: Thu, 10 Mar 2022 17:57:00 +0100
Subject: [PATCH 11/73] MET-4237 Wrong logic in increment. Math is not
Programming. Commutative Identity does not apply a+b==b+a
---
.../europeana/metis/core/execution/WorkflowPostProcessor.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java b/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java
index 26046cd6f..3e022327b 100644
--- a/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java
+++ b/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java
@@ -121,7 +121,7 @@ private void indexPostProcess(AbstractExecutablePlugin> indexPlugin, String da
List subTaskInfoList;
// get chunked tasks from dataset id and topology name
- for (int i = 0; i < totalRecords; i = +ECLOUD_REQUEST_BATCH_SIZE) {
+ for (int i = 0; i < totalRecords; i +=ECLOUD_REQUEST_BATCH_SIZE) {
subTaskInfoList = dpsClient.getDetailedTaskReportBetweenChunks(indexPlugin.getTopologyName(),
Long.parseLong(indexPlugin.getExternalTaskId()), i, i + ECLOUD_REQUEST_BATCH_SIZE);
if (i >= totalRecords) {
From ae7cd0c914517c7a1bb2b40fd5b73adf552eec0a Mon Sep 17 00:00:00 2001
From: Adolfo Peixinho
Date: Thu, 10 Mar 2022 18:01:13 +0100
Subject: [PATCH 12/73] MET-4237 Code cleanup
---
.../eu/europeana/metis/core/execution/WorkflowPostProcessor.java | 1 -
1 file changed, 1 deletion(-)
diff --git a/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java b/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java
index 3e022327b..264efbe81 100644
--- a/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java
+++ b/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java
@@ -211,7 +211,6 @@ private void depublishRecordPostProcess(DepublishPlugin depublishPlugin, String
final List subTasks = new ArrayList<>();
List subTasksBatch;
do {
- // need to change dpsCline call
subTasksBatch = retryableExternalRequestForNetworkExceptionsThrowing(
() -> dpsClient.getDetailedTaskReportBetweenChunks(
depublishPlugin.getTopologyName(), externalTaskId, subTasks.size(),
From 0cd58faab18093045d1595e76eb88bc410706e1d Mon Sep 17 00:00:00 2001
From: Simon Tzanakis
Date: Tue, 15 Mar 2022 14:16:43 +0100
Subject: [PATCH 13/73] Debt/met 4250 refactor code to remove mock maker inline
(#508)
* MET-4250 Update NetworkUtil
* MET-4250 Update RdfConversionUtils
* MET-4250 Javadocs and cleanup
* MET-4250 Remove mockito inline from root pom
---
.../rest/client/TestAuthenticationClient.java | 2 +-
.../embedded/EmbeddedLocalhostMongo.java | 2 +-
metis-common/metis-common-network/pom.xml | 5 -
.../europeana/metis/network/NetworkUtil.java | 25 +--
.../network/ExternalRequestUtilTest.java | 22 +--
.../metis/network/NetworkUtilTest.java | 21 +--
.../metis/network/StringHttpClientTest.java | 14 +-
.../europeana/metis/zoho/ZohoUtilsTest.java | 4 +-
.../core/service/TestDatasetService.java | 4 +-
.../core/service/TestProxiesService.java | 2 +-
.../rest/client/EnrichmentWorkerImpl.java | 9 +-
.../enrichment/MetisRecordParserTest.java | 7 +-
.../utils/EntityMergeEngineTest.java | 6 +-
.../oaipmh/CloseableHttpOaiClientTest.java | 2 +-
.../oaipmh/OaiHarvesterImplTest.java | 2 +-
.../eu/europeana/indexing/IndexerImpl.java | 101 ++++++------
.../RecordTierCalculationViewGenerator.java | 15 +-
.../metadata/ContextualClassesClassifier.java | 4 +-
.../metadata/EnablingElementsClassifier.java | 6 +-
.../tiers/metadata/LanguageClassifier.java | 4 +-
.../media/AbstractMediaClassifierTest.java | 4 +-
.../extraction/AudioVideoProcessorTest.java | 10 +-
metis-schema/pom.xml | 6 -
.../schema/convert/RdfConversionUtils.java | 145 +++++++++---------
.../convert/RdfConversionUtilsTest.java | 79 ++++------
.../src/test/java/TestValidationClient.java | 2 +-
.../src/test/java/TestApplication.java | 2 +-
.../src/test/java/TestApplication.java | 2 +-
.../src/test/java/TestSchemaProvider.java | 2 +-
.../test/java/TestValidationExecution.java | 1 -
pom.xml | 6 -
31 files changed, 250 insertions(+), 266 deletions(-)
diff --git a/metis-authentication/metis-authentication-rest-client/src/test/java/eu/europeana/metis/authentication/rest/client/TestAuthenticationClient.java b/metis-authentication/metis-authentication-rest-client/src/test/java/eu/europeana/metis/authentication/rest/client/TestAuthenticationClient.java
index 07e4633b5..16fd1c40b 100644
--- a/metis-authentication/metis-authentication-rest-client/src/test/java/eu/europeana/metis/authentication/rest/client/TestAuthenticationClient.java
+++ b/metis-authentication/metis-authentication-rest-client/src/test/java/eu/europeana/metis/authentication/rest/client/TestAuthenticationClient.java
@@ -28,7 +28,7 @@ class TestAuthenticationClient {
static {
try {
- portForWireMock = NetworkUtil.getAvailableLocalPort();
+ portForWireMock = new NetworkUtil().getAvailableLocalPort();
} catch (IOException e) {
e.printStackTrace();
}
diff --git a/metis-common/metis-common-mongo/src/main/java/eu/europeana/metis/mongo/embedded/EmbeddedLocalhostMongo.java b/metis-common/metis-common-mongo/src/main/java/eu/europeana/metis/mongo/embedded/EmbeddedLocalhostMongo.java
index 3667fb795..5249b59a3 100644
--- a/metis-common/metis-common-mongo/src/main/java/eu/europeana/metis/mongo/embedded/EmbeddedLocalhostMongo.java
+++ b/metis-common/metis-common-mongo/src/main/java/eu/europeana/metis/mongo/embedded/EmbeddedLocalhostMongo.java
@@ -39,7 +39,7 @@ public EmbeddedLocalhostMongo() {
public void start() {
if (mongodExecutable == null) {
try {
- mongoPort = NetworkUtil.getAvailableLocalPort();
+ mongoPort = new NetworkUtil().getAvailableLocalPort();
RuntimeConfig runtimeConfig = Defaults.runtimeConfigFor(Command.MongoD, LOGGER)
.processOutput(ProcessOutput.getDefaultInstanceSilent())
.build();
diff --git a/metis-common/metis-common-network/pom.xml b/metis-common/metis-common-network/pom.xml
index c13c1ed7e..362a47011 100644
--- a/metis-common/metis-common-network/pom.xml
+++ b/metis-common/metis-common-network/pom.xml
@@ -52,11 +52,6 @@
org.mockito
mockito-core
-
- org.mockito
- mockito-inline
- test
-
org.glassfish.jersey.core
jersey-common
diff --git a/metis-common/metis-common-network/src/main/java/eu/europeana/metis/network/NetworkUtil.java b/metis-common/metis-common-network/src/main/java/eu/europeana/metis/network/NetworkUtil.java
index efce93017..ee3ac060d 100644
--- a/metis-common/metis-common-network/src/main/java/eu/europeana/metis/network/NetworkUtil.java
+++ b/metis-common/metis-common-network/src/main/java/eu/europeana/metis/network/NetworkUtil.java
@@ -3,6 +3,7 @@
import java.io.IOException;
import java.net.InetAddress;
import java.net.ServerSocket;
+import javax.net.ServerSocketFactory;
import javax.net.ssl.SSLServerSocketFactory;
/**
@@ -11,26 +12,30 @@
* @author Simon Tzanakis (Simon.Tzanakis@europeana.eu)
* @since 2017-02-24
*/
-public final class NetworkUtil {
+public class NetworkUtil {
private static final int BACKLOG = 100;
- private NetworkUtil() {
- }
-
/**
- * This method can be used in JUnit tests to get a random available port on localhost to run a
- * service. It should not be used for normal operation, otherwise ssl checks should be followed to
- * avoid man-in-the-middle attacks.
+ * This method can be used in JUnit tests to get a random available port on localhost to run a service. It should not be used
+ * for normal operation, otherwise ssl checks should be followed to avoid man-in-the-middle attacks.
*
* @return the available port number
* @throws IOException if the specified localhost is not available
*/
- public static int getAvailableLocalPort() throws IOException {
- ServerSocket s = SSLServerSocketFactory.getDefault()
- .createServerSocket(0, BACKLOG, InetAddress.getByName("localhost"));
+ public int getAvailableLocalPort() throws IOException {
+ ServerSocket s = getServerSocketFactory().createServerSocket(0, BACKLOG, InetAddress.getByName("localhost"));
int localPort = s.getLocalPort();
s.close();
return localPort;
}
+
+ /**
+ * Get a server socket factory.
+ *
+ * @return the server socket factory
+ */
+ ServerSocketFactory getServerSocketFactory() {
+ return SSLServerSocketFactory.getDefault();
+ }
}
diff --git a/metis-common/metis-common-network/src/test/java/eu/europeana/metis/network/ExternalRequestUtilTest.java b/metis-common/metis-common-network/src/test/java/eu/europeana/metis/network/ExternalRequestUtilTest.java
index 8a06425b5..eb254c8f0 100644
--- a/metis-common/metis-common-network/src/test/java/eu/europeana/metis/network/ExternalRequestUtilTest.java
+++ b/metis-common/metis-common-network/src/test/java/eu/europeana/metis/network/ExternalRequestUtilTest.java
@@ -64,25 +64,19 @@ void testRetryableExternalRequestWithMap() {
@Test
void testRetryableExternalRequestThrowsExceptionOutOfSpecifiedMap() {
- assertThrows(RuntimeException.class, () -> {
- ExternalRequestUtil.retryableExternalRequest(
- () -> {
- throw new RuntimeException(new ClassNotFoundException("Class pointer test exception"));
- },
- UNMODIFIABLE_MAP_WITH_TEST_EXCEPTIONS);
- });
+ assertThrows(RuntimeException.class, () -> ExternalRequestUtil.retryableExternalRequest(
+ () -> {
+ throw new RuntimeException(new ClassNotFoundException("Class pointer test exception"));
+ }, UNMODIFIABLE_MAP_WITH_TEST_EXCEPTIONS));
}
@Disabled("TODO: MET-4255 Improve execution time")
@Test
void testRetryableExternalRequestThrowsException() {
- assertThrows(RuntimeException.class, () -> {
- ExternalRequestUtil.retryableExternalRequest(
- () -> {
- throw new RuntimeException(new ClassNotFoundException("Class pointer test exception"));
- },
- null);
- });
+ assertThrows(RuntimeException.class, () -> ExternalRequestUtil.retryableExternalRequest(
+ () -> {
+ throw new RuntimeException(new ClassNotFoundException("Class pointer test exception"));
+ }, null));
}
@Test
diff --git a/metis-common/metis-common-network/src/test/java/eu/europeana/metis/network/NetworkUtilTest.java b/metis-common/metis-common-network/src/test/java/eu/europeana/metis/network/NetworkUtilTest.java
index 3d47f33a7..ae091ef99 100644
--- a/metis-common/metis-common-network/src/test/java/eu/europeana/metis/network/NetworkUtilTest.java
+++ b/metis-common/metis-common-network/src/test/java/eu/europeana/metis/network/NetworkUtilTest.java
@@ -3,16 +3,13 @@
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.mockStatic;
+import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.when;
import java.io.IOException;
import java.net.InetAddress;
import javax.net.ServerSocketFactory;
-import javax.net.ssl.SSLServerSocketFactory;
-import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
-import org.mockito.MockedStatic;
/**
* Unit test for {@link NetworkUtil}
@@ -24,21 +21,17 @@ class NetworkUtilTest {
@Test
void getAvailableLocalPort() throws IOException {
- int availableLocalPort = NetworkUtil.getAvailableLocalPort();
-
+ int availableLocalPort = new NetworkUtil().getAvailableLocalPort();
assertTrue(availableLocalPort > 0);
}
- @Disabled("TODO: MET-4250 Handle MockMaker in Jenkins")
@Test
void getAvailableLocalPortWithException() throws IOException {
final int BACKLOG = 100;
- try (MockedStatic sslServerSocketFactory = mockStatic(SSLServerSocketFactory.class)) {
- ServerSocketFactory serverSocketFactory = mock(ServerSocketFactory.class);
- sslServerSocketFactory.when(SSLServerSocketFactory::getDefault).thenReturn(serverSocketFactory);
- when(serverSocketFactory.createServerSocket(0, BACKLOG, InetAddress.getByName("localhost"))).thenThrow(IOException.class);
-
- assertThrows(IOException.class, () -> NetworkUtil.getAvailableLocalPort());
- }
+ final ServerSocketFactory sslServerSocketFactory = mock(ServerSocketFactory.class);
+ when(sslServerSocketFactory.createServerSocket(0, BACKLOG, InetAddress.getByName("localhost"))).thenThrow(IOException.class);
+ final NetworkUtil networkUtil = spy(NetworkUtil.class);
+ when(networkUtil.getServerSocketFactory()).thenReturn(sslServerSocketFactory);
+ assertThrows(IOException.class, networkUtil::getAvailableLocalPort);
}
}
\ No newline at end of file
diff --git a/metis-common/metis-common-network/src/test/java/eu/europeana/metis/network/StringHttpClientTest.java b/metis-common/metis-common-network/src/test/java/eu/europeana/metis/network/StringHttpClientTest.java
index c58837c58..4181574f7 100644
--- a/metis-common/metis-common-network/src/test/java/eu/europeana/metis/network/StringHttpClientTest.java
+++ b/metis-common/metis-common-network/src/test/java/eu/europeana/metis/network/StringHttpClientTest.java
@@ -10,7 +10,6 @@
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.IOException;
-import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
@@ -19,7 +18,6 @@
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.io.entity.BasicHttpEntity;
import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
/**
@@ -56,8 +54,7 @@ void getResourceUrlWithException() {
void createResult() throws URISyntaxException, IOException {
List closeables = new ArrayList<>();
HttpEntity responseEntity = new BasicHttpEntity(new ByteArrayInputStream("content".getBytes()), ContentType.TEXT_PLAIN);
- final ContentRetriever contentRetriever = ContentRetriever.forNonCloseableContent(
- responseEntity == null ? InputStream::nullInputStream : responseEntity::getContent,
+ final ContentRetriever contentRetriever = ContentRetriever.forNonCloseableContent(responseEntity::getContent,
closeables::add);
StringContent actualContent = stringHttpClient.createResult(new URI("/resource/provided"), new URI("/resource/actual"),
@@ -65,18 +62,17 @@ void createResult() throws URISyntaxException, IOException {
assertEquals("content", actualContent.getContent());
assertEquals("text/plain", actualContent.getContentType());
+ assertEquals(1, closeables.size());
}
- @Disabled("TODO: MET-4250 Handle MockMaker in Jenkins")
@Test
void createResultWithException() throws IOException {
final ContentRetriever contentRetriever = mock(ContentRetriever.class);
when(contentRetriever.getContent()).thenThrow(IOException.class);
- assertThrows(IOException.class, () -> {
- stringHttpClient.createResult(new URI("/resource/provided"), new URI("/resource/actual"),
- "text/plain", 7L, contentRetriever);
- });
+ assertThrows(IOException.class,
+ () -> stringHttpClient.createResult(new URI("/resource/provided"), new URI("/resource/actual"),
+ "text/plain", 7L, contentRetriever));
}
@Test
diff --git a/metis-common/metis-common-zoho/src/test/java/eu/europeana/metis/zoho/ZohoUtilsTest.java b/metis-common/metis-common-zoho/src/test/java/eu/europeana/metis/zoho/ZohoUtilsTest.java
index 54ae0b2df..dc638df13 100644
--- a/metis-common/metis-common-zoho/src/test/java/eu/europeana/metis/zoho/ZohoUtilsTest.java
+++ b/metis-common/metis-common-zoho/src/test/java/eu/europeana/metis/zoho/ZohoUtilsTest.java
@@ -33,9 +33,7 @@ void stringListSupplier() {
final Record recordOrganization = new Record();
final List expectedChoiceList = List.of("Organization1Role", "Organization2Role");
recordOrganization.addKeyValue(ZohoConstants.ORGANIZATION_ROLE_FIELD,
- expectedChoiceList.stream()
- .map(choice -> new Choice<>(choice))
- .collect(Collectors.toList()));
+ expectedChoiceList.stream().map(Choice::new).collect(Collectors.toList()));
final List organizationRoleStringList = ZohoUtils.stringListSupplier(
recordOrganization.getKeyValue(ZohoConstants.ORGANIZATION_ROLE_FIELD));
diff --git a/metis-core/metis-core-service/src/test/java/eu/europeana/metis/core/service/TestDatasetService.java b/metis-core/metis-core-service/src/test/java/eu/europeana/metis/core/service/TestDatasetService.java
index 678e3a3ff..74f6754e4 100644
--- a/metis-core/metis-core-service/src/test/java/eu/europeana/metis/core/service/TestDatasetService.java
+++ b/metis-core/metis-core-service/src/test/java/eu/europeana/metis/core/service/TestDatasetService.java
@@ -22,7 +22,6 @@
import static org.mockito.Mockito.when;
import com.github.tomakehurst.wiremock.WireMockServer;
-import eu.europeana.metis.utils.RestEndpoints;
import eu.europeana.metis.authentication.user.MetisUserView;
import eu.europeana.metis.core.dao.DatasetDao;
import eu.europeana.metis.core.dao.DatasetXsltDao;
@@ -41,6 +40,7 @@
import eu.europeana.metis.exception.GenericMetisException;
import eu.europeana.metis.exception.UserUnauthorizedException;
import eu.europeana.metis.network.NetworkUtil;
+import eu.europeana.metis.utils.RestEndpoints;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
@@ -66,7 +66,7 @@ class TestDatasetService {
static {
try {
- portForWireMock = NetworkUtil.getAvailableLocalPort();
+ portForWireMock = new NetworkUtil().getAvailableLocalPort();
} catch (IOException e) {
e.printStackTrace();
}
diff --git a/metis-core/metis-core-service/src/test/java/eu/europeana/metis/core/service/TestProxiesService.java b/metis-core/metis-core-service/src/test/java/eu/europeana/metis/core/service/TestProxiesService.java
index 0a004009b..c42e800a3 100644
--- a/metis-core/metis-core-service/src/test/java/eu/europeana/metis/core/service/TestProxiesService.java
+++ b/metis-core/metis-core-service/src/test/java/eu/europeana/metis/core/service/TestProxiesService.java
@@ -564,7 +564,7 @@ void testGetRecord() throws MCSException, ExternalTaskException {
// Create representation
final Representation representation = mock(Representation.class);
- final String contentUri = "http://example.com";
+ final String contentUri = "https://example.com";
final File file = new File();
file.setContentUri(URI.create(contentUri));
when(representation.getFiles()).thenReturn(Collections.singletonList(file));
diff --git a/metis-enrichment/metis-enrichment-client/src/main/java/eu/europeana/enrichment/rest/client/EnrichmentWorkerImpl.java b/metis-enrichment/metis-enrichment-client/src/main/java/eu/europeana/enrichment/rest/client/EnrichmentWorkerImpl.java
index 7f944fd97..316eabc5f 100644
--- a/metis-enrichment/metis-enrichment-client/src/main/java/eu/europeana/enrichment/rest/client/EnrichmentWorkerImpl.java
+++ b/metis-enrichment/metis-enrichment-client/src/main/java/eu/europeana/enrichment/rest/client/EnrichmentWorkerImpl.java
@@ -20,6 +20,7 @@
public class EnrichmentWorkerImpl implements EnrichmentWorker {
private static final Logger LOGGER = LoggerFactory.getLogger(EnrichmentWorkerImpl.class);
+ private static final RdfConversionUtils rdfConversionUtils = new RdfConversionUtils();
private final Enricher enricher;
private final Dereferencer dereferencer;
@@ -169,18 +170,18 @@ private String convertRdfToStringForLogging(final RDF rdf) {
String convertRdfToString(RDF rdf) throws SerializationException {
- return RdfConversionUtils.convertRdfToString(rdf);
+ return rdfConversionUtils.convertRdfToString(rdf);
}
byte[] convertRdfToBytes(RDF rdf) throws SerializationException {
- return RdfConversionUtils.convertRdfToBytes(rdf);
+ return rdfConversionUtils.convertRdfToBytes(rdf);
}
RDF convertStringToRdf(String xml) throws SerializationException {
- return RdfConversionUtils.convertStringToRdf(xml);
+ return rdfConversionUtils.convertStringToRdf(xml);
}
RDF convertInputStreamToRdf(InputStream xml) throws SerializationException {
- return RdfConversionUtils.convertInputStreamToRdf(xml);
+ return rdfConversionUtils.convertInputStreamToRdf(xml);
}
}
diff --git a/metis-enrichment/metis-enrichment-client/src/test/java/eu/europeana/enrichment/rest/client/enrichment/MetisRecordParserTest.java b/metis-enrichment/metis-enrichment-client/src/test/java/eu/europeana/enrichment/rest/client/enrichment/MetisRecordParserTest.java
index d83e6874c..6cf493987 100644
--- a/metis-enrichment/metis-enrichment-client/src/test/java/eu/europeana/enrichment/rest/client/enrichment/MetisRecordParserTest.java
+++ b/metis-enrichment/metis-enrichment-client/src/test/java/eu/europeana/enrichment/rest/client/enrichment/MetisRecordParserTest.java
@@ -24,6 +24,7 @@
import eu.europeana.metis.schema.jibx.Subject;
import eu.europeana.metis.schema.jibx.Temporal;
import eu.europeana.metis.schema.jibx.Type;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Set;
import org.apache.commons.io.IOUtils;
@@ -31,6 +32,8 @@
public class MetisRecordParserTest {
+ private static final RdfConversionUtils rdfConversionUtils = new RdfConversionUtils();
+
@Test
public void testExtractedFieldValuesForEnrichment() {
RDF rdf = new RDF();
@@ -177,8 +180,8 @@ public void testExtractedFieldValuesForEnrichment() {
@Test
public void testSetAdditionalData() throws Exception {
String xml = IOUtils
- .toString(getClass().getClassLoader().getResourceAsStream("sample_completeness.rdf"), "UTF-8");
- RDF rdf = RdfConversionUtils.convertStringToRdf(xml);
+ .toString(getClass().getClassLoader().getResourceAsStream("sample_completeness.rdf"), StandardCharsets.UTF_8);
+ RDF rdf = rdfConversionUtils.convertStringToRdf(xml);
EnrichmentUtils.setAdditionalData(rdf);
EuropeanaAggregationType europeanaAggregationType = rdf.getEuropeanaAggregationList().stream()
.findAny().orElse(null);
diff --git a/metis-enrichment/metis-enrichment-client/src/test/java/eu/europeana/enrichment/utils/EntityMergeEngineTest.java b/metis-enrichment/metis-enrichment-client/src/test/java/eu/europeana/enrichment/utils/EntityMergeEngineTest.java
index 1760a1a6a..3dc87169d 100644
--- a/metis-enrichment/metis-enrichment-client/src/test/java/eu/europeana/enrichment/utils/EntityMergeEngineTest.java
+++ b/metis-enrichment/metis-enrichment-client/src/test/java/eu/europeana/enrichment/utils/EntityMergeEngineTest.java
@@ -44,6 +44,8 @@
public class EntityMergeEngineTest {
+ private static final RdfConversionUtils rdfConversionUtils = new RdfConversionUtils();
+
private static Place createPlace() {
Place place = new Place();
@@ -605,7 +607,7 @@ public void testMergePlace() throws SerializationException {
verifyPlace((Place) inputList.get(2), rdf.getPlaceList().get(2));
// Convert RDF to string as extra test that everything is OK.
- RdfConversionUtils.convertRdfToString(rdf);
+ rdfConversionUtils.convertRdfToString(rdf);
}
@Test
@@ -645,7 +647,7 @@ public void testMergeOtherTypes() throws SerializationException {
verifyOrganization((Organization) inputList.get(5), rdf.getOrganizationList().get(0));
// Convert RDF to string as extra test that everything is OK.
- RdfConversionUtils.convertRdfToString(rdf);
+ rdfConversionUtils.convertRdfToString(rdf);
}
@Test
diff --git a/metis-harvesting/src/test/java/eu/europeana/metis/harvesting/oaipmh/CloseableHttpOaiClientTest.java b/metis-harvesting/src/test/java/eu/europeana/metis/harvesting/oaipmh/CloseableHttpOaiClientTest.java
index e6cc24087..458dcc7e6 100644
--- a/metis-harvesting/src/test/java/eu/europeana/metis/harvesting/oaipmh/CloseableHttpOaiClientTest.java
+++ b/metis-harvesting/src/test/java/eu/europeana/metis/harvesting/oaipmh/CloseableHttpOaiClientTest.java
@@ -29,7 +29,7 @@ public class CloseableHttpOaiClientTest {
@BeforeAll
static void prepare() throws IOException {
- int portForWireMock = NetworkUtil.getAvailableLocalPort();
+ int portForWireMock = new NetworkUtil().getAvailableLocalPort();
final String localhostUrl = "http://127.0.0.1:" + portForWireMock;
URL = localhostUrl + PATH;
CONNECTION_CLIENT_FACTORY = () -> TestHelper.CONNECTION_CLIENT_FACTORY.apply(ENDPOINT);
diff --git a/metis-harvesting/src/test/java/eu/europeana/metis/harvesting/oaipmh/OaiHarvesterImplTest.java b/metis-harvesting/src/test/java/eu/europeana/metis/harvesting/oaipmh/OaiHarvesterImplTest.java
index bea665715..2fee29bf5 100644
--- a/metis-harvesting/src/test/java/eu/europeana/metis/harvesting/oaipmh/OaiHarvesterImplTest.java
+++ b/metis-harvesting/src/test/java/eu/europeana/metis/harvesting/oaipmh/OaiHarvesterImplTest.java
@@ -29,7 +29,7 @@ class OaiHarvesterImplTest {
@BeforeAll
static void prepare() throws IOException {
- int portForWireMock = NetworkUtil.getAvailableLocalPort();
+ int portForWireMock = new NetworkUtil().getAvailableLocalPort();
final String localhostUrl = "http://127.0.0.1:" + portForWireMock;
OAI_PMH_ENDPOINT = localhostUrl + "/oai-phm/";
CONNECTION_CLIENT_FACTORY = TestHelper.CONNECTION_CLIENT_FACTORY::apply;
diff --git a/metis-indexing/src/main/java/eu/europeana/indexing/IndexerImpl.java b/metis-indexing/src/main/java/eu/europeana/indexing/IndexerImpl.java
index 9e48a6638..4c2d29b11 100644
--- a/metis-indexing/src/main/java/eu/europeana/indexing/IndexerImpl.java
+++ b/metis-indexing/src/main/java/eu/europeana/indexing/IndexerImpl.java
@@ -5,6 +5,11 @@
import eu.europeana.indexing.exception.SetupRelatedIndexingException;
import eu.europeana.indexing.fullbean.StringToFullBeanConverter;
import eu.europeana.indexing.tiers.ClassifierFactory;
+import eu.europeana.indexing.tiers.model.MediaTier;
+import eu.europeana.indexing.tiers.model.MetadataTier;
+import eu.europeana.indexing.tiers.model.TierClassifier;
+import eu.europeana.indexing.tiers.view.ContentTierBreakdown;
+import eu.europeana.indexing.tiers.view.MetadataTierBreakdown;
import eu.europeana.indexing.utils.RdfTierUtils;
import eu.europeana.indexing.utils.RdfWrapper;
import eu.europeana.metis.schema.jibx.RDF;
@@ -31,6 +36,8 @@ class IndexerImpl implements Indexer {
private final AbstractConnectionProvider connectionProvider;
private final IndexingSupplier stringToRdfConverterSupplier;
+ private final TierClassifier mediaClassifier = ClassifierFactory.getMediaClassifier();
+ private final TierClassifier metadataClassifier = ClassifierFactory.getMetadataClassifier();
/**
* Constructor.
@@ -45,8 +52,8 @@ class IndexerImpl implements Indexer {
* Constructor for testing purposes.
*
* @param connectionProvider The connection provider for this indexer.
- * @param stringToRdfConverterSupplier Supplies an instance of {@link StringToFullBeanConverter}
- * used to parse strings to instances of {@link RDF}. Will be called once during every index.
+ * @param stringToRdfConverterSupplier Supplies an instance of {@link StringToFullBeanConverter} used to convert a string to an
+ * instance of {@link RDF}. Will be called once during every index.
*/
IndexerImpl(AbstractConnectionProvider connectionProvider,
IndexingSupplier stringToRdfConverterSupplier) {
@@ -54,8 +61,38 @@ class IndexerImpl implements Indexer {
this.stringToRdfConverterSupplier = stringToRdfConverterSupplier;
}
+ @Override
+ public void indexRdfs(List records, IndexingProperties indexingProperties)
+ throws IndexingException {
+ indexRecords(records, indexingProperties);
+ }
+
+ @Override
+ public void index(List records, IndexingProperties indexingProperties)
+ throws IndexingException {
+ LOGGER.info("Parsing {} records...", records.size());
+ final StringToFullBeanConverter stringToRdfConverter = stringToRdfConverterSupplier.get();
+ final List wrappedRecords = new ArrayList<>(records.size());
+ for (String record : records) {
+ wrappedRecords.add(stringToRdfConverter.convertStringToRdf(record));
+ }
+ indexRecords(wrappedRecords, indexingProperties);
+ }
+
+ @Override
+ public void index(InputStream record, IndexingProperties indexingProperties)
+ throws IndexingException {
+ final StringToFullBeanConverter stringToRdfConverter = stringToRdfConverterSupplier.get();
+ indexRdf(stringToRdfConverter.convertToRdf(record), indexingProperties);
+ }
+
+ @Override
+ public void indexRdf(RDF record, IndexingProperties indexingProperties) throws IndexingException {
+ indexRdfs(List.of(record), indexingProperties);
+ }
+
private void indexRecords(List records, IndexingProperties properties)
- throws IndexingException {
+ throws IndexingException {
if (properties.isPerformRedirects() && connectionProvider.getRecordRedirectDao() == null) {
throw new SetupRelatedIndexingException(
"Record redirect dao has not been initialized and performing redirects is requested");
@@ -68,60 +105,30 @@ private void indexRecords(List records, IndexingProperties properties)
preprocessRecord(record, properties.isPerformTierCalculation());
if (properties.isPerformRedirects()) {
publisher.publishWithRedirects(new RdfWrapper(record), properties.getRecordDate(),
- properties.getDatasetIdsForRedirection());
+ properties.getDatasetIdsForRedirection());
} else {
publisher.publish(new RdfWrapper(record), properties.getRecordDate(),
- properties.getDatasetIdsForRedirection());
+ properties.getDatasetIdsForRedirection());
}
}
LOGGER.info("Successfully processed {} records.", records.size());
}
- private static void preprocessRecord(RDF rdf, boolean performTierCalculation)
- throws IndexingException {
-
- // Perform the tier classification
- if (performTierCalculation) {
- final RdfWrapper rdfWrapper = new RdfWrapper(rdf);
- RdfTierUtils.setTier(rdf, ClassifierFactory.getMediaClassifier().classify(rdfWrapper).getTier());
- RdfTierUtils.setTier(rdf, ClassifierFactory.getMetadataClassifier().classify(rdfWrapper).getTier());
- }
- }
-
- @Override
- public void indexRdfs(List records, IndexingProperties indexingProperties)
- throws IndexingException {
- indexRecords(records, indexingProperties);
- }
-
- @Override
- public void indexRdf(RDF record, IndexingProperties indexingProperties) throws IndexingException {
- indexRdfs(List.of(record), indexingProperties);
- }
-
- @Override
- public void index(List records, IndexingProperties indexingProperties)
- throws IndexingException {
- LOGGER.info("Parsing {} records...", records.size());
- final StringToFullBeanConverter stringToRdfConverter = stringToRdfConverterSupplier.get();
- final List wrappedRecords = new ArrayList<>(records.size());
- for (String record : records) {
- wrappedRecords.add(stringToRdfConverter.convertStringToRdf(record));
- }
- indexRecords(wrappedRecords, indexingProperties);
- }
-
@Override
public void index(String record, IndexingProperties indexingProperties) throws IndexingException {
index(List.of(record), indexingProperties);
}
- @Override
- public void index(InputStream record, IndexingProperties indexingProperties)
- throws IndexingException {
- final StringToFullBeanConverter stringToRdfConverter = stringToRdfConverterSupplier.get();
- indexRdf(stringToRdfConverter.convertToRdf(record), indexingProperties);
+ private void preprocessRecord(RDF rdf, boolean performTierCalculation)
+ throws IndexingException {
+
+ // Perform the tier classification
+ if (performTierCalculation) {
+ final RdfWrapper rdfWrapper = new RdfWrapper(rdf);
+ RdfTierUtils.setTier(rdf, mediaClassifier.classify(rdfWrapper).getTier());
+ RdfTierUtils.setTier(rdf, metadataClassifier.classify(rdfWrapper).getTier());
+ }
}
@Override
@@ -168,8 +175,7 @@ public long countRecords(String datasetId) {
}
/**
- * Similar to the Java interface {@link Supplier}, but one that may throw an {@link
- * IndexerRelatedIndexingException}.
+ * Similar to the Java interface {@link Supplier}, but one that may throw an {@link IndexerRelatedIndexingException}.
*
* @param The type of the object to be supplied.
* @author jochen
@@ -181,8 +187,7 @@ interface IndexingSupplier {
* Gets a result.
*
* @return A result.
- * @throws IndexerRelatedIndexingException In case something went wrong while getting the
- * result.
+ * @throws IndexerRelatedIndexingException In case something went wrong while getting the result.
*/
T get() throws IndexerRelatedIndexingException;
}
diff --git a/metis-indexing/src/main/java/eu/europeana/indexing/tiers/RecordTierCalculationViewGenerator.java b/metis-indexing/src/main/java/eu/europeana/indexing/tiers/RecordTierCalculationViewGenerator.java
index 010afc645..c50b3ee9c 100644
--- a/metis-indexing/src/main/java/eu/europeana/indexing/tiers/RecordTierCalculationViewGenerator.java
+++ b/metis-indexing/src/main/java/eu/europeana/indexing/tiers/RecordTierCalculationViewGenerator.java
@@ -3,6 +3,7 @@
import eu.europeana.indexing.exception.TierCalculationException;
import eu.europeana.indexing.tiers.model.MediaTier;
import eu.europeana.indexing.tiers.model.MetadataTier;
+import eu.europeana.indexing.tiers.model.TierClassifier;
import eu.europeana.indexing.tiers.model.TierClassifier.TierClassification;
import eu.europeana.indexing.tiers.view.ContentTierBreakdown;
import eu.europeana.indexing.tiers.view.MetadataTierBreakdown;
@@ -22,6 +23,10 @@
*/
public class RecordTierCalculationViewGenerator {
+ private static final RdfConversionUtils rdfConversionUtils = new RdfConversionUtils();
+ private static final TierClassifier mediaClassifier = ClassifierFactory.getMediaClassifier();
+ private static final TierClassifier metadataClassifier = ClassifierFactory.getMetadataClassifier();
+
private final String europeanaId;
private final String providerId;
private final String stringRdf;
@@ -59,13 +64,11 @@ private RecordTierCalculationView tierClassification(final String xml) {
final RDF rdf;
try {
// Perform the tier classification
- rdf = RdfConversionUtils.convertStringToRdf(xml);
+ rdf = rdfConversionUtils.convertStringToRdf(xml);
final RdfWrapper rdfWrapper = new RdfWrapper(rdf);
- final TierClassification mediaTierClassification = ClassifierFactory.getMediaClassifier()
- .classify(rdfWrapper);
- final TierClassification metadataTierClassification = ClassifierFactory.getMetadataClassifier()
- .classify(
- rdfWrapper);
+ final TierClassification mediaTierClassification = mediaClassifier.classify(rdfWrapper);
+ final TierClassification metadataTierClassification = metadataClassifier.classify(
+ rdfWrapper);
RecordTierCalculationSummary recordTierCalculationSummary = new RecordTierCalculationSummary();
recordTierCalculationSummary.setEuropeanaRecordId(europeanaId);
recordTierCalculationSummary.setProviderRecordId(providerId);
diff --git a/metis-indexing/src/main/java/eu/europeana/indexing/tiers/metadata/ContextualClassesClassifier.java b/metis-indexing/src/main/java/eu/europeana/indexing/tiers/metadata/ContextualClassesClassifier.java
index 4363dcc8c..dcb53a3cd 100644
--- a/metis-indexing/src/main/java/eu/europeana/indexing/tiers/metadata/ContextualClassesClassifier.java
+++ b/metis-indexing/src/main/java/eu/europeana/indexing/tiers/metadata/ContextualClassesClassifier.java
@@ -33,6 +33,8 @@
*/
public class ContextualClassesClassifier implements TierClassifierBreakdown {
+ private static final RdfConversionUtils rdfConversionUtils = new RdfConversionUtils();
+
private static Set getResourceLinks(ProxyType proxy) {
return Stream.of(ResourceLinkFromProxy.values())
.map(ResourceLinkFromProxy::getLinkAndValueGetter)
@@ -64,7 +66,7 @@ public ContextualClassesBreakdown classifyBreakdown(RdfWrapper entity) {
final Set uniqueContextualClasses = contextualClassesStatistics.getDistinctClassesSet().stream()
.map(ContextualClassGroup::getContextualClass)
.map(
- RdfConversionUtils::getQualifiedElementNameForClass)
+ rdfConversionUtils::getQualifiedElementNameForClass)
.collect(Collectors.toSet());
return new ContextualClassesBreakdown(contextualClassesStatistics.getCompleteContextualResources(), uniqueContextualClasses,
diff --git a/metis-indexing/src/main/java/eu/europeana/indexing/tiers/metadata/EnablingElementsClassifier.java b/metis-indexing/src/main/java/eu/europeana/indexing/tiers/metadata/EnablingElementsClassifier.java
index ac218d229..162d6577c 100644
--- a/metis-indexing/src/main/java/eu/europeana/indexing/tiers/metadata/EnablingElementsClassifier.java
+++ b/metis-indexing/src/main/java/eu/europeana/indexing/tiers/metadata/EnablingElementsClassifier.java
@@ -21,6 +21,8 @@
*/
public class EnablingElementsClassifier implements TierClassifierBreakdown {
+ private static final RdfConversionUtils rdfConversionUtils = new RdfConversionUtils();
+
private static final int MIN_ELEMENTS_TIER_A = 1;
private static final int MIN_ELEMENTS_TIER_B = 3;
private static final int MIN_ELEMENTS_TIER_C = 4;
@@ -36,10 +38,10 @@ public EnablingElementsBreakdown classifyBreakdown(RdfWrapper entity) {
final MetadataTier metadataTier = calculateMetadataTier(inventory);
final Set distinctEnablingElementsList = inventory.getElements().stream().map(EnablingElement::getTypedClass)
- .map(RdfConversionUtils::getQualifiedElementNameForClass)
+ .map(rdfConversionUtils::getQualifiedElementNameForClass)
.collect(Collectors.toSet());
final Set metadataGroupsList = inventory.getGroups().stream().map(ContextualClassGroup::getContextualClass)
- .map(RdfConversionUtils::getQualifiedElementNameForClass)
+ .map(rdfConversionUtils::getQualifiedElementNameForClass)
.collect(Collectors.toSet());
return new EnablingElementsBreakdown(distinctEnablingElementsList, metadataGroupsList, metadataTier);
diff --git a/metis-indexing/src/main/java/eu/europeana/indexing/tiers/metadata/LanguageClassifier.java b/metis-indexing/src/main/java/eu/europeana/indexing/tiers/metadata/LanguageClassifier.java
index 18a883bd9..3eb4d821a 100644
--- a/metis-indexing/src/main/java/eu/europeana/indexing/tiers/metadata/LanguageClassifier.java
+++ b/metis-indexing/src/main/java/eu/europeana/indexing/tiers/metadata/LanguageClassifier.java
@@ -19,6 +19,8 @@
*/
public class LanguageClassifier implements TierClassifierBreakdown {
+ private static final RdfConversionUtils rdfConversionUtils = new RdfConversionUtils();
+
private static final float MIN_RATE_FOR_T1 = 0.25F;
private static final float MIN_RATE_FOR_T2 = 0.5F;
private static final float MIN_RATE_FOR_T3 = 0.75F;
@@ -42,7 +44,7 @@ public LanguageBreakdown classifyBreakdown(RdfWrapper entity) {
return new LanguageBreakdown(qualifiedProperties.size(),
qualifiedPropertiesWithoutLanguage.stream().map(PropertyType::getTypedClass)
- .map(RdfConversionUtils::getQualifiedElementNameForClass).collect(Collectors.toSet()),
+ .map(rdfConversionUtils::getQualifiedElementNameForClass).collect(Collectors.toSet()),
metadataTier);
}
diff --git a/metis-indexing/src/test/java/eu/europeana/indexing/tiers/media/AbstractMediaClassifierTest.java b/metis-indexing/src/test/java/eu/europeana/indexing/tiers/media/AbstractMediaClassifierTest.java
index 49d2e3953..6e9fdfbd4 100644
--- a/metis-indexing/src/test/java/eu/europeana/indexing/tiers/media/AbstractMediaClassifierTest.java
+++ b/metis-indexing/src/test/java/eu/europeana/indexing/tiers/media/AbstractMediaClassifierTest.java
@@ -89,7 +89,7 @@ void testClassify_WithoutWebResources() {
doReturn(Collections.emptyList()).when(entity).getWebResourceWrappers(
EnumSet.of(WebResourceLinkType.HAS_VIEW, WebResourceLinkType.IS_SHOWN_BY));
//Has embeddable media will be true
- doReturn(Set.of("http://soundcloud.com/")).when(entity).getUrlsOfTypes(Set.of(WebResourceLinkType.IS_SHOWN_BY));
+ doReturn(Set.of("https://soundcloud.com/")).when(entity).getUrlsOfTypes(Set.of(WebResourceLinkType.IS_SHOWN_BY));
doReturn(MediaTier.T0).when(classifier).classifyEntityWithoutWebResources(entity, hasLandingPage);
assertEquals(MediaTier.T0, classifier.classify(entity).getTier());
}
@@ -126,7 +126,7 @@ void testClassify_WithWebResources() {
doReturn(entityLicense).when(entity).getLicenseType();
doReturn(null).when(classifier).preClassifyEntity(entity);
//Has embeddable media will be true
- doReturn(Set.of("http://soundcloud.com/")).when(entity).getUrlsOfTypes(Set.of(WebResourceLinkType.IS_SHOWN_BY));
+ doReturn(Set.of("https://soundcloud.com/")).when(entity).getUrlsOfTypes(Set.of(WebResourceLinkType.IS_SHOWN_BY));
// Create web resources.
doReturn(mediaResourceTechnicalMetadata1).when(classifier)
diff --git a/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/extraction/AudioVideoProcessorTest.java b/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/extraction/AudioVideoProcessorTest.java
index 0c79c7e86..567daee30 100644
--- a/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/extraction/AudioVideoProcessorTest.java
+++ b/metis-media-service/src/test/java/eu/europeana/metis/mediaprocessing/extraction/AudioVideoProcessorTest.java
@@ -33,8 +33,8 @@
import eu.europeana.metis.mediaprocessing.model.Resource;
import eu.europeana.metis.mediaprocessing.model.ResourceExtractionResultImpl;
import eu.europeana.metis.mediaprocessing.model.VideoResourceMetadata;
-import eu.europeana.metis.schema.model.MediaType;
import eu.europeana.metis.network.NetworkUtil;
+import eu.europeana.metis.schema.model.MediaType;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
@@ -62,7 +62,7 @@ class AudioVideoProcessorTest {
static {
try {
- portForWireMock = NetworkUtil.getAvailableLocalPort();
+ portForWireMock = new NetworkUtil().getAvailableLocalPort();
} catch (IOException e) {
e.printStackTrace();
}
@@ -130,7 +130,7 @@ void testCreateAudioVideoAnalysisCommand() throws IOException, MediaExtractionEx
// Create resource
final Resource resource = mock(Resource.class);
- doReturn("http://valid.url.nl/test").when(resource).getResourceUrl();
+ doReturn("https://valid.url.nl/test").when(resource).getResourceUrl();
doReturn(Paths.get("content path")).when(resource).getContentPath();
// test resource with content
@@ -158,10 +158,10 @@ void testCreateAudioVideoAnalysisCommand() throws IOException, MediaExtractionEx
doReturn("valid.without.prefix.nl/").when(resource).getResourceUrl();
assertThrows(MediaExtractionException.class,
() -> audioVideoProcessor.createAudioVideoAnalysisCommand(resource));
- doReturn("http://invalid.characters.nl/!@#$%^&*()_").when(resource).getResourceUrl();
+ doReturn("https://invalid.characters.nl/!@#$%^&*()_").when(resource).getResourceUrl();
assertThrows(MediaExtractionException.class,
() -> audioVideoProcessor.createAudioVideoAnalysisCommand(resource));
- doReturn("http://valid.url.nl/test").when(resource).getResourceUrl();
+ doReturn("https://valid.url.nl/test").when(resource).getResourceUrl();
// test if hasContent fails.
doThrow(new IOException()).when(resource).hasContent();
diff --git a/metis-schema/pom.xml b/metis-schema/pom.xml
index f3620553b..66d77bdb4 100644
--- a/metis-schema/pom.xml
+++ b/metis-schema/pom.xml
@@ -86,10 +86,6 @@
org.junit.jupiter
junit-jupiter-engine
-
- mockito-inline
- org.mockito
-
@@ -169,12 +165,10 @@
src/main/java
-
true
true
false
-
diff --git a/metis-schema/src/main/java/eu/europeana/metis/schema/convert/RdfConversionUtils.java b/metis-schema/src/main/java/eu/europeana/metis/schema/convert/RdfConversionUtils.java
index 78826b40c..e15e3788d 100644
--- a/metis-schema/src/main/java/eu/europeana/metis/schema/convert/RdfConversionUtils.java
+++ b/metis-schema/src/main/java/eu/europeana/metis/schema/convert/RdfConversionUtils.java
@@ -23,20 +23,36 @@
/**
* Utility class for converting {@link RDF} to String and vice versa.
*/
-public final class RdfConversionUtils {
+public class RdfConversionUtils {
private static final int INDENTATION_SPACE = 2;
private static final String UTF8 = StandardCharsets.UTF_8.name();
- private static IBindingFactory rdfBindingFactory;
- private static Map rdfXmlElementMetadataMap;
@SuppressWarnings("java:S5852") //This regex is safe, and it's only meant for internal use without use input
private static final Pattern complexTypePattern = Pattern.compile("^\\{(.*)}:(.*)$");
+ private final IBindingFactory rdfBindingFactory;
+ private final Map rdfXmlElementMetadataMap;
- static {
- initializeStaticComponents();
+ /**
+ * Default constructor
+ */
+ public RdfConversionUtils() {
+ this(RDF.class);
}
- private RdfConversionUtils() {
+ /**
+ * Constructor supplying class type for the binding factory.
+ * At the current state this is used for assisting testing
+ *
+ * @param classType the class object type
+ * @param the class type
+ */
+ RdfConversionUtils(Class classType) {
+ try {
+ rdfBindingFactory = BindingDirectory.getFactory(classType);
+ rdfXmlElementMetadataMap = initializeRdfXmlElementMetadataMap();
+ } catch (JiBXException e) {
+ throw new IllegalStateException("No binding factory available.", e);
+ }
}
/**
@@ -46,7 +62,7 @@ private RdfConversionUtils() {
* @return An XML string representation of the RDF object
* @throws SerializationException if during marshalling there is a failure
*/
- public static byte[] convertRdfToBytes(RDF rdf) throws SerializationException {
+ public byte[] convertRdfToBytes(RDF rdf) throws SerializationException {
try {
IMarshallingContext context = rdfBindingFactory.createMarshallingContext();
context.setIndent(INDENTATION_SPACE);
@@ -66,7 +82,7 @@ public static byte[] convertRdfToBytes(RDF rdf) throws SerializationException {
* @param objectClass the jibx object class to search for
* @return the xml representation
*/
- public static String getQualifiedElementNameForClass(Class> objectClass) {
+ public String getQualifiedElementNameForClass(Class> objectClass) {
final RdfXmlElementMetadata rdfXmlElementMetadata = rdfXmlElementMetadataMap.get(objectClass.getCanonicalName());
Objects.requireNonNull(rdfXmlElementMetadata,
String.format("Element metadata not found for class: %s", objectClass.getCanonicalName()));
@@ -80,7 +96,7 @@ public static String getQualifiedElementNameForClass(Class> objectClass) {
* @return the RDF object
* @throws SerializationException if during unmarshalling there is a failure
*/
- public static RDF convertInputStreamToRdf(InputStream inputStream) throws SerializationException {
+ public RDF convertInputStreamToRdf(InputStream inputStream) throws SerializationException {
try {
final IUnmarshallingContext context = rdfBindingFactory.createUnmarshallingContext();
return (RDF) context.unmarshalDocument(inputStream, UTF8);
@@ -91,33 +107,18 @@ public static RDF convertInputStreamToRdf(InputStream inputStream) throws Serial
}
/**
- * Collect all information that we can get for jibx classes from the {@link IBindingFactory}.
+ * Convert an {@link RDF} to a UTF-8 encoded XML
+ *
+ * @param rdf The RDF object to convert
+ * @return An XML string representation of the RDF object
+ * @throws SerializationException if during marshalling there is a failure
*/
- private static Map initializeRdfXmlElementMetadataMap() {
- Map rdfXmlElementMetadataMap = new HashMap<>();
- for (int i = 0; i < rdfBindingFactory.getMappedClasses().length; i++) {
- final String canonicalName;
- final String elementNamespace;
- final String elementName;
- final Matcher matcher = complexTypePattern.matcher(rdfBindingFactory.getMappedClasses()[i]);
- if (matcher.matches()) {
- //Complex type search
- elementNamespace = matcher.group(1);
- elementName = matcher.group(2);
- final Pattern canonicalClassNamePattern = Pattern.compile(String.format("^(.*)\\.(%s)$", elementName));
- canonicalName = Arrays.stream(rdfBindingFactory.getAbstractMappings()).flatMap(Arrays::stream)
- .filter(Objects::nonNull)
- .filter(input -> canonicalClassNamePattern.matcher(input).matches())
- .findFirst().orElse(null);
- } else {
- //Simple type search
- elementNamespace = rdfBindingFactory.getElementNamespaces()[i];
- elementName = rdfBindingFactory.getElementNames()[i];
- canonicalName = rdfBindingFactory.getMappedClasses()[i];
- }
- checkAndStoreMetadataInMap(rdfXmlElementMetadataMap, canonicalName, elementNamespace, elementName);
+ public String convertRdfToString(RDF rdf) throws SerializationException {
+ try {
+ return new String(convertRdfToBytes(rdf), UTF8);
+ } catch (UnsupportedEncodingException e) {
+ throw new IllegalStateException("Unexpected exception - should not occur.", e);
}
- return rdfXmlElementMetadataMap;
}
static class RdfXmlElementMetadata {
@@ -152,21 +153,52 @@ public String getName() {
}
/**
- * Convert an {@link RDF} to a UTF-8 encoded XML
+ * Convert a UTF-8 encoded XML to {@link RDF}
*
- * @param rdf The RDF object to convert
- * @return An XML string representation of the RDF object
- * @throws SerializationException if during marshalling there is a failure
+ * @param xml the xml string
+ * @return the RDF object
+ * @throws SerializationException if during unmarshalling there is a failure
*/
- public static String convertRdfToString(RDF rdf) throws SerializationException {
- try {
- return new String(convertRdfToBytes(rdf), UTF8);
- } catch (UnsupportedEncodingException e) {
- throw new IllegalStateException("Unexpected exception - should not occur.", e);
+ public RDF convertStringToRdf(String xml) throws SerializationException {
+ try (final InputStream inputStream = new ByteArrayInputStream(
+ xml.getBytes(StandardCharsets.UTF_8))) {
+ return convertInputStreamToRdf(inputStream);
+ } catch (IOException e) {
+ throw new SerializationException("Unexpected issue with byte stream.", e);
+ }
+ }
+
+ /**
+ * Collect all information that we can get for jibx classes from the {@link IBindingFactory}.
+ */
+ private Map initializeRdfXmlElementMetadataMap() {
+ Map elementMetadataMap = new HashMap<>();
+ for (int i = 0; i < rdfBindingFactory.getMappedClasses().length; i++) {
+ final String canonicalName;
+ final String elementNamespace;
+ final String elementName;
+ final Matcher matcher = complexTypePattern.matcher(rdfBindingFactory.getMappedClasses()[i]);
+ if (matcher.matches()) {
+ //Complex type search
+ elementNamespace = matcher.group(1);
+ elementName = matcher.group(2);
+ final Pattern canonicalClassNamePattern = Pattern.compile(String.format("^(.*)\\.(%s)$", elementName));
+ canonicalName = Arrays.stream(rdfBindingFactory.getAbstractMappings()).flatMap(Arrays::stream)
+ .filter(Objects::nonNull)
+ .filter(input -> canonicalClassNamePattern.matcher(input).matches())
+ .findFirst().orElse(null);
+ } else {
+ //Simple type search
+ elementNamespace = rdfBindingFactory.getElementNamespaces()[i];
+ elementName = rdfBindingFactory.getElementNames()[i];
+ canonicalName = rdfBindingFactory.getMappedClasses()[i];
+ }
+ checkAndStoreMetadataInMap(elementMetadataMap, canonicalName, elementNamespace, elementName);
}
+ return elementMetadataMap;
}
- private static void checkAndStoreMetadataInMap(final Map rdfXmlElementMetadataMap,
+ private void checkAndStoreMetadataInMap(final Map rdfXmlElementMetadataMap,
String canonicalName, String elementNamespace, String elementName) {
//Store only if we could find the canonical name properly
if (canonicalName != null) {
@@ -179,29 +211,4 @@ private static void checkAndStoreMetadataInMap(final Map bindingDirectoryMockedStatic = Mockito.mockStatic(BindingDirectory.class)) {
- bindingDirectoryMockedStatic.when(() -> BindingDirectory.getFactory(RDF.class)).thenThrow(JiBXException.class);
- try {
- RdfConversionUtils.initializeStaticComponents();
- } catch (Throwable throwable) {
- //Check for two possibilities because based on the execution order of the tests this can throw a different exception
- assertTrue(throwable instanceof ExceptionInInitializerError || throwable instanceof IllegalStateException);
- assertTrue(throwable.getCause() instanceof IllegalStateException || throwable.getCause() instanceof JiBXException);
- }
- }
+ void failRdfConversionUtilsInitialization() {
+ //Force failure
+ assertThrows(IllegalStateException.class, () -> new RdfConversionUtils(RdfConversionUtils.class));
}
@Test
void getQualifiedElementNameForClass_ContextualClasses() {
//Check contextual classes
- assertEquals("edm:AgentType", RdfConversionUtils.getQualifiedElementNameForClass(AgentType.class));
- assertEquals("edm:TimeSpanType", RdfConversionUtils.getQualifiedElementNameForClass(TimeSpanType.class));
- assertEquals("edm:PlaceType", RdfConversionUtils.getQualifiedElementNameForClass(PlaceType.class));
- assertEquals("skos:Concept", RdfConversionUtils.getQualifiedElementNameForClass(Concept.class));
+ final RdfConversionUtils rdfConversionUtils = new RdfConversionUtils();
+ assertEquals("edm:AgentType", rdfConversionUtils.getQualifiedElementNameForClass(AgentType.class));
+ assertEquals("edm:TimeSpanType", rdfConversionUtils.getQualifiedElementNameForClass(TimeSpanType.class));
+ assertEquals("edm:PlaceType", rdfConversionUtils.getQualifiedElementNameForClass(PlaceType.class));
+ assertEquals("skos:Concept", rdfConversionUtils.getQualifiedElementNameForClass(Concept.class));
}
@Test
void getQualifiedElementNameForClass_Dc() {
//Check dc elements
- assertEquals("dc:coverage", RdfConversionUtils.getQualifiedElementNameForClass(Coverage.class));
- assertEquals("dc:description", RdfConversionUtils.getQualifiedElementNameForClass(Description.class));
- assertEquals("dc:format", RdfConversionUtils.getQualifiedElementNameForClass(Format.class));
- assertEquals("dc:relation", RdfConversionUtils.getQualifiedElementNameForClass(Relation.class));
- assertEquals("dc:rights", RdfConversionUtils.getQualifiedElementNameForClass(Rights.class));
- assertEquals("dc:source", RdfConversionUtils.getQualifiedElementNameForClass(Source.class));
- assertEquals("dc:subject", RdfConversionUtils.getQualifiedElementNameForClass(Subject.class));
- assertEquals("dc:title", RdfConversionUtils.getQualifiedElementNameForClass(Title.class));
- assertEquals("dc:type", RdfConversionUtils.getQualifiedElementNameForClass(Type.class));
+ final RdfConversionUtils rdfConversionUtils = new RdfConversionUtils();
+ assertEquals("dc:coverage", rdfConversionUtils.getQualifiedElementNameForClass(Coverage.class));
+ assertEquals("dc:description", rdfConversionUtils.getQualifiedElementNameForClass(Description.class));
+ assertEquals("dc:format", rdfConversionUtils.getQualifiedElementNameForClass(Format.class));
+ assertEquals("dc:relation", rdfConversionUtils.getQualifiedElementNameForClass(Relation.class));
+ assertEquals("dc:rights", rdfConversionUtils.getQualifiedElementNameForClass(Rights.class));
+ assertEquals("dc:source", rdfConversionUtils.getQualifiedElementNameForClass(Source.class));
+ assertEquals("dc:subject", rdfConversionUtils.getQualifiedElementNameForClass(Subject.class));
+ assertEquals("dc:title", rdfConversionUtils.getQualifiedElementNameForClass(Title.class));
+ assertEquals("dc:type", rdfConversionUtils.getQualifiedElementNameForClass(Type.class));
}
@Test
void getQualifiedElementNameForClass_Dcterms() {
//Check dcterms elements
- assertEquals("dcterms:alternative", RdfConversionUtils.getQualifiedElementNameForClass(Alternative.class));
- assertEquals("dcterms:hasPart", RdfConversionUtils.getQualifiedElementNameForClass(HasPart.class));
- assertEquals("dcterms:isPartOf", RdfConversionUtils.getQualifiedElementNameForClass(IsPartOf.class));
- assertEquals("dcterms:isReferencedBy", RdfConversionUtils.getQualifiedElementNameForClass(IsReferencedBy.class));
- assertEquals("dcterms:medium", RdfConversionUtils.getQualifiedElementNameForClass(Medium.class));
- assertEquals("dcterms:provenance", RdfConversionUtils.getQualifiedElementNameForClass(Provenance.class));
- assertEquals("dcterms:references", RdfConversionUtils.getQualifiedElementNameForClass(References.class));
- assertEquals("dcterms:spatial", RdfConversionUtils.getQualifiedElementNameForClass(Spatial.class));
- assertEquals("dcterms:tableOfContents", RdfConversionUtils.getQualifiedElementNameForClass(TableOfContents.class));
- assertEquals("dcterms:temporal", RdfConversionUtils.getQualifiedElementNameForClass(Temporal.class));
+ final RdfConversionUtils rdfConversionUtils = new RdfConversionUtils();
+ assertEquals("dcterms:alternative", rdfConversionUtils.getQualifiedElementNameForClass(Alternative.class));
+ assertEquals("dcterms:hasPart", rdfConversionUtils.getQualifiedElementNameForClass(HasPart.class));
+ assertEquals("dcterms:isPartOf", rdfConversionUtils.getQualifiedElementNameForClass(IsPartOf.class));
+ assertEquals("dcterms:isReferencedBy", rdfConversionUtils.getQualifiedElementNameForClass(IsReferencedBy.class));
+ assertEquals("dcterms:medium", rdfConversionUtils.getQualifiedElementNameForClass(Medium.class));
+ assertEquals("dcterms:provenance", rdfConversionUtils.getQualifiedElementNameForClass(Provenance.class));
+ assertEquals("dcterms:references", rdfConversionUtils.getQualifiedElementNameForClass(References.class));
+ assertEquals("dcterms:spatial", rdfConversionUtils.getQualifiedElementNameForClass(Spatial.class));
+ assertEquals("dcterms:tableOfContents", rdfConversionUtils.getQualifiedElementNameForClass(TableOfContents.class));
+ assertEquals("dcterms:temporal", rdfConversionUtils.getQualifiedElementNameForClass(Temporal.class));
}
@Test
void getQualifiedElementNameForClass_Edm() {
//Check edm elements
- assertEquals("edm:currentLocation", RdfConversionUtils.getQualifiedElementNameForClass(CurrentLocation.class));
- assertEquals("edm:hasType", RdfConversionUtils.getQualifiedElementNameForClass(HasType.class));
- assertEquals("edm:isRelatedTo", RdfConversionUtils.getQualifiedElementNameForClass(IsRelatedTo.class));
+ final RdfConversionUtils rdfConversionUtils = new RdfConversionUtils();
+ assertEquals("edm:currentLocation", rdfConversionUtils.getQualifiedElementNameForClass(CurrentLocation.class));
+ assertEquals("edm:hasType", rdfConversionUtils.getQualifiedElementNameForClass(HasType.class));
+ assertEquals("edm:isRelatedTo", rdfConversionUtils.getQualifiedElementNameForClass(IsRelatedTo.class));
}
}
\ No newline at end of file
diff --git a/metis-validation/metis-validation-client/src/test/java/TestValidationClient.java b/metis-validation/metis-validation-client/src/test/java/TestValidationClient.java
index 90574be1c..0a9ceb84b 100644
--- a/metis-validation/metis-validation-client/src/test/java/TestValidationClient.java
+++ b/metis-validation/metis-validation-client/src/test/java/TestValidationClient.java
@@ -28,7 +28,7 @@ class TestValidationClient {
static {
try {
- portForWireMock = NetworkUtil.getAvailableLocalPort();
+ portForWireMock = new NetworkUtil().getAvailableLocalPort();
} catch (IOException e) {
e.printStackTrace();
}
diff --git a/metis-validation/metis-validation-rest/src/test/java/TestApplication.java b/metis-validation/metis-validation-rest/src/test/java/TestApplication.java
index ccb2dabe0..eaaf89029 100644
--- a/metis-validation/metis-validation-rest/src/test/java/TestApplication.java
+++ b/metis-validation/metis-validation-rest/src/test/java/TestApplication.java
@@ -19,7 +19,7 @@ public class TestApplication {
static {
try {
- portForWireMock = NetworkUtil.getAvailableLocalPort();
+ portForWireMock = new NetworkUtil().getAvailableLocalPort();
} catch (IOException e) {
e.printStackTrace();
}
diff --git a/metis-validation/metis-validation-service/src/test/java/TestApplication.java b/metis-validation/metis-validation-service/src/test/java/TestApplication.java
index 51888196d..52db600d1 100644
--- a/metis-validation/metis-validation-service/src/test/java/TestApplication.java
+++ b/metis-validation/metis-validation-service/src/test/java/TestApplication.java
@@ -19,7 +19,7 @@ public class TestApplication {
static {
try {
- portForWireMock = NetworkUtil.getAvailableLocalPort();
+ portForWireMock = new NetworkUtil().getAvailableLocalPort();
} catch (IOException e) {
e.printStackTrace();
}
diff --git a/metis-validation/metis-validation-service/src/test/java/TestSchemaProvider.java b/metis-validation/metis-validation-service/src/test/java/TestSchemaProvider.java
index 6e0ec4020..2ffa8c3ad 100644
--- a/metis-validation/metis-validation-service/src/test/java/TestSchemaProvider.java
+++ b/metis-validation/metis-validation-service/src/test/java/TestSchemaProvider.java
@@ -30,7 +30,7 @@ class TestSchemaProvider {
static {
try {
- portForWireMock = NetworkUtil.getAvailableLocalPort();
+ portForWireMock = new NetworkUtil().getAvailableLocalPort();
} catch (IOException e) {
e.printStackTrace();
}
diff --git a/metis-validation/metis-validation-service/src/test/java/TestValidationExecution.java b/metis-validation/metis-validation-service/src/test/java/TestValidationExecution.java
index d0f19151e..85f045c1a 100644
--- a/metis-validation/metis-validation-service/src/test/java/TestValidationExecution.java
+++ b/metis-validation/metis-validation-service/src/test/java/TestValidationExecution.java
@@ -26,7 +26,6 @@
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import net.lingala.zip4j.ZipFile;
-import net.lingala.zip4j.exception.ZipException;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.AfterAll;
diff --git a/pom.xml b/pom.xml
index 15f414c74..57995f0eb 100644
--- a/pom.xml
+++ b/pom.xml
@@ -483,12 +483,6 @@
${version.mockito.core}
test
-
- mockito-inline
- org.mockito
- test
- ${version.mockito.core}
-
org.springframework
From 3401cd8022a45449ebc6c780b25b505f6f00d7a8 Mon Sep 17 00:00:00 2001
From: JoanaCMS <70145179+JoanaCMS@users.noreply.github.com>
Date: Tue, 15 Mar 2022 14:32:35 +0100
Subject: [PATCH 14/73] MET-4233 Secure Dereference service from url request
attacks (#507)
* MET-4233 Created DereferenceValidationUtils class
* MET-4233 Improvement in exception being thrown
Refactpring in DereferenceValidationUtils
* MET-4233 Refactoring code
* MET-4233 Refactoring of code
* MET-4233 Created new unit tests
* MET-4233 Added new unit tests
Code refactoring
* MET-4233 Removing code smells
* MET-4233 Removing code smell
* MET-4233 Code review changes
* MET-4233 Changed application configuration and updated the code accordingly
* MET-4233 Removed code smells
* MET-4233 Code cleanup
* MET-4233 Removed DereferenceValidationUtils class
* MET-4233 Code review changes
* MET-4233 Created new unit tests
* MET-4233 Removed code smells
---
.../VocabularyCollectionImporterFactory.java | 33 +++++++++++++-
.../VocabularyCollectionMavenRule.java | 21 ++++++++-
...cabularyCollectionImporterFactoryTest.java | 45 +++++++++++++++++++
.../dereference/rest/config/Application.java | 12 +++++
.../dereferencing.properties.example | 4 +-
.../MongoDereferencingManagementService.java | 11 ++++-
...ngoDereferencingManagementServiceTest.java | 4 +-
7 files changed, 123 insertions(+), 7 deletions(-)
create mode 100644 metis-dereference/metis-dereference-import/src/test/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactoryTest.java
diff --git a/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactory.java b/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactory.java
index 2f491bb68..c7db33197 100644
--- a/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactory.java
+++ b/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactory.java
@@ -1,26 +1,46 @@
package eu.europeana.metis.dereference.vocimport;
+import eu.europeana.metis.dereference.vocimport.exception.VocabularyImportException;
import eu.europeana.metis.dereference.vocimport.model.Location;
+import org.apache.commons.collections.CollectionUtils;
+
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
/**
* This class is the factory for instances of {@link VocabularyCollectionImporter}.
*/
public class VocabularyCollectionImporterFactory {
+ private final List validUrlPrefixes;
+
+ /**
+ * Constructor for the factory
+ *
+ * @param validUrlPrefixes The utils class used to verify input values
+ */
+ public VocabularyCollectionImporterFactory(List validUrlPrefixes) {
+ this.validUrlPrefixes = new ArrayList<>(validUrlPrefixes);
+ }
+
/**
* Create a vocabulary importer for remote web addresses, indicated by instances of {@link URI}.
* Note that this method can only be used for locations that are also a valid {@link
* java.net.URL}.
*
* @param directoryLocation The location of the directory to import.
+ * @throws VocabularyImportException if a problem occurs when verifying directory
* @return A vocabulary importer.
*/
- public VocabularyCollectionImporter createImporter(URI directoryLocation) {
+ public VocabularyCollectionImporter createImporter(URI directoryLocation) throws VocabularyImportException {
+ if(isUrlPrefixNotValid(directoryLocation.toString())){
+ throw new VocabularyImportException("The location of the directory to import is not valid.");
+ }
return new VocabularyCollectionImporterImpl(new UriLocation(directoryLocation));
}
@@ -47,6 +67,17 @@ public VocabularyCollectionImporter createImporter(Path baseDirectory, Path dire
return new VocabularyCollectionImporterImpl(new PathLocation(baseDirectory, directoryLocation));
}
+ private boolean isUrlPrefixNotValid(String directoryToEvaluate) {
+ boolean result;
+
+ if (CollectionUtils.isEmpty(validUrlPrefixes)) {
+ result = true;
+ } else {
+ result = validUrlPrefixes.stream().noneMatch(directoryToEvaluate::startsWith);
+ }
+ return result;
+ }
+
private static final class UriLocation implements Location {
private final URI uri;
diff --git a/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionMavenRule.java b/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionMavenRule.java
index de4bbd747..f69238b83 100644
--- a/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionMavenRule.java
+++ b/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionMavenRule.java
@@ -1,6 +1,7 @@
package eu.europeana.metis.dereference.vocimport;
import eu.europeana.metis.dereference.vocimport.exception.VocabularyImportException;
+
import java.nio.file.Path;
import org.apache.maven.enforcer.rule.api.EnforcerRule;
import org.apache.maven.enforcer.rule.api.EnforcerRuleException;
@@ -8,6 +9,8 @@
import org.apache.maven.plugin.logging.Log;
import org.apache.maven.project.MavenProject;
import org.codehaus.plexus.component.repository.exception.ComponentLookupException;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Component;
/**
* This is a Maven-enabled enforcer rule that can be used in a maven project. For an example of how
@@ -44,6 +47,7 @@
*
* }
*/
+@Component
public class VocabularyCollectionMavenRule implements EnforcerRule {
/**
@@ -69,12 +73,24 @@ public class VocabularyCollectionMavenRule implements EnforcerRule {
*/
private String vocabularyDirectoryFile = null;
+ private VocabularyCollectionImporterFactory vocabularyCollectionImporterFactory;
+
/**
* No-arguments constructor, required for maven instantiation.
*/
public VocabularyCollectionMavenRule() {
}
+ /**
+ * Constructor. Used to inject the factory
+ *
+ * @param vocabularyCollectionImporterFactory The vocabulary collection importer factory
+ */
+ @Autowired
+ public VocabularyCollectionMavenRule(VocabularyCollectionImporterFactory vocabularyCollectionImporterFactory){
+ this.vocabularyCollectionImporterFactory = vocabularyCollectionImporterFactory;
+ }
+
/**
* Constructor.
*
@@ -113,8 +129,9 @@ public void execute(EnforcerRuleHelper enforcerRuleHelper) throws EnforcerRuleEx
final Path baseDirectory = project.getBasedir().toPath();
final Path vocabularyDirectory = baseDirectory.resolve(vocabularyDirectoryFile);
+ try {
// Prepare validation
- final VocabularyCollectionImporter importer = new VocabularyCollectionImporterFactory()
+ final VocabularyCollectionImporter importer = vocabularyCollectionImporterFactory
.createImporter(baseDirectory, vocabularyDirectory);
final VocabularyCollectionValidatorImpl validator = new VocabularyCollectionValidatorImpl(
importer, lenientOnLackOfExamples, lenientOnMappingTestFailures,
@@ -123,7 +140,7 @@ public void execute(EnforcerRuleHelper enforcerRuleHelper) throws EnforcerRuleEx
log.info("Validating vocabulary collection: " + importer.getDirectoryLocation().toString());
// Perform validation
- try {
+
validator.validate(vocabulary -> log.info(" Vocabulary found: " + vocabulary.getName()),
log::warn);
} catch (VocabularyImportException e) {
diff --git a/metis-dereference/metis-dereference-import/src/test/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactoryTest.java b/metis-dereference/metis-dereference-import/src/test/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactoryTest.java
new file mode 100644
index 000000000..a3b76174c
--- /dev/null
+++ b/metis-dereference/metis-dereference-import/src/test/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactoryTest.java
@@ -0,0 +1,45 @@
+package eu.europeana.metis.dereference.vocimport;
+
+import eu.europeana.metis.dereference.vocimport.exception.VocabularyImportException;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+class VocabularyCollectionImporterFactoryTest {
+
+ private VocabularyCollectionImporterFactory factory;
+
+ @BeforeEach
+ void setUp() {
+ List validUrlsPrefixes = new ArrayList<>();
+ validUrlsPrefixes.add("https://validprefix");
+ factory = new VocabularyCollectionImporterFactory(validUrlsPrefixes);
+ }
+
+ @Test
+ void createImporterWithUri_expectSuccess() throws URISyntaxException, VocabularyImportException {
+ VocabularyCollectionImporter result = factory.createImporter(new URI("https://validprefix/test/call"));
+ assertEquals("https://validprefix/test/call", result.getDirectoryLocation().toString());
+ }
+
+ @Test
+ void createImporterWithUri_expectFail() {
+ assertThrows(VocabularyImportException.class,
+ () -> factory.createImporter(new URI("https://anotherprefix/test/call")));
+ }
+
+ @Test
+ void createImporterWithPath_expectSuccess() {
+ VocabularyCollectionImporter result = factory.createImporter(Paths.get("/path/test/random"));
+ assertEquals("/path/test/random", result.getDirectoryLocation().toString());
+ }
+
+}
diff --git a/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/config/Application.java b/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/config/Application.java
index 819a647bd..922cca604 100644
--- a/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/config/Application.java
+++ b/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/config/Application.java
@@ -4,8 +4,11 @@
import eu.europeana.corelib.web.socks.SocksProxy;
import eu.europeana.metis.dereference.service.dao.ProcessedEntityDao;
import eu.europeana.metis.dereference.service.dao.VocabularyDao;
+import eu.europeana.metis.dereference.vocimport.VocabularyCollectionImporterFactory;
import eu.europeana.metis.mongo.connection.MongoClientProvider;
import eu.europeana.metis.mongo.connection.MongoProperties;
+
+import java.util.Arrays;
import java.util.Collections;
import javax.annotation.PreDestroy;
import org.springframework.beans.factory.InitializingBean;
@@ -66,6 +69,10 @@ public class Application implements WebMvcConfigurer, InitializingBean {
@Value("${vocabulary.db}")
private String vocabularyDb;
+ //Valid directories list
+ @Value("${valid.url.prefixes}")
+ private String[] validUrlPrefixes;
+
private MongoClient mongoClientEntity;
private MongoClient mongoClientVocabulary;
@@ -121,6 +128,11 @@ public static PropertySourcesPlaceholderConfigurer propertySourcesPlaceholderCon
return new PropertySourcesPlaceholderConfigurer();
}
+ @Bean
+ public VocabularyCollectionImporterFactory getVocabularyCollectionImporterFactory(){
+ return new VocabularyCollectionImporterFactory(Arrays.asList(validUrlPrefixes));
+ }
+
/**
* Closes any connections previous acquired.
*/
diff --git a/metis-dereference/metis-dereference-rest/src/main/resources/dereferencing.properties.example b/metis-dereference/metis-dereference-rest/src/main/resources/dereferencing.properties.example
index 3ee045635..93a843124 100644
--- a/metis-dereference/metis-dereference-rest/src/main/resources/dereferencing.properties.example
+++ b/metis-dereference/metis-dereference-rest/src/main/resources/dereferencing.properties.example
@@ -12,4 +12,6 @@ mongo.username=
mongo.password=
mongo.application.name=
entity.db=
-vocabulary.db=
\ No newline at end of file
+vocabulary.db=
+
+valid.url.prefixes=
\ No newline at end of file
diff --git a/metis-dereference/metis-dereference-service/src/main/java/eu/europeana/metis/dereference/service/MongoDereferencingManagementService.java b/metis-dereference/metis-dereference-service/src/main/java/eu/europeana/metis/dereference/service/MongoDereferencingManagementService.java
index 19fd99634..a0d608d3b 100644
--- a/metis-dereference/metis-dereference-service/src/main/java/eu/europeana/metis/dereference/service/MongoDereferencingManagementService.java
+++ b/metis-dereference/metis-dereference-service/src/main/java/eu/europeana/metis/dereference/service/MongoDereferencingManagementService.java
@@ -8,6 +8,7 @@
import eu.europeana.metis.dereference.vocimport.VocabularyCollectionValidator;
import eu.europeana.metis.dereference.vocimport.VocabularyCollectionValidatorImpl;
import eu.europeana.metis.dereference.vocimport.exception.VocabularyImportException;
+
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
@@ -22,6 +23,7 @@ public class MongoDereferencingManagementService implements DereferencingManagem
private final VocabularyDao vocabularyDao;
private final ProcessedEntityDao processedEntityDao;
+ private final VocabularyCollectionImporterFactory vocabularyCollectionImporterFactory;
/**
* Constructor.
@@ -30,9 +32,10 @@ public class MongoDereferencingManagementService implements DereferencingManagem
*/
@Autowired
public MongoDereferencingManagementService(VocabularyDao vocabularyDao,
- ProcessedEntityDao processedEntityDao) {
+ ProcessedEntityDao processedEntityDao, VocabularyCollectionImporterFactory vocabularyCollectionImporterFactory) {
this.vocabularyDao = vocabularyDao;
this.processedEntityDao = processedEntityDao;
+ this.vocabularyCollectionImporterFactory = vocabularyCollectionImporterFactory;
}
@Override
@@ -48,9 +51,10 @@ public void emptyCache() {
@Override
public void loadVocabularies(URI directoryUrl) throws VocabularyImportException {
+ try {
// Import and validate the vocabularies
final List vocabularies = new ArrayList<>();
- final VocabularyCollectionImporter importer = new VocabularyCollectionImporterFactory()
+ final VocabularyCollectionImporter importer = vocabularyCollectionImporterFactory
.createImporter(directoryUrl);
final VocabularyCollectionValidator validator = new VocabularyCollectionValidatorImpl(importer,
true, true, true);
@@ -58,6 +62,9 @@ public void loadVocabularies(URI directoryUrl) throws VocabularyImportException
// All vocabularies are loaded well. Now we replace the vocabularies.
vocabularyDao.replaceAll(vocabularies);
+ } catch (VocabularyImportException e) {
+ throw new VocabularyImportException("An error as occurred while loading the vocabularies", e);
+ }
}
private static Vocabulary convertVocabulary(
diff --git a/metis-dereference/metis-dereference-service/src/test/java/eu/europeana/metis/dereference/service/MongoDereferencingManagementServiceTest.java b/metis-dereference/metis-dereference-service/src/test/java/eu/europeana/metis/dereference/service/MongoDereferencingManagementServiceTest.java
index f77c0a64a..b6439ee87 100644
--- a/metis-dereference/metis-dereference-service/src/test/java/eu/europeana/metis/dereference/service/MongoDereferencingManagementServiceTest.java
+++ b/metis-dereference/metis-dereference-service/src/test/java/eu/europeana/metis/dereference/service/MongoDereferencingManagementServiceTest.java
@@ -9,6 +9,7 @@
import eu.europeana.metis.dereference.Vocabulary;
import eu.europeana.metis.dereference.service.dao.ProcessedEntityDao;
import eu.europeana.metis.dereference.service.dao.VocabularyDao;
+import eu.europeana.metis.dereference.vocimport.VocabularyCollectionImporterFactory;
import eu.europeana.metis.mongo.embedded.EmbeddedLocalhostMongo;
import java.util.Collections;
import java.util.List;
@@ -40,7 +41,8 @@ void prepare() {
}
};
ProcessedEntityDao processedEntityDao = mock(ProcessedEntityDao.class);
- service = new MongoDereferencingManagementService(vocDao, processedEntityDao);
+ VocabularyCollectionImporterFactory vocabularyCollectionImporterFactory = mock(VocabularyCollectionImporterFactory.class);
+ service = new MongoDereferencingManagementService(vocDao, processedEntityDao, vocabularyCollectionImporterFactory);
}
@Test
From 3a6af4c1942360735a1d0bd4557d5f79b19ccc1f Mon Sep 17 00:00:00 2001
From: Simon Tzanakis
Date: Thu, 17 Mar 2022 09:49:37 +0100
Subject: [PATCH 15/73] MET-4233 Secure Dereference service from url request
attacks (#515)
---
.../VocabularyCollectionImporterFactory.java | 40 ++------------
.../VocabularyCollectionImporterImpl.java | 6 +-
.../VocabularyCollectionMavenRule.java | 12 ----
.../VocabularyCollectionValidatorImpl.java | 8 +--
...cabularyCollectionImporterFactoryTest.java | 55 +++++++------------
.../DereferencingManagementController.java | 22 +++++++-
.../dereference/rest/config/Application.java | 12 ++--
...DereferencingManagementControllerTest.java | 28 ++++++++--
8 files changed, 81 insertions(+), 102 deletions(-)
diff --git a/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactory.java b/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactory.java
index c7db33197..93f2d46d5 100644
--- a/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactory.java
+++ b/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactory.java
@@ -1,46 +1,25 @@
package eu.europeana.metis.dereference.vocimport;
-import eu.europeana.metis.dereference.vocimport.exception.VocabularyImportException;
import eu.europeana.metis.dereference.vocimport.model.Location;
-import org.apache.commons.collections.CollectionUtils;
-
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
-import java.util.ArrayList;
-import java.util.List;
/**
* This class is the factory for instances of {@link VocabularyCollectionImporter}.
*/
public class VocabularyCollectionImporterFactory {
- private final List validUrlPrefixes;
-
- /**
- * Constructor for the factory
- *
- * @param validUrlPrefixes The utils class used to verify input values
- */
- public VocabularyCollectionImporterFactory(List validUrlPrefixes) {
- this.validUrlPrefixes = new ArrayList<>(validUrlPrefixes);
- }
-
/**
- * Create a vocabulary importer for remote web addresses, indicated by instances of {@link URI}.
- * Note that this method can only be used for locations that are also a valid {@link
- * java.net.URL}.
+ * Create a vocabulary importer for remote web addresses, indicated by instances of {@link URI}. Note that this method can only
+ * be used for locations that are also a valid {@link java.net.URL}.
*
* @param directoryLocation The location of the directory to import.
- * @throws VocabularyImportException if a problem occurs when verifying directory
* @return A vocabulary importer.
*/
- public VocabularyCollectionImporter createImporter(URI directoryLocation) throws VocabularyImportException {
- if(isUrlPrefixNotValid(directoryLocation.toString())){
- throw new VocabularyImportException("The location of the directory to import is not valid.");
- }
+ public VocabularyCollectionImporter createImporter(URI directoryLocation) {
return new VocabularyCollectionImporterImpl(new UriLocation(directoryLocation));
}
@@ -67,28 +46,19 @@ public VocabularyCollectionImporter createImporter(Path baseDirectory, Path dire
return new VocabularyCollectionImporterImpl(new PathLocation(baseDirectory, directoryLocation));
}
- private boolean isUrlPrefixNotValid(String directoryToEvaluate) {
- boolean result;
-
- if (CollectionUtils.isEmpty(validUrlPrefixes)) {
- result = true;
- } else {
- result = validUrlPrefixes.stream().noneMatch(directoryToEvaluate::startsWith);
- }
- return result;
- }
-
private static final class UriLocation implements Location {
private final URI uri;
UriLocation(URI uri) {
this.uri = uri;
+
}
@Override
public InputStream read() throws IOException {
return uri.toURL().openStream();
+
}
@Override
diff --git a/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterImpl.java b/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterImpl.java
index 02bbcb5dc..37ce3a47c 100644
--- a/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterImpl.java
+++ b/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterImpl.java
@@ -28,16 +28,18 @@ final class VocabularyCollectionImporterImpl implements VocabularyCollectionImpo
}
@Override
- public Iterable importVocabularies() throws VocabularyImportException {
+ public Iterable importVocabularies()
+ throws VocabularyImportException {
// Obtain the directory entries.
final ObjectMapper mapper = new ObjectMapper(new YAMLFactory());
final VocabularyDirectoryEntry[] directoryEntries;
+
try (final InputStream input = directoryLocation.read()) {
directoryEntries = mapper.readValue(input, VocabularyDirectoryEntry[].class);
} catch (IOException e) {
throw new VocabularyImportException(
- "Could not read vocabulary directory at [" + directoryLocation + "].", e);
+ "Could not read vocabulary directory at [" + directoryLocation + "].", e);
}
// Compile the vocabulary loaders
diff --git a/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionMavenRule.java b/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionMavenRule.java
index f69238b83..9d21ffb86 100644
--- a/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionMavenRule.java
+++ b/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionMavenRule.java
@@ -1,7 +1,6 @@
package eu.europeana.metis.dereference.vocimport;
import eu.europeana.metis.dereference.vocimport.exception.VocabularyImportException;
-
import java.nio.file.Path;
import org.apache.maven.enforcer.rule.api.EnforcerRule;
import org.apache.maven.enforcer.rule.api.EnforcerRuleException;
@@ -9,7 +8,6 @@
import org.apache.maven.plugin.logging.Log;
import org.apache.maven.project.MavenProject;
import org.codehaus.plexus.component.repository.exception.ComponentLookupException;
-import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
/**
@@ -81,16 +79,6 @@ public class VocabularyCollectionMavenRule implements EnforcerRule {
public VocabularyCollectionMavenRule() {
}
- /**
- * Constructor. Used to inject the factory
- *
- * @param vocabularyCollectionImporterFactory The vocabulary collection importer factory
- */
- @Autowired
- public VocabularyCollectionMavenRule(VocabularyCollectionImporterFactory vocabularyCollectionImporterFactory){
- this.vocabularyCollectionImporterFactory = vocabularyCollectionImporterFactory;
- }
-
/**
* Constructor.
*
diff --git a/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionValidatorImpl.java b/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionValidatorImpl.java
index dfa92b86b..62ac84952 100644
--- a/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionValidatorImpl.java
+++ b/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionValidatorImpl.java
@@ -51,20 +51,18 @@ public void validate(Consumer vocabularyReceiver, Consumer w
}
@Override
- public void validateVocabularyOnly(Consumer vocabularyReceiver)
- throws VocabularyImportException {
+ public void validateVocabularyOnly(Consumer vocabularyReceiver) throws VocabularyImportException {
validateInternal(vocabularyReceiver, null, false);
}
private void validateInternal(Consumer vocabularyReceiver,
- Consumer warningReceiver, boolean validateExamples)
- throws VocabularyImportException {
+ Consumer warningReceiver, boolean validateExamples) throws VocabularyImportException {
final DuplicationChecker duplicationChecker = new DuplicationChecker();
final Iterable vocabularyLoaders = importer.importVocabularies();
for (VocabularyLoader loader : vocabularyLoaders) {
final Vocabulary vocabulary = loader.load();
final IncomingRecordToEdmConverter converter = validateVocabulary(vocabulary,
- duplicationChecker);
+ duplicationChecker);
if (validateExamples) {
validateExamples(vocabulary, warningReceiver, converter);
}
diff --git a/metis-dereference/metis-dereference-import/src/test/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactoryTest.java b/metis-dereference/metis-dereference-import/src/test/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactoryTest.java
index a3b76174c..7d725eb46 100644
--- a/metis-dereference/metis-dereference-import/src/test/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactoryTest.java
+++ b/metis-dereference/metis-dereference-import/src/test/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactoryTest.java
@@ -1,45 +1,32 @@
package eu.europeana.metis.dereference.vocimport;
-import eu.europeana.metis.dereference.vocimport.exception.VocabularyImportException;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
+import static org.junit.jupiter.api.Assertions.assertEquals;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.List;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertThrows;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
class VocabularyCollectionImporterFactoryTest {
- private VocabularyCollectionImporterFactory factory;
-
- @BeforeEach
- void setUp() {
- List validUrlsPrefixes = new ArrayList<>();
- validUrlsPrefixes.add("https://validprefix");
- factory = new VocabularyCollectionImporterFactory(validUrlsPrefixes);
- }
-
- @Test
- void createImporterWithUri_expectSuccess() throws URISyntaxException, VocabularyImportException {
- VocabularyCollectionImporter result = factory.createImporter(new URI("https://validprefix/test/call"));
- assertEquals("https://validprefix/test/call", result.getDirectoryLocation().toString());
- }
-
- @Test
- void createImporterWithUri_expectFail() {
- assertThrows(VocabularyImportException.class,
- () -> factory.createImporter(new URI("https://anotherprefix/test/call")));
- }
-
- @Test
- void createImporterWithPath_expectSuccess() {
- VocabularyCollectionImporter result = factory.createImporter(Paths.get("/path/test/random"));
- assertEquals("/path/test/random", result.getDirectoryLocation().toString());
- }
+ private VocabularyCollectionImporterFactory factory;
+
+ @BeforeEach
+ void setUp() {
+ factory = new VocabularyCollectionImporterFactory();
+ }
+
+ @Test
+ void createImporterWithUri_expectSuccess() throws URISyntaxException {
+ VocabularyCollectionImporter result = factory.createImporter(new URI("https://validprefix/test/call"));
+ assertEquals("https://validprefix/test/call", result.getDirectoryLocation().toString());
+ }
+
+ @Test
+ void createImporterWithPath_expectSuccess() {
+ VocabularyCollectionImporter result = factory.createImporter(Paths.get("/path/test/random"));
+ assertEquals("/path/test/random", result.getDirectoryLocation().toString());
+ }
}
diff --git a/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/DereferencingManagementController.java b/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/DereferencingManagementController.java
index 6d45846b3..90dd5a664 100644
--- a/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/DereferencingManagementController.java
+++ b/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/DereferencingManagementController.java
@@ -1,9 +1,9 @@
package eu.europeana.metis.dereference.rest;
-import eu.europeana.metis.utils.RestEndpoints;
import eu.europeana.metis.dereference.Vocabulary;
import eu.europeana.metis.dereference.service.DereferencingManagementService;
import eu.europeana.metis.dereference.vocimport.exception.VocabularyImportException;
+import eu.europeana.metis.utils.RestEndpoints;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import io.swagger.annotations.ApiParam;
@@ -11,7 +11,9 @@
import io.swagger.annotations.ApiResponses;
import java.net.URI;
import java.net.URISyntaxException;
+import java.util.ArrayList;
import java.util.List;
+import org.apache.commons.collections.CollectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
@@ -34,10 +36,12 @@ public class DereferencingManagementController {
private static final Logger LOGGER = LoggerFactory.getLogger(DereferencingManagementController.class);
private final DereferencingManagementService service;
+ private final List validUrlPrefixes;
@Autowired
- public DereferencingManagementController(DereferencingManagementService service) {
+ public DereferencingManagementController(DereferencingManagementService service, List validUrlPrefixes) {
this.service = service;
+ this.validUrlPrefixes = new ArrayList<>(validUrlPrefixes);
}
/**
@@ -79,6 +83,9 @@ public void emptyCache() {
}) public ResponseEntity loadVocabularies(
@ApiParam("directory_url") @RequestParam("directory_url") String directoryUrl) {
try {
+ if (isUrlPrefixNotValid(directoryUrl)) {
+ return ResponseEntity.badRequest().body("The url of the directory to import is not valid.");
+ }
service.loadVocabularies(new URI(directoryUrl));
return ResponseEntity.ok().build();
} catch (URISyntaxException e) {
@@ -89,4 +96,15 @@ public void emptyCache() {
return ResponseEntity.status(HttpStatus.BAD_GATEWAY).body(e.getMessage());
}
}
+
+ private boolean isUrlPrefixNotValid(String directoryToEvaluate) {
+ boolean result;
+
+ if (CollectionUtils.isEmpty(validUrlPrefixes)) {
+ result = true;
+ } else {
+ result = validUrlPrefixes.stream().noneMatch(directoryToEvaluate::startsWith);
+ }
+ return result;
+ }
}
diff --git a/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/config/Application.java b/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/config/Application.java
index 922cca604..58cb63fb7 100644
--- a/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/config/Application.java
+++ b/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/config/Application.java
@@ -4,12 +4,10 @@
import eu.europeana.corelib.web.socks.SocksProxy;
import eu.europeana.metis.dereference.service.dao.ProcessedEntityDao;
import eu.europeana.metis.dereference.service.dao.VocabularyDao;
-import eu.europeana.metis.dereference.vocimport.VocabularyCollectionImporterFactory;
import eu.europeana.metis.mongo.connection.MongoClientProvider;
import eu.europeana.metis.mongo.connection.MongoProperties;
-
-import java.util.Arrays;
import java.util.Collections;
+import java.util.List;
import javax.annotation.PreDestroy;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Value;
@@ -124,13 +122,13 @@ VocabularyDao getVocabularyDao() {
}
@Bean
- public static PropertySourcesPlaceholderConfigurer propertySourcesPlaceholderConfigurer() {
- return new PropertySourcesPlaceholderConfigurer();
+ List getValidUrlPrefixes() {
+ return List.of(validUrlPrefixes);
}
@Bean
- public VocabularyCollectionImporterFactory getVocabularyCollectionImporterFactory(){
- return new VocabularyCollectionImporterFactory(Arrays.asList(validUrlPrefixes));
+ public static PropertySourcesPlaceholderConfigurer propertySourcesPlaceholderConfigurer() {
+ return new PropertySourcesPlaceholderConfigurer();
}
/**
diff --git a/metis-dereference/metis-dereference-rest/src/test/java/eu/europeana/metis/dereference/rest/DereferencingManagementControllerTest.java b/metis-dereference/metis-dereference-rest/src/test/java/eu/europeana/metis/dereference/rest/DereferencingManagementControllerTest.java
index 96f2f4589..bd950d707 100644
--- a/metis-dereference/metis-dereference-rest/src/test/java/eu/europeana/metis/dereference/rest/DereferencingManagementControllerTest.java
+++ b/metis-dereference/metis-dereference-rest/src/test/java/eu/europeana/metis/dereference/rest/DereferencingManagementControllerTest.java
@@ -2,19 +2,24 @@
import static org.hamcrest.core.Is.is;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.doNothing;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.delete;
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get;
+import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
import eu.europeana.metis.dereference.Vocabulary;
import eu.europeana.metis.dereference.rest.exceptions.RestResponseExceptionHandler;
import eu.europeana.metis.dereference.service.DereferencingManagementService;
+import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.List;
import org.bson.types.ObjectId;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@@ -33,7 +38,7 @@ void setUp() {
dereferencingManagementServiceMock = mock(DereferencingManagementService.class);
DereferencingManagementController dereferencingManagementController = new DereferencingManagementController(
- dereferencingManagementServiceMock);
+ dereferencingManagementServiceMock, List.of("http://correctUrl"));
dereferencingManagementControllerMock = MockMvcBuilders
.standaloneSetup(dereferencingManagementController)
@@ -60,9 +65,22 @@ void testGetAllVocabularies() throws Exception {
when(dereferencingManagementServiceMock.getAllVocabularies()).thenReturn(dummyVocabList);
dereferencingManagementControllerMock.perform(get("/vocabularies"))
- .andExpect(jsonPath("$[0].uris[0]", is("http://dummy1.org/path1")))
- .andExpect(jsonPath("$[1].uris[0]", is("http://dummy2.org/path2")))
- .andExpect(status().is(200));
+ .andExpect(jsonPath("$[0].uris[0]", is("http://dummy1.org/path1")))
+ .andExpect(jsonPath("$[1].uris[0]", is("http://dummy2.org/path2")))
+ .andExpect(status().is(200));
+ }
+
+ @Test
+ void testLoadVocabularies_validPrefix_expectSuccess() throws Exception {
+ doNothing().when(dereferencingManagementServiceMock).loadVocabularies(any(URI.class));
+ dereferencingManagementControllerMock.perform(post("/load_vocabularies")
+ .param("directory_url", "http://correctUrl/test/call")).andExpect(status().is(200));
+ }
+
+ @Test
+ void testLoadVocabularies_invalidPrefix_expectFail() throws Exception {
+ dereferencingManagementControllerMock.perform(post("/load_vocabularies")
+ .param("directory_url", "http://wrongUrl")).andExpect(status().is(400));
}
@Test
@@ -73,7 +91,7 @@ void testEmptyCache() throws Exception {
}).when(dereferencingManagementServiceMock).emptyCache();
dereferencingManagementControllerMock.perform(delete("/cache"))
- .andExpect(status().is(200));
+ .andExpect(status().is(200));
assertEquals("OK", testEmptyCacheResult);
}
From 1aea2e92bc043b03d6dfe96df049af3516cb73f9 Mon Sep 17 00:00:00 2001
From: Simon Tzanakis
Date: Thu, 17 Mar 2022 16:57:16 +0100
Subject: [PATCH 16/73] MET-4233 MET-4233B Reproduce sonar issue on branch
(#519)
* MET-4233 MET-4233B Reproduce sonar issue on branch
* MET-4233 MET-4233B Reproduce sonar issue on branch2
* MET-4233 MET-4233B 1st try to fix issue
* MET-4233 MET-4233B 2nd try to fix issue
* MET-4233 MET-4233B Re-organize code
* MET-4233 MET-4233B Cleanup
* MET-4233 MET-4233B Push small change, github didn't pick last change on PR
---
.../VocabularyCollectionImporterFactory.java | 36 +++++----
.../VocabularyCollectionImporterImpl.java | 46 ++++++-----
.../dereference/vocimport/model/Location.java | 9 ++-
...cabularyCollectionImporterFactoryTest.java | 32 --------
.../DereferencingManagementController.java | 81 +++++++++++++------
.../dereference/rest/config/Application.java | 10 +--
.../rest/config/ServletInitializer.java | 4 +-
.../dereferencing.properties.example | 3 +-
...DereferencingManagementControllerTest.java | 24 +++---
.../DereferencingManagementService.java | 7 +-
.../MongoDereferencingManagementService.java | 19 +++--
11 files changed, 142 insertions(+), 129 deletions(-)
delete mode 100644 metis-dereference/metis-dereference-import/src/test/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactoryTest.java
diff --git a/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactory.java b/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactory.java
index 93f2d46d5..97eeea888 100644
--- a/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactory.java
+++ b/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactory.java
@@ -1,9 +1,13 @@
package eu.europeana.metis.dereference.vocimport;
import eu.europeana.metis.dereference.vocimport.model.Location;
+import eu.europeana.metis.exception.BadContentException;
import java.io.IOException;
import java.io.InputStream;
+import java.net.MalformedURLException;
import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
@@ -19,8 +23,8 @@ public class VocabularyCollectionImporterFactory {
* @param directoryLocation The location of the directory to import.
* @return A vocabulary importer.
*/
- public VocabularyCollectionImporter createImporter(URI directoryLocation) {
- return new VocabularyCollectionImporterImpl(new UriLocation(directoryLocation));
+ public VocabularyCollectionImporter createImporter(URL directoryLocation) {
+ return new VocabularyCollectionImporterImpl(new UrlLocation(directoryLocation));
}
/**
@@ -34,9 +38,8 @@ public VocabularyCollectionImporter createImporter(Path directoryLocation) {
}
/**
- * Create a vocabulary importer for local files, indicated by instances of {@link Path}. This
- * method provides a way to set a base directory that will be assumed known (so that output and
- * logs will only include the relative location).
+ * Create a vocabulary importer for local files, indicated by instances of {@link Path}. This method provides a way to set a
+ * base directory that will be assumed known (so that output and logs will only include the relative location).
*
* @param baseDirectory The base directory of the project or collection. Can be null.
* @param directoryLocation The full location of the directory file to import.
@@ -46,29 +49,32 @@ public VocabularyCollectionImporter createImporter(Path baseDirectory, Path dire
return new VocabularyCollectionImporterImpl(new PathLocation(baseDirectory, directoryLocation));
}
- private static final class UriLocation implements Location {
+ private static final class UrlLocation implements Location {
- private final URI uri;
-
- UriLocation(URI uri) {
- this.uri = uri;
+ private final URL url;
+ UrlLocation(URL url) {
+ this.url = url;
}
@Override
public InputStream read() throws IOException {
- return uri.toURL().openStream();
-
+ return url.openStream();
}
@Override
- public Location resolve(String relativeLocation) {
- return new UriLocation(uri.resolve(relativeLocation));
+ public Location resolve(String relativeLocation) throws BadContentException {
+ try {
+ return new UrlLocation(url.toURI().resolve(relativeLocation).toURL());
+ } catch (URISyntaxException | MalformedURLException e) {
+ throw new BadContentException(
+ String.format("Provided url '%s' and relative location %s, failed to parse.", url, relativeLocation), e);
+ }
}
@Override
public String toString() {
- return uri.toString();
+ return url.toString();
}
}
diff --git a/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterImpl.java b/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterImpl.java
index 37ce3a47c..733305b98 100644
--- a/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterImpl.java
+++ b/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterImpl.java
@@ -9,6 +9,7 @@
import eu.europeana.metis.dereference.vocimport.model.VocabularyDirectoryEntry;
import eu.europeana.metis.dereference.vocimport.model.VocabularyLoader;
import eu.europeana.metis.dereference.vocimport.model.VocabularyMetadata;
+import eu.europeana.metis.exception.BadContentException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
@@ -21,15 +22,14 @@
*/
final class VocabularyCollectionImporterImpl implements VocabularyCollectionImporter {
- private Location directoryLocation;
+ private final Location directoryLocation;
VocabularyCollectionImporterImpl(Location directoryLocation) {
this.directoryLocation = directoryLocation;
}
@Override
- public Iterable importVocabularies()
- throws VocabularyImportException {
+ public Iterable importVocabularies() throws VocabularyImportException {
// Obtain the directory entries.
final ObjectMapper mapper = new ObjectMapper(new YAMLFactory());
@@ -45,8 +45,16 @@ public Iterable importVocabularies()
// Compile the vocabulary loaders
final List result = new ArrayList<>(directoryEntries.length);
for (VocabularyDirectoryEntry entry : directoryEntries) {
- final Location metadataLocation = directoryLocation.resolve(entry.getMetadata());
- final Location mappingLocation = directoryLocation.resolve(entry.getMapping());
+ final Location metadataLocation;
+ final Location mappingLocation;
+ try {
+ metadataLocation = directoryLocation.resolve(entry.getMetadata());
+ mappingLocation = directoryLocation.resolve(entry.getMapping());
+ } catch (BadContentException e) {
+ throw new VocabularyImportException(
+ String.format("Could not read vocabulary directory at [%s] and entry metadata [%s], entry mapping [%s].",
+ directoryLocation, entry.getMetadata(), entry.getMapping()), e);
+ }
result.add(() -> loadVocabulary(metadataLocation, mappingLocation, mapper));
}
@@ -55,7 +63,7 @@ public Iterable importVocabularies()
}
private Vocabulary loadVocabulary(Location metadataLocation, Location mappingLocation,
- ObjectMapper mapper) throws VocabularyImportException {
+ ObjectMapper mapper) throws VocabularyImportException {
// Read the metadata file.
final VocabularyMetadata metadata;
@@ -63,7 +71,7 @@ private Vocabulary loadVocabulary(Location metadataLocation, Location mappingLoc
metadata = mapper.readValue(input, VocabularyMetadata.class);
} catch (IOException e) {
throw new VocabularyImportException(
- "Could not read vocabulary metadata at [" + metadataLocation + "].", e);
+ "Could not read vocabulary metadata at [" + metadataLocation + "].", e);
}
// Read the mapping file.
@@ -72,22 +80,22 @@ private Vocabulary loadVocabulary(Location metadataLocation, Location mappingLoc
mapping = IOUtils.toString(input, StandardCharsets.UTF_8);
} catch (IOException e) {
throw new VocabularyImportException(
- "Could not read vocabulary mapping at [" + mappingLocation + "].", e);
+ "Could not read vocabulary mapping at [" + mappingLocation + "].", e);
}
// Compile the vocabulary.
return Vocabulary.builder()
- .setName(metadata.getName())
- .setTypes(metadata.getTypes())
- .setPaths(metadata.getPaths())
- .setParentIterations(metadata.getParentIterations())
- .setSuffix(metadata.getSuffix())
- .setExamples(metadata.getExamples())
- .setCounterExamples(metadata.getCounterExamples())
- .setTransformation(mapping)
- .setReadableMetadataLocation(metadataLocation.toString())
- .setReadableMappingLocation(mappingLocation.toString())
- .build();
+ .setName(metadata.getName())
+ .setTypes(metadata.getTypes())
+ .setPaths(metadata.getPaths())
+ .setParentIterations(metadata.getParentIterations())
+ .setSuffix(metadata.getSuffix())
+ .setExamples(metadata.getExamples())
+ .setCounterExamples(metadata.getCounterExamples())
+ .setTransformation(mapping)
+ .setReadableMetadataLocation(metadataLocation.toString())
+ .setReadableMappingLocation(mappingLocation.toString())
+ .build();
}
public Location getDirectoryLocation() {
diff --git a/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/model/Location.java b/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/model/Location.java
index f6970584b..f2a1039da 100644
--- a/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/model/Location.java
+++ b/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/model/Location.java
@@ -1,5 +1,6 @@
package eu.europeana.metis.dereference.vocimport.model;
+import eu.europeana.metis.exception.BadContentException;
import java.io.IOException;
import java.io.InputStream;
@@ -14,14 +15,14 @@ public interface Location {
InputStream read() throws IOException;
/**
- * Resolve a relative location against the given location. The given location can be assumed to be
- * a file (as opposed to a path/directory) so that essentially the relative location is resolved
- * against the parent of the given location.
+ * Resolve a relative location against the given location. The given location can be assumed to be a file (as opposed to a
+ * path/directory) so that essentially the relative location is resolved against the parent of the given location.
*
* @param relativeLocation The relative location to resolve.
* @return The resolved location.
+ * @throws BadContentException if the resolve did not succeed
*/
- Location resolve(String relativeLocation);
+ Location resolve(String relativeLocation) throws BadContentException;
/**
* @return A human-readable representation of the location.
diff --git a/metis-dereference/metis-dereference-import/src/test/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactoryTest.java b/metis-dereference/metis-dereference-import/src/test/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactoryTest.java
deleted file mode 100644
index 7d725eb46..000000000
--- a/metis-dereference/metis-dereference-import/src/test/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionImporterFactoryTest.java
+++ /dev/null
@@ -1,32 +0,0 @@
-package eu.europeana.metis.dereference.vocimport;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.nio.file.Paths;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-
-class VocabularyCollectionImporterFactoryTest {
-
- private VocabularyCollectionImporterFactory factory;
-
- @BeforeEach
- void setUp() {
- factory = new VocabularyCollectionImporterFactory();
- }
-
- @Test
- void createImporterWithUri_expectSuccess() throws URISyntaxException {
- VocabularyCollectionImporter result = factory.createImporter(new URI("https://validprefix/test/call"));
- assertEquals("https://validprefix/test/call", result.getDirectoryLocation().toString());
- }
-
- @Test
- void createImporterWithPath_expectSuccess() {
- VocabularyCollectionImporter result = factory.createImporter(Paths.get("/path/test/random"));
- assertEquals("/path/test/random", result.getDirectoryLocation().toString());
- }
-
-}
diff --git a/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/DereferencingManagementController.java b/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/DereferencingManagementController.java
index 90dd5a664..43b046ba3 100644
--- a/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/DereferencingManagementController.java
+++ b/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/DereferencingManagementController.java
@@ -3,17 +3,21 @@
import eu.europeana.metis.dereference.Vocabulary;
import eu.europeana.metis.dereference.service.DereferencingManagementService;
import eu.europeana.metis.dereference.vocimport.exception.VocabularyImportException;
+import eu.europeana.metis.exception.BadContentException;
import eu.europeana.metis.utils.RestEndpoints;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import io.swagger.annotations.ApiParam;
import io.swagger.annotations.ApiResponse;
import io.swagger.annotations.ApiResponses;
+import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
-import java.util.ArrayList;
+import java.net.URL;
+import java.util.HashSet;
import java.util.List;
-import org.apache.commons.collections.CollectionUtils;
+import java.util.Optional;
+import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
@@ -36,12 +40,16 @@ public class DereferencingManagementController {
private static final Logger LOGGER = LoggerFactory.getLogger(DereferencingManagementController.class);
private final DereferencingManagementService service;
- private final List validUrlPrefixes;
+ private final Set allowedUrlDomains;
+ /**
+ * @param service the dereferencing management service
+ * @param allowedUrlDomains the allowed valid url prefixes
+ */
@Autowired
- public DereferencingManagementController(DereferencingManagementService service, List validUrlPrefixes) {
+ public DereferencingManagementController(DereferencingManagementService service, Set allowedUrlDomains) {
this.service = service;
- this.validUrlPrefixes = new ArrayList<>(validUrlPrefixes);
+ this.allowedUrlDomains = new HashSet<>(allowedUrlDomains);
}
/**
@@ -57,9 +65,8 @@ public List getAllVocabularies() {
}
/**
- * Empty Cache. This will remove ALL entries in the cache (Redis). If the same redis
- * instance/cluster is used for multiple services then the cache for other services is cleared as
- * well.
+ * Empty Cache. This will remove ALL entries in the cache (Redis). If the same redis instance/cluster is used for multiple
+ * services then the cache for other services is cleared as well.
*/
@DeleteMapping(value = RestEndpoints.CACHE_EMPTY)
@ResponseBody
@@ -71,24 +78,27 @@ public void emptyCache() {
/**
* Load the vocabularies from an online source. This does NOT purge the cache.
*
- * @param directoryUrl The online location of the vocabulary directory.
+ * @param directoryUrl The online location of the vocabulary directory
+ * @return sting containing an error message otherwise empty
*/
@PostMapping(value = RestEndpoints.LOAD_VOCABULARIES)
@ResponseBody
@ApiOperation(value = "Load and replace the vocabularies listed by the given vocabulary directory. Does NOT purge the cache.")
@ApiResponses(value = {
- @ApiResponse(code = 200, message = "Vocabularies loaded successfully."),
- @ApiResponse(code = 400, message = "Bad request parameters."),
- @ApiResponse(code = 502, message = "Problem accessing vocabulary repository.")
- }) public ResponseEntity loadVocabularies(
- @ApiParam("directory_url") @RequestParam("directory_url") String directoryUrl) {
+ @ApiResponse(code = 200, message = "Vocabularies loaded successfully."),
+ @ApiResponse(code = 400, message = "Bad request parameters."),
+ @ApiResponse(code = 502, message = "Problem accessing vocabulary repository.")
+ })
+ public ResponseEntity loadVocabularies(
+ @ApiParam("directory_url") @RequestParam("directory_url") String directoryUrl) {
try {
- if (isUrlPrefixNotValid(directoryUrl)) {
- return ResponseEntity.badRequest().body("The url of the directory to import is not valid.");
+ final Optional validatedLocationUrl = getValidatedLocationUrl(directoryUrl);
+ if (validatedLocationUrl.isPresent()) {
+ service.loadVocabularies(validatedLocationUrl.get());
+ return ResponseEntity.ok().build();
}
- service.loadVocabularies(new URI(directoryUrl));
- return ResponseEntity.ok().build();
- } catch (URISyntaxException e) {
+ return ResponseEntity.badRequest().body("The url of the directory to import is not valid.");
+ } catch (BadContentException e) {
LOGGER.warn("Could not load vocabularies", e);
return ResponseEntity.badRequest().body(e.getMessage());
} catch (VocabularyImportException e) {
@@ -97,14 +107,33 @@ public void emptyCache() {
}
}
- private boolean isUrlPrefixNotValid(String directoryToEvaluate) {
- boolean result;
+ /**
+ * Validates a String representation of a URL.
+ * The method will check that the url is:
+ *
+ * - valid according to the protocol
+ * - of https scheme
+ * - part of the allowed domains
+ *
+ * domain for the application to further access it.
+ *
+ * @param directoryUrl the url to validate
+ * @return the validated URL class
+ * @throws BadContentException if the url failed during parsing
+ */
+ private Optional getValidatedLocationUrl(String directoryUrl) throws BadContentException {
+ try {
+ URI uri = new URI(directoryUrl);
+ String scheme = uri.getScheme();
+ String remoteHost = uri.getHost();
- if (CollectionUtils.isEmpty(validUrlPrefixes)) {
- result = true;
- } else {
- result = validUrlPrefixes.stream().noneMatch(directoryToEvaluate::startsWith);
+ if ("https".equals(scheme) && allowedUrlDomains.contains(remoteHost)) {
+ return Optional.of(uri.toURL());
+ }
+ } catch (URISyntaxException | MalformedURLException e) {
+ throw new BadContentException(String.format("Provided directoryUrl '%s', failed to parse.", directoryUrl), e);
}
- return result;
+
+ return Optional.empty();
}
}
diff --git a/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/config/Application.java b/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/config/Application.java
index 58cb63fb7..d951ae798 100644
--- a/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/config/Application.java
+++ b/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/config/Application.java
@@ -7,7 +7,7 @@
import eu.europeana.metis.mongo.connection.MongoClientProvider;
import eu.europeana.metis.mongo.connection.MongoProperties;
import java.util.Collections;
-import java.util.List;
+import java.util.Set;
import javax.annotation.PreDestroy;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Value;
@@ -68,8 +68,8 @@ public class Application implements WebMvcConfigurer, InitializingBean {
private String vocabularyDb;
//Valid directories list
- @Value("${valid.url.prefixes}")
- private String[] validUrlPrefixes;
+ @Value("${allowed.url.domains}")
+ private String[] allowedUrlDomains;
private MongoClient mongoClientEntity;
private MongoClient mongoClientVocabulary;
@@ -122,8 +122,8 @@ VocabularyDao getVocabularyDao() {
}
@Bean
- List getValidUrlPrefixes() {
- return List.of(validUrlPrefixes);
+ Set getAllowedUrlDomains() {
+ return Set.of(allowedUrlDomains);
}
@Bean
diff --git a/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/config/ServletInitializer.java b/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/config/ServletInitializer.java
index 20d9dc309..22baa6a3a 100644
--- a/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/config/ServletInitializer.java
+++ b/metis-dereference/metis-dereference-rest/src/main/java/eu/europeana/metis/dereference/rest/config/ServletInitializer.java
@@ -1,8 +1,9 @@
package eu.europeana.metis.dereference.rest.config;
+import eu.europeana.metis.dereference.RdfRetriever;
import eu.europeana.metis.dereference.service.MongoDereferenceService;
import eu.europeana.metis.dereference.service.MongoDereferencingManagementService;
-import eu.europeana.metis.dereference.RdfRetriever;
+import eu.europeana.metis.dereference.vocimport.VocabularyCollectionImporterFactory;
import org.springframework.util.ClassUtils;
import org.springframework.web.context.WebApplicationContext;
import org.springframework.web.context.support.AnnotationConfigWebApplicationContext;
@@ -19,6 +20,7 @@ protected WebApplicationContext createServletApplicationContext() {
context.scan(ClassUtils.getPackageName(getClass()));
context.register(MongoDereferenceService.class);
context.register(MongoDereferencingManagementService.class);
+ context.register(VocabularyCollectionImporterFactory.class);
context.register(RdfRetriever.class);
return context;
diff --git a/metis-dereference/metis-dereference-rest/src/main/resources/dereferencing.properties.example b/metis-dereference/metis-dereference-rest/src/main/resources/dereferencing.properties.example
index 93a843124..a7f04f76d 100644
--- a/metis-dereference/metis-dereference-rest/src/main/resources/dereferencing.properties.example
+++ b/metis-dereference/metis-dereference-rest/src/main/resources/dereferencing.properties.example
@@ -14,4 +14,5 @@ mongo.application.name=
entity.db=
vocabulary.db=
-valid.url.prefixes=
\ No newline at end of file
+#The allowed domains for vocabularies loading without the scheme(always validated against https). e.g. raw.githubusercontent.com
+allowed.url.domains=
\ No newline at end of file
diff --git a/metis-dereference/metis-dereference-rest/src/test/java/eu/europeana/metis/dereference/rest/DereferencingManagementControllerTest.java b/metis-dereference/metis-dereference-rest/src/test/java/eu/europeana/metis/dereference/rest/DereferencingManagementControllerTest.java
index bd950d707..a7e99e76b 100644
--- a/metis-dereference/metis-dereference-rest/src/test/java/eu/europeana/metis/dereference/rest/DereferencingManagementControllerTest.java
+++ b/metis-dereference/metis-dereference-rest/src/test/java/eu/europeana/metis/dereference/rest/DereferencingManagementControllerTest.java
@@ -16,10 +16,10 @@
import eu.europeana.metis.dereference.Vocabulary;
import eu.europeana.metis.dereference.rest.exceptions.RestResponseExceptionHandler;
import eu.europeana.metis.dereference.service.DereferencingManagementService;
-import java.net.URI;
+import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
-import java.util.List;
+import java.util.Set;
import org.bson.types.ObjectId;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@@ -38,7 +38,7 @@ void setUp() {
dereferencingManagementServiceMock = mock(DereferencingManagementService.class);
DereferencingManagementController dereferencingManagementController = new DereferencingManagementController(
- dereferencingManagementServiceMock, List.of("http://correctUrl"));
+ dereferencingManagementServiceMock, Set.of("valid.domain.com"));
dereferencingManagementControllerMock = MockMvcBuilders
.standaloneSetup(dereferencingManagementController)
@@ -51,12 +51,12 @@ void testGetAllVocabularies() throws Exception {
Vocabulary dummyVocab1 = new Vocabulary();
dummyVocab1.setId(new ObjectId());
dummyVocab1.setName("Dummy1");
- dummyVocab1.setUris(Collections.singleton("http://dummy1.org/path1"));
+ dummyVocab1.setUris(Collections.singleton("https://dummy1.org/path1"));
Vocabulary dummyVocab2 = new Vocabulary();
dummyVocab2.setId(new ObjectId());
dummyVocab2.setName("Dummy2");
- dummyVocab2.setUris(Collections.singleton("http://dummy2.org/path2"));
+ dummyVocab2.setUris(Collections.singleton("https://dummy2.org/path2"));
ArrayList dummyVocabList = new ArrayList<>();
dummyVocabList.add(dummyVocab1);
@@ -65,22 +65,22 @@ void testGetAllVocabularies() throws Exception {
when(dereferencingManagementServiceMock.getAllVocabularies()).thenReturn(dummyVocabList);
dereferencingManagementControllerMock.perform(get("/vocabularies"))
- .andExpect(jsonPath("$[0].uris[0]", is("http://dummy1.org/path1")))
- .andExpect(jsonPath("$[1].uris[0]", is("http://dummy2.org/path2")))
+ .andExpect(jsonPath("$[0].uris[0]", is("https://dummy1.org/path1")))
+ .andExpect(jsonPath("$[1].uris[0]", is("https://dummy2.org/path2")))
.andExpect(status().is(200));
}
@Test
- void testLoadVocabularies_validPrefix_expectSuccess() throws Exception {
- doNothing().when(dereferencingManagementServiceMock).loadVocabularies(any(URI.class));
+ void testLoadVocabularies_validDomain_expectSuccess() throws Exception {
+ doNothing().when(dereferencingManagementServiceMock).loadVocabularies(any(URL.class));
dereferencingManagementControllerMock.perform(post("/load_vocabularies")
- .param("directory_url", "http://correctUrl/test/call")).andExpect(status().is(200));
+ .param("directory_url", "https://valid.domain.com/test/call")).andExpect(status().is(200));
}
@Test
- void testLoadVocabularies_invalidPrefix_expectFail() throws Exception {
+ void testLoadVocabularies_invalidDomain_expectFail() throws Exception {
dereferencingManagementControllerMock.perform(post("/load_vocabularies")
- .param("directory_url", "http://wrongUrl")).andExpect(status().is(400));
+ .param("directory_url", "https://invalid.domain.com")).andExpect(status().is(400));
}
@Test
diff --git a/metis-dereference/metis-dereference-service/src/main/java/eu/europeana/metis/dereference/service/DereferencingManagementService.java b/metis-dereference/metis-dereference-service/src/main/java/eu/europeana/metis/dereference/service/DereferencingManagementService.java
index 7248f6a63..9a057884e 100644
--- a/metis-dereference/metis-dereference-service/src/main/java/eu/europeana/metis/dereference/service/DereferencingManagementService.java
+++ b/metis-dereference/metis-dereference-service/src/main/java/eu/europeana/metis/dereference/service/DereferencingManagementService.java
@@ -2,7 +2,7 @@
import eu.europeana.metis.dereference.Vocabulary;
import eu.europeana.metis.dereference.vocimport.exception.VocabularyImportException;
-import java.net.URI;
+import java.net.URL;
import java.util.List;
/**
@@ -26,8 +26,7 @@ public interface DereferencingManagementService {
* Load the vocabularies from an online source. This does NOT purge the cache.
*
* @param directoryUrl The online location of the vocabulary directory.
- * @throws VocabularyImportException In case some issue occurred while importing the
- * vocabularies.
+ * @throws VocabularyImportException In case some issue occurred while importing the vocabularies.
*/
- void loadVocabularies(URI directoryUrl) throws VocabularyImportException;
+ void loadVocabularies(URL directoryUrl) throws VocabularyImportException;
}
diff --git a/metis-dereference/metis-dereference-service/src/main/java/eu/europeana/metis/dereference/service/MongoDereferencingManagementService.java b/metis-dereference/metis-dereference-service/src/main/java/eu/europeana/metis/dereference/service/MongoDereferencingManagementService.java
index a0d608d3b..b6d98362f 100644
--- a/metis-dereference/metis-dereference-service/src/main/java/eu/europeana/metis/dereference/service/MongoDereferencingManagementService.java
+++ b/metis-dereference/metis-dereference-service/src/main/java/eu/europeana/metis/dereference/service/MongoDereferencingManagementService.java
@@ -8,8 +8,7 @@
import eu.europeana.metis.dereference.vocimport.VocabularyCollectionValidator;
import eu.europeana.metis.dereference.vocimport.VocabularyCollectionValidatorImpl;
import eu.europeana.metis.dereference.vocimport.exception.VocabularyImportException;
-
-import java.net.URI;
+import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import org.springframework.beans.factory.annotation.Autowired;
@@ -49,16 +48,16 @@ public void emptyCache() {
}
@Override
- public void loadVocabularies(URI directoryUrl) throws VocabularyImportException {
+ public void loadVocabularies(URL directoryUrl) throws VocabularyImportException {
try {
- // Import and validate the vocabularies
- final List vocabularies = new ArrayList<>();
- final VocabularyCollectionImporter importer = vocabularyCollectionImporterFactory
- .createImporter(directoryUrl);
- final VocabularyCollectionValidator validator = new VocabularyCollectionValidatorImpl(importer,
- true, true, true);
- validator.validateVocabularyOnly(vocabulary -> vocabularies.add(convertVocabulary(vocabulary)));
+ // Import and validate the vocabularies
+ final List vocabularies = new ArrayList<>();
+ final VocabularyCollectionImporter importer = vocabularyCollectionImporterFactory
+ .createImporter(directoryUrl);
+ final VocabularyCollectionValidator validator = new VocabularyCollectionValidatorImpl(importer,
+ true, true, true);
+ validator.validateVocabularyOnly(vocabulary -> vocabularies.add(convertVocabulary(vocabulary)));
// All vocabularies are loaded well. Now we replace the vocabularies.
vocabularyDao.replaceAll(vocabularies);
From 641b046d7848519844bcd386556902bb6dfc09db Mon Sep 17 00:00:00 2001
From: Simon Tzanakis
Date: Thu, 24 Mar 2022 14:12:50 +0100
Subject: [PATCH 17/73] MET-4237 Align depublication status in metis (#520)
* MET-4237 Implementation of indexPostProcessing de-publication status, with incremental processing
* MET-4237 final implementation
---
.../core/execution/WorkflowPostProcessor.java | 152 ++++++++----------
1 file changed, 70 insertions(+), 82 deletions(-)
diff --git a/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java b/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java
index 4c5e7e9d9..3eecebde1 100644
--- a/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java
+++ b/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java
@@ -31,6 +31,7 @@
import eu.europeana.metis.exception.BadContentException;
import java.util.ArrayList;
import java.util.Date;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@@ -71,38 +72,15 @@ public WorkflowPostProcessor(DepublishRecordIdDao depublishRecordIdDao,
this.dpsClient = dpsClient;
}
- /**
- * This method performs post-processing after an individual workflow step.
- *
- * @param plugin The plugin that was successfully executed
- * @param datasetId The dataset ID to which the plugin belongs
- * @throws DpsException If communication with e-cloud dps failed
- * @throws InvalidIndexPluginException If invalid type of plugin
- * @throws BadContentException In case the records would violate the maximum number of de-published records that each dataset
- * can have.
- */
- void performPluginPostProcessing(AbstractExecutablePlugin> plugin, String datasetId)
- throws DpsException, InvalidIndexPluginException, BadContentException {
-
- final PluginType pluginType = plugin.getPluginType();
- LOGGER.info("Starting postprocessing of plugin {} in dataset {}.", pluginType, datasetId);
- if (pluginType == PluginType.PREVIEW || pluginType == PluginType.PUBLISH) {
- indexPostProcess(plugin, datasetId);
- } else if (pluginType == PluginType.DEPUBLISH) {
- depublishPostProcess((DepublishPlugin) plugin, datasetId);
- }
- LOGGER.info("Finished postprocessing of plugin {} in dataset {}.", pluginType, datasetId);
- }
-
/**
* Performs post-processing for indexing plugins
*
* @param indexPlugin The index plugin
- * @param datasetId The dataset id
- * @throws DpsException If communication with e-cloud dps failed
+ * @param datasetId The dataset id
+ * @throws DpsException If communication with e-cloud dps failed
* @throws InvalidIndexPluginException If invalid type of plugin
- * @throws BadContentException In case the records would violate the maximum number of de-published records that each dataset
- * can have.
+ * @throws BadContentException In case the records would violate the maximum number of de-published records that each
+ * dataset can have.
*/
private void indexPostProcess(AbstractExecutablePlugin> indexPlugin, String datasetId)
throws DpsException, InvalidIndexPluginException, BadContentException {
@@ -111,37 +89,24 @@ private void indexPostProcess(AbstractExecutablePlugin> indexPlugin, String da
targetIndexingDatabase = ((IndexToPreviewPlugin) indexPlugin).getTargetIndexingDatabase();
} else if (indexPlugin instanceof IndexToPublishPlugin) {
targetIndexingDatabase = ((IndexToPublishPlugin) indexPlugin).getTargetIndexingDatabase();
- //Reset depublish status
- depublishRecordIdDao.markRecordIdsWithDepublicationStatus(datasetId, null,
- DepublicationStatus.PENDING_DEPUBLICATION, null);
-
- final long totalRecords = dpsClient.getTotalMetisDatabaseRecords(indexPlugin.getExternalTaskId(),
- ((IndexToPublishPlugin) indexPlugin).getTargetIndexingDatabase());
- List subTaskInfoList;
-
- // get chunked tasks from dataset id and topology name
- for (int i = 0; i < totalRecords; i += ECLOUD_REQUEST_BATCH_SIZE) {
- subTaskInfoList = dpsClient.getDetailedTaskReportBetweenChunks(indexPlugin.getTopologyName(),
- Long.parseLong(indexPlugin.getExternalTaskId()), i, i + ECLOUD_REQUEST_BATCH_SIZE);
- if (i >= totalRecords) {
- subTaskInfoList = dpsClient.getDetailedTaskReportBetweenChunks(indexPlugin.getTopologyName(),
- Long.parseLong(indexPlugin.getExternalTaskId()), (int) (totalRecords - (totalRecords % ECLOUD_REQUEST_BATCH_SIZE)),
- (int) totalRecords);
- }
+
+ final boolean isIncremental = ((IndexToPublishPlugin) indexPlugin).getPluginMetadata().isIncrementalIndexing();
+
+ if (isIncremental) {
// get all currently de-published records ids
Set depublishedRecordIds = depublishRecordIdDao
.getAllDepublishRecordIdsWithStatus(datasetId, DepublishRecordIdSortField.DEPUBLICATION_STATE,
SortDirection.ASCENDING,
DepublicationStatus.DEPUBLISHED);
- // TODO: what if it's incremental
- // filter the record ids that are a part of the given report, to be de-published
- Set recordIdsToDepublish = subTaskInfoList.stream()
- .filter(taskInfo -> depublishedRecordIds.contains(
- taskInfo.getEuropeanaId()))
- .map(SubTaskInfo::getEuropeanaId).collect(Collectors.toSet());
- // reset de-publish status
- depublishRecordIdDao.markRecordIdsWithDepublicationStatus(datasetId, recordIdsToDepublish,
+ List publishedDatasetRecordIds = dpsClient.searchPublishedDatasetRecords(indexPlugin.getExternalTaskId(),
+ new ArrayList<>(depublishedRecordIds));
+ // reset de-publish status, pass recordIds to be de-published
+ depublishRecordIdDao.markRecordIdsWithDepublicationStatus(datasetId, new HashSet<>(publishedDatasetRecordIds),
+ DepublicationStatus.PENDING_DEPUBLICATION, null);
+ } else {
+ // reset de-publish status, pass null, all records will be de-published
+ depublishRecordIdDao.markRecordIdsWithDepublicationStatus(datasetId, null,
DepublicationStatus.PENDING_DEPUBLICATION, null);
}
} else {
@@ -156,7 +121,7 @@ private void indexPostProcess(AbstractExecutablePlugin> indexPlugin, String da
* Performs post-processing for de-publish plugins
*
* @param depublishPlugin The de-publish plugin
- * @param datasetId The dataset id
+ * @param datasetId The dataset id
* @throws DpsException If communication with e-cloud dps failed
*/
private void depublishPostProcess(DepublishPlugin depublishPlugin, String datasetId)
@@ -168,37 +133,9 @@ private void depublishPostProcess(DepublishPlugin depublishPlugin, String datase
}
}
- /**
- * @param datasetId The dataset id
- */
- private void depublishDatasetPostProcess(String datasetId) {
-
- // Set all depublished records back to PENDING.
- depublishRecordIdDao.markRecordIdsWithDepublicationStatus(datasetId, null,
- DepublicationStatus.PENDING_DEPUBLICATION, null);
- // Find latest PUBLISH Type Plugin and set dataStatus to DELETED.
- final PluginWithExecutionId latestSuccessfulPlugin = workflowExecutionDao
- .getLatestSuccessfulPlugin(datasetId, OrchestratorService.PUBLISH_TYPES);
- if (Objects.nonNull(latestSuccessfulPlugin) && Objects
- .nonNull(latestSuccessfulPlugin.getPlugin())) {
- final WorkflowExecution workflowExecutionToUpdate = workflowExecutionDao
- .getById(latestSuccessfulPlugin.getExecutionId());
- final Optional metisPluginWithType = workflowExecutionToUpdate
- .getMetisPluginWithType(latestSuccessfulPlugin.getPlugin().getPluginType());
- if (metisPluginWithType.isPresent()) {
- metisPluginWithType.get().setDataStatus(DataStatus.DELETED);
- workflowExecutionDao.updateWorkflowPlugins(workflowExecutionToUpdate);
- }
- }
- // Set publication fitness to UNFIT.
- final Dataset dataset = datasetDao.getDatasetByDatasetId(datasetId);
- dataset.setPublicationFitness(PublicationFitness.UNFIT);
- datasetDao.update(dataset);
- }
-
/**
* @param depublishPlugin The de-publish plugin
- * @param datasetId The dataset id
+ * @param datasetId The dataset id
* @throws DpsException If communication with e-cloud dps failed
*/
private void depublishRecordPostProcess(DepublishPlugin depublishPlugin, String datasetId)
@@ -239,4 +176,55 @@ private void depublishRecordPostProcess(DepublishPlugin depublishPlugin, String
datasetDao.update(dataset);
}
}
+
+ /**
+ * @param datasetId The dataset id
+ */
+ private void depublishDatasetPostProcess(String datasetId) {
+
+ // Set all depublished records back to PENDING.
+ depublishRecordIdDao.markRecordIdsWithDepublicationStatus(datasetId, null,
+ DepublicationStatus.PENDING_DEPUBLICATION, null);
+ // Find latest PUBLISH Type Plugin and set dataStatus to DELETED.
+ final PluginWithExecutionId latestSuccessfulPlugin = workflowExecutionDao
+ .getLatestSuccessfulPlugin(datasetId, OrchestratorService.PUBLISH_TYPES);
+ if (Objects.nonNull(latestSuccessfulPlugin) && Objects
+ .nonNull(latestSuccessfulPlugin.getPlugin())) {
+ final WorkflowExecution workflowExecutionToUpdate = workflowExecutionDao
+ .getById(latestSuccessfulPlugin.getExecutionId());
+ final Optional metisPluginWithType = workflowExecutionToUpdate
+ .getMetisPluginWithType(latestSuccessfulPlugin.getPlugin().getPluginType());
+ if (metisPluginWithType.isPresent()) {
+ metisPluginWithType.get().setDataStatus(DataStatus.DELETED);
+ workflowExecutionDao.updateWorkflowPlugins(workflowExecutionToUpdate);
+ }
+ }
+ // Set publication fitness to UNFIT.
+ final Dataset dataset = datasetDao.getDatasetByDatasetId(datasetId);
+ dataset.setPublicationFitness(PublicationFitness.UNFIT);
+ datasetDao.update(dataset);
+ }
+
+ /**
+ * This method performs post-processing after an individual workflow step.
+ *
+ * @param plugin The plugin that was successfully executed
+ * @param datasetId The dataset ID to which the plugin belongs
+ * @throws DpsException If communication with e-cloud dps failed
+ * @throws InvalidIndexPluginException If invalid type of plugin
+ * @throws BadContentException In case the records would violate the maximum number of de-published records that each dataset
+ * can have.
+ */
+ void performPluginPostProcessing(AbstractExecutablePlugin> plugin, String datasetId)
+ throws DpsException, InvalidIndexPluginException, BadContentException {
+
+ final PluginType pluginType = plugin.getPluginType();
+ LOGGER.info("Starting postprocessing of plugin {} in dataset {}.", pluginType, datasetId);
+ if (pluginType == PluginType.PREVIEW || pluginType == PluginType.PUBLISH) {
+ indexPostProcess(plugin, datasetId);
+ } else if (pluginType == PluginType.DEPUBLISH) {
+ depublishPostProcess((DepublishPlugin) plugin, datasetId);
+ }
+ LOGGER.info("Finished postprocessing of plugin {} in dataset {}.", pluginType, datasetId);
+ }
}
\ No newline at end of file
From 2dc505c7db12cadb689f421ad74e17970c8ce72d Mon Sep 17 00:00:00 2001
From: Simon Tzanakis
Date: Fri, 25 Mar 2022 10:32:41 +0100
Subject: [PATCH 18/73] Update CONTRIBUTING.MD for squash merges
---
CONTRIBUTING.MD | 34 +++++++++++++++++++++++++++++-----
1 file changed, 29 insertions(+), 5 deletions(-)
diff --git a/CONTRIBUTING.MD b/CONTRIBUTING.MD
index 1e59b1733..e6d450de1 100644
--- a/CONTRIBUTING.MD
+++ b/CONTRIBUTING.MD
@@ -128,14 +128,38 @@ If there is any doubt on fixing the merge conflicts while merging, the implement
worked on the relevant changes that were introduced on the destination branch.
The merge can be performed on the github pull request page or manually(especially for conflicts).
-To do this manually, checkout the destination branch(usually `develop`) and execute the merge command with the `--no-ff`
-parameter:
+To do this manually, checkout the destination branch(usually `develop`).
+We prefer squash merging or alternatively non fast forward merging.
+
+- A squash merge can be performed with the `--squash` parameter:
+
+ `git merge --squash `
+
+ To complete the squash merge, a commit has to also be performed if done locally.
+ The commit should be formatted as the following template(replicating github squashed commits):
+ ```/ (#)
+ (optionally as description)
+ * List of all commit messages from the pull requst
+ ```
+ Example message:
+ ```
+ Debt/met 4250 refactor code to remove mock maker inline (#508)
+ * MET-4250 Update NetworkUtil
+
+ * MET-4250 Update RdfConversionUtils
+
+ * MET-4250 Javadocs and cleanup
+
+ * MET-4250 Remove mockito inline from root pom```
+
+
+- A non fast forward merge can be performed with the `--no-ff` parameter:
`git merge --no-ff `
-The merger should check that the local branch is building before and after merging. If there were merge conflicts that were
-resolved during the merge, then a local deployment should be triggered and verified. If the build succeeds the destination branch
-can be pushed to the remote repository and the pull request will be resolved.
+The merger should check that the local branch is building before and after merging.
+If there were merge conflicts that were resolved during the merge, then a local deployment should be triggered and verified.
+If the build succeeds the destination branch can be pushed to the remote repository and the pull request will be resolved.
The reviewer can now move the ticket ahead in the board and re-assign it to the implementor.
From 3dc9004fc2dfc895ed28b34143bc7a39d43cc307 Mon Sep 17 00:00:00 2001
From: Simon Tzanakis
Date: Fri, 25 Mar 2022 10:49:39 +0100
Subject: [PATCH 19/73] Update CONTRIBUTING.MD to fix line breaks
---
CONTRIBUTING.MD | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/CONTRIBUTING.MD b/CONTRIBUTING.MD
index e6d450de1..1b650ed48 100644
--- a/CONTRIBUTING.MD
+++ b/CONTRIBUTING.MD
@@ -137,7 +137,8 @@ We prefer squash merging or alternatively non fast forward merging.
To complete the squash merge, a commit has to also be performed if done locally.
The commit should be formatted as the following template(replicating github squashed commits):
- ```/ (#)
+ ```
+ / (#)
(optionally as description)
* List of all commit messages from the pull requst
```
@@ -150,7 +151,8 @@ We prefer squash merging or alternatively non fast forward merging.
* MET-4250 Javadocs and cleanup
- * MET-4250 Remove mockito inline from root pom```
+ * MET-4250 Remove mockito inline from root pom
+ ```
- A non fast forward merge can be performed with the `--no-ff` parameter:
From dba23ae5a67b486ef60192719fa3a5de78d3cc89 Mon Sep 17 00:00:00 2001
From: Simon Tzanakis
Date: Fri, 8 Apr 2022 11:17:38 +0200
Subject: [PATCH 20/73] MET-4374 Secure regex backtracking in dereference
(#521)
* MET-4374 Reproduce error
* MET-4374 Reproduce error
* MET-4374 Solve backtracking
* MET-4374 Cleanup to remove futher issues
* MET-4374 Fix issues and add some tests
* MET-4374 Handle review
---
.../IncomingRecordToEdmConverter.java | 94 -----------
.../IncomingRecordToEdmTransformer.java | 148 ++++++++++++++++++
.../IncomingRecordToEdmConverterTest.java | 35 -----
.../IncomingRecordToEdmTransformerTest.java | 81 ++++++++++
.../src/test/resources/copy_xml.xslt | 11 ++
.../src/test/resources/invalid_xml.xml | 3 +
.../src/test/resources/produce_empty.xslt | 4 +
.../test/resources/produce_invalid_xml.xslt | 7 +
.../src/test/resources/yso_p105069.xml | 45 ++++++
.../VocabularyCollectionMavenRule.java | 2 +-
.../VocabularyCollectionValidatorImpl.java | 110 ++++++-------
.../service/MongoDereferenceService.java | 131 ++++++++--------
12 files changed, 421 insertions(+), 250 deletions(-)
delete mode 100644 metis-dereference/metis-dereference-common/src/main/java/eu/europeana/metis/dereference/IncomingRecordToEdmConverter.java
create mode 100644 metis-dereference/metis-dereference-common/src/main/java/eu/europeana/metis/dereference/IncomingRecordToEdmTransformer.java
delete mode 100644 metis-dereference/metis-dereference-common/src/test/java/eu/europeana/metis/dereference/IncomingRecordToEdmConverterTest.java
create mode 100644 metis-dereference/metis-dereference-common/src/test/java/eu/europeana/metis/dereference/IncomingRecordToEdmTransformerTest.java
create mode 100644 metis-dereference/metis-dereference-common/src/test/resources/copy_xml.xslt
create mode 100644 metis-dereference/metis-dereference-common/src/test/resources/invalid_xml.xml
create mode 100644 metis-dereference/metis-dereference-common/src/test/resources/produce_empty.xslt
create mode 100644 metis-dereference/metis-dereference-common/src/test/resources/produce_invalid_xml.xslt
create mode 100644 metis-dereference/metis-dereference-common/src/test/resources/yso_p105069.xml
diff --git a/metis-dereference/metis-dereference-common/src/main/java/eu/europeana/metis/dereference/IncomingRecordToEdmConverter.java b/metis-dereference/metis-dereference-common/src/main/java/eu/europeana/metis/dereference/IncomingRecordToEdmConverter.java
deleted file mode 100644
index ed4170896..000000000
--- a/metis-dereference/metis-dereference-common/src/main/java/eu/europeana/metis/dereference/IncomingRecordToEdmConverter.java
+++ /dev/null
@@ -1,94 +0,0 @@
-package eu.europeana.metis.dereference;
-
-import java.io.StringReader;
-import java.io.StringWriter;
-import java.nio.charset.StandardCharsets;
-import java.util.regex.Pattern;
-import javax.xml.XMLConstants;
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.Source;
-import javax.xml.transform.Templates;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerException;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.stream.StreamResult;
-import javax.xml.transform.stream.StreamSource;
-import net.sf.saxon.BasicTransformerFactory;
-
-/**
- * Convert an incoming record to EDM.
- */
-public class IncomingRecordToEdmConverter {
-
- private static final String EMPTY_XML_REGEX = "\\A(<\\?.*?\\?>||\\s)*\\Z";
- private static final Pattern EMPTY_XML_CHECKER = Pattern.compile(EMPTY_XML_REGEX, Pattern.DOTALL);
-
- /** Vocabulary XSLs require the resource ID as a parameter. This is the parameter name. **/
- private static final String TARGET_ID_PARAMETER_NAME = "targetId";
-
- private final Templates template;
-
- /**
- * Create a converter for the given vocabulary.
- *
- * @param vocabulary The vocabulary for which to perform the conversion.
- * @throws TransformerException In case the input could not be parsed or the conversion could not
- * be set up.
- */
- public IncomingRecordToEdmConverter(Vocabulary vocabulary) throws TransformerException {
- this(vocabulary.getXslt());
- }
-
- /**
- * Create a converter for the transformation.
- *
- * @param xslt The xslt representing the conversion to perform.
- * @throws TransformerException In case the input could not be parsed or the conversion could not
- * be set up.
- */
- public IncomingRecordToEdmConverter(String xslt) throws TransformerException {
- final Source xsltSource = new StreamSource(new StringReader(xslt));
- // Ensure that the Saxon library is used by choosing the right transformer factory.
- final TransformerFactory factory = new BasicTransformerFactory();
- factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
- this.template = factory.newTemplates(xsltSource);
- }
-
- /**
- * Convert the given record.
- *
- * @param record The incoming record (that comes from the vocabulary).
- * @param recordId The record ID of the incoming record.
- * @return The EDM record, or null if the record couldn't be transformed.
- * @throws TransformerException In case there is a problem performing the transformation.
- */
- public String convert(String record, String recordId) throws TransformerException {
-
- // Set up the transformer
- final Source source = new StreamSource(new StringReader(record));
- final StringWriter stringWriter = new StringWriter();
- final Transformer transformer = template.newTransformer();
- transformer.setParameter(TARGET_ID_PARAMETER_NAME, recordId);
- transformer.setOutputProperty(OutputKeys.INDENT, "yes");
- transformer.setOutputProperty(OutputKeys.ENCODING, StandardCharsets.UTF_8.name());
-
- // Perform the transformation.
- transformer.transform(source, new StreamResult(stringWriter));
- final String result = stringWriter.toString();
-
- // Check whether there is a result (any tag in the file).
- return isEmptyXml(result) ? null : result;
- }
-
- /**
- * This method analyzes the XML file and decides whether or not it has any content. Excluded are
- * space characters, the XML header and XML comments. Note: if this method returns true, the input
- * is not technically a valid XML as it doesn't have a root node.
- *
- * @param file The input XML.
- * @return Whether the XML has any content.
- */
- static boolean isEmptyXml(String file) {
- return EMPTY_XML_CHECKER.matcher(file).matches();
- }
-}
diff --git a/metis-dereference/metis-dereference-common/src/main/java/eu/europeana/metis/dereference/IncomingRecordToEdmTransformer.java b/metis-dereference/metis-dereference-common/src/main/java/eu/europeana/metis/dereference/IncomingRecordToEdmTransformer.java
new file mode 100644
index 000000000..8d2677b9a
--- /dev/null
+++ b/metis-dereference/metis-dereference-common/src/main/java/eu/europeana/metis/dereference/IncomingRecordToEdmTransformer.java
@@ -0,0 +1,148 @@
+package eu.europeana.metis.dereference;
+
+import static eu.europeana.metis.utils.CommonStringValues.CRLF_PATTERN;
+
+import eu.europeana.metis.exception.BadContentException;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.StringReader;
+import java.io.StringWriter;
+import java.nio.charset.StandardCharsets;
+import java.util.Optional;
+import java.util.regex.Pattern;
+import javax.xml.XMLConstants;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Source;
+import javax.xml.transform.Templates;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.stream.StreamResult;
+import javax.xml.transform.stream.StreamSource;
+import net.sf.saxon.BasicTransformerFactory;
+import org.jetbrains.annotations.NotNull;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.SAXException;
+
+/**
+ * Convert an incoming record to EDM.
+ */
+public class IncomingRecordToEdmTransformer {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(IncomingRecordToEdmTransformer.class);
+ private static final Pattern XML_DECLARATION_CHECKER = Pattern.compile("\\A<\\?[^?]*\\?>\\s*\\z");
+
+ /**
+ * Vocabulary XSLs require the resource ID as a parameter. This is the parameter name.
+ **/
+ private static final String TARGET_ID_PARAMETER_NAME = "targetId";
+
+ private final Templates template;
+ private final DocumentBuilderFactory documentBuilderFactory;
+
+ /**
+ * Create a converter for the transformation.
+ *
+ * @param xslt The xslt representing the conversion to perform.
+ * @throws TransformerException if the transformer could not be initialized
+ * @throws ParserConfigurationException if the xml builder could not be initialized
+ */
+ public IncomingRecordToEdmTransformer(String xslt) throws TransformerException, ParserConfigurationException {
+ final Source xsltSource = new StreamSource(new StringReader(xslt));
+ // Ensure that the Saxon library is used by choosing the right transformer factory.
+ final TransformerFactory transformerFactory = new BasicTransformerFactory();
+ transformerFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+ this.template = transformerFactory.newTemplates(xsltSource);
+
+ documentBuilderFactory = DocumentBuilderFactory.newInstance();
+ documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+ documentBuilderFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
+ documentBuilderFactory.setNamespaceAware(true);
+ }
+
+ /**
+ * Transform the given xmlRecord.
+ *
+ * @param xmlRecord The incoming xmlRecord (that comes from the vocabulary).
+ * @param resourceId The xmlRecord ID of the incoming xmlRecord.
+ * @return The EDM xmlRecord, or null if the xmlRecord couldn't be transformed.
+ * @throws BadContentException if there was a problem performing the transformation.
+ */
+ public Optional transform(String xmlRecord, String resourceId) throws BadContentException {
+ // Set up the transformer
+ final Source source = new StreamSource(new StringReader(xmlRecord));
+ final StringWriter transformedXmlWriter = new StringWriter();
+ final Transformer transformer;
+ try {
+ transformer = template.newTransformer();
+ transformer.setParameter(TARGET_ID_PARAMETER_NAME, resourceId);
+ transformer.setOutputProperty(OutputKeys.INDENT, "yes");
+ transformer.setOutputProperty(OutputKeys.ENCODING, StandardCharsets.UTF_8.name());
+
+ // Perform the transformation.
+ transformer.transform(source, new StreamResult(transformedXmlWriter));
+ } catch (TransformerException e) {
+ throw new BadContentException("Transformation failure", e);
+ }
+ return getValidatedXml(resourceId, transformedXmlWriter.toString());
+ }
+
+ /**
+ * Returns an optional which is empty if the provided xml is a validated empty xml or contains the xml itself if it's a valid
+ * parsable xml.
+ *
+ * @param resourceId the resource id
+ * @param xml the xml
+ * @return the optional being empty or with the xml contents
+ * @throws BadContentException if the xml parsing failed
+ */
+ @NotNull
+ private Optional getValidatedXml(String resourceId, String xml) throws BadContentException {
+ final Optional xmlResponse;
+ if (isEmptyXml(xml)) {
+ xmlResponse = Optional.empty();
+ if (LOGGER.isInfoEnabled()) {
+ LOGGER.info("Transformed entity {} results to an empty XML.",
+ CRLF_PATTERN.matcher(resourceId).replaceAll(""));
+ }
+ } else {
+ try {
+ assertXmlValidity(xml);
+ xmlResponse = Optional.of(xml);
+ } catch (ParserConfigurationException | IOException | SAXException e) {
+ throw new BadContentException("Transformed xml is not valid", e);
+ }
+ }
+
+ return xmlResponse;
+ }
+
+ /**
+ * Asserts if the provided xml is valid and can be parsed.
+ *
+ * @param xml the xml string
+ * @throws ParserConfigurationException if xml parsing failed
+ * @throws IOException if xml parsing failed
+ * @throws SAXException if xml parsing failed
+ */
+ private void assertXmlValidity(String xml) throws ParserConfigurationException, IOException, SAXException {
+ documentBuilderFactory.newDocumentBuilder().parse(new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8)));
+ }
+
+ /**
+ * Checks if the provided xml is empty.
+ *
+ * Emptiness is verifying if the only the xml header declaration is present. Note: if this method returns true, the input is not
+ * technically a valid XML as it doesn't have a root node.
+ *
+ *
+ * @param xml the input XML.
+ * @return true if xml is empty
+ */
+ private boolean isEmptyXml(String xml) {
+ return XML_DECLARATION_CHECKER.matcher(xml).matches();
+ }
+}
diff --git a/metis-dereference/metis-dereference-common/src/test/java/eu/europeana/metis/dereference/IncomingRecordToEdmConverterTest.java b/metis-dereference/metis-dereference-common/src/test/java/eu/europeana/metis/dereference/IncomingRecordToEdmConverterTest.java
deleted file mode 100644
index 5e6d2ce78..000000000
--- a/metis-dereference/metis-dereference-common/src/test/java/eu/europeana/metis/dereference/IncomingRecordToEdmConverterTest.java
+++ /dev/null
@@ -1,35 +0,0 @@
-package eu.europeana.metis.dereference;
-
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-import eu.europeana.metis.dereference.IncomingRecordToEdmConverter;
-import org.junit.jupiter.api.Test;
-
-class IncomingRecordToEdmConverterTest {
-
-
- @Test
- void testIsEmptyXml() {
-
- assertTrue(IncomingRecordToEdmConverter.isEmptyXml(""));
- assertTrue(IncomingRecordToEdmConverter.isEmptyXml("?>"));
- assertTrue(
- IncomingRecordToEdmConverter.isEmptyXml(""));
- assertTrue(
- IncomingRecordToEdmConverter.isEmptyXml(" "));
- assertTrue(IncomingRecordToEdmConverter
- .isEmptyXml("\n"));
- assertTrue(IncomingRecordToEdmConverter
- .isEmptyXml("\n"));
- assertTrue(IncomingRecordToEdmConverter
- .isEmptyXml(" \n "));
-
- assertFalse(IncomingRecordToEdmConverter.isEmptyXml("A"));
- assertFalse(IncomingRecordToEdmConverter.isEmptyXml(
- " \n \n \n "));
- assertFalse(IncomingRecordToEdmConverter
- .isEmptyXml(""));
-
- }
-}
diff --git a/metis-dereference/metis-dereference-common/src/test/java/eu/europeana/metis/dereference/IncomingRecordToEdmTransformerTest.java b/metis-dereference/metis-dereference-common/src/test/java/eu/europeana/metis/dereference/IncomingRecordToEdmTransformerTest.java
new file mode 100644
index 000000000..0438de292
--- /dev/null
+++ b/metis-dereference/metis-dereference-common/src/test/java/eu/europeana/metis/dereference/IncomingRecordToEdmTransformerTest.java
@@ -0,0 +1,81 @@
+package eu.europeana.metis.dereference;
+
+
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import eu.europeana.metis.exception.BadContentException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Objects;
+import java.util.Optional;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+
+class IncomingRecordToEdmTransformerTest {
+
+ private static final String copyXmlXsltFileName = "copy_xml.xslt";
+ private static final String produceEmptyXsltFileName = "produce_empty.xslt";
+ private static final String produceInvalidXmlXsltFileName = "produce_invalid_xml.xslt";
+ private static final String ysoP105069FileName = "yso_p105069.xml";
+ private static final String invalidXmlFileName = "invalid_xml.xml";
+
+ private static String copyXmlXsltString;
+ private static String produceEmptyXsltString;
+ private static String produceInvalidXmlXsltString;
+ private static String ysoP105069String;
+ private static String invalidXmlString;
+
+ @BeforeAll
+ static void setUp() throws Exception {
+ ClassLoader classLoader = IncomingRecordToEdmTransformerTest.class.getClassLoader();
+ Path path = Paths.get(Objects.requireNonNull(classLoader.getResource(copyXmlXsltFileName)).toURI());
+ copyXmlXsltString = Files.readString(path, StandardCharsets.UTF_8);
+
+ path = Paths.get(Objects.requireNonNull(classLoader.getResource(produceEmptyXsltFileName)).toURI());
+ produceEmptyXsltString = Files.readString(path, StandardCharsets.UTF_8);
+
+ path = Paths.get(Objects.requireNonNull(classLoader.getResource(produceInvalidXmlXsltFileName)).toURI());
+ produceInvalidXmlXsltString = Files.readString(path, StandardCharsets.UTF_8);
+
+ path = Paths.get(Objects.requireNonNull(classLoader.getResource(ysoP105069FileName)).toURI());
+ ysoP105069String = Files.readString(path, StandardCharsets.UTF_8);
+
+ path = Paths.get(Objects.requireNonNull(classLoader.getResource(invalidXmlFileName)).toURI());
+ invalidXmlString = Files.readString(path, StandardCharsets.UTF_8);
+ }
+
+ @Test
+ void transform() throws Exception {
+ IncomingRecordToEdmTransformer incomingRecordToEdmTransformer = new IncomingRecordToEdmTransformer(copyXmlXsltString);
+ final Optional transformedOptional = incomingRecordToEdmTransformer.transform(ysoP105069String,
+ "http://www.yso.fi/onto/yso/p105069");
+ assertTrue(transformedOptional.isPresent());
+ }
+
+ @Test
+ void transform_EmptyXslt() throws Exception {
+ IncomingRecordToEdmTransformer incomingRecordToEdmTransformer = new IncomingRecordToEdmTransformer(produceEmptyXsltString);
+ final Optional transformedOptional = incomingRecordToEdmTransformer.transform(ysoP105069String,
+ "http://www.yso.fi/onto/yso/p105069");
+ assertTrue(transformedOptional.isEmpty());
+ }
+
+ @Test
+ void transform_InvalidSourceXml_BadContentException() throws Exception {
+ IncomingRecordToEdmTransformer incomingRecordToEdmTransformer = new IncomingRecordToEdmTransformer(copyXmlXsltString);
+ assertThrows(BadContentException.class, () -> incomingRecordToEdmTransformer.transform(invalidXmlString,
+ "http://www.yso.fi/onto/yso/p105069"));
+ }
+
+ @Test
+ void transform_InvalidXml_BadContentException() throws Exception {
+ IncomingRecordToEdmTransformer incomingRecordToEdmTransformer = new IncomingRecordToEdmTransformer(
+ produceInvalidXmlXsltString);
+ assertThrows(BadContentException.class, () -> incomingRecordToEdmTransformer.transform(ysoP105069String,
+ "http://www.yso.fi/onto/yso/p105069"));
+ }
+}
+
diff --git a/metis-dereference/metis-dereference-common/src/test/resources/copy_xml.xslt b/metis-dereference/metis-dereference-common/src/test/resources/copy_xml.xslt
new file mode 100644
index 000000000..28082a2d9
--- /dev/null
+++ b/metis-dereference/metis-dereference-common/src/test/resources/copy_xml.xslt
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/metis-dereference/metis-dereference-common/src/test/resources/invalid_xml.xml b/metis-dereference/metis-dereference-common/src/test/resources/invalid_xml.xml
new file mode 100644
index 000000000..7f7e28205
--- /dev/null
+++ b/metis-dereference/metis-dereference-common/src/test/resources/invalid_xml.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/metis-dereference/metis-dereference-common/src/test/resources/produce_empty.xslt b/metis-dereference/metis-dereference-common/src/test/resources/produce_empty.xslt
new file mode 100644
index 000000000..777a7600d
--- /dev/null
+++ b/metis-dereference/metis-dereference-common/src/test/resources/produce_empty.xslt
@@ -0,0 +1,4 @@
+
+
+
\ No newline at end of file
diff --git a/metis-dereference/metis-dereference-common/src/test/resources/produce_invalid_xml.xslt b/metis-dereference/metis-dereference-common/src/test/resources/produce_invalid_xml.xslt
new file mode 100644
index 000000000..e81c022af
--- /dev/null
+++ b/metis-dereference/metis-dereference-common/src/test/resources/produce_invalid_xml.xslt
@@ -0,0 +1,7 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/metis-dereference/metis-dereference-common/src/test/resources/yso_p105069.xml b/metis-dereference/metis-dereference-common/src/test/resources/yso_p105069.xml
new file mode 100644
index 000000000..8998f86a4
--- /dev/null
+++ b/metis-dereference/metis-dereference-common/src/test/resources/yso_p105069.xml
@@ -0,0 +1,45 @@
+
+
+
+
+
+
+
+
+
+
+ Tjeckien
+ Tšekki
+ Czech Republic
+
+
+
+ Källa för positionsinformation: Wikidata.
+ Location information source: Wikidata.
+ Sijaintitietojen lähde: Wikidata.
+ 1990-06-18
+ 2016-05-23T16:13:36+03:00
+
+
+
+
+
+
+
+
+
+ Praha
+ Prague
+ Prag
+ 50.08333
+ 14.41667
+
+
+
+
diff --git a/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionMavenRule.java b/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionMavenRule.java
index 9d21ffb86..68195cf59 100644
--- a/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionMavenRule.java
+++ b/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionMavenRule.java
@@ -71,7 +71,7 @@ public class VocabularyCollectionMavenRule implements EnforcerRule {
*/
private String vocabularyDirectoryFile = null;
- private VocabularyCollectionImporterFactory vocabularyCollectionImporterFactory;
+ private final VocabularyCollectionImporterFactory vocabularyCollectionImporterFactory = new VocabularyCollectionImporterFactory();
/**
* No-arguments constructor, required for maven instantiation.
diff --git a/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionValidatorImpl.java b/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionValidatorImpl.java
index 62ac84952..36b4ef706 100644
--- a/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionValidatorImpl.java
+++ b/metis-dereference/metis-dereference-import/src/main/java/eu/europeana/metis/dereference/vocimport/VocabularyCollectionValidatorImpl.java
@@ -1,22 +1,27 @@
package eu.europeana.metis.dereference.vocimport;
import eu.europeana.enrichment.utils.EnrichmentBaseConverter;
-import eu.europeana.metis.dereference.IncomingRecordToEdmConverter;
+import eu.europeana.metis.dereference.IncomingRecordToEdmTransformer;
import eu.europeana.metis.dereference.RdfRetriever;
import eu.europeana.metis.dereference.vocimport.exception.VocabularyImportException;
import eu.europeana.metis.dereference.vocimport.model.Vocabulary;
import eu.europeana.metis.dereference.vocimport.model.VocabularyLoader;
import eu.europeana.metis.dereference.vocimport.utils.NonCollidingPathVocabularyTrie;
+import eu.europeana.metis.exception.BadContentException;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
+import java.util.Optional;
import java.util.function.Consumer;
import javax.xml.bind.JAXBException;
+import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
-import org.apache.commons.lang3.StringUtils;
+/**
+ * Class that contains functionality to validate vocabularies using a {@link VocabularyCollectionImporter}.
+ */
public class VocabularyCollectionValidatorImpl implements VocabularyCollectionValidator {
private final VocabularyCollectionImporter importer;
@@ -28,16 +33,15 @@ public class VocabularyCollectionValidatorImpl implements VocabularyCollectionVa
* Constructor.
*
* @param importer Vocabulary importer.
- * @param lenientOnLackOfExamples Whether the the validator is lenient on vocabulary mappings
- * without examples.
- * @param lenientOnMappingTestFailures Whether the validator is lenient on errors and unmet
- * expectations when applying the mapping to the example and counterexample values.
- * @param lenientOnExampleRetrievalFailures Whether the validator is lenient on example or
- * counterexample retrieval (download) issues.
+ * @param lenientOnLackOfExamples Whether the the validator is lenient on vocabulary mappings without examples.
+ * @param lenientOnMappingTestFailures Whether the validator is lenient on errors and unmet expectations when applying the
+ * mapping to the example and counterexample values.
+ * @param lenientOnExampleRetrievalFailures Whether the validator is lenient on example or counterexample retrieval (download)
+ * issues.
*/
public VocabularyCollectionValidatorImpl(VocabularyCollectionImporter importer,
- boolean lenientOnLackOfExamples, boolean lenientOnMappingTestFailures,
- boolean lenientOnExampleRetrievalFailures) {
+ boolean lenientOnLackOfExamples, boolean lenientOnMappingTestFailures,
+ boolean lenientOnExampleRetrievalFailures) {
this.importer = importer;
this.lenientOnLackOfExamples = lenientOnLackOfExamples;
this.lenientOnMappingTestFailures = lenientOnMappingTestFailures;
@@ -46,7 +50,7 @@ public VocabularyCollectionValidatorImpl(VocabularyCollectionImporter importer,
@Override
public void validate(Consumer vocabularyReceiver, Consumer warningReceiver)
- throws VocabularyImportException {
+ throws VocabularyImportException {
validateInternal(vocabularyReceiver, warningReceiver, true);
}
@@ -61,7 +65,7 @@ private void validateInternal(Consumer vocabularyReceiver,
final Iterable vocabularyLoaders = importer.importVocabularies();
for (VocabularyLoader loader : vocabularyLoaders) {
final Vocabulary vocabulary = loader.load();
- final IncomingRecordToEdmConverter converter = validateVocabulary(vocabulary,
+ final IncomingRecordToEdmTransformer converter = validateVocabulary(vocabulary,
duplicationChecker);
if (validateExamples) {
validateExamples(vocabulary, warningReceiver, converter);
@@ -70,29 +74,29 @@ private void validateInternal(Consumer vocabularyReceiver,
}
}
- private IncomingRecordToEdmConverter validateVocabulary(Vocabulary vocabulary,
- DuplicationChecker duplicationChecker) throws VocabularyImportException {
+ private IncomingRecordToEdmTransformer validateVocabulary(Vocabulary vocabulary,
+ DuplicationChecker duplicationChecker) throws VocabularyImportException {
// Check the presence of the required fields.
if (vocabulary.getName() == null) {
throw new VocabularyImportException(
- String.format("No vocabulary name given in metadata at [%s].",
- vocabulary.getReadableMetadataLocation()));
+ String.format("No vocabulary name given in metadata at [%s].",
+ vocabulary.getReadableMetadataLocation()));
}
if (vocabulary.getTypes().isEmpty()) {
throw new VocabularyImportException(
- String.format("No vocabulary type(s) given in metadata at [%s].",
- vocabulary.getReadableMetadataLocation()));
+ String.format("No vocabulary type(s) given in metadata at [%s].",
+ vocabulary.getReadableMetadataLocation()));
}
if (vocabulary.getPaths().isEmpty()) {
throw new VocabularyImportException(
- String.format("No vocabulary path(s) given in metadata at [%s].",
- vocabulary.getReadableMetadataLocation()));
+ String.format("No vocabulary path(s) given in metadata at [%s].",
+ vocabulary.getReadableMetadataLocation()));
}
if (vocabulary.getTransformation() == null) {
throw new VocabularyImportException(
- String.format("No transformation given in mapping at [%s].",
- vocabulary.getReadableMappingLocation()));
+ String.format("No transformation given in mapping at [%s].",
+ vocabulary.getReadableMappingLocation()));
}
// Check whether name and links are unique.
@@ -100,21 +104,21 @@ private IncomingRecordToEdmConverter validateVocabulary(Vocabulary vocabulary,
// Verifying the xslt - compile it.
try {
- return new IncomingRecordToEdmConverter(vocabulary.getTransformation());
- } catch (TransformerException e) {
+ return new IncomingRecordToEdmTransformer(vocabulary.getTransformation());
+ } catch (TransformerException | ParserConfigurationException e) {
throw new VocabularyImportException(
- String.format("Error in the transformation given in mapping at [%s].",
- vocabulary.getReadableMappingLocation()), e);
+ String.format("Error in the transformation given in mapping at [%s].",
+ vocabulary.getReadableMappingLocation()), e);
}
}
private void validateExamples(Vocabulary vocabulary, Consumer warningReceiver,
- IncomingRecordToEdmConverter converter) throws VocabularyImportException {
+ IncomingRecordToEdmTransformer converter) throws VocabularyImportException {
// Testing the examples (if there are any - otherwise issue warning).
if (vocabulary.getExamples().isEmpty()) {
final String message = String.format("No examples specified for metadata at [%s].",
- vocabulary.getReadableMetadataLocation());
+ vocabulary.getReadableMetadataLocation());
if (lenientOnLackOfExamples) {
warningReceiver.accept(message);
} else {
@@ -123,26 +127,26 @@ private void validateExamples(Vocabulary vocabulary, Consumer warningRec
}
for (String example : vocabulary.getExamples()) {
testExample(converter, example, vocabulary.getSuffix(), false,
- vocabulary.getReadableMetadataLocation(), warningReceiver);
+ vocabulary.getReadableMetadataLocation(), warningReceiver);
}
// Testing the counter examples (if there are any).
for (String example : vocabulary.getCounterExamples()) {
testExample(converter, example, vocabulary.getSuffix(), true,
- vocabulary.getReadableMetadataLocation(), warningReceiver);
+ vocabulary.getReadableMetadataLocation(), warningReceiver);
}
}
private String getTestErrorMessage(String example, boolean isCounterExample,
- String readableMetadataLocation, String sentenceContinuation, Exception exception) {
+ String readableMetadataLocation, String sentenceContinuation, Exception exception) {
final String sentence = String.format("%s '%s' in metadata at [%s] %s.",
- isCounterExample ? "Counterexample" : "Example", example, readableMetadataLocation,
- sentenceContinuation);
- return sentence + (exception == null ? "" : " Error: " + exception.getMessage());
+ isCounterExample ? "Counterexample" : "Example", example, readableMetadataLocation,
+ sentenceContinuation);
+ return sentence + (exception == null ? "" : String.format(" Error: %s", exception.getMessage()));
}
private void processTestError(String message, boolean isWarning, Consumer warningReceiver,
- Exception originalException) throws VocabularyImportException {
+ Exception originalException) throws VocabularyImportException {
if (isWarning) {
warningReceiver.accept(message);
} else {
@@ -150,9 +154,9 @@ private void processTestError(String message, boolean isWarning, Consumer warningReceiver) throws VocabularyImportException {
+ private void testExample(IncomingRecordToEdmTransformer incomingRecordToEdmTransformer, String example, String suffix,
+ boolean isCounterExample, String readableMetadataLocation,
+ Consumer warningReceiver) throws VocabularyImportException {
// Retrieve the example - is not null.
final String exampleContent;
@@ -160,40 +164,40 @@ private void testExample(IncomingRecordToEdmConverter converter, String example,
exampleContent = new RdfRetriever().retrieve(example, suffix);
} catch (IOException | URISyntaxException e) {
final String message = getTestErrorMessage(example, isCounterExample,
- readableMetadataLocation, "could not be retrieved", e);
+ readableMetadataLocation, "could not be retrieved", e);
processTestError(message, lenientOnExampleRetrievalFailures, warningReceiver, e);
return;
}
// Convert the example
- final String result;
+ final Optional result;
try {
- result = converter.convert(exampleContent, example);
- } catch (TransformerException e) {
+ result = incomingRecordToEdmTransformer.transform(exampleContent, example);
+ } catch (BadContentException e) {
final String message = getTestErrorMessage(example, isCounterExample,
- readableMetadataLocation, "could not be mapped", e);
+ readableMetadataLocation, "could not be mapped", e);
processTestError(message, lenientOnMappingTestFailures, warningReceiver, e);
return;
}
// Check whether the example yielded a mapped entity or not
- if (StringUtils.isNotBlank(result) && isCounterExample) {
+ if (result.isPresent() && isCounterExample) {
final String message = getTestErrorMessage(example, isCounterExample,
- readableMetadataLocation, "yielded a mapped result, but is expected not to", null);
+ readableMetadataLocation, "yielded a mapped result, but is expected not to", null);
processTestError(message, lenientOnMappingTestFailures, warningReceiver, null);
- } else if (StringUtils.isBlank(result) && !isCounterExample) {
+ } else if (result.isEmpty() && !isCounterExample) {
final String message = getTestErrorMessage(example, isCounterExample,
- readableMetadataLocation, "did not yield a mapped result, but is expected to", null);
+ readableMetadataLocation, "did not yield a mapped result, but is expected to", null);
processTestError(message, lenientOnMappingTestFailures, warningReceiver, null);
}
// Check whether the example yielded valid XML
- if (StringUtils.isNotBlank(result)) {
+ if (result.isPresent()) {
try {
- EnrichmentBaseConverter.convertToEnrichmentBase(result);
+ EnrichmentBaseConverter.convertToEnrichmentBase(result.get());
} catch (JAXBException e) {
final String message = getTestErrorMessage(example, isCounterExample,
- readableMetadataLocation, "did not yield a valid XML", e);
+ readableMetadataLocation, "did not yield a valid XML", e);
throw new VocabularyImportException(message, e);
}
}
@@ -211,11 +215,11 @@ void checkAndRegister(Vocabulary vocabulary) throws VocabularyImportException {
// Handle the name uniqueness
final String nameToCheck = vocabulary.getName().trim().replaceAll("\\s", " ")
- .toLowerCase(Locale.ENGLISH);
+ .toLowerCase(Locale.ENGLISH);
if (knownNames.containsKey(nameToCheck)) {
final String message = String.format("Duplicate name '%s' detected in metadata at [%s]:"
- + " metadata at [%s] contains a name that is similar.", vocabulary.getName(),
- vocabulary.getReadableMetadataLocation(), knownNames.get(nameToCheck));
+ + " metadata at [%s] contains a name that is similar.", vocabulary.getName(),
+ vocabulary.getReadableMetadataLocation(), knownNames.get(nameToCheck));
throw new VocabularyImportException(message);
}
knownNames.put(nameToCheck, vocabulary.getReadableMetadataLocation());
diff --git a/metis-dereference/metis-dereference-service/src/main/java/eu/europeana/metis/dereference/service/MongoDereferenceService.java b/metis-dereference/metis-dereference-service/src/main/java/eu/europeana/metis/dereference/service/MongoDereferenceService.java
index 5d8f88ae0..2d0db505f 100644
--- a/metis-dereference/metis-dereference-service/src/main/java/eu/europeana/metis/dereference/service/MongoDereferenceService.java
+++ b/metis-dereference/metis-dereference-service/src/main/java/eu/europeana/metis/dereference/service/MongoDereferenceService.java
@@ -9,7 +9,7 @@
import eu.europeana.enrichment.api.external.model.Resource;
import eu.europeana.enrichment.api.external.model.TimeSpan;
import eu.europeana.enrichment.utils.EnrichmentBaseConverter;
-import eu.europeana.metis.dereference.IncomingRecordToEdmConverter;
+import eu.europeana.metis.dereference.IncomingRecordToEdmTransformer;
import eu.europeana.metis.dereference.ProcessedEntity;
import eu.europeana.metis.dereference.RdfRetriever;
import eu.europeana.metis.dereference.Vocabulary;
@@ -17,6 +17,7 @@
import eu.europeana.metis.dereference.service.dao.VocabularyDao;
import eu.europeana.metis.dereference.service.utils.GraphUtils;
import eu.europeana.metis.dereference.service.utils.VocabularyCandidates;
+import eu.europeana.metis.exception.BadContentException;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
@@ -32,6 +33,7 @@
import java.util.function.Function;
import java.util.stream.Stream;
import javax.xml.bind.JAXBException;
+import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
@@ -108,7 +110,7 @@ public List dereference(String resourceId)
* @return A collection of dereferenced resources. Is not null, but could be empty.
*/
private Collection dereferenceResource(String resourceId)
- throws JAXBException, TransformerException, URISyntaxException {
+ throws JAXBException, URISyntaxException {
// Get the main object to dereference. If null, we are done.
final Pair resource = computeEnrichmentBaseVocabularyPair(
@@ -123,7 +125,7 @@ private Collection dereferenceResource(String resourceId)
try {
result = computeEnrichmentBaseVocabularyPair(key);
return result == null ? null : result.getLeft();
- } catch (JAXBException | TransformerException | URISyntaxException e) {
+ } catch (JAXBException | URISyntaxException e) {
LOGGER.warn(String.format("Problem occurred while dereferencing broader resource %s.", key),
e);
return null;
@@ -166,25 +168,6 @@ private static Stream getStream(Collection collection) {
return collection == null ? Stream.empty() : collection.stream();
}
- Pair computeEnrichmentBaseVocabularyPair(String resourceId)
- throws JAXBException, TransformerException, URISyntaxException {
-
- // Try to get the entity and its vocabulary from the cache.
- final ProcessedEntity cachedEntity = processedEntityDao.get(resourceId);
- final Pair entityVocabularyPair = computeEntityVocabularyPair(resourceId,
- cachedEntity);
-
- // Parse the entity.
- final Pair enrichmentBaseVocabularyPair;
- if (entityVocabularyPair.getLeft() == null || entityVocabularyPair.getRight() == null) {
- enrichmentBaseVocabularyPair = null;
- } else {
- enrichmentBaseVocabularyPair = convertToEnrichmentBaseVocabularyPair(
- entityVocabularyPair.getLeft(), entityVocabularyPair.getRight());
- }
- return enrichmentBaseVocabularyPair;
- }
-
/**
* Computes the entity and vocabulary.
* It will use the cache if it's still valid, otherwise it will retrieve(if applicable) the
@@ -208,7 +191,7 @@ Pair computeEnrichmentBaseVocabularyPair(String reso
* @throws TransformerException if an exception occurred during transformation of the original entity
*/
private Pair computeEntityVocabularyPair(String resourceId,
- ProcessedEntity cachedEntity) throws URISyntaxException, TransformerException {
+ ProcessedEntity cachedEntity) throws URISyntaxException {
final Pair transformedEntityVocabularyPair;
@@ -233,35 +216,7 @@ private Pair computeEntityVocabularyPair(String resourceId,
return transformedEntityVocabularyPair;
}
- private void saveEntity(String resourceId, ProcessedEntity cachedEntity,
- Pair transformedEntityAndVocabularyPair) {
-
- final String entityXml = transformedEntityAndVocabularyPair.getLeft();
- final Vocabulary vocabulary = transformedEntityAndVocabularyPair.getRight();
- final String vocabularyIdString = Optional.ofNullable(vocabulary).map(Vocabulary::getId)
- .map(ObjectId::toString).orElse(null);
- //Save entity
- ProcessedEntity entityToCache = (cachedEntity == null) ? new ProcessedEntity() : cachedEntity;
- entityToCache.setResourceId(resourceId);
- entityToCache.setXml(entityXml);
- entityToCache.setVocabularyId(vocabularyIdString);
- processedEntityDao.save(entityToCache);
- }
-
- private Pair convertToEnrichmentBaseVocabularyPair(String entityXml,
- Vocabulary entityVocabulary) throws JAXBException {
- final Pair result;
- if (entityXml == null || entityVocabulary == null) {
- result = null;
- } else {
- result = new ImmutablePair<>(EnrichmentBaseConverter.convertToEnrichmentBase(entityXml),
- entityVocabulary);
- }
- return result;
- }
-
- private Pair retrieveAndTransformEntity(String resourceId)
- throws TransformerException, URISyntaxException {
+ private Pair retrieveAndTransformEntity(String resourceId) throws URISyntaxException {
final VocabularyCandidates vocabularyCandidates = VocabularyCandidates
.findVocabulariesForUrl(resourceId, vocabularyDao::getByUriSearch);
@@ -296,6 +251,47 @@ private Pair retrieveAndTransformEntity(String resourceId)
return entityVocabularyPair;
}
+ private void saveEntity(String resourceId, ProcessedEntity cachedEntity,
+ Pair transformedEntityAndVocabularyPair) {
+
+ final String entityXml = transformedEntityAndVocabularyPair.getLeft();
+ final Vocabulary vocabulary = transformedEntityAndVocabularyPair.getRight();
+ final String vocabularyIdString = Optional.ofNullable(vocabulary).map(Vocabulary::getId)
+ .map(ObjectId::toString).orElse(null);
+ //Save entity
+ ProcessedEntity entityToCache = (cachedEntity == null) ? new ProcessedEntity() : cachedEntity;
+ entityToCache.setResourceId(resourceId);
+ entityToCache.setXml(entityXml);
+ entityToCache.setVocabularyId(vocabularyIdString);
+ processedEntityDao.save(entityToCache);
+ }
+
+ private Pair convertToEnrichmentBaseVocabularyPair(String entityXml,
+ Vocabulary entityVocabulary) throws JAXBException {
+ final Pair result;
+ if (entityXml == null || entityVocabulary == null) {
+ result = null;
+ } else {
+ result = new ImmutablePair<>(EnrichmentBaseConverter.convertToEnrichmentBase(entityXml),
+ entityVocabulary);
+ }
+ return result;
+ }
+
+ private String transformEntity(Vocabulary vocabulary, String originalEntity, String resourceId) {
+ Optional result;
+ try {
+ final IncomingRecordToEdmTransformer incomingRecordToEdmTransformer = new IncomingRecordToEdmTransformer(
+ vocabulary.getXslt());
+ result = incomingRecordToEdmTransformer.transform(originalEntity, resourceId);
+ } catch (TransformerException | BadContentException | ParserConfigurationException e) {
+ LOGGER.warn("Error transforming entity: {} with message: {}", resourceId, e.getMessage());
+ LOGGER.debug("Transformation issue: ", e);
+ result = Optional.empty();
+ }
+ return result.orElse(null);
+ }
+
private String retrieveOriginalEntity(String resourceId, VocabularyCandidates candidates)
throws URISyntaxException {
@@ -323,21 +319,22 @@ private String retrieveOriginalEntity(String resourceId, VocabularyCandidates ca
return originalEntity;
}
- private String transformEntity(Vocabulary vocabulary, String originalEntity, String resourceId)
- throws TransformerException {
- final IncomingRecordToEdmConverter converter = new IncomingRecordToEdmConverter(vocabulary);
- final String result;
- try {
- result = converter.convert(originalEntity, resourceId);
- if (result == null && LOGGER.isInfoEnabled()) {
- LOGGER.info("Could not transform entity {} as it results is an empty XML.",
- CRLF_PATTERN.matcher(resourceId).replaceAll(""));
- }
- } catch (TransformerException e) {
- LOGGER.warn("Error transforming entity: {} with message: {}", resourceId, e.getMessage());
- LOGGER.debug("Transformation issue: ", e);
- return null;
+ Pair computeEnrichmentBaseVocabularyPair(String resourceId)
+ throws JAXBException, URISyntaxException {
+
+ // Try to get the entity and its vocabulary from the cache.
+ final ProcessedEntity cachedEntity = processedEntityDao.get(resourceId);
+ final Pair entityVocabularyPair = computeEntityVocabularyPair(resourceId,
+ cachedEntity);
+
+ // Parse the entity.
+ final Pair enrichmentBaseVocabularyPair;
+ if (entityVocabularyPair.getLeft() == null || entityVocabularyPair.getRight() == null) {
+ enrichmentBaseVocabularyPair = null;
+ } else {
+ enrichmentBaseVocabularyPair = convertToEnrichmentBaseVocabularyPair(
+ entityVocabularyPair.getLeft(), entityVocabularyPair.getRight());
}
- return result;
+ return enrichmentBaseVocabularyPair;
}
}
From 6ed88d5fa00e3f973f2a9961cc94b8f74751f2c3 Mon Sep 17 00:00:00 2001
From: Simon Tzanakis
Date: Fri, 8 Apr 2022 16:41:37 +0200
Subject: [PATCH 21/73] =?UTF-8?q?MET-4285=20Initial=20implementation=20of?=
=?UTF-8?q?=20geo=20parsing=20and=20record=20geo=20locatio=E2=80=A6=20(#52?=
=?UTF-8?q?3)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* MET-4285 Initial implementation of geo parsing and record geo location extraction for solr
* MET-4285 Geo parsing support altitude
* MET-4285 Add some relevant tests for Solr
* MET-4285 Parse geo uris during solr document creation
* MET-4285 Fix xmls so that tests can rdf parse
* MET-4285 Fix xmls so that tests can rdf parse
* MET-4285 Truncate long decimals geo, re-organize solr properties
* MET-4285 Cleanup
* MET-4285 Fix analysis
* MET-4285 Handle review
---
.../metis/utils/GeoUriWGS84Parser.java | 219 ++++++++++++++++++
.../metis/utils/GeoUriWGS84ParserTest.java | 77 ++++++
.../eu/europeana/indexing/solr/EdmLabel.java | 27 ++-
.../indexing/solr/SolrDocumentPopulator.java | 57 ++---
.../solr/property/FullBeanSolrProperties.java | 183 +++++++++++++++
.../solr/SolrDocumentPopulatorTest.java | 86 +++++++
.../europeana_record_with_geospatial_data.xml | 127 ++++++++++
...eana_record_with_geospatial_data_wgs84.xml | 128 ++++++++++
8 files changed, 858 insertions(+), 46 deletions(-)
create mode 100644 metis-common/metis-common-utils/src/main/java/eu/europeana/metis/utils/GeoUriWGS84Parser.java
create mode 100644 metis-common/metis-common-utils/src/test/java/eu/europeana/metis/utils/GeoUriWGS84ParserTest.java
create mode 100644 metis-indexing/src/main/java/eu/europeana/indexing/solr/property/FullBeanSolrProperties.java
create mode 100644 metis-indexing/src/test/java/eu/europeana/indexing/solr/SolrDocumentPopulatorTest.java
create mode 100644 metis-indexing/src/test/resources/europeana_record_with_geospatial_data.xml
create mode 100644 metis-indexing/src/test/resources/europeana_record_with_geospatial_data_wgs84.xml
diff --git a/metis-common/metis-common-utils/src/main/java/eu/europeana/metis/utils/GeoUriWGS84Parser.java b/metis-common/metis-common-utils/src/main/java/eu/europeana/metis/utils/GeoUriWGS84Parser.java
new file mode 100644
index 000000000..a2981ae50
--- /dev/null
+++ b/metis-common/metis-common-utils/src/main/java/eu/europeana/metis/utils/GeoUriWGS84Parser.java
@@ -0,0 +1,219 @@
+package eu.europeana.metis.utils;
+
+import eu.europeana.metis.exception.BadContentException;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+/**
+ * Contains functionality to parse and validate geo uri
+ */
+public final class GeoUriWGS84Parser {
+
+ private static final String DECIMAL_POINT_REGEX = "(?:\\.\\d+)?";
+ private static final String ZEROES_DECIMAL_POINT_REGEX = "(?:\\.0+)?";
+ private static final String LATITUDE_REGEX =
+ "^[+-]?(?:90" + ZEROES_DECIMAL_POINT_REGEX + "|(?:[0-9]|[1-8][0-9])" + DECIMAL_POINT_REGEX + ")$";
+ private static final Pattern LATITUDE_PATTERN = Pattern.compile(LATITUDE_REGEX);
+ private static final String LONGITUDE_REGEX =
+ "^[+-]?(?:180" + ZEROES_DECIMAL_POINT_REGEX + "|(?:[0-9]|[1-9][0-9]|1[0-7][0-9])" + DECIMAL_POINT_REGEX + ")$";
+ private static final Pattern LONGITUDE_PATTERN = Pattern.compile(LONGITUDE_REGEX);
+ private static final String ALTITUDE_REGEX = "^[+-]?\\d+" + DECIMAL_POINT_REGEX + "$";
+ private static final Pattern ALTITUDE_PATTERN = Pattern.compile(ALTITUDE_REGEX);
+ private static final String CRS_WGS_84 = "wgs84";
+ private static final int MAX_NUMBER_COORDINATES = 3;
+ private static final int MAX_DECIMAL_POINTS_TO_KEEP = 7;
+
+ private GeoUriWGS84Parser() {
+ }
+
+ /**
+ * Parse a provided geo uri in wgs84 coordinate reference system (CRS) and validate its contents.
+ * The parsing of the string follows closely but not exhaustively the specification located at
+ * https://datatracker.ietf.org/doc/html/rfc5870
+ * The checks that are performed to the provided string are as follows:
+ *
+ * - There should not be any spaces
+ * - It should start with "geo:"
+ * - There should be at least one part after the scheme and that should be the coordinates
+ * - If crs parameter is present it should be "wgs84"
+ * - The "u" parameter should be just after crs if crs is present or just after the coordinates
+ * - The coordinates should have 2 or 3 dimensions
+ * - The coordinates should be of valid structure and valid range
+ * - The coordinates if they have decimal points they will be truncated after 7th point
+ *
+ *
+ *
+ * @param geoUriString the geo uri string
+ * @return the geo coordinates, null will never be returned
+ * @throws BadContentException if the geo uri parsing encountered an error
+ */
+ public static GeoCoordinates parse(String geoUriString) throws BadContentException {
+ final String[] geoUriParts = validateGeoUriAndGetParts(geoUriString);
+
+ //Finally, check the coordinates part and validate
+ return validateGeoCoordinatesAndGet(geoUriParts[0]);
+ }
+
+ private static String[] validateGeoUriAndGetParts(String geoUriString) throws BadContentException {
+ //Validate that there aren't any space characters in the URI
+ if (!geoUriString.matches("^\\S+$")) {
+ throw new BadContentException("URI cannot have spaces");
+ }
+ //Validate geo URI
+ if (!geoUriString.matches("^geo:.*$")) {
+ throw new BadContentException("Invalid scheme value");
+ }
+
+ final String[] schemeAndParts = geoUriString.split(":");
+ if (schemeAndParts.length <= 1) {
+ throw new BadContentException("There are no parts in the geo URI");
+ }
+
+ //Find all parts
+ final String[] geoUriParts = schemeAndParts[1].split(";");
+ //Must be at least one part available
+ if (geoUriParts.length < 1) {
+ throw new BadContentException("Invalid geo uri parts length");
+ }
+
+ //Find all other parameters
+ final LinkedList geoUriParameters = Arrays.stream(geoUriParts, 1, geoUriParts.length).map(s -> {
+ final String[] split = s.split("=");
+ return new GeoUriParameter(split[0], split[1]);
+ }).collect(Collectors.toCollection(LinkedList::new));
+
+ //If crs present, it must be the exact first after the dimensions. If not present then there is a default
+ String crs = CRS_WGS_84;
+ for (int i = 0; i < geoUriParameters.size(); i++) {
+ if ("crs".equalsIgnoreCase(geoUriParameters.get(i).getName())) {
+ crs = geoUriParameters.get(i).getValue();
+ if (i != 0) {
+ throw new BadContentException("Invalid geo uri 'crs' parameter position");
+ }
+ }
+ if ("u".equalsIgnoreCase(geoUriParameters.get(i).getName()) && i > 1) {
+ throw new BadContentException("Invalid geo uri 'u' parameter position");
+ }
+ }
+ //Validate value of crs
+ if (!CRS_WGS_84.equalsIgnoreCase(crs)) {
+ throw new BadContentException(String.format("Crs parameter value is not %s", CRS_WGS_84));
+ }
+ return geoUriParts;
+ }
+
+ /**
+ * Generate a geo coordinates from a geoUriPart string.
+ * The provided string is validated against:
+ *
+ * - the total coordinates available
+ * - the validity of each number and its range
+ * - the convertibility to a {@link Double}
+ *
+ * The decimal points are also truncated up to a maximum allowed.
+ *
+ *
+ * @param geoUriPart the string that should contain the coordinates
+ * @return the geo coordinates
+ * @throws BadContentException if the geo coordinates were not valid
+ */
+ private static GeoCoordinates validateGeoCoordinatesAndGet(String geoUriPart) throws BadContentException {
+ final String[] coordinates = geoUriPart.split(",");
+ if (coordinates.length < 2 || coordinates.length > MAX_NUMBER_COORDINATES) {
+ throw new BadContentException("Coordinates are not of valid length");
+ }
+ final Matcher latitudeMatcher = LATITUDE_PATTERN.matcher(coordinates[0]);
+ final Matcher longitudeMatcher = LONGITUDE_PATTERN.matcher(coordinates[1]);
+ final GeoCoordinates geoCoordinates;
+ if (latitudeMatcher.matches() && longitudeMatcher.matches()) {
+ Double altitude = null;
+ if (coordinates.length == MAX_NUMBER_COORDINATES) {
+ final Matcher altitudeMatcher = ALTITUDE_PATTERN.matcher(coordinates[2]);
+ if (altitudeMatcher.matches()) {
+ altitude = Double.parseDouble(truncateDecimalPoints(altitudeMatcher.group(0)));
+ }
+ }
+ geoCoordinates = new GeoCoordinates(
+ Double.parseDouble(truncateDecimalPoints(latitudeMatcher.group(0))),
+ Double.parseDouble(truncateDecimalPoints(longitudeMatcher.group(0))), altitude);
+ } else {
+ throw new BadContentException("Coordinates are invalid");
+ }
+ return geoCoordinates;
+ }
+
+ private static String truncateDecimalPoints(String decimalNumber) {
+ final String[] decimalNumberParts = decimalNumber.split("\\.");
+ final StringBuilder decimalNumberTruncated = new StringBuilder();
+ if (decimalNumberParts.length >= 1) {
+ decimalNumberTruncated.append(decimalNumberParts[0]);
+ }
+ if (decimalNumberParts.length > 1) {
+ decimalNumberTruncated.append(".");
+ decimalNumberTruncated.append(decimalNumberParts[1], 0,
+ Math.min(decimalNumberParts[1].length(), MAX_DECIMAL_POINTS_TO_KEEP));
+ }
+ return decimalNumberTruncated.toString();
+ }
+
+ /**
+ * Class containing geo coordinates (latitude, longitude)
+ */
+ public static class GeoCoordinates {
+
+ private final Double latitude;
+ private final Double longitude;
+ private final Double altitude;
+
+ /**
+ * Constructor with required parameters
+ *
+ * @param latitude the latitude
+ * @param longitude the longitude
+ * @param altitude the altitude
+ */
+ public GeoCoordinates(Double latitude, Double longitude, Double altitude) {
+ this.latitude = latitude;
+ this.longitude = longitude;
+ this.altitude = altitude;
+ }
+
+ public Double getLatitude() {
+ return latitude;
+ }
+
+ public Double getLongitude() {
+ return longitude;
+ }
+
+ public Double getAltitude() {
+ return altitude;
+ }
+ }
+
+ /**
+ * Class wrapping the name and value of geo uri parameters.
+ */
+ private static class GeoUriParameter {
+
+ private final String name;
+ private final String value;
+
+ public GeoUriParameter(String name, String value) {
+ this.name = name;
+ this.value = value;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public String getValue() {
+ return value;
+ }
+ }
+
+}
diff --git a/metis-common/metis-common-utils/src/test/java/eu/europeana/metis/utils/GeoUriWGS84ParserTest.java b/metis-common/metis-common-utils/src/test/java/eu/europeana/metis/utils/GeoUriWGS84ParserTest.java
new file mode 100644
index 000000000..c060c26bc
--- /dev/null
+++ b/metis-common/metis-common-utils/src/test/java/eu/europeana/metis/utils/GeoUriWGS84ParserTest.java
@@ -0,0 +1,77 @@
+package eu.europeana.metis.utils;
+
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import eu.europeana.metis.exception.BadContentException;
+import eu.europeana.metis.utils.GeoUriWGS84Parser.GeoCoordinates;
+import org.junit.jupiter.api.Test;
+
+class GeoUriWGS84ParserTest {
+
+ @Test
+ void parse_invalid() {
+
+ //URI cannot have spaces
+ assertThrows(BadContentException.class, () -> GeoUriWGS84Parser.parse("geo: 37.786971,-122.399677"));
+ assertThrows(BadContentException.class, () -> GeoUriWGS84Parser.parse("geo:37.786971,-122.399677; u=35"));
+
+ //Non geo
+ assertThrows(BadContentException.class, () -> GeoUriWGS84Parser.parse("test:"));
+
+ //URI cannot be without dimensions
+ assertThrows(BadContentException.class, () -> GeoUriWGS84Parser.parse("geo:"));
+ //URI must have at least one part
+ assertThrows(BadContentException.class, () -> GeoUriWGS84Parser.parse("geo:;"));
+
+ //Validate order of crs and u parameters
+ assertThrows(BadContentException.class, () -> GeoUriWGS84Parser.parse("geo:37.786971,-122.399677;u=35;crs=wgs84"));
+ assertThrows(BadContentException.class,
+ () -> GeoUriWGS84Parser.parse("geo:37.786971,-122.399677;crs=wgs84;parameter1=value1;u=35"));
+ assertThrows(BadContentException.class,
+ () -> GeoUriWGS84Parser.parse("geo:37.786971,-122.399677;parameter1=value1;crs=wgs84;u=35"));
+
+ //Validate crs value
+ assertThrows(BadContentException.class, () -> GeoUriWGS84Parser.parse("geo:37.786971,-122.399677;crs=Moon-2011;u=35"));
+
+ //Coordinates must be present and of correct length
+ assertThrows(BadContentException.class, () -> GeoUriWGS84Parser.parse("geo:;crs=wgs84"));
+ assertThrows(BadContentException.class, () -> GeoUriWGS84Parser.parse("geo:37.786971,;crs=wgs84"));
+ assertThrows(BadContentException.class, () -> GeoUriWGS84Parser.parse("geo:37.786971;crs=wgs84"));
+ assertThrows(BadContentException.class, () -> GeoUriWGS84Parser.parse("geo:37.786971,100,100,10;crs=wgs84"));
+ //Invalid coordinate
+ assertThrows(BadContentException.class, () -> GeoUriWGS84Parser.parse("geo:test,-122.399677;crs=wgs84"));
+ //Invalid range coordinates
+ assertThrows(BadContentException.class, () -> GeoUriWGS84Parser.parse("geo:-100,200;crs=wgs84"));
+ }
+
+ @Test
+ void parse_valid() throws Exception {
+ assertDoesNotThrow(() -> GeoUriWGS84Parser.parse("geo:37.786971,-122.399677;crs=wgs84;u=35"));
+ assertDoesNotThrow(() -> GeoUriWGS84Parser.parse("geo:37.786971,-122.399677;u=35"));
+ assertDoesNotThrow(() -> GeoUriWGS84Parser.parse("geo:37.786971,-122.399677;crs=wgs84;u=35;parameter1=value1"));
+ assertDoesNotThrow(() -> GeoUriWGS84Parser.parse("geo:37.786971,-122.399677;u=35;parameter1=value1"));
+
+ assertDoesNotThrow(() -> GeoUriWGS84Parser.parse("geo:37.786971,-122.399677"));
+ assertDoesNotThrow(() -> GeoUriWGS84Parser.parse("geo:37.786971,-122.399677,10"));
+ assertDoesNotThrow(() -> GeoUriWGS84Parser.parse("geo:37.1234567,-122.1234567,10"));
+ assertDoesNotThrow(() -> GeoUriWGS84Parser.parse("geo:37,-122"));
+
+ final GeoCoordinates geoCoordinates = GeoUriWGS84Parser.parse("geo:37.786971,-122.399677");
+ assertEquals(Double.parseDouble("37.786971"), geoCoordinates.getLatitude());
+ assertEquals(Double.parseDouble("-122.399677"), geoCoordinates.getLongitude());
+
+ final GeoCoordinates geoCoordinatesWithAltitude = GeoUriWGS84Parser.parse("geo:37.786971,-122.399677,1000.500600");
+ assertEquals(Double.parseDouble("37.786971"), geoCoordinatesWithAltitude.getLatitude());
+ assertEquals(Double.parseDouble("-122.399677"), geoCoordinatesWithAltitude.getLongitude());
+ assertEquals(Double.parseDouble("1000.500600"), geoCoordinatesWithAltitude.getAltitude());
+
+ //Should truncate the extra decimal points
+ final GeoCoordinates geoCoordinatesWithLongDecimalPoints = GeoUriWGS84Parser.parse(
+ "geo:40.123456789,45.123456789,1000.123456789");
+ assertEquals(Double.parseDouble("40.1234567"), geoCoordinatesWithLongDecimalPoints.getLatitude());
+ assertEquals(Double.parseDouble("45.1234567"), geoCoordinatesWithLongDecimalPoints.getLongitude());
+ assertEquals(Double.parseDouble("1000.1234567"), geoCoordinatesWithLongDecimalPoints.getAltitude());
+ }
+}
\ No newline at end of file
diff --git a/metis-indexing/src/main/java/eu/europeana/indexing/solr/EdmLabel.java b/metis-indexing/src/main/java/eu/europeana/indexing/solr/EdmLabel.java
index e94eea556..ef0131b64 100644
--- a/metis-indexing/src/main/java/eu/europeana/indexing/solr/EdmLabel.java
+++ b/metis-indexing/src/main/java/eu/europeana/indexing/solr/EdmLabel.java
@@ -85,25 +85,30 @@ public enum EdmLabel {
PROXY_DCTERMS_HAS_PART("proxy_dcterms_hasPart"),
PROXY_DCTERMS_IS_PART_OF("proxy_dcterms_isPartOf"),
PROXY_DCTERMS_ISSUED("proxy_dcterms_issued"),
- PROXY_DCTERMS_MEDIUM("proxy_dcterms_medium"),
- PROXY_DCTERMS_PROVENANCE("proxy_dcterms_provenance"),
- PROXY_DCTERMS_SPATIAL("proxy_dcterms_spatial"),
- PROXY_DCTERMS_TEMPORAL("proxy_dcterms_temporal"),
+ PROXY_DCTERMS_MEDIUM("proxy_dcterms_medium"),
+ PROXY_DCTERMS_PROVENANCE("proxy_dcterms_provenance"),
+ PROXY_DCTERMS_SPATIAL("proxy_dcterms_spatial"),
+ PROXY_DCTERMS_TEMPORAL("proxy_dcterms_temporal"),
EDM_UGC("edm_UGC"),
PROXY_EDM_CURRENT_LOCATION("proxy_edm_currentLocation"),
PROXY_EDM_HAS_MET("proxy_edm_hasMet"),
- PROXY_EDM_ISRELATEDTO("proxy_edm_isRelatedTo"),
+ PROXY_EDM_ISRELATEDTO("proxy_edm_isRelatedTo"),
PROXY_EDM_YEAR("proxy_edm_year"),
PROVIDER_EDM_TYPE("proxy_edm_type"),
+ //GEO LOCATION FIELDS
+ CURRENT_LOCATION_WGS("currentLocation_wgs"),
+ COVERAGE_LOCATION_WGS("coverageLocation_wgs"),
+ LOCATION_WGS("location_wgs"),
+
//SKOS_CONCEPT
- SKOS_CONCEPT("skos_concept"),
- CC_SKOS_PREF_LABEL("cc_skos_prefLabel"),
- CC_SKOS_ALT_LABEL("cc_skos_altLabel"),
-
+ SKOS_CONCEPT("skos_concept"),
+ CC_SKOS_PREF_LABEL("cc_skos_prefLabel"),
+ CC_SKOS_ALT_LABEL("cc_skos_altLabel"),
+
//PLACE
- EDM_PLACE("edm_place"),
- PL_SKOS_PREF_LABEL("pl_skos_prefLabel"),
+ EDM_PLACE("edm_place"),
+ PL_SKOS_PREF_LABEL("pl_skos_prefLabel"),
PL_SKOS_ALT_LABEL("pl_skos_altLabel"),
PL_WGS84_POS_LAT("pl_wgs84_pos_lat"),
PL_WGS84_POS_LONG("pl_wgs84_pos_long"),
diff --git a/metis-indexing/src/main/java/eu/europeana/indexing/solr/SolrDocumentPopulator.java b/metis-indexing/src/main/java/eu/europeana/indexing/solr/SolrDocumentPopulator.java
index d17d8ad4c..a0ca28ad2 100644
--- a/metis-indexing/src/main/java/eu/europeana/indexing/solr/SolrDocumentPopulator.java
+++ b/metis-indexing/src/main/java/eu/europeana/indexing/solr/SolrDocumentPopulator.java
@@ -12,6 +12,7 @@
import eu.europeana.indexing.solr.property.AggregationSolrCreator;
import eu.europeana.indexing.solr.property.ConceptSolrCreator;
import eu.europeana.indexing.solr.property.EuropeanaAggregationSolrCreator;
+import eu.europeana.indexing.solr.property.FullBeanSolrProperties;
import eu.europeana.indexing.solr.property.LicenseSolrCreator;
import eu.europeana.indexing.solr.property.PlaceSolrCreator;
import eu.europeana.indexing.solr.property.ProvidedChoSolrCreator;
@@ -41,39 +42,35 @@
import org.apache.solr.common.SolrInputDocument;
/**
- * This class provides functionality to populate Solr documents. Both methods in this class should
- * be called to fill the Solr document. The method {@link #populateWithProperties(SolrInputDocument,
- * FullBeanImpl)} copies properties from the source to the Solr document. The method {@link
- * #populateWithFacets(SolrInputDocument, RdfWrapper)} on the other hand performs some analysis and
- * sets technical metadata.
+ * This class provides functionality to populate Solr documents. Both methods in this class should be called to fill the Solr
+ * document. The method {@link #populateWithProperties(SolrInputDocument, FullBeanImpl)} copies properties from the source to the
+ * Solr document. The method {@link #populateWithFacets(SolrInputDocument, RdfWrapper)} on the other hand performs some analysis
+ * and sets technical metadata.
*
* @author jochen
*/
public class SolrDocumentPopulator {
/**
- * Populates a Solr document with the properties of the full bean. Please note: this method should
- * only be called once on a given document, otherwise the behavior is not defined.
+ * Populates a Solr document with the properties of the full bean. Please note: this method should only be called once on a
+ * given document, otherwise the behavior is not defined.
*
* @param document The Solr document to populate.
* @param fullBean The FullBean to populate from.
*/
public void populateWithProperties(SolrInputDocument document, FullBeanImpl fullBean) {
- // Get the type: filter duplicates
- final String[] types = Optional.ofNullable(fullBean.getProxies()).stream().flatMap(List::stream)
- .filter(Objects::nonNull).map(ProxyImpl::getEdmType).filter(Objects::nonNull).distinct()
- .toArray(String[]::new);
- SolrPropertyUtils.addValues(document, EdmLabel.PROVIDER_EDM_TYPE, types);
+ new FullBeanSolrProperties().setProperties(document, fullBean);
// Gather the licenses.
final List licenses = Optional.ofNullable(fullBean.getLicenses()).stream()
- .flatMap(List::stream).filter(Objects::nonNull).collect(Collectors.toList());
+ .flatMap(List::stream).filter(Objects::nonNull).collect(Collectors.toList());
// Gather the quality annotations.
final Set acceptableTargets = Optional.ofNullable(fullBean.getAggregations()).stream()
- .flatMap(Collection::stream).filter(Objects::nonNull).map(AggregationImpl::getAbout)
- .filter(Objects::nonNull).collect(Collectors.toSet());
+ .flatMap(Collection::stream).filter(Objects::nonNull)
+ .map(AggregationImpl::getAbout)
+ .filter(Objects::nonNull).collect(Collectors.toSet());
final Predicate hasAcceptableTarget = annotation -> Optional
.ofNullable(annotation.getTarget()).stream().flatMap(Arrays::stream)
.anyMatch(acceptableTargets::contains);
@@ -99,23 +96,15 @@ public void populateWithProperties(SolrInputDocument document, FullBeanImpl full
// Add the licenses.
final Set defRights = fullBean.getAggregations().stream()
- .map(AggregationImpl::getEdmRights).filter(Objects::nonNull)
- .flatMap(SolrPropertyUtils::getRightsFromMap).collect(Collectors.toSet());
+ .map(AggregationImpl::getEdmRights).filter(Objects::nonNull)
+ .flatMap(SolrPropertyUtils::getRightsFromMap).collect(Collectors.toSet());
new LicenseSolrCreator(license -> defRights.contains(license.getAbout()))
.addAllToDocument(document, fullBean.getLicenses());
-
- // Add the top-level properties.
- document
- .addField(EdmLabel.EUROPEANA_COMPLETENESS.toString(), fullBean.getEuropeanaCompleteness());
- document.addField(EdmLabel.EUROPEANA_COLLECTIONNAME.toString(),
- fullBean.getEuropeanaCollectionName()[0]);
- document.addField(EdmLabel.TIMESTAMP_CREATED.toString(), fullBean.getTimestampCreated());
- document.addField(EdmLabel.TIMESTAMP_UPDATED.toString(), fullBean.getTimestampUpdated());
}
/**
- * Populates a Solr document with the CRF fields of the RDF. Please note: this method should only
- * be called once on a given document, otherwise the behavior is not defined.
+ * Populates a Solr document with the CRF fields of the RDF. Please note: this method should only be called once on a given
+ * document, otherwise the behavior is not defined.
*
* @param document The document to populate.
* @param rdf The RDF to populate from.
@@ -131,7 +120,7 @@ public void populateWithFacets(SolrInputDocument document, RdfWrapper rdf) {
final List webResourcesWithMedia = rdf.getWebResourceWrappers(
EnumSet.of(WebResourceLinkType.IS_SHOWN_BY, WebResourceLinkType.HAS_VIEW));
final boolean hasMedia = webResourcesWithMedia.stream().map(WebResourceWrapper::getMediaType)
- .anyMatch(type -> type != MediaType.OTHER);
+ .anyMatch(type -> type != MediaType.OTHER);
document.addField(EdmLabel.FACET_HAS_MEDIA.toString(), hasMedia);
// has_landingPage is true if and only if there is at least one web resource of type
@@ -141,7 +130,7 @@ public void populateWithFacets(SolrInputDocument document, RdfWrapper rdf) {
// is_fulltext is true if and only if there is at least one web resource of type 'isShownBy'
// or 'hasView' with 'rdf:type' equal to 'edm:FullTextResource'.
final boolean isFullText = webResourcesWithMedia.stream().map(WebResourceWrapper::getType)
- .anyMatch("http://www.europeana.eu/schemas/edm/FullTextResource"::equals);
+ .anyMatch("http://www.europeana.eu/schemas/edm/FullTextResource"::equals);
document.addField(EdmLabel.FACET_IS_FULL_TEXT.toString(), isFullText);
// Compose the filter and facet tags. Only use the web resources of type 'isShownBy' or 'hasView'.
@@ -163,14 +152,12 @@ public void populateWithFacets(SolrInputDocument document, RdfWrapper rdf) {
}
private List getDataProviderAggregations(FullBeanImpl fullBean) {
-
List proxyInResult = fullBean.getProxies().stream()
- .filter(not(ProxyImpl::isEuropeanaProxy))
- .filter(proxy -> ArrayUtils.isEmpty(proxy.getLineage())).map(ProxyImpl::getProxyIn)
- .map(Arrays::asList).flatMap(List::stream).collect(Collectors.toList());
+ .filter(not(ProxyImpl::isEuropeanaProxy))
+ .filter(proxy -> ArrayUtils.isEmpty(proxy.getLineage())).map(ProxyImpl::getProxyIn)
+ .map(Arrays::asList).flatMap(List::stream).collect(Collectors.toList());
return fullBean.getAggregations().stream().filter(x -> proxyInResult.contains(x.getAbout()))
- .collect(Collectors.toList());
-
+ .collect(Collectors.toList());
}
}
diff --git a/metis-indexing/src/main/java/eu/europeana/indexing/solr/property/FullBeanSolrProperties.java b/metis-indexing/src/main/java/eu/europeana/indexing/solr/property/FullBeanSolrProperties.java
new file mode 100644
index 000000000..8653e5177
--- /dev/null
+++ b/metis-indexing/src/main/java/eu/europeana/indexing/solr/property/FullBeanSolrProperties.java
@@ -0,0 +1,183 @@
+package eu.europeana.indexing.solr.property;
+
+import static eu.europeana.indexing.solr.EdmLabel.COVERAGE_LOCATION_WGS;
+import static eu.europeana.indexing.solr.EdmLabel.CURRENT_LOCATION_WGS;
+import static eu.europeana.indexing.solr.EdmLabel.LOCATION_WGS;
+import static java.lang.String.format;
+
+import eu.europeana.corelib.solr.bean.impl.FullBeanImpl;
+import eu.europeana.corelib.solr.entity.PlaceImpl;
+import eu.europeana.corelib.solr.entity.ProxyImpl;
+import eu.europeana.indexing.solr.EdmLabel;
+import eu.europeana.metis.exception.BadContentException;
+import eu.europeana.metis.utils.GeoUriWGS84Parser;
+import eu.europeana.metis.utils.GeoUriWGS84Parser.GeoCoordinates;
+import java.text.DecimalFormat;
+import java.text.DecimalFormatSymbols;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.solr.common.SolrInputDocument;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Class that creates Solr properties related to the FullBean and properties that need to be retrieved and computed from multiple
+ * sub-elements.
+ */
+public class FullBeanSolrProperties {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(FullBeanSolrProperties.class);
+
+ /**
+ * Computes and creates all properties relevant to fullbean as a whole.
+ *
+ * @param document the solr document
+ * @param fullBean the fullbean to analyze
+ */
+ public void setProperties(SolrInputDocument document, FullBeanImpl fullBean) {
+ // Get the type: filter duplicates
+ final String[] types = Optional.ofNullable(fullBean.getProxies()).stream().flatMap(List::stream)
+ .filter(Objects::nonNull).map(ProxyImpl::getEdmType).filter(Objects::nonNull).distinct()
+ .toArray(String[]::new);
+ SolrPropertyUtils.addValues(document, EdmLabel.PROVIDER_EDM_TYPE, types);
+
+ setGeospatialFields(document, fullBean);
+
+ document.addField(EdmLabel.EUROPEANA_COMPLETENESS.toString(), fullBean.getEuropeanaCompleteness());
+ document.addField(EdmLabel.EUROPEANA_COLLECTIONNAME.toString(), fullBean.getEuropeanaCollectionName()[0]);
+ document.addField(EdmLabel.TIMESTAMP_CREATED.toString(), fullBean.getTimestampCreated());
+ document.addField(EdmLabel.TIMESTAMP_UPDATED.toString(), fullBean.getTimestampUpdated());
+ }
+
+ private void setGeospatialFields(SolrInputDocument document, FullBeanImpl fullBean) {
+ final List proxies = fullBean.getProxies();
+ final Map placesAboutMap = fullBean.getPlaces().stream()
+ .collect(Collectors.toMap(PlaceImpl::getAbout, Function.identity(),
+ (place1, place2) -> place1));
+ final Set currentLocationStrings = new HashSet<>();
+ final Set coverageLocationStrings = new HashSet<>();
+ for (ProxyImpl proxy : proxies) {
+ currentLocationStrings.addAll(getCurrentLocationStrings(proxy));
+ coverageLocationStrings.addAll(getCoverageLocationStrings(proxy));
+ }
+ final Set currentLocationPoints = new HashSet<>(
+ getReferencedPlacesLocationPoints(placesAboutMap, currentLocationStrings));
+ currentLocationPoints.addAll(getWGS84LocationPoints(currentLocationStrings));
+
+ final Set coverageLocationPoints = new HashSet<>(
+ getReferencedPlacesLocationPoints(placesAboutMap, coverageLocationStrings));
+ coverageLocationPoints.addAll(getWGS84LocationPoints(coverageLocationStrings));
+
+ SolrPropertyUtils.addValues(document, CURRENT_LOCATION_WGS,
+ currentLocationPoints.stream().map(Object::toString).toArray(String[]::new));
+
+ SolrPropertyUtils.addValues(document, COVERAGE_LOCATION_WGS,
+ coverageLocationPoints.stream().map(Object::toString).toArray(String[]::new));
+
+ Set locationPointsCombined = new HashSet<>();
+ locationPointsCombined.addAll(currentLocationPoints);
+ locationPointsCombined.addAll(coverageLocationPoints);
+ SolrPropertyUtils.addValues(document, LOCATION_WGS,
+ locationPointsCombined.stream().map(Object::toString).toArray(String[]::new));
+ }
+
+
+ private Set getReferencedPlacesLocationPoints(Map placesAboutMap,
+ Set locationStrings) {
+ return locationStrings.stream().map(placesAboutMap::get).filter(Objects::nonNull)
+ .map(this::getPlaceLocationPoint).filter(Objects::nonNull).collect(Collectors.toSet());
+ }
+
+ private Set getWGS84LocationPoints(Set locationStrings) {
+ return locationStrings.stream().map(this::getValidGeoCoordinates).filter(Objects::nonNull)
+ .map(LocationPoint::new).collect(Collectors.toSet());
+ }
+
+ private Set getCurrentLocationStrings(ProxyImpl proxy) {
+ final Set currentLocations = new HashSet<>();
+ Optional.ofNullable(proxy.getEdmCurrentLocation()).map(Map::values).stream().flatMap(Collection::stream)
+ .flatMap(Collection::stream)
+ .filter(StringUtils::isNotBlank)
+ .forEach(currentLocations::add);
+ return currentLocations;
+ }
+
+ private Set getCoverageLocationStrings(ProxyImpl proxy) {
+ final Set coverageLocations = new HashSet<>();
+ Optional.ofNullable(proxy.getDctermsSpatial()).map(Map::values).stream().flatMap(Collection::stream)
+ .flatMap(Collection::stream)
+ .filter(StringUtils::isNotBlank)
+ .forEach(coverageLocations::add);
+ Optional.ofNullable(proxy.getDcCoverage()).map(Map::values).stream().flatMap(Collection::stream)
+ .flatMap(Collection::stream)
+ .filter(StringUtils::isNotBlank)
+ .forEach(coverageLocations::add);
+ return coverageLocations;
+ }
+
+ private LocationPoint getPlaceLocationPoint(PlaceImpl place) {
+ if (place.getLatitude() != null && place.getLongitude() != null) {
+ return new LocationPoint(place.getLatitude().doubleValue(), place.getLongitude().doubleValue());
+ }
+ return null;
+ }
+
+ private GeoCoordinates getValidGeoCoordinates(String s) {
+ try {
+ return GeoUriWGS84Parser.parse(s);
+ } catch (BadContentException e) {
+ LOGGER.debug(format("Geo parsing failed %s", s), e);
+ }
+ return null;
+ }
+
+
+ private static class LocationPoint {
+
+ //We allow 7 decimal points
+ private static final DecimalFormat decimalFormat = new DecimalFormat("#.#######", new DecimalFormatSymbols(Locale.US));
+ private final Double latitude;
+ private final Double longitude;
+
+ public LocationPoint(Double latitude, Double longitude) {
+ this.latitude = latitude;
+ this.longitude = longitude;
+ }
+
+ public LocationPoint(GeoCoordinates geoCoordinates) {
+ this.latitude = geoCoordinates.getLatitude();
+ this.longitude = geoCoordinates.getLongitude();
+ }
+
+ @Override
+ public String toString() {
+ return format(Locale.US, "%s,%s", decimalFormat.format(latitude), decimalFormat.format(longitude));
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ LocationPoint that = (LocationPoint) o;
+ return this.toString().equals(that.toString());
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(latitude, longitude);
+ }
+ }
+}
diff --git a/metis-indexing/src/test/java/eu/europeana/indexing/solr/SolrDocumentPopulatorTest.java b/metis-indexing/src/test/java/eu/europeana/indexing/solr/SolrDocumentPopulatorTest.java
new file mode 100644
index 000000000..a0e0fcab3
--- /dev/null
+++ b/metis-indexing/src/test/java/eu/europeana/indexing/solr/SolrDocumentPopulatorTest.java
@@ -0,0 +1,86 @@
+package eu.europeana.indexing.solr;
+
+import static eu.europeana.indexing.solr.EdmLabel.COVERAGE_LOCATION_WGS;
+import static eu.europeana.indexing.solr.EdmLabel.CURRENT_LOCATION_WGS;
+import static eu.europeana.indexing.solr.EdmLabel.EUROPEANA_ID;
+import static eu.europeana.indexing.solr.EdmLabel.LOCATION_WGS;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import eu.europeana.corelib.solr.bean.impl.FullBeanImpl;
+import eu.europeana.indexing.fullbean.RdfToFullBeanConverter;
+import eu.europeana.indexing.tiers.ClassifierFactory;
+import eu.europeana.indexing.utils.RdfTierUtils;
+import eu.europeana.indexing.utils.RdfWrapper;
+import eu.europeana.metis.schema.convert.RdfConversionUtils;
+import eu.europeana.metis.schema.jibx.RDF;
+import java.io.File;
+import java.nio.file.Files;
+import java.util.List;
+import java.util.Objects;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.solr.common.SolrInputDocument;
+import org.junit.jupiter.api.Test;
+
+class SolrDocumentPopulatorTest {
+
+ @Test
+ void populateWithProperties_PlaceCoordinates() throws Exception {
+ ClassLoader classLoader = SolrDocumentPopulatorTest.class.getClassLoader();
+ File file = new File(Objects.requireNonNull(classLoader.getResource("europeana_record_with_geospatial_data.xml")).getFile());
+ String xml = new String(Files.readAllBytes(file.toPath()));
+ final RDF rdf = new RdfConversionUtils().convertStringToRdf(xml);
+
+ // Perform the tier classification
+ final RdfWrapper rdfWrapper = new RdfWrapper(rdf);
+ RdfTierUtils.setTier(rdf, ClassifierFactory.getMediaClassifier().classify(rdfWrapper).getTier());
+ RdfTierUtils.setTier(rdf, ClassifierFactory.getMetadataClassifier().classify(rdfWrapper).getTier());
+
+ final RdfToFullBeanConverter fullBeanConverter = new RdfToFullBeanConverter();
+ final FullBeanImpl fullBean = fullBeanConverter.convertRdfToFullBean(rdfWrapper);
+
+ // Create Solr document.
+ final SolrDocumentPopulator documentPopulator = new SolrDocumentPopulator();
+ final SolrInputDocument document = new SolrInputDocument();
+ documentPopulator.populateWithProperties(document, fullBean);
+ documentPopulator.populateWithFacets(document, rdfWrapper);
+
+ assertTrue(document.get(EUROPEANA_ID.toString()).getValues().contains(fullBean.getAbout()));
+ assertTrue(CollectionUtils.isEqualCollection(document.get(CURRENT_LOCATION_WGS.toString()).getValues(),
+ List.of("50.75,4.5")));
+ assertTrue(CollectionUtils.isEqualCollection(document.get(COVERAGE_LOCATION_WGS.toString()).getValues(),
+ List.of("50,50", "40,40")));
+ assertTrue(CollectionUtils.isEqualCollection(document.get(LOCATION_WGS.toString()).getValues(),
+ List.of("50,50", "40,40", "50.75,4.5")));
+ }
+
+ @Test
+ void populateWithProperties_WGS84Coordinates() throws Exception {
+ ClassLoader classLoader = SolrDocumentPopulatorTest.class.getClassLoader();
+ File file = new File(
+ Objects.requireNonNull(classLoader.getResource("europeana_record_with_geospatial_data_wgs84.xml")).getFile());
+ String xml = new String(Files.readAllBytes(file.toPath()));
+ final RDF rdf = new RdfConversionUtils().convertStringToRdf(xml);
+
+ // Perform the tier classification
+ final RdfWrapper rdfWrapper = new RdfWrapper(rdf);
+ RdfTierUtils.setTier(rdf, ClassifierFactory.getMediaClassifier().classify(rdfWrapper).getTier());
+ RdfTierUtils.setTier(rdf, ClassifierFactory.getMetadataClassifier().classify(rdfWrapper).getTier());
+
+ final RdfToFullBeanConverter fullBeanConverter = new RdfToFullBeanConverter();
+ final FullBeanImpl fullBean = fullBeanConverter.convertRdfToFullBean(rdfWrapper);
+
+ // Create Solr document.
+ final SolrDocumentPopulator documentPopulator = new SolrDocumentPopulator();
+ final SolrInputDocument document = new SolrInputDocument();
+ documentPopulator.populateWithProperties(document, fullBean);
+ documentPopulator.populateWithFacets(document, rdfWrapper);
+
+ assertTrue(document.get(EUROPEANA_ID.toString()).getValues().contains(fullBean.getAbout()));
+ assertTrue(CollectionUtils.isEqualCollection(document.get(CURRENT_LOCATION_WGS.toString()).getValues(),
+ List.of("50.75,4.5")));
+ assertTrue(CollectionUtils.isEqualCollection(document.get(COVERAGE_LOCATION_WGS.toString()).getValues(),
+ List.of("50,50", "40,40", "40.123456,40.1234567")));
+ assertTrue(CollectionUtils.isEqualCollection(document.get(LOCATION_WGS.toString()).getValues(),
+ List.of("50,50", "40,40", "40.123456,40.1234567", "50.75,4.5")));
+ }
+}
\ No newline at end of file
diff --git a/metis-indexing/src/test/resources/europeana_record_with_geospatial_data.xml b/metis-indexing/src/test/resources/europeana_record_with_geospatial_data.xml
new file mode 100644
index 000000000..456c99b21
--- /dev/null
+++ b/metis-indexing/src/test/resources/europeana_record_with_geospatial_data.xml
@@ -0,0 +1,127 @@
+
+
+
+
+
+
+ text/html
+ 197506
+
+
+ image/jpeg
+ 6643
+ 640
+ 480
+ sRGB
+ #F0E68C
+ #B22222
+ #FF4500
+ #2F4F4F
+ #ADFF2F
+ #87CEEB
+ landscape
+
+
+ 50.75
+ 4.5
+
+
+ 40
+ 40
+
+
+ 50
+ 50
+
+
+ 40
+ 40
+
+
+
+ 1914-1919 के मध्य मुख्यतः यूरोप में व्याप्त इस महायुद्ध को प्रथम विश्व युद्ध कहते हैं । यह महायुद्ध
+ यूरोप, एशिया व अफ्रीका तीन महाद्वीपों और जल, थल तथा आकाश में लड़ा गया। इसमें भाग लेने वाले देशों की संख्या, इसका क्षेत्र
+ (जिसमें यह लड़ा गया) तथा इससे हुई क्षति के अभूतपूर्व आंकड़ों के कारण ही इसे विश्वयुद्ध कहते हैं ।
+
+ Første verdenskrig
+
+
+
+
+
+
+
+ EFG - The European Film Gateway
+ J.M.P.- Trends
+
+
+
+ 12944
+ nld
+
+
+
+
+
+
+ true
+ 1984
+
+
+
+
+
+ PYLYSER, JEAN-MARIE
+ Newsitems West Flanders. Day of the Navy in Ostend, with a wreath-laying memorial service at the
+ monument of the sailors on the dike. Thereafter, a military parade takes place at the Wapenplein in the presence of Prince
+ Albert, Princess Paola, Governor Vanneste, mayor Goekindt and certain naval officers. The navy admiral gives a speech,
+ followed by a short parade. The princess salutes the flag bearers of the veterans' associations.
+
+ Westvlaams filmjournaal - WAF (Westvlaamse Actualiteiten Films). Dag van de zeemacht te
+ Oostende, met een kranslegging aan het monument van de zeelieden op de dijk. Naast enkele zeemachtofficieren legt ook
+ burgemeester Goekindt een krans. Nadien vindt een wapenschouwing plaats op het Wapenplein in aanwezigheid van prins Albert,
+ prinses Paola, gouverneur Vanneste en de eerder genoemde personaliteiten. De zeemachtadmiraal houdt een toespraak. Er vindt
+ een korte parade plaats en de prinsen groeten de vaandeldragers van de oud-strijdersverenigingen.
+
+ Newsitems West Flanders. Day of the Navy in Ostend, with a wreath-laying memorial service at the
+ monument of the sailors on the dike. Thereafter, a military parade takes place at the Wapenplein in the presence of Prince
+ Albert, Princess Paola, Governor Vanneste, mayor Goekindt and certain naval officers. The navy admiral gives a speech,
+ followed by a short parade. The princess salutes the flag bearers of the veterans' associations.
+
+ 12944
+ nl
+ EFG1914
+ World War I
+ WESTVLAAMS FILMJOURNAAL - 164
+ Newsreel
+
+
+ 1984
+ Belgium
+
+ false
+
+
+ VIDEO
+
+
+
+ Europeana Foundation
+ Europeana Foundation
+ 1_test
+ Netherlands
+
+ nl
+ 10
+
+
+ The Royal Belgian Film Archives
+ Cinémathèque royale de Belgique
+ Koninklijk Belgisch Filmarchief
+
+
\ No newline at end of file
diff --git a/metis-indexing/src/test/resources/europeana_record_with_geospatial_data_wgs84.xml b/metis-indexing/src/test/resources/europeana_record_with_geospatial_data_wgs84.xml
new file mode 100644
index 000000000..5ce8b91ab
--- /dev/null
+++ b/metis-indexing/src/test/resources/europeana_record_with_geospatial_data_wgs84.xml
@@ -0,0 +1,128 @@
+
+
+
+
+
+
+ text/html
+ 197506
+
+
+ image/jpeg
+ 6643
+ 640
+ 480
+ sRGB
+ #F0E68C
+ #B22222
+ #FF4500
+ #2F4F4F
+ #ADFF2F
+ #87CEEB
+ landscape
+
+
+ 50.75
+ 4.5
+
+
+ 40
+ 40
+
+
+ 50
+ 50
+
+
+ 40
+ 40
+
+
+
+ 1914-1919 के मध्य मुख्यतः यूरोप में व्याप्त इस महायुद्ध को प्रथम विश्व युद्ध कहते हैं । यह महायुद्ध
+ यूरोप, एशिया व अफ्रीका तीन महाद्वीपों और जल, थल तथा आकाश में लड़ा गया। इसमें भाग लेने वाले देशों की संख्या, इसका क्षेत्र
+ (जिसमें यह लड़ा गया) तथा इससे हुई क्षति के अभूतपूर्व आंकड़ों के कारण ही इसे विश्वयुद्ध कहते हैं ।
+
+ Første verdenskrig
+
+
+
+
+
+
+
+ EFG - The European Film Gateway
+ J.M.P.- Trends
+
+
+
+
+ 12944
+ nld
+
+
+
+
+
+
+ true
+ 1984
+
+
+
+
+
+ PYLYSER, JEAN-MARIE
+ Newsitems West Flanders. Day of the Navy in Ostend, with a wreath-laying memorial service at the
+ monument of the sailors on the dike. Thereafter, a military parade takes place at the Wapenplein in the presence of Prince
+ Albert, Princess Paola, Governor Vanneste, mayor Goekindt and certain naval officers. The navy admiral gives a speech,
+ followed by a short parade. The princess salutes the flag bearers of the veterans' associations.
+
+ Westvlaams filmjournaal - WAF (Westvlaamse Actualiteiten Films). Dag van de zeemacht te
+ Oostende, met een kranslegging aan het monument van de zeelieden op de dijk. Naast enkele zeemachtofficieren legt ook
+ burgemeester Goekindt een krans. Nadien vindt een wapenschouwing plaats op het Wapenplein in aanwezigheid van prins Albert,
+ prinses Paola, gouverneur Vanneste en de eerder genoemde personaliteiten. De zeemachtadmiraal houdt een toespraak. Er vindt
+ een korte parade plaats en de prinsen groeten de vaandeldragers van de oud-strijdersverenigingen.
+
+ Newsitems West Flanders. Day of the Navy in Ostend, with a wreath-laying memorial service at the
+ monument of the sailors on the dike. Thereafter, a military parade takes place at the Wapenplein in the presence of Prince
+ Albert, Princess Paola, Governor Vanneste, mayor Goekindt and certain naval officers. The navy admiral gives a speech,
+ followed by a short parade. The princess salutes the flag bearers of the veterans' associations.
+
+ 12944
+ nl
+ EFG1914
+ World War I
+ WESTVLAAMS FILMJOURNAAL - 164
+ Newsreel
+
+
+ 1984
+ Belgium
+
+ false
+
+
+ VIDEO
+
+
+
+ Europeana Foundation
+ Europeana Foundation
+ 1_test
+ Netherlands
+
+ nl
+ 10
+
+
+ The Royal Belgian Film Archives
+ Cinémathèque royale de Belgique
+ Koninklijk Belgisch Filmarchief
+
+
\ No newline at end of file
From 5617f3d73a498386af55016caad92fe74ed03aa4 Mon Sep 17 00:00:00 2001
From: Simon Tzanakis
Date: Mon, 11 Apr 2022 11:45:05 +0200
Subject: [PATCH 22/73] MET-4285 Fix path resolution for tests in Jenkins
---
.../solr/SolrDocumentPopulatorTest.java | 17 +++++++----------
1 file changed, 7 insertions(+), 10 deletions(-)
diff --git a/metis-indexing/src/test/java/eu/europeana/indexing/solr/SolrDocumentPopulatorTest.java b/metis-indexing/src/test/java/eu/europeana/indexing/solr/SolrDocumentPopulatorTest.java
index a0e0fcab3..541d5a549 100644
--- a/metis-indexing/src/test/java/eu/europeana/indexing/solr/SolrDocumentPopulatorTest.java
+++ b/metis-indexing/src/test/java/eu/europeana/indexing/solr/SolrDocumentPopulatorTest.java
@@ -13,11 +13,11 @@
import eu.europeana.indexing.utils.RdfWrapper;
import eu.europeana.metis.schema.convert.RdfConversionUtils;
import eu.europeana.metis.schema.jibx.RDF;
-import java.io.File;
-import java.nio.file.Files;
+import java.io.FileInputStream;
+import java.nio.charset.StandardCharsets;
import java.util.List;
-import java.util.Objects;
import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.io.IOUtils;
import org.apache.solr.common.SolrInputDocument;
import org.junit.jupiter.api.Test;
@@ -25,9 +25,8 @@ class SolrDocumentPopulatorTest {
@Test
void populateWithProperties_PlaceCoordinates() throws Exception {
- ClassLoader classLoader = SolrDocumentPopulatorTest.class.getClassLoader();
- File file = new File(Objects.requireNonNull(classLoader.getResource("europeana_record_with_geospatial_data.xml")).getFile());
- String xml = new String(Files.readAllBytes(file.toPath()));
+ String xml = IOUtils.toString(new FileInputStream("src/test/resources/europeana_record_with_geospatial_data.xml"),
+ StandardCharsets.UTF_8);
final RDF rdf = new RdfConversionUtils().convertStringToRdf(xml);
// Perform the tier classification
@@ -55,10 +54,8 @@ void populateWithProperties_PlaceCoordinates() throws Exception {
@Test
void populateWithProperties_WGS84Coordinates() throws Exception {
- ClassLoader classLoader = SolrDocumentPopulatorTest.class.getClassLoader();
- File file = new File(
- Objects.requireNonNull(classLoader.getResource("europeana_record_with_geospatial_data_wgs84.xml")).getFile());
- String xml = new String(Files.readAllBytes(file.toPath()));
+ String xml = IOUtils.toString(new FileInputStream("src/test/resources/europeana_record_with_geospatial_data_wgs84.xml"),
+ StandardCharsets.UTF_8);
final RDF rdf = new RdfConversionUtils().convertStringToRdf(xml);
// Perform the tier classification
From 098f611382443081964549a6e55070269e585a27 Mon Sep 17 00:00:00 2001
From: jochen_vermeulen
Date: Tue, 12 Apr 2022 10:42:23 +0200
Subject: [PATCH 23/73] MET-4237: Fix bugs in the post-processing code.
---
.../core/execution/WorkflowPostProcessor.java | 35 ++++++++++++-------
1 file changed, 23 insertions(+), 12 deletions(-)
diff --git a/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java b/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java
index 3eecebde1..5864a613c 100644
--- a/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java
+++ b/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/execution/WorkflowPostProcessor.java
@@ -31,16 +31,17 @@
import eu.europeana.metis.exception.BadContentException;
import java.util.ArrayList;
import java.util.Date;
-import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
+import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.springframework.util.CollectionUtils;
/**
* This object can perform post-processing for workflows.
@@ -93,17 +94,27 @@ private void indexPostProcess(AbstractExecutablePlugin> indexPlugin, String da
final boolean isIncremental = ((IndexToPublishPlugin) indexPlugin).getPluginMetadata().isIncrementalIndexing();
if (isIncremental) {
- // get all currently de-published records ids
- Set depublishedRecordIds = depublishRecordIdDao
- .getAllDepublishRecordIdsWithStatus(datasetId, DepublishRecordIdSortField.DEPUBLICATION_STATE,
- SortDirection.ASCENDING,
- DepublicationStatus.DEPUBLISHED);
-
- List publishedDatasetRecordIds = dpsClient.searchPublishedDatasetRecords(indexPlugin.getExternalTaskId(),
- new ArrayList<>(depublishedRecordIds));
- // reset de-publish status, pass recordIds to be de-published
- depublishRecordIdDao.markRecordIdsWithDepublicationStatus(datasetId, new HashSet<>(publishedDatasetRecordIds),
- DepublicationStatus.PENDING_DEPUBLICATION, null);
+ // get all currently de-published records IDs from the database and create their full versions
+ final Set depublishedRecordIds = depublishRecordIdDao.getAllDepublishRecordIdsWithStatus(
+ datasetId, DepublishRecordIdSortField.DEPUBLICATION_STATE, SortDirection.ASCENDING,
+ DepublicationStatus.DEPUBLISHED);
+ final Map depublishedRecordIdsByFullId = depublishedRecordIds.stream()
+ .collect(Collectors.toMap(id -> DepublishRecordIdUtils.composeFullRecordId(datasetId, id),
+ Function.identity()));
+
+ // Check which have been published by the index action - use full record IDs for eCloud.
+ if (!CollectionUtils.isEmpty(depublishedRecordIdsByFullId)) {
+ final List publishedRecordIds = dpsClient.searchPublishedDatasetRecords(datasetId,
+ new ArrayList<>(depublishedRecordIdsByFullId.keySet()));
+
+ // Remove the 'depublished' status. Note: we need to check for an empty result (otherwise
+ // the DAO call will update all records). Use the simple record IDs again.
+ if (!CollectionUtils.isEmpty(publishedRecordIds)) {
+ depublishRecordIdDao.markRecordIdsWithDepublicationStatus(datasetId,
+ publishedRecordIds.stream().map(depublishedRecordIdsByFullId::get)
+ .collect(Collectors.toSet()), DepublicationStatus.PENDING_DEPUBLICATION, null);
+ }
+ }
} else {
// reset de-publish status, pass null, all records will be de-published
depublishRecordIdDao.markRecordIdsWithDepublicationStatus(datasetId, null,
From 054fde753354e1a8ee1e8d30ad30cb207a53252a Mon Sep 17 00:00:00 2001
From: Simon Tzanakis
Date: Fri, 15 Apr 2022 16:21:18 +0200
Subject: [PATCH 24/73] MET-4449 Initial implementation of record patterns
analysis (#524)
* MET-4449 Initial implementation of record patterns analysis
* MET-4449 Updates on problem patterns
* MET-4449 Make ExecutionStep generic
* MET-4449 Make ExecutionStep generic and refactor
* MET-4449 Fix spelling mistake
---
metis-pattern-analysis/pom.xml | 51 ++++++++
.../PatternAnalysisService.java | 92 ++++++++++++++
.../ProblemPatternAnalyzer.java | 111 ++++++++++++++++
.../exception/PatternAnalysisException.java | 19 +++
.../view/DatasetProblemPatternAnalysis.java | 52 ++++++++
.../view/ProblemOccurrence.java | 47 +++++++
.../patternanalysis/view/ProblemPattern.java | 40 ++++++
.../view/ProblemPatternDescription.java | 77 ++++++++++++
.../patternanalysis/view/RecordAnalysis.java | 32 +++++
.../ProblemPatternAnalyzerTest.java | 49 ++++++++
.../DatasetProblemPatternAnalysisTest.java | 42 +++++++
.../view/ProblemOccurrenceTest.java | 24 ++++
.../view/ProblemPatternDescriptionTest.java | 44 +++++++
.../view/ProblemPatternTest.java | 33 +++++
.../view/RecordAnalysisTest.java | 27 ++++
.../resources/europeana_record_with_P2.xml | 119 ++++++++++++++++++
.../resources/europeana_record_with_P6.xml | 102 +++++++++++++++
pom.xml | 2 +
18 files changed, 963 insertions(+)
create mode 100644 metis-pattern-analysis/pom.xml
create mode 100644 metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/PatternAnalysisService.java
create mode 100644 metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/ProblemPatternAnalyzer.java
create mode 100644 metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/exception/PatternAnalysisException.java
create mode 100644 metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/DatasetProblemPatternAnalysis.java
create mode 100644 metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemOccurrence.java
create mode 100644 metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemPattern.java
create mode 100644 metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemPatternDescription.java
create mode 100644 metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/RecordAnalysis.java
create mode 100644 metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/ProblemPatternAnalyzerTest.java
create mode 100644 metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/DatasetProblemPatternAnalysisTest.java
create mode 100644 metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemOccurrenceTest.java
create mode 100644 metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemPatternDescriptionTest.java
create mode 100644 metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemPatternTest.java
create mode 100644 metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/RecordAnalysisTest.java
create mode 100644 metis-pattern-analysis/src/test/resources/europeana_record_with_P2.xml
create mode 100644 metis-pattern-analysis/src/test/resources/europeana_record_with_P6.xml
diff --git a/metis-pattern-analysis/pom.xml b/metis-pattern-analysis/pom.xml
new file mode 100644
index 000000000..6dfeaf2e2
--- /dev/null
+++ b/metis-pattern-analysis/pom.xml
@@ -0,0 +1,51 @@
+
+
+
+ metis-framework
+ eu.europeana.metis
+ 7-SNAPSHOT
+
+ 4.0.0
+ metis-pattern-analysis
+
+
+
+ eu.europeana.metis
+ metis-schema
+
+
+ com.fasterxml.jackson.core
+ jackson-annotations
+ ${version.jackson}
+
+
+ commons-collections
+ commons-collections
+ ${version.commons.collections}
+
+
+ commons-io
+ commons-io
+
+
+ org.apache.commons
+ commons-lang3
+
+
+
+ org.junit.jupiter
+ junit-jupiter-api
+
+
+ org.junit.jupiter
+ junit-jupiter-engine
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ ${version.jackson}
+ test
+
+
+
+
\ No newline at end of file
diff --git a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/PatternAnalysisService.java b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/PatternAnalysisService.java
new file mode 100644
index 000000000..1847e52d7
--- /dev/null
+++ b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/PatternAnalysisService.java
@@ -0,0 +1,92 @@
+package eu.europeana.patternanalysis;
+
+import eu.europeana.metis.schema.jibx.RDF;
+import eu.europeana.patternanalysis.exception.PatternAnalysisException;
+import eu.europeana.patternanalysis.view.DatasetProblemPatternAnalysis;
+import eu.europeana.patternanalysis.view.ProblemPattern;
+import java.time.LocalDateTime;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ * Interface with all methods required for a pattern analysis service
+ *
+ * @param the type of the execution step
+ */
+public interface PatternAnalysisService {
+
+ /**
+ * Generates the analysis of the record in RDF format.
+ *
+ * It will compute patterns and store all relevant information in the database
+ *
+ *
+ * @param datasetId the datasetId
+ * @param executionStep the constant value of the step (Similar to eu.europeana.metis.core.workflow.plugins.PluginType from
+ * metis-core and eu.europeana.metis.sandbox.common.Step from metis-sandbox
+ * @param executionTimestamp the execution timestamp for the execution of the dataset(this should be the same for all records).
+ * @param rdfRecord the rdf record
+ * @throws PatternAnalysisException if an error occurred during the analysis
+ */
+ void generateRecordPatternAnalysis(String datasetId, T executionStep, LocalDateTime executionTimestamp, RDF rdfRecord)
+ throws PatternAnalysisException;
+
+ /**
+ * Generates the analysis of the record in String format.
+ *
+ * It will compute patterns and store all relevant information in the database
+ *
+ *
+ * @param datasetId the datasetId
+ * @param executionStep the constant value of the step (Similar to eu.europeana.metis.core.workflow.plugins.PluginType from
+ * metis-core and eu.europeana.metis.sandbox.common.Step from metis-sandbox
+ * @param executionTimestamp the execution timestamp for the execution of the dataset(this should be the same for all records).
+ * @param rdfRecord the rdf record
+ * @throws PatternAnalysisException if an error occurred during the analysis
+ */
+ void generateRecordPatternAnalysis(String datasetId, T executionStep, LocalDateTime executionTimestamp, String rdfRecord)
+ throws PatternAnalysisException;
+
+ /**
+ * Finalizes the computation of the analysis for the dataset.
+ * This method should be called at the end(post-processing) of the dataset execution, to perform the final calculations
+ *
+ * @param datasetId the datasetId
+ * @param executionStep the constant value of the step (Similar to eu.europeana.metis.core.workflow.plugins.PluginType from
+ * metis-core and eu.europeana.metis.sandbox.common.Step from metis-sandbox).
+ * @param executionTimestamp the execution timestamp for the execution of the dataset(this should be the same for all records).
+ * @throws PatternAnalysisException if an error occurred during the analysis
+ */
+ void finalizeDatasetPatternAnalysis(String datasetId, T executionStep, LocalDateTime executionTimestamp)
+ throws PatternAnalysisException;
+
+ /**
+ * Get the Dataset pattern analysis for a specific execution.
+ *
+ * This method will generate the dataset pattern analysis for a dataset and a specific execution from the data in the database.
+ * An in memory cache could be implemented internally.
+ *
+ *
+ * @param datasetId the dataset identifier
+ * @param executionStep the execution step
+ * @param executionTimestamp the execution timestamp
+ * @return the dataset pattern analysis
+ */
+ Optional> getDatasetPatternAnalysis(String datasetId, T executionStep,
+ LocalDateTime executionTimestamp);
+
+ /**
+ * Get a list of problem patterns for a particular record without storing them in the database.
+ * Internally this method could check first if the analysis is present in the database and retrieve that.
+ * If not, it should generate it on the fly. An in memory cache could be implemented internally.
+ *
+ *
+ * @param datasetId the dataset identifier
+ * @param executionStep the execution step
+ * @param executionTimestamp the execution timestamp
+ * @param rdfRecord the RDF record
+ * @return the list of problem patterns
+ */
+ List getRecordPatternAnalysis(String datasetId, T executionStep, LocalDateTime executionTimestamp,
+ RDF rdfRecord);
+}
diff --git a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/ProblemPatternAnalyzer.java b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/ProblemPatternAnalyzer.java
new file mode 100644
index 000000000..665b89823
--- /dev/null
+++ b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/ProblemPatternAnalyzer.java
@@ -0,0 +1,111 @@
+package eu.europeana.patternanalysis;
+
+import static java.lang.String.format;
+import static java.util.function.Predicate.not;
+
+import eu.europeana.metis.schema.convert.RdfConversionUtils;
+import eu.europeana.metis.schema.convert.SerializationException;
+import eu.europeana.metis.schema.jibx.Description;
+import eu.europeana.metis.schema.jibx.EuropeanaType;
+import eu.europeana.metis.schema.jibx.EuropeanaType.Choice;
+import eu.europeana.metis.schema.jibx.ProvidedCHOType;
+import eu.europeana.metis.schema.jibx.ProxyType;
+import eu.europeana.metis.schema.jibx.RDF;
+import eu.europeana.metis.schema.jibx.Title;
+import eu.europeana.patternanalysis.view.ProblemOccurrence;
+import eu.europeana.patternanalysis.view.ProblemPattern;
+import eu.europeana.patternanalysis.view.ProblemPatternDescription;
+import eu.europeana.patternanalysis.view.RecordAnalysis;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.lang3.StringUtils;
+
+/**
+ * Class that contains functionality to analyze a record and retrieve all problem patterns.
+ */
+public class ProblemPatternAnalyzer {
+
+ public static final int MIN_TITLE_LENGTH = 2;
+
+ /**
+ * Analyzes a record for problem patterns.
+ *
+ * @param rdfString the rdf record as a string
+ * @return a list of problem patterns
+ * @throws SerializationException if the record could not be converted to {@link RDF}
+ */
+ public List analyzeRecord(String rdfString) throws SerializationException {
+ return analyzeRecord(new RdfConversionUtils().convertStringToRdf(rdfString));
+ }
+
+ /**
+ * Analyzes a record for problem patterns.
+ *
+ * @param rdf the rdf record
+ * @return a list of problem patterns
+ */
+ public List analyzeRecord(RDF rdf) {
+ final List providerProxies = getProviderProxies(rdf);
+ final List titles = providerProxies.stream().map(EuropeanaType::getChoiceList).flatMap(Collection::stream)
+ .filter(Choice::ifTitle).map(Choice::getTitle)
+ .map(Title::getString)
+ .filter(StringUtils::isNotBlank)
+ .map(String::trim)
+ .collect(Collectors.toList());
+ final List descriptions = providerProxies.stream().map(EuropeanaType::getChoiceList).flatMap(Collection::stream)
+ .filter(Choice::ifDescription).map(Choice::getDescription)
+ .map(Description::getString)
+ .filter(StringUtils::isNotBlank)
+ .map(String::trim)
+ .collect(Collectors.toList());
+ final String rdfAbout = rdf.getProvidedCHOList().stream().filter(Objects::nonNull).findFirst()
+ .map(ProvidedCHOType::getAbout).orElse(null);
+ return computeProblemPatterns(rdfAbout, titles, descriptions);
+ }
+
+ private ArrayList computeProblemPatterns(String rdfAbout, List titles, List descriptions) {
+ final ArrayList problemPatterns = new ArrayList<>();
+
+ constructProblemPattern(rdfAbout, ProblemPatternDescription.P2, checkP2(titles, descriptions)).ifPresent(
+ problemPatterns::add);
+ constructProblemPattern(rdfAbout, ProblemPatternDescription.P6, checkP6(titles)).ifPresent(problemPatterns::add);
+ return problemPatterns;
+ }
+
+ private List getProviderProxies(RDF rdf) {
+ return rdf.getProxyList().stream().filter(proxyType -> proxyType.getEuropeanaProxy() != null)
+ .filter(not(proxyType -> proxyType.getEuropeanaProxy().isEuropeanaProxy())).collect(Collectors.toList());
+ }
+
+ private List checkP2(List titles, List descriptions) {
+ final Set uniqueTitles = titles.stream().map(String::toLowerCase).collect(Collectors.toSet());
+ final Set uniqueDescriptions = descriptions.stream().map(String::toLowerCase).collect(Collectors.toSet());
+ final HashSet equalTitlesAndDescriptions = new HashSet<>(uniqueTitles);
+ equalTitlesAndDescriptions.retainAll(uniqueDescriptions);
+
+ return equalTitlesAndDescriptions.stream().map(
+ value -> new ProblemOccurrence(format("Equal(lower cased) title and description: %s", value))
+ ).collect(Collectors.toList());
+ }
+
+ private List checkP6(List titles) {
+ return titles.stream().filter(title -> title.length() <= MIN_TITLE_LENGTH)
+ .map(title -> new ProblemOccurrence(format("Non meaningful title: %s", title))).collect(Collectors.toList());
+ }
+
+ private Optional constructProblemPattern(String recordId, ProblemPatternDescription problemPatternDescription,
+ List problemOccurrences) {
+ if (CollectionUtils.isNotEmpty(problemOccurrences)) {
+ return Optional.of(new ProblemPattern(
+ problemPatternDescription, 1, List.of(new RecordAnalysis(recordId, problemOccurrences))));
+ }
+ return Optional.empty();
+ }
+}
diff --git a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/exception/PatternAnalysisException.java b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/exception/PatternAnalysisException.java
new file mode 100644
index 000000000..e2e54bfa4
--- /dev/null
+++ b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/exception/PatternAnalysisException.java
@@ -0,0 +1,19 @@
+package eu.europeana.patternanalysis.exception;
+
+/**
+ * Exception used for a pattern analysis error.
+ */
+public class PatternAnalysisException extends Exception {
+
+ /**
+ * Constructs a new exception with the specified detail message and cause.
+ *
+ * @param message the detail message (which is saved for later retrieval by the {@link #getMessage()} method).
+ * @param cause the cause (which is saved for later retrieval by the {@link #getCause()} method). (A null value is
+ * permitted, and indicates that the cause is nonexistent or unknown.)
+ */
+ public PatternAnalysisException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+}
diff --git a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/DatasetProblemPatternAnalysis.java b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/DatasetProblemPatternAnalysis.java
new file mode 100644
index 000000000..286444e60
--- /dev/null
+++ b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/DatasetProblemPatternAnalysis.java
@@ -0,0 +1,52 @@
+package eu.europeana.patternanalysis.view;
+
+import com.fasterxml.jackson.annotation.JsonFormat;
+import java.time.LocalDateTime;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Class containing the dataset analysis for problem patterns.
+ *
+ * @param the type of the execution step
+ */
+public class DatasetProblemPatternAnalysis {
+
+ private final String datasetId;
+ private final T executionStep;
+ @JsonFormat(pattern = "yyyy-MM-dd'T'HH:mm:ss.SSSXXX")
+ private final LocalDateTime executionTimestamp;
+ private final List problemPatternList;
+
+ /**
+ * Constructor with required parameters.
+ *
+ * @param datasetId the dataset id
+ * @param executionTimestamp the execution timestamp
+ * @param executionStep the execution step
+ * @param problemPatternList the problem pattern list
+ */
+ public DatasetProblemPatternAnalysis(String datasetId, T executionStep, LocalDateTime executionTimestamp,
+ List problemPatternList) {
+ this.datasetId = datasetId;
+ this.executionStep = executionStep;
+ this.executionTimestamp = executionTimestamp;
+ this.problemPatternList = problemPatternList == null ? new ArrayList<>() : new ArrayList<>(problemPatternList);
+ }
+
+ public String getDatasetId() {
+ return datasetId;
+ }
+
+ public T getExecutionStep() {
+ return executionStep;
+ }
+
+ public LocalDateTime getExecutionTimestamp() {
+ return executionTimestamp;
+ }
+
+ public List getProblemPatternList() {
+ return new ArrayList<>(problemPatternList);
+ }
+}
diff --git a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemOccurrence.java b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemOccurrence.java
new file mode 100644
index 000000000..b09aca9a4
--- /dev/null
+++ b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemOccurrence.java
@@ -0,0 +1,47 @@
+package eu.europeana.patternanalysis.view;
+
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonInclude.Include;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Class containing the problem occurrence report.
+ * It also contains {@link #affectedRecordIds} which indicate other records are part of this problem with this record and
+ * problem. It can be null if the problem is only related to the current record.
+ */
+public class ProblemOccurrence {
+
+ private final String messageReport;
+ @JsonInclude(Include.NON_NULL)
+ private final List affectedRecordIds;
+
+ /**
+ * Constructor with required parameters.
+ *
+ * @param message the problem message
+ * @param affectedRecordIds the affected record ids. Can be null if the problem spans only to the current record.
+ */
+ public ProblemOccurrence(String message, List affectedRecordIds) {
+ this.messageReport = message;
+ this.affectedRecordIds = affectedRecordIds == null ? new ArrayList<>() : new ArrayList<>(affectedRecordIds);
+ }
+
+ /**
+ * Constructor with required parameters.
+ *
+ * @param messageReport the message report
+ */
+ public ProblemOccurrence(String messageReport) {
+ this.messageReport = messageReport;
+ this.affectedRecordIds = new ArrayList<>();
+ }
+
+ public String getMessageReport() {
+ return messageReport;
+ }
+
+ public List getAffectedRecordIds() {
+ return new ArrayList<>(affectedRecordIds);
+ }
+}
diff --git a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemPattern.java b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemPattern.java
new file mode 100644
index 000000000..4c73a75e1
--- /dev/null
+++ b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemPattern.java
@@ -0,0 +1,40 @@
+package eu.europeana.patternanalysis.view;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Class containing the problem pattern including its {@link RecordAnalysis}.
+ */
+public class ProblemPattern {
+
+ private final ProblemPatternDescription problemPatternDescription;
+ private final int recordOccurrences;
+ private final List recordAnalysisList;
+
+ /**
+ * Constructor with required parameters.
+ *
+ * @param problemPatternId the problem pattern id
+ * @param recordOccurrences the record occurrences
+ * @param recordAnalysisList the record analysis list
+ */
+ public ProblemPattern(ProblemPatternDescription problemPatternId, int recordOccurrences,
+ List recordAnalysisList) {
+ this.problemPatternDescription = problemPatternId;
+ this.recordOccurrences = recordOccurrences;
+ this.recordAnalysisList = recordAnalysisList == null ? new ArrayList<>() : new ArrayList<>(recordAnalysisList);
+ }
+
+ public ProblemPatternDescription getProblemPatternDescription() {
+ return problemPatternDescription;
+ }
+
+ public int getRecordOccurrences() {
+ return recordOccurrences;
+ }
+
+ public List getRecordAnalysisList() {
+ return new ArrayList<>(recordAnalysisList);
+ }
+}
diff --git a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemPatternDescription.java b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemPatternDescription.java
new file mode 100644
index 000000000..58dc8c886
--- /dev/null
+++ b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemPatternDescription.java
@@ -0,0 +1,77 @@
+package eu.europeana.patternanalysis.view;
+
+import com.fasterxml.jackson.annotation.JsonFormat;
+import java.util.Arrays;
+
+/**
+ * Enum containing all available problem patterns.
+ */
+@JsonFormat(shape = JsonFormat.Shape.OBJECT)
+public enum ProblemPatternDescription {
+
+ P1(ProblemPatternId.P1, ProblemPatternSeverity.WARNING, ProblemPatternQualityDimension.CONCISENESS),
+ P2(ProblemPatternId.P2, ProblemPatternSeverity.WARNING, ProblemPatternQualityDimension.CONCISENESS),
+ P3(ProblemPatternId.P3, ProblemPatternSeverity.NOTICE, ProblemPatternQualityDimension.CONCISENESS),
+ P5(ProblemPatternId.P5, ProblemPatternSeverity.NOTICE, ProblemPatternQualityDimension.ACCURACY),
+ P6(ProblemPatternId.P6, ProblemPatternSeverity.NOTICE, ProblemPatternQualityDimension.ACCURACY),
+ P7(ProblemPatternId.P7, ProblemPatternSeverity.NOTICE, ProblemPatternQualityDimension.COMPLETENESS),
+ P9(ProblemPatternId.P9, ProblemPatternSeverity.WARNING, ProblemPatternQualityDimension.ACCURACY),
+ P12(ProblemPatternId.P12, ProblemPatternSeverity.NOTICE, ProblemPatternQualityDimension.ACCURACY);
+
+ private final ProblemPatternId problemPatternId;
+ private final ProblemPatternSeverity problemPatternSeverity;
+ private final ProblemPatternQualityDimension problemPatternQualityDimension;
+
+
+ ProblemPatternDescription(ProblemPatternId problemPatternId,
+ ProblemPatternSeverity problemPatternSeverity,
+ ProblemPatternQualityDimension problemPatternQualityDimension) {
+ this.problemPatternId = problemPatternId;
+ this.problemPatternSeverity = problemPatternSeverity;
+ this.problemPatternQualityDimension = problemPatternQualityDimension;
+ }
+
+ public ProblemPatternId getProblemPatternId() {
+ return problemPatternId;
+ }
+
+ public ProblemPatternSeverity getProblemPatternSeverity() {
+ return problemPatternSeverity;
+ }
+
+ public ProblemPatternQualityDimension getProblemPatternQualityDimension() {
+ return problemPatternQualityDimension;
+ }
+
+ /**
+ * Retrieves an instance of the enum based on the provided enum name(ignore case) or else throws a runtime exception
+ *
+ * @param name the enum name
+ * @return the enum object
+ */
+ public static ProblemPatternDescription fromName(String name) {
+ return Arrays.stream(ProblemPatternDescription.values()).filter(value -> value.name().equalsIgnoreCase(name)).findFirst()
+ .orElseThrow();
+ }
+
+ /**
+ * The problem pattern ids
+ */
+ public enum ProblemPatternId {
+ P1, P2, P3, P5, P6, P7, P9, P12;
+ }
+
+ /**
+ * The problem pattern severities
+ */
+ public enum ProblemPatternSeverity {
+ NOTICE, WARNING, ERROR, FATAL
+ }
+
+ /**
+ * The problem pattern quality dimensions
+ */
+ public enum ProblemPatternQualityDimension {
+ ACCURACY, AVAILABILITY, COMPLETENESS, CONCISENESS, COMPLIANCE, CONSISTENCY, TIMELINESS, LICENSING, INTERLINKING, UNDERSTANDABILITY, REPRESENTATIONAL
+ }
+}
diff --git a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/RecordAnalysis.java b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/RecordAnalysis.java
new file mode 100644
index 000000000..7c47ab4ba
--- /dev/null
+++ b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/RecordAnalysis.java
@@ -0,0 +1,32 @@
+package eu.europeana.patternanalysis.view;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Class containing the record analysis.
+ */
+public class RecordAnalysis {
+
+ private final String recordId;
+ private final List problemOccurrenceList;
+
+ /**
+ * Constructor with required parameters.
+ *
+ * @param recordId the record id
+ * @param problemOccurrenceList the problem occurrences list
+ */
+ public RecordAnalysis(String recordId, List problemOccurrenceList) {
+ this.recordId = recordId;
+ this.problemOccurrenceList = problemOccurrenceList == null ? new ArrayList<>() : new ArrayList<>(problemOccurrenceList);
+ }
+
+ public String getRecordId() {
+ return recordId;
+ }
+
+ public List getProblemOccurrenceList() {
+ return new ArrayList<>(problemOccurrenceList);
+ }
+}
diff --git a/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/ProblemPatternAnalyzerTest.java b/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/ProblemPatternAnalyzerTest.java
new file mode 100644
index 000000000..60d801f47
--- /dev/null
+++ b/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/ProblemPatternAnalyzerTest.java
@@ -0,0 +1,49 @@
+package eu.europeana.patternanalysis;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import eu.europeana.metis.schema.convert.RdfConversionUtils;
+import eu.europeana.metis.schema.jibx.RDF;
+import eu.europeana.patternanalysis.view.ProblemPattern;
+import eu.europeana.patternanalysis.view.ProblemPatternDescription;
+import java.io.FileInputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+import org.apache.commons.io.IOUtils;
+import org.junit.jupiter.api.Test;
+
+class ProblemPatternAnalyzerTest {
+
+ @Test
+ void analyzeRecord_P2() throws Exception {
+ //Should contain two provider proxies that each contain a pair of identical title and description. All four values are identical on the two proxies.
+ String xml = IOUtils.toString(new FileInputStream("src/test/resources/europeana_record_with_P2.xml"),
+ StandardCharsets.UTF_8);
+ final RDF rdf = new RdfConversionUtils().convertStringToRdf(xml);
+
+ final ProblemPatternAnalyzer problemPatternAnalyzer = new ProblemPatternAnalyzer();
+ final List problemPatterns = problemPatternAnalyzer.analyzeRecord(rdf);
+
+ assertNotNull(problemPatterns);
+ assertEquals(1, problemPatterns.size());
+ assertEquals(ProblemPatternDescription.P2, problemPatterns.get(0).getProblemPatternDescription());
+ assertEquals(1, problemPatterns.get(0).getRecordAnalysisList().get(0).getProblemOccurrenceList().size());
+ }
+
+ @Test
+ void analyzeRecord_P6() throws Exception {
+ //Should contain one title that is not meaningful(too short)
+ String xml = IOUtils.toString(new FileInputStream("src/test/resources/europeana_record_with_P6.xml"),
+ StandardCharsets.UTF_8);
+ final RDF rdf = new RdfConversionUtils().convertStringToRdf(xml);
+
+ final ProblemPatternAnalyzer problemPatternAnalyzer = new ProblemPatternAnalyzer();
+ final List problemPatterns = problemPatternAnalyzer.analyzeRecord(rdf);
+
+ assertNotNull(problemPatterns);
+ assertEquals(1, problemPatterns.size());
+ assertEquals(ProblemPatternDescription.P6, problemPatterns.get(0).getProblemPatternDescription());
+ assertEquals(1, problemPatterns.get(0).getRecordAnalysisList().get(0).getProblemOccurrenceList().size());
+ }
+}
\ No newline at end of file
diff --git a/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/DatasetProblemPatternAnalysisTest.java b/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/DatasetProblemPatternAnalysisTest.java
new file mode 100644
index 000000000..d287b30c6
--- /dev/null
+++ b/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/DatasetProblemPatternAnalysisTest.java
@@ -0,0 +1,42 @@
+package eu.europeana.patternanalysis.view;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.time.LocalDateTime;
+import java.util.List;
+import org.apache.commons.collections.CollectionUtils;
+import org.junit.jupiter.api.Test;
+
+class DatasetProblemPatternAnalysisTest {
+
+ @Test
+ void objectCreationTest() {
+ final ProblemOccurrence problemOccurrence1 = new ProblemOccurrence("Duplicate titleA", List.of("recordId1", "recordId2"));
+ final ProblemOccurrence problemOccurrence2 = new ProblemOccurrence("Duplicate titleB");
+ final RecordAnalysis recordAnalysis1 = new RecordAnalysis("recordId1", List.of(problemOccurrence1, problemOccurrence2));
+
+ final RecordAnalysis recordAnalysis2 = new RecordAnalysis("recordId1", null);
+
+ final ProblemPattern problemPattern1 = new ProblemPattern(
+ ProblemPatternDescription.P2, 2, List.of(recordAnalysis1, recordAnalysis2));
+ final ProblemPattern problemPattern2 = new ProblemPattern(ProblemPatternDescription.P2, 2, null);
+
+ final LocalDateTime currentDate = LocalDateTime.now();
+ final DatasetProblemPatternAnalysis datasetProblemPatternAnalysis1 = new DatasetProblemPatternAnalysis<>("datasetId1",
+ "VALIDATION_EXTERNAL", currentDate
+ , List.of(problemPattern1, problemPattern2));
+
+ assertEquals("datasetId1", datasetProblemPatternAnalysis1.getDatasetId());
+ assertEquals(0, currentDate.compareTo(datasetProblemPatternAnalysis1.getExecutionTimestamp()));
+ assertEquals("VALIDATION_EXTERNAL", datasetProblemPatternAnalysis1.getExecutionStep());
+ assertTrue(CollectionUtils.isEqualCollection(List.of(problemPattern1, problemPattern2),
+ datasetProblemPatternAnalysis1.getProblemPatternList()));
+
+ final DatasetProblemPatternAnalysis datasetProblemPatternAnalysis2 = new DatasetProblemPatternAnalysis<>("datasetId1",
+ "VALIDATION_EXTERNAL", currentDate, null);
+ assertNotNull(datasetProblemPatternAnalysis2.getProblemPatternList());
+ }
+
+}
\ No newline at end of file
diff --git a/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemOccurrenceTest.java b/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemOccurrenceTest.java
new file mode 100644
index 000000000..a8a73e73f
--- /dev/null
+++ b/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemOccurrenceTest.java
@@ -0,0 +1,24 @@
+package eu.europeana.patternanalysis.view;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.List;
+import org.apache.commons.collections.CollectionUtils;
+import org.junit.jupiter.api.Test;
+
+class ProblemOccurrenceTest {
+
+ @Test
+ void objectCreationTest() {
+ final ProblemOccurrence problemOccurrence1 = new ProblemOccurrence("Duplicate titleA", List.of("recordId1", "recordId2"));
+ assertEquals("Duplicate titleA", problemOccurrence1.getMessageReport());
+ assertTrue(CollectionUtils.isEqualCollection(List.of("recordId2", "recordId1"), problemOccurrence1.getAffectedRecordIds()));
+
+ final ProblemOccurrence problemOccurrence2 = new ProblemOccurrence("Duplicate titleB");
+ assertNotNull(problemOccurrence2.getAffectedRecordIds());
+ assertEquals(0, problemOccurrence2.getAffectedRecordIds().size());
+ }
+
+}
\ No newline at end of file
diff --git a/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemPatternDescriptionTest.java b/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemPatternDescriptionTest.java
new file mode 100644
index 000000000..793c7935d
--- /dev/null
+++ b/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemPatternDescriptionTest.java
@@ -0,0 +1,44 @@
+package eu.europeana.patternanalysis.view;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import org.junit.jupiter.api.Test;
+
+class ProblemPatternDescriptionTest {
+
+ @Test
+ void checkValues() {
+ assertEquals("P1", ProblemPatternDescription.P1.getProblemPatternId().toString());
+ assertEquals("WARNING", ProblemPatternDescription.P1.getProblemPatternSeverity().toString());
+ assertEquals("CONCISENESS", ProblemPatternDescription.P1.getProblemPatternQualityDimension().toString());
+
+ assertEquals("P2", ProblemPatternDescription.P2.getProblemPatternId().toString());
+ assertEquals("WARNING", ProblemPatternDescription.P2.getProblemPatternSeverity().toString());
+ assertEquals("CONCISENESS", ProblemPatternDescription.P2.getProblemPatternQualityDimension().toString());
+
+ assertEquals("P3", ProblemPatternDescription.P3.getProblemPatternId().toString());
+ assertEquals("NOTICE", ProblemPatternDescription.P3.getProblemPatternSeverity().toString());
+ assertEquals("CONCISENESS", ProblemPatternDescription.P3.getProblemPatternQualityDimension().toString());
+
+ assertEquals("P5", ProblemPatternDescription.P5.getProblemPatternId().toString());
+ assertEquals("NOTICE", ProblemPatternDescription.P5.getProblemPatternSeverity().toString());
+ assertEquals("ACCURACY", ProblemPatternDescription.P5.getProblemPatternQualityDimension().toString());
+
+ assertEquals("P6", ProblemPatternDescription.P6.getProblemPatternId().toString());
+ assertEquals("NOTICE", ProblemPatternDescription.P6.getProblemPatternSeverity().toString());
+ assertEquals("ACCURACY", ProblemPatternDescription.P6.getProblemPatternQualityDimension().toString());
+
+ assertEquals("P7", ProblemPatternDescription.P7.getProblemPatternId().toString());
+ assertEquals("NOTICE", ProblemPatternDescription.P7.getProblemPatternSeverity().toString());
+ assertEquals("COMPLETENESS", ProblemPatternDescription.P7.getProblemPatternQualityDimension().toString());
+
+ assertEquals("P9", ProblemPatternDescription.P9.getProblemPatternId().toString());
+ assertEquals("WARNING", ProblemPatternDescription.P9.getProblemPatternSeverity().toString());
+ assertEquals("ACCURACY", ProblemPatternDescription.P9.getProblemPatternQualityDimension().toString());
+
+ assertEquals("P12", ProblemPatternDescription.P12.getProblemPatternId().toString());
+ assertEquals("NOTICE", ProblemPatternDescription.P12.getProblemPatternSeverity().toString());
+ assertEquals("ACCURACY", ProblemPatternDescription.P12.getProblemPatternQualityDimension().toString());
+ }
+
+}
\ No newline at end of file
diff --git a/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemPatternTest.java b/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemPatternTest.java
new file mode 100644
index 000000000..d4112cbff
--- /dev/null
+++ b/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemPatternTest.java
@@ -0,0 +1,33 @@
+package eu.europeana.patternanalysis.view;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.List;
+import org.apache.commons.collections.CollectionUtils;
+import org.junit.jupiter.api.Test;
+
+class ProblemPatternTest {
+
+ @Test
+ void objectCreationTest() {
+ final ProblemOccurrence problemOccurrence1 = new ProblemOccurrence("Duplicate titleA", List.of("recordId1", "recordId2"));
+ final ProblemOccurrence problemOccurrence2 = new ProblemOccurrence("Duplicate titleB");
+ final RecordAnalysis recordAnalysis1 = new RecordAnalysis("recordId1", List.of(problemOccurrence1, problemOccurrence2));
+
+ final RecordAnalysis recordAnalysis2 = new RecordAnalysis("recordId1", null);
+
+ final ProblemPattern problemPattern1 = new ProblemPattern(
+ ProblemPatternDescription.P2, 2, List.of(recordAnalysis1, recordAnalysis2));
+
+ assertEquals(ProblemPatternDescription.P2, problemPattern1.getProblemPatternDescription());
+ assertEquals(2, problemPattern1.getRecordOccurrences());
+ assertTrue(
+ CollectionUtils.isEqualCollection(List.of(recordAnalysis1, recordAnalysis2), problemPattern1.getRecordAnalysisList()));
+
+ final ProblemPattern problemPattern2 = new ProblemPattern(ProblemPatternDescription.P2, 2, null);
+ assertNotNull(problemPattern2.getRecordAnalysisList());
+ }
+
+}
\ No newline at end of file
diff --git a/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/RecordAnalysisTest.java b/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/RecordAnalysisTest.java
new file mode 100644
index 000000000..3bcb60b29
--- /dev/null
+++ b/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/RecordAnalysisTest.java
@@ -0,0 +1,27 @@
+package eu.europeana.patternanalysis.view;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.List;
+import org.apache.commons.collections.CollectionUtils;
+import org.junit.jupiter.api.Test;
+
+class RecordAnalysisTest {
+
+ @Test
+ void objectCreationTest() {
+ final ProblemOccurrence problemOccurrence1 = new ProblemOccurrence("Duplicate titleA", List.of("recordId1", "recordId2"));
+ final ProblemOccurrence problemOccurrence2 = new ProblemOccurrence("Duplicate titleB");
+ final RecordAnalysis recordAnalysis1 = new RecordAnalysis("recordId1", List.of(problemOccurrence1, problemOccurrence2));
+
+ assertEquals("recordId1", recordAnalysis1.getRecordId());
+ assertTrue(CollectionUtils.isEqualCollection(List.of(problemOccurrence1, problemOccurrence2),
+ recordAnalysis1.getProblemOccurrenceList()));
+
+ final RecordAnalysis recordAnalysis2 = new RecordAnalysis("recordId1", null);
+ assertNotNull(recordAnalysis2.getProblemOccurrenceList());
+ }
+
+}
\ No newline at end of file
diff --git a/metis-pattern-analysis/src/test/resources/europeana_record_with_P2.xml b/metis-pattern-analysis/src/test/resources/europeana_record_with_P2.xml
new file mode 100644
index 000000000..129932bc3
--- /dev/null
+++ b/metis-pattern-analysis/src/test/resources/europeana_record_with_P2.xml
@@ -0,0 +1,119 @@
+
+
+
+
+
+
+ text/html
+ 197506
+
+
+ image/jpeg
+ 6643
+ 640
+ 480
+ sRGB
+ #F0E68C
+ #B22222
+ #FF4500
+ #2F4F4F
+ #ADFF2F
+ #87CEEB
+ landscape
+
+
+ 50.75
+ 4.5
+
+
+
+ 1914-1919 के मध्य मुख्यतः यूरोप में व्याप्त इस महायुद्ध को प्रथम विश्व युद्ध कहते हैं । यह महायुद्ध
+ यूरोप, एशिया व अफ्रीका तीन महाद्वीपों और जल, थल तथा आकाश में लड़ा गया। इसमें भाग लेने वाले देशों की संख्या, इसका क्षेत्र
+ (जिसमें यह लड़ा गया) तथा इससे हुई क्षति के अभूतपूर्व आंकड़ों के कारण ही इसे विश्वयुद्ध कहते हैं ।
+
+ Første verdenskrig
+
+
+
+
+
+
+
+ EFG - The European Film Gateway
+ J.M.P.- Trends
+
+
+
+ 12944
+ nld
+
+
+
+ true
+ 1984
+
+
+
+
+
+ PYLYSER, JEAN-MARIE
+ same title and Description
+ 12944
+ nl
+ EFG1914
+ World War I
+ WESTVLAAMS FILMJOURNAAL - 164
+ Same title and Description
+ Newsreel
+
+
+ 1984
+ Belgium
+
+ false
+
+
+ VIDEO
+
+
+ PYLYSER, JEAN-MARIE
+ same title and Description
+ 12944
+ nl
+ EFG1914
+ World War I
+ WESTVLAAMS FILMJOURNAAL - 164
+ Same title and Description
+ Newsreel
+
+
+ 1984
+ Belgium
+
+ false
+
+
+
+ VIDEO
+
+
+
+ Europeana Foundation
+ Europeana Foundation
+ 1_test
+ Netherlands
+
+ nl
+ 10
+
+
+ The Royal Belgian Film Archives
+ Cinémathèque royale de Belgique
+ Koninklijk Belgisch Filmarchief
+
+
\ No newline at end of file
diff --git a/metis-pattern-analysis/src/test/resources/europeana_record_with_P6.xml b/metis-pattern-analysis/src/test/resources/europeana_record_with_P6.xml
new file mode 100644
index 000000000..381c8b6f9
--- /dev/null
+++ b/metis-pattern-analysis/src/test/resources/europeana_record_with_P6.xml
@@ -0,0 +1,102 @@
+
+
+
+
+
+
+ text/html
+ 197506
+
+
+ image/jpeg
+ 6643
+ 640
+ 480
+ sRGB
+ #F0E68C
+ #B22222
+ #FF4500
+ #2F4F4F
+ #ADFF2F
+ #87CEEB
+ landscape
+
+
+ 50.75
+ 4.5
+
+
+
+ 1914-1919 के मध्य मुख्यतः यूरोप में व्याप्त इस महायुद्ध को प्रथम विश्व युद्ध कहते हैं । यह महायुद्ध
+ यूरोप, एशिया व अफ्रीका तीन महाद्वीपों और जल, थल तथा आकाश में लड़ा गया। इसमें भाग लेने वाले देशों की संख्या, इसका क्षेत्र
+ (जिसमें यह लड़ा गया) तथा इससे हुई क्षति के अभूतपूर्व आंकड़ों के कारण ही इसे विश्वयुद्ध कहते हैं ।
+
+ Første verdenskrig
+
+
+
+
+
+
+
+ EFG - The European Film Gateway
+ J.M.P.- Trends
+
+
+
+ 12944
+ nld
+
+
+
+ true
+ 1984
+
+
+
+
+
+ PYLYSER, JEAN-MARIE
+ Newsitems West Flanders. Day of the Navy in Ostend, with a wreath-laying memorial service at the
+ monument of the sailors on the dike. Thereafter, a military parade takes place at the Wapenplein in the presence of Prince
+ Albert, Princess Paola, Governor Vanneste, mayor Goekindt and certain naval officers. The navy admiral gives a speech,
+ followed by a short parade. The princess salutes the flag bearers of the veterans' associations.
+
+ 12944
+ nl
+ EFG1914
+ World War I
+ WESTVLAAMS FILMJOURNAAL - 164
+ AB
+ Newsreel
+
+
+ 1984
+ Belgium
+
+ false
+
+
+ VIDEO
+
+
+
+ Europeana Foundation
+ Europeana Foundation
+ 1_test
+ Netherlands
+
+ nl
+ 10
+
+
+ The Royal Belgian Film Archives
+ Cinémathèque royale de Belgique
+ Koninklijk Belgisch Filmarchief
+
+
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 57995f0eb..1e0c90eb6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -21,6 +21,7 @@
metis-transformation
metis-harvesting
metis-repository
+ metis-pattern-analysis
@@ -131,6 +132,7 @@
1.4
2.9.0
3.12.0
+ 3.2.2
1.11
2.15.3
From 603923f1fd95733bf37401ce01cf44292ce8ef4a Mon Sep 17 00:00:00 2001
From: Joana Sousa
Date: Fri, 22 Apr 2022 12:17:11 +0200
Subject: [PATCH 25/73] MET-4240 Removed input variables from
PatternAnalysisService interface
---
.../europeana/patternanalysis/PatternAnalysisService.java | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/PatternAnalysisService.java b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/PatternAnalysisService.java
index 1847e52d7..e08b08158 100644
--- a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/PatternAnalysisService.java
+++ b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/PatternAnalysisService.java
@@ -81,12 +81,8 @@ Optional> getDatasetPatternAnalysis(String data
* If not, it should generate it on the fly. An in memory cache could be implemented internally.
*
*
- * @param datasetId the dataset identifier
- * @param executionStep the execution step
- * @param executionTimestamp the execution timestamp
* @param rdfRecord the RDF record
* @return the list of problem patterns
*/
- List getRecordPatternAnalysis(String datasetId, T executionStep, LocalDateTime executionTimestamp,
- RDF rdfRecord);
+ List getRecordPatternAnalysis(RDF rdfRecord);
}
From f75047347b4dbb0fd424232b9a230305cc782e55 Mon Sep 17 00:00:00 2001
From: JoanaCMS <70145179+JoanaCMS@users.noreply.github.com>
Date: Tue, 3 May 2022 13:30:58 +0200
Subject: [PATCH 26/73] Debt/MET-4418 Change Macedonia to North Macedonia
(#526)
* MET-4418 Upate Macedonia to North Macedonia
* MET-4418 Fixed Language value
---
.../src/main/java/eu/europeana/metis/core/common/Country.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/common/Country.java b/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/common/Country.java
index 3301b436d..2c2022cdd 100644
--- a/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/common/Country.java
+++ b/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/common/Country.java
@@ -44,7 +44,7 @@ public enum Country {
LIECHTENSTEIN("Liechtenstein", "LI"),
LITHUANIA("Lithuania", "LT"),
LUXEMBOURG("Luxembourg", "LU"),
- MACEDONIA("Macedonia", "MK"),
+ NORTH_MACEDONIA("North Macedonia", "MK"),
MALTA("Malta", "MT"),
MOLDOVA("Moldova", "MD"),
MONACO("Monaco", "MC"),
From b6f29755383df53b099366684341b6c3dcfe83e6 Mon Sep 17 00:00:00 2001
From: Simon Tzanakis
Date: Tue, 3 May 2022 15:54:31 +0200
Subject: [PATCH 27/73] MET-4470 Generify execution point for pattern service
(#525)
---
.../PatternAnalysisService.java | 36 ++++++++++---------
1 file changed, 20 insertions(+), 16 deletions(-)
diff --git a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/PatternAnalysisService.java b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/PatternAnalysisService.java
index e08b08158..b70ac6165 100644
--- a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/PatternAnalysisService.java
+++ b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/PatternAnalysisService.java
@@ -12,23 +12,33 @@
* Interface with all methods required for a pattern analysis service
*
* @param the type of the execution step
+ * @param the type of the execution point
*/
-public interface PatternAnalysisService {
+public interface PatternAnalysisService {
/**
- * Generates the analysis of the record in RDF format.
- *
- * It will compute patterns and store all relevant information in the database
- *
+ * Initializes the pattern analysis execution to create a unique execution point.
+ * This method should be called at the beginning(pre-processing) of the dataset execution once
*
* @param datasetId the datasetId
* @param executionStep the constant value of the step (Similar to eu.europeana.metis.core.workflow.plugins.PluginType from
* metis-core and eu.europeana.metis.sandbox.common.Step from metis-sandbox
* @param executionTimestamp the execution timestamp for the execution of the dataset(this should be the same for all records).
+ * @return the execution point that can be used on other calls
+ */
+ K initializePatternAnalysisExecution(String datasetId, T executionStep, LocalDateTime executionTimestamp);
+
+ /**
+ * Generates the analysis of the record in RDF format.
+ *
+ * It will compute patterns and store all relevant information in the database
+ *
+ *
+ * @param executionPoint the execution point
* @param rdfRecord the rdf record
* @throws PatternAnalysisException if an error occurred during the analysis
*/
- void generateRecordPatternAnalysis(String datasetId, T executionStep, LocalDateTime executionTimestamp, RDF rdfRecord)
+ void generateRecordPatternAnalysis(K executionPoint, RDF rdfRecord)
throws PatternAnalysisException;
/**
@@ -37,27 +47,21 @@ void generateRecordPatternAnalysis(String datasetId, T executionStep, LocalDateT
* It will compute patterns and store all relevant information in the database
*
*
- * @param datasetId the datasetId
- * @param executionStep the constant value of the step (Similar to eu.europeana.metis.core.workflow.plugins.PluginType from
- * metis-core and eu.europeana.metis.sandbox.common.Step from metis-sandbox
- * @param executionTimestamp the execution timestamp for the execution of the dataset(this should be the same for all records).
+ * @param executionPoint the execution point
* @param rdfRecord the rdf record
* @throws PatternAnalysisException if an error occurred during the analysis
*/
- void generateRecordPatternAnalysis(String datasetId, T executionStep, LocalDateTime executionTimestamp, String rdfRecord)
+ void generateRecordPatternAnalysis(K executionPoint, String rdfRecord)
throws PatternAnalysisException;
/**
* Finalizes the computation of the analysis for the dataset.
* This method should be called at the end(post-processing) of the dataset execution, to perform the final calculations
*
- * @param datasetId the datasetId
- * @param executionStep the constant value of the step (Similar to eu.europeana.metis.core.workflow.plugins.PluginType from
- * metis-core and eu.europeana.metis.sandbox.common.Step from metis-sandbox).
- * @param executionTimestamp the execution timestamp for the execution of the dataset(this should be the same for all records).
+ * @param executionPoint the execution point
* @throws PatternAnalysisException if an error occurred during the analysis
*/
- void finalizeDatasetPatternAnalysis(String datasetId, T executionStep, LocalDateTime executionTimestamp)
+ void finalizeDatasetPatternAnalysis(K executionPoint)
throws PatternAnalysisException;
/**
From d00c518ebb0ad593cd207bb03513feeab3ad2a5e Mon Sep 17 00:00:00 2001
From: Simon Tzanakis
Date: Tue, 10 May 2022 09:13:24 +0200
Subject: [PATCH 28/73] Feat/met 4457 implement problem patterns p3 p5 p7 p9
p12 (#527)
* MET-4457 Implement P5
* MET-4457 Combine choices code
* MET-4457 Implement P7
* MET-4457 Implement P9
* MET-4457 Fix P5
* MET-4457 Refactor tests
* MET-4457 Implement P12
* MET-4457 Implement P3
* MET-4457 Cover case where europeanaProxy null
* MET-4457 Do not capture identical pairs(Like P2) on P3
* MET-4457 Unit test unicode codes case
unicode codes are counted as one character
* MET-4457 Update comment
* MET-4457 Process review
---
metis-pattern-analysis/pom.xml | 10 +
.../ProblemPatternAnalyzer.java | 199 ++++++++++++++++--
.../view/ProblemPatternDescription.java | 24 +++
.../ProblemPatternAnalyzerTest.java | 92 +++++---
.../europeana_record_empty_proxy_choices.xml | 100 +++++++++
.../europeana_record_no_problem_patterns.xml | 101 +++++++++
.../resources/europeana_record_with_P12.xml | 100 +++++++++
.../resources/europeana_record_with_P2.xml | 9 +-
.../resources/europeana_record_with_P3.xml | 126 +++++++++++
.../resources/europeana_record_with_P5.xml | 114 ++++++++++
.../resources/europeana_record_with_P6.xml | 6 +-
.../resources/europeana_record_with_P7.xml | 95 +++++++++
...eana_record_with_P7_descriptions_empty.xml | 99 +++++++++
.../resources/europeana_record_with_P9.xml | 98 +++++++++
pom.xml | 1 +
15 files changed, 1116 insertions(+), 58 deletions(-)
create mode 100644 metis-pattern-analysis/src/test/resources/europeana_record_empty_proxy_choices.xml
create mode 100644 metis-pattern-analysis/src/test/resources/europeana_record_no_problem_patterns.xml
create mode 100644 metis-pattern-analysis/src/test/resources/europeana_record_with_P12.xml
create mode 100644 metis-pattern-analysis/src/test/resources/europeana_record_with_P3.xml
create mode 100644 metis-pattern-analysis/src/test/resources/europeana_record_with_P5.xml
create mode 100644 metis-pattern-analysis/src/test/resources/europeana_record_with_P7.xml
create mode 100644 metis-pattern-analysis/src/test/resources/europeana_record_with_P7_descriptions_empty.xml
create mode 100644 metis-pattern-analysis/src/test/resources/europeana_record_with_P9.xml
diff --git a/metis-pattern-analysis/pom.xml b/metis-pattern-analysis/pom.xml
index 6dfeaf2e2..00ffc9309 100644
--- a/metis-pattern-analysis/pom.xml
+++ b/metis-pattern-analysis/pom.xml
@@ -31,6 +31,12 @@
org.apache.commons
commons-lang3
+
+ org.apache.commons
+ commons-text
+ ${version.apache.commons.text}
+
+
org.junit.jupiter
@@ -40,6 +46,10 @@
org.junit.jupiter
junit-jupiter-engine
+
+ org.junit.jupiter
+ junit-jupiter-params
+
com.fasterxml.jackson.core
jackson-databind
diff --git a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/ProblemPatternAnalyzer.java b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/ProblemPatternAnalyzer.java
index 665b89823..6960374de 100644
--- a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/ProblemPatternAnalyzer.java
+++ b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/ProblemPatternAnalyzer.java
@@ -2,37 +2,55 @@
import static java.lang.String.format;
import static java.util.function.Predicate.not;
+import static java.util.stream.Collectors.toList;
+import static java.util.stream.Collectors.toMap;
+import static java.util.stream.Collectors.toSet;
+import static org.apache.commons.lang3.BooleanUtils.isFalse;
import eu.europeana.metis.schema.convert.RdfConversionUtils;
import eu.europeana.metis.schema.convert.SerializationException;
-import eu.europeana.metis.schema.jibx.Description;
import eu.europeana.metis.schema.jibx.EuropeanaType;
import eu.europeana.metis.schema.jibx.EuropeanaType.Choice;
+import eu.europeana.metis.schema.jibx.LiteralType;
import eu.europeana.metis.schema.jibx.ProvidedCHOType;
import eu.europeana.metis.schema.jibx.ProxyType;
import eu.europeana.metis.schema.jibx.RDF;
-import eu.europeana.metis.schema.jibx.Title;
+import eu.europeana.metis.schema.jibx.ResourceOrLiteralType;
import eu.europeana.patternanalysis.view.ProblemOccurrence;
import eu.europeana.patternanalysis.view.ProblemPattern;
import eu.europeana.patternanalysis.view.ProblemPatternDescription;
import eu.europeana.patternanalysis.view.RecordAnalysis;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
+import java.util.function.Function;
+import java.util.function.Predicate;
+import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.text.similarity.LongestCommonSubsequence;
/**
* Class that contains functionality to analyze a record and retrieve all problem patterns.
*/
public class ProblemPatternAnalyzer {
- public static final int MIN_TITLE_LENGTH = 2;
+ private static final int MIN_TITLE_LENGTH = 2;
+ private static final int MAX_TITLE_LENGTH = 70;
+ private static final int MIN_DESCRIPTION_LENGTH = 50;
+ private static final int UNRECOGNIZABLE_CHARACTERS_THRESHOLD = 5;
+ private static final double LCS_CALCULATION_THRESHOLD = 0.9;
+ private static final int TITLE_DESCRIPTION_LENGTH_DISTANCE = 20;
+ // Match anything that is not alphanumeric in all languages or literal spaces. We cannot just use \\w
+ private static final String UNRECOGNIZABLE_CHARACTERS_REGEX = "[^\\p{IsAlphabetic}\\p{IsDigit} ]";
+ private static final Pattern UNRECOGNIZABLE_CHARACTERS_PATTERN = Pattern.compile(UNRECOGNIZABLE_CHARACTERS_REGEX);
/**
* Analyzes a record for problem patterns.
@@ -53,51 +71,186 @@ public List analyzeRecord(String rdfString) throws Serialization
*/
public List analyzeRecord(RDF rdf) {
final List providerProxies = getProviderProxies(rdf);
- final List titles = providerProxies.stream().map(EuropeanaType::getChoiceList).flatMap(Collection::stream)
- .filter(Choice::ifTitle).map(Choice::getTitle)
- .map(Title::getString)
- .filter(StringUtils::isNotBlank)
- .map(String::trim)
- .collect(Collectors.toList());
- final List descriptions = providerProxies.stream().map(EuropeanaType::getChoiceList).flatMap(Collection::stream)
- .filter(Choice::ifDescription).map(Choice::getDescription)
- .map(Description::getString)
- .filter(StringUtils::isNotBlank)
- .map(String::trim)
- .collect(Collectors.toList());
+ final List choices = providerProxies.stream().map(EuropeanaType::getChoiceList)
+ .filter(Objects::nonNull)
+ .flatMap(Collection::stream)
+ .collect(toList());
+
+ final List titles = getChoicesInStringList(choices, Choice::ifTitle, Choice::getTitle, LiteralType::getString);
+ final List descriptions = getChoicesInStringList(choices, Choice::ifDescription, Choice::getDescription,
+ ResourceOrLiteralType::getString);
+ final List identifiers = getChoicesInStringList(choices, Choice::ifIdentifier, Choice::getIdentifier,
+ LiteralType::getString);
final String rdfAbout = rdf.getProvidedCHOList().stream().filter(Objects::nonNull).findFirst()
.map(ProvidedCHOType::getAbout).orElse(null);
- return computeProblemPatterns(rdfAbout, titles, descriptions);
+ return computeProblemPatterns(rdfAbout, titles, descriptions, identifiers);
+ }
+
+ private List getChoicesInStringList(List choices, Predicate choicePredicate,
+ Function choiceGetter, Function getString) {
+ return choices.stream().filter(Objects::nonNull).filter(choicePredicate).map(choiceGetter).map(getString).collect(toList());
}
- private ArrayList computeProblemPatterns(String rdfAbout, List titles, List descriptions) {
+ private ArrayList computeProblemPatterns(String rdfAbout, List titles, List descriptions,
+ List identifiers) {
final ArrayList problemPatterns = new ArrayList<>();
constructProblemPattern(rdfAbout, ProblemPatternDescription.P2, checkP2(titles, descriptions)).ifPresent(
problemPatterns::add);
+ constructProblemPattern(rdfAbout, ProblemPatternDescription.P3, checkP3(titles, descriptions)).ifPresent(
+ problemPatterns::add);
+ constructProblemPattern(rdfAbout, ProblemPatternDescription.P5, checkP5(titles, identifiers)).ifPresent(problemPatterns::add);
constructProblemPattern(rdfAbout, ProblemPatternDescription.P6, checkP6(titles)).ifPresent(problemPatterns::add);
+ constructProblemPattern(rdfAbout, ProblemPatternDescription.P7, checkP7(descriptions)).ifPresent(problemPatterns::add);
+ constructProblemPattern(rdfAbout, ProblemPatternDescription.P9, checkP9(descriptions)).ifPresent(problemPatterns::add);
+ constructProblemPattern(rdfAbout, ProblemPatternDescription.P12, checkP12(titles)).ifPresent(problemPatterns::add);
return problemPatterns;
}
+ private static boolean isProviderProxy(ProxyType proxy) {
+ return proxy.getEuropeanaProxy() == null || isFalse(proxy.getEuropeanaProxy().isEuropeanaProxy());
+ }
+
private List getProviderProxies(RDF rdf) {
- return rdf.getProxyList().stream().filter(proxyType -> proxyType.getEuropeanaProxy() != null)
- .filter(not(proxyType -> proxyType.getEuropeanaProxy().isEuropeanaProxy())).collect(Collectors.toList());
+ return Optional.ofNullable(rdf.getProxyList()).stream().flatMap(Collection::stream)
+ .filter(Objects::nonNull).filter(ProblemPatternAnalyzer::isProviderProxy)
+ .collect(Collectors.toList());
}
+ /**
+ * Check whether there is a title - description pair for which the values are equal, ignoring letter (upper or lower) case.
+ * It will report a single occurrence for multiple same fields
+ *
+ * @param titles the list of titles
+ * @param descriptions the list of descriptions
+ * @return the list of problem occurrences encountered
+ */
private List checkP2(List titles, List descriptions) {
- final Set uniqueTitles = titles.stream().map(String::toLowerCase).collect(Collectors.toSet());
- final Set uniqueDescriptions = descriptions.stream().map(String::toLowerCase).collect(Collectors.toSet());
+ final Set uniqueTitles = titles.stream().map(String::toLowerCase).collect(toSet());
+ final Set uniqueDescriptions = descriptions.stream().map(String::toLowerCase).collect(toSet());
final HashSet equalTitlesAndDescriptions = new HashSet<>(uniqueTitles);
equalTitlesAndDescriptions.retainAll(uniqueDescriptions);
return equalTitlesAndDescriptions.stream().map(
value -> new ProblemOccurrence(format("Equal(lower cased) title and description: %s", value))
- ).collect(Collectors.toList());
+ ).collect(toList());
}
+ /**
+ * Check whether there is a title - description pair for which the values are too similar.
+ *
+ * The solution is based on the LCS algorithm(Longest
+ * Common Subsequence).
+ *
+ * The formula chosen is:
+ *
+ * LCS (title, description) / minimum(length(title), length(desc)) >= 0.9 && |length(title)-length(desc)| <= 20
+ *
+ * Blank values are filtered out. Titles and descriptions that are equal, ignoring letter (upper or lower) case are filtered
+ * out. Same titles will be reported once and will not have a duplicate of it self with same near identical descriptions.
+ *
+ * @param titles the list of titles
+ * @param descriptions the list of descriptions
+ * @return the list of problem occurrences encountered
+ */
+ private List checkP3(List titles, List descriptions) {
+ final Map> nearIdenticalTitleDescriptionsMap =
+ titles.stream().filter(StringUtils::isNotBlank)
+ .collect(toMap(title -> title, title -> nearIdenticalDescriptions(title, descriptions), (t1, t2) -> t1));
+
+ return nearIdenticalTitleDescriptionsMap.entrySet().stream().flatMap(
+ entry -> entry.getValue().stream().map(
+ value -> new ProblemOccurrence(format("Near-Identical title and description fields: %s | %s", entry.getKey(), value))
+ )
+ ).collect(toList());
+ }
+
+ private List nearIdenticalDescriptions(String title, List descriptions) {
+ final LongestCommonSubsequence longestCommonSubsequence = new LongestCommonSubsequence();
+ final Predicate lcsPredicate = description ->
+ ((double) longestCommonSubsequence.apply(title, description) / Math.min(title.length(), description.length()))
+ >= LCS_CALCULATION_THRESHOLD;
+ final Predicate distancePredicate = description -> Math.abs(title.length() - description.length())
+ <= TITLE_DESCRIPTION_LENGTH_DISTANCE;
+ return descriptions.stream().filter(StringUtils::isNotBlank).filter(not(title::equalsIgnoreCase))
+ .filter(lcsPredicate.and(distancePredicate)).collect(toList());
+ }
+
+ /**
+ * Check whether a title is not human-readable.
+ *
+ * We check this by:
+ *
+ * - Whether there are more than 5 characters that are not valid.
+ * Non valid characters are considered characters that are not alphanumeric and are not simple "literal" spaces(tabs,
+ * new lines etc. are considered invalid characters).
+ * This is performed with regex unicode matching {@link #UNRECOGNIZABLE_CHARACTERS_REGEX} and should support all languages.
+ * For more information check unicode regex
+ * - The title does not fully contain an identifier
+ *
+ *
+ *
+ * @param titles the list of titles
+ * @param identifiers the list of identifiers
+ * @return the list of problem occurrences encountered
+ */
+ private List checkP5(List titles, List identifiers) {
+ final Predicate moreThanThresholdUnrecognizableCharacters = s ->
+ UNRECOGNIZABLE_CHARACTERS_PATTERN.matcher(s).results().count() > UNRECOGNIZABLE_CHARACTERS_THRESHOLD;
+ final Predicate containsIdentifier = s -> identifiers.stream().anyMatch(s::contains);
+ return titles.stream().filter(moreThanThresholdUnrecognizableCharacters.or(containsIdentifier))
+ .map(title -> new ProblemOccurrence(format("Unrecognized title: %s", title))
+ ).collect(toList());
+ }
+
+ /**
+ * Check whether the record has titles of {@link #MIN_TITLE_LENGTH} characters or fewer.
+ *
+ * @param titles the list of titles
+ * @return the list of problem occurrences encountered
+ */
private List checkP6(List titles) {
return titles.stream().filter(title -> title.length() <= MIN_TITLE_LENGTH)
- .map(title -> new ProblemOccurrence(format("Non meaningful title: %s", title))).collect(Collectors.toList());
+ .map(title -> new ProblemOccurrence(format("Non meaningful title: %s", title))).collect(toList());
+ }
+
+ /**
+ * Check whether the record is lacking a description (or only has empty descriptions).
+ *
+ * @param descriptions the list of descriptions
+ * @return the list of problem occurrences encountered
+ */
+ private List checkP7(List descriptions) {
+ if (CollectionUtils.isEmpty(descriptions) || descriptions.stream().allMatch(StringUtils::isBlank)) {
+ return List.of(new ProblemOccurrence("Missing description fields"));
+ }
+ return Collections.emptyList();
+ }
+
+ /**
+ * Check whether the record has descriptions of {@link #MIN_DESCRIPTION_LENGTH} characters or fewer.
+ * Blank values are filtered out
+ *
+ * @param descriptions the list of descriptions
+ * @return the list of problem occurrences encountered
+ */
+ private List checkP9(List descriptions) {
+ return descriptions.stream().filter(StringUtils::isNotBlank)
+ .filter(description -> description.length() <= MIN_DESCRIPTION_LENGTH)
+ .map(description -> new ProblemOccurrence(format("Very short description: %s", description)))
+ .collect(toList());
+ }
+
+ /**
+ * Check whether the record has titles of more than {@link #MAX_TITLE_LENGTH} characters.
+ * Unicode codes are converted to relevant characters(counted as one character) and the length of that is checked.
+ *
+ * @param titles the list of titles
+ * @return the list of problem occurrences encountered
+ */
+ private List checkP12(List titles) {
+ return titles.stream().filter(title -> title.length() > MAX_TITLE_LENGTH)
+ .map(title -> new ProblemOccurrence(format("Extremely long title: %s", title))).collect(toList());
}
private Optional constructProblemPattern(String recordId, ProblemPatternDescription problemPatternDescription,
diff --git a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemPatternDescription.java b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemPatternDescription.java
index 58dc8c886..3983d9d64 100644
--- a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemPatternDescription.java
+++ b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemPatternDescription.java
@@ -9,13 +9,37 @@
@JsonFormat(shape = JsonFormat.Shape.OBJECT)
public enum ProblemPatternDescription {
+ /**
+ * Systematic use of the same title
+ */
P1(ProblemPatternId.P1, ProblemPatternSeverity.WARNING, ProblemPatternQualityDimension.CONCISENESS),
+ /**
+ * Equal title and description fields
+ */
P2(ProblemPatternId.P2, ProblemPatternSeverity.WARNING, ProblemPatternQualityDimension.CONCISENESS),
+ /**
+ * Near-Identical title and description fields
+ */
P3(ProblemPatternId.P3, ProblemPatternSeverity.NOTICE, ProblemPatternQualityDimension.CONCISENESS),
+ /**
+ * Unrecognizable title
+ */
P5(ProblemPatternId.P5, ProblemPatternSeverity.NOTICE, ProblemPatternQualityDimension.ACCURACY),
+ /**
+ * Non-meaningful title
+ */
P6(ProblemPatternId.P6, ProblemPatternSeverity.NOTICE, ProblemPatternQualityDimension.ACCURACY),
+ /**
+ * Missing description fields
+ */
P7(ProblemPatternId.P7, ProblemPatternSeverity.NOTICE, ProblemPatternQualityDimension.COMPLETENESS),
+ /**
+ * Very short description
+ */
P9(ProblemPatternId.P9, ProblemPatternSeverity.WARNING, ProblemPatternQualityDimension.ACCURACY),
+ /**
+ * Extremely long values
+ */
P12(ProblemPatternId.P12, ProblemPatternSeverity.NOTICE, ProblemPatternQualityDimension.ACCURACY);
private final ProblemPatternId problemPatternId;
diff --git a/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/ProblemPatternAnalyzerTest.java b/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/ProblemPatternAnalyzerTest.java
index 60d801f47..b23f8c499 100644
--- a/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/ProblemPatternAnalyzerTest.java
+++ b/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/ProblemPatternAnalyzerTest.java
@@ -3,47 +3,89 @@
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
-import eu.europeana.metis.schema.convert.RdfConversionUtils;
-import eu.europeana.metis.schema.jibx.RDF;
+import eu.europeana.metis.schema.convert.SerializationException;
import eu.europeana.patternanalysis.view.ProblemPattern;
import eu.europeana.patternanalysis.view.ProblemPatternDescription;
import java.io.FileInputStream;
+import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.List;
+import java.util.stream.Stream;
import org.apache.commons.io.IOUtils;
-import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
class ProblemPatternAnalyzerTest {
- @Test
- void analyzeRecord_P2() throws Exception {
- //Should contain two provider proxies that each contain a pair of identical title and description. All four values are identical on the two proxies.
- String xml = IOUtils.toString(new FileInputStream("src/test/resources/europeana_record_with_P2.xml"),
- StandardCharsets.UTF_8);
- final RDF rdf = new RdfConversionUtils().convertStringToRdf(xml);
+ public static final String FILE_XML_NO_PROBLEM_PATTERNS_LOCATION = "src/test/resources/europeana_record_no_problem_patterns.xml";
+ public static final String FILE_XML_EMPTY_CHOICES_LOCATION = "src/test/resources/europeana_record_empty_proxy_choices.xml";
+ public static final String FILE_XML_P2_LOCATION = "src/test/resources/europeana_record_with_P2.xml";
+ public static final String FILE_XML_P3_LOCATION = "src/test/resources/europeana_record_with_P3.xml";
+ public static final String FILE_XML_P5_LOCATION = "src/test/resources/europeana_record_with_P5.xml";
+ public static final String FILE_XML_P6_LOCATION = "src/test/resources/europeana_record_with_P6.xml";
+ public static final String FILE_XML_P7_LOCATION = "src/test/resources/europeana_record_with_P7.xml";
+ public static final String FILE_XML_P7_DESCRIPTIONS_EMPTY_LOCATION = "src/test/resources/europeana_record_with_P7_descriptions_empty.xml";
+ public static final String FILE_XML_P9_LOCATION = "src/test/resources/europeana_record_with_P9.xml";
+ public static final String FILE_XML_P12_LOCATION = "src/test/resources/europeana_record_with_P12.xml";
- final ProblemPatternAnalyzer problemPatternAnalyzer = new ProblemPatternAnalyzer();
- final List problemPatterns = problemPatternAnalyzer.analyzeRecord(rdf);
+ private static Stream analyzeRecord() {
+ return Stream.of(
+ //Should not have any problem patterns generated
+ Arguments.of(FILE_XML_NO_PROBLEM_PATTERNS_LOCATION, 0, null, 0),
+ //Should not have any choices on the provider proxy(to check for null list), therefore reporting only P7
+ Arguments.of(FILE_XML_EMPTY_CHOICES_LOCATION, 1, ProblemPatternDescription.P7, 1),
+ //Should contain two provider proxies that each contain a pair of identical title and description. All four values are identical on the two proxies.
+ Arguments.of(FILE_XML_P2_LOCATION, 1, ProblemPatternDescription.P2, 1),
+ //Should contain identical titles, very similar ones and also completely different ones
+ Arguments.of(FILE_XML_P3_LOCATION, 2, ProblemPatternDescription.P3, 1),
+ //Should contain valid titles in different languages and unrecognizable titles
+ Arguments.of(FILE_XML_P5_LOCATION, 1, ProblemPatternDescription.P5, 3),
+ //Should contain one title that is not meaningful(too short)
+ Arguments.of(FILE_XML_P6_LOCATION, 1, ProblemPatternDescription.P6, 1),
+ //Should not contain any descriptions
+ Arguments.of(FILE_XML_P7_LOCATION, 1, ProblemPatternDescription.P7, 1),
+ //Should contain multiple descriptions that are "empty"
+ Arguments.of(FILE_XML_P7_DESCRIPTIONS_EMPTY_LOCATION, 1, ProblemPatternDescription.P7, 1),
+ //Should contain a description with length less than threshold
+ Arguments.of(FILE_XML_P9_LOCATION, 1, ProblemPatternDescription.P9, 2),
+ //Should contain a title with length more than threshold
+ Arguments.of(FILE_XML_P12_LOCATION, 1, ProblemPatternDescription.P12, 1)
+ );
+ }
- assertNotNull(problemPatterns);
- assertEquals(1, problemPatterns.size());
- assertEquals(ProblemPatternDescription.P2, problemPatterns.get(0).getProblemPatternDescription());
- assertEquals(1, problemPatterns.get(0).getRecordAnalysisList().get(0).getProblemOccurrenceList().size());
+ private ProblemPatternDescription getRequestedProblemPattern(ProblemPatternDescription problemPatternDescription,
+ List problemPatterns) {
+ return problemPatterns.stream()
+ .map(ProblemPattern::getProblemPatternDescription)
+ .filter(patternDescription -> patternDescription == problemPatternDescription).findFirst().orElse(null);
}
- @Test
- void analyzeRecord_P6() throws Exception {
- //Should contain one title that is not meaningful(too short)
- String xml = IOUtils.toString(new FileInputStream("src/test/resources/europeana_record_with_P6.xml"),
- StandardCharsets.UTF_8);
- final RDF rdf = new RdfConversionUtils().convertStringToRdf(xml);
+ private List analyzeProblemPatternsForFile(String fileLocation) throws IOException, SerializationException {
+ String xml = IOUtils.toString(new FileInputStream(fileLocation), StandardCharsets.UTF_8);
final ProblemPatternAnalyzer problemPatternAnalyzer = new ProblemPatternAnalyzer();
- final List problemPatterns = problemPatternAnalyzer.analyzeRecord(rdf);
+ return problemPatternAnalyzer.analyzeRecord(xml);
+ }
+
+ private int getRequestedProblemOccurrencesSize(ProblemPatternDescription problemPatternDescription,
+ List problemPatterns) {
+ return problemPatterns.stream()
+ .filter(problemPattern -> problemPattern.getProblemPatternDescription()
+ == problemPatternDescription)
+ .map(problemPattern -> problemPattern.getRecordAnalysisList().get(0).getProblemOccurrenceList().size())
+ .findFirst().orElse(0);
+ }
+
+ @ParameterizedTest(name = "[{index}] - For file:{0}, totalPatterns:{1}, patternId:{2}, totalOccurrences:{3}")
+ @MethodSource
+ void analyzeRecord(String fileLocation, int totalPatterns, ProblemPatternDescription problemPatternDescription,
+ int totalOccurrences) throws Exception {
+ final List problemPatterns = analyzeProblemPatternsForFile(fileLocation);
assertNotNull(problemPatterns);
- assertEquals(1, problemPatterns.size());
- assertEquals(ProblemPatternDescription.P6, problemPatterns.get(0).getProblemPatternDescription());
- assertEquals(1, problemPatterns.get(0).getRecordAnalysisList().get(0).getProblemOccurrenceList().size());
+ assertEquals(totalPatterns, problemPatterns.size());
+ assertEquals(problemPatternDescription, getRequestedProblemPattern(problemPatternDescription, problemPatterns));
+ assertEquals(totalOccurrences, getRequestedProblemOccurrencesSize(problemPatternDescription, problemPatterns));
}
}
\ No newline at end of file
diff --git a/metis-pattern-analysis/src/test/resources/europeana_record_empty_proxy_choices.xml b/metis-pattern-analysis/src/test/resources/europeana_record_empty_proxy_choices.xml
new file mode 100644
index 000000000..de7343397
--- /dev/null
+++ b/metis-pattern-analysis/src/test/resources/europeana_record_empty_proxy_choices.xml
@@ -0,0 +1,100 @@
+
+
+
+
+
+
+ text/html
+ 197506
+
+
+ image/jpeg
+ 6643
+ 640
+ 480
+ sRGB
+ #F0E68C
+ #B22222
+ #FF4500
+ #2F4F4F
+ #ADFF2F
+ #87CEEB
+ landscape
+
+
+ 50.75
+ 4.5
+
+
+
+ 1914-1919 के मध्य मुख्यतः यूरोप में व्याप्त इस महायुद्ध को प्रथम विश्व युद्ध कहते हैं । यह महायुद्ध
+ यूरोप, एशिया व अफ्रीका तीन महाद्वीपों और जल, थल तथा आकाश में लड़ा गया। इसमें भाग लेने वाले देशों की संख्या, इसका क्षेत्र
+ (जिसमें यह लड़ा गया) तथा इससे हुई क्षति के अभूतपूर्व आंकड़ों के कारण ही इसे विश्वयुद्ध कहते हैं ।
+
+ Første verdenskrig
+
+
+
+
+
+
+
+ EFG - The European Film Gateway
+ J.M.P.- Trends
+
+
+
+ 12944
+ nld
+
+
+
+ true
+ 1984
+
+
+
+
+
+
+
+
+
+ PYLYSER, JEAN-MARIE
+ 12944
+ nl
+ EFG1914
+ World War I
+ WESTVLAAMS FILMJOURNAAL - 164
+ Newsreel
+
+
+ 1984
+ Belgium
+
+
+
+
+ VIDEO
+
+
+
+ Europeana Foundation
+ Europeana Foundation
+ 1_test
+ Netherlands
+
+ nl
+ 10
+
+
+ The Royal Belgian Film Archives
+ Cinémathèque royale de Belgique
+ Koninklijk Belgisch Filmarchief
+
+
\ No newline at end of file
diff --git a/metis-pattern-analysis/src/test/resources/europeana_record_no_problem_patterns.xml b/metis-pattern-analysis/src/test/resources/europeana_record_no_problem_patterns.xml
new file mode 100644
index 000000000..de7da14ec
--- /dev/null
+++ b/metis-pattern-analysis/src/test/resources/europeana_record_no_problem_patterns.xml
@@ -0,0 +1,101 @@
+
+
+
+
+
+
+ text/html
+ 197506
+
+
+ image/jpeg
+ 6643
+ 640
+ 480
+ sRGB
+ #F0E68C
+ #B22222
+ #FF4500
+ #2F4F4F
+ #ADFF2F
+ #87CEEB
+ landscape
+
+
+ 50.75
+ 4.5
+
+
+
+ 1914-1919 के मध्य मुख्यतः यूरोप में व्याप्त इस महायुद्ध को प्रथम विश्व युद्ध कहते हैं । यह महायुद्ध
+ यूरोप, एशिया व अफ्रीका तीन महाद्वीपों और जल, थल तथा आकाश में लड़ा गया। इसमें भाग लेने वाले देशों की संख्या, इसका क्षेत्र
+ (जिसमें यह लड़ा गया) तथा इससे हुई क्षति के अभूतपूर्व आंकड़ों के कारण ही इसे विश्वयुद्ध कहते हैं ।
+
+ Første verdenskrig
+
+
+
+
+
+
+
+ EFG - The European Film Gateway
+ J.M.P.- Trends
+
+
+
+ 12944
+ nld
+
+
+
+ true
+ 1984
+
+
+
+
+
+ This description is more 50 charactersAAAAAAAAAAAAA
+
+
+
+
+ PYLYSER, JEAN-MARIE
+ 12944
+ nl
+ EFG1914
+ World War I
+ WESTVLAAMS FILMJOURNAAL - 164
+ Newsreel
+
+
+ 1984
+ Belgium
+
+
+
+
+ VIDEO
+
+
+
+ Europeana Foundation
+ Europeana Foundation
+ 1_test
+ Netherlands
+
+ nl
+ 10
+
+
+ The Royal Belgian Film Archives
+ Cinémathèque royale de Belgique
+ Koninklijk Belgisch Filmarchief
+
+
\ No newline at end of file
diff --git a/metis-pattern-analysis/src/test/resources/europeana_record_with_P12.xml b/metis-pattern-analysis/src/test/resources/europeana_record_with_P12.xml
new file mode 100644
index 000000000..aa9be5b82
--- /dev/null
+++ b/metis-pattern-analysis/src/test/resources/europeana_record_with_P12.xml
@@ -0,0 +1,100 @@
+
+
+
+
+
+
+ text/html
+ 197506
+
+
+ image/jpeg
+ 6643
+ 640
+ 480
+ sRGB
+ #F0E68C
+ #B22222
+ #FF4500
+ #2F4F4F
+ #ADFF2F
+ #87CEEB
+ landscape
+
+
+ 50.75
+ 4.5
+
+
+
+ 1914-1919 के मध्य मुख्यतः यूरोप में व्याप्त इस महायुद्ध को प्रथम विश्व युद्ध कहते हैं । यह महायुद्ध
+ यूरोप, एशिया व अफ्रीका तीन महाद्वीपों और जल, थल तथा आकाश में लड़ा गया। इसमें भाग लेने वाले देशों की संख्या, इसका क्षेत्र
+ (जिसमें यह लड़ा गया) तथा इससे हुई क्षति के अभूतपूर्व आंकड़ों के कारण ही इसे विश्वयुद्ध कहते हैं ।
+
+ Første verdenskrig
+
+
+
+
+
+
+
+ EFG - The European Film Gateway
+ J.M.P.- Trends
+
+
+
+ 12944
+ nld
+
+
+
+ true
+ 1984
+
+
+
+
+
+ PYLYSER, JEAN-MARIE
+ This description is more than 50 charactersAAAAAAAA
+
+ 12944
+ nl
+ EFG1914
+ World War I
+ WESTVLAAMS FILMJOURNAAL - 164
+ This title is more than 70 characters long AAAAAAAAAAAAAAAAAAAAAAAAAAAA
+
+ This title is more than 70 characters long
+ Newsreel
+
+
+ 1984
+ Belgium
+
+
+
+ VIDEO
+
+
+
+ Europeana Foundation
+ Europeana Foundation
+ 1_test
+ Netherlands
+
+ nl
+ 10
+
+
+ The Royal Belgian Film Archives
+ Cinémathèque royale de Belgique
+ Koninklijk Belgisch Filmarchief
+
+
\ No newline at end of file
diff --git a/metis-pattern-analysis/src/test/resources/europeana_record_with_P2.xml b/metis-pattern-analysis/src/test/resources/europeana_record_with_P2.xml
index 129932bc3..0fbd36718 100644
--- a/metis-pattern-analysis/src/test/resources/europeana_record_with_P2.xml
+++ b/metis-pattern-analysis/src/test/resources/europeana_record_with_P2.xml
@@ -62,13 +62,13 @@
PYLYSER, JEAN-MARIE
- same title and Description
+ same title and Element more than 50 charactersAAAAA
12944
nl
EFG1914
World War I
WESTVLAAMS FILMJOURNAAL - 164
- Same title and Description
+ Same title and Element more than 50 charactersAAAAA
Newsreel
@@ -82,20 +82,19 @@
PYLYSER, JEAN-MARIE
- same title and Description
+
12944
nl
EFG1914
World War I
WESTVLAAMS FILMJOURNAAL - 164
- Same title and Description
+
Newsreel
1984
Belgium
- false
diff --git a/metis-pattern-analysis/src/test/resources/europeana_record_with_P3.xml b/metis-pattern-analysis/src/test/resources/europeana_record_with_P3.xml
new file mode 100644
index 000000000..be1f7e116
--- /dev/null
+++ b/metis-pattern-analysis/src/test/resources/europeana_record_with_P3.xml
@@ -0,0 +1,126 @@
+
+
+
+
+
+
+ text/html
+ 197506
+
+
+ image/jpeg
+ 6643
+ 640
+ 480
+ sRGB
+ #F0E68C
+ #B22222
+ #FF4500
+ #2F4F4F
+ #ADFF2F
+ #87CEEB
+ landscape
+
+
+ 50.75
+ 4.5
+
+
+
+ 1914-1919 के मध्य मुख्यतः यूरोप में व्याप्त इस महायुद्ध को प्रथम विश्व युद्ध कहते हैं । यह महायुद्ध
+ यूरोप, एशिया व अफ्रीका तीन महाद्वीपों और जल, थल तथा आकाश में लड़ा गया। इसमें भाग लेने वाले देशों की संख्या, इसका क्षेत्र
+ (जिसमें यह लड़ा गया) तथा इससे हुई क्षति के अभूतपूर्व आंकड़ों के कारण ही इसे विश्वयुद्ध कहते हैं ।
+
+ Første verdenskrig
+
+
+
+
+
+
+
+ EFG - The European Film Gateway
+ J.M.P.- Trends
+
+
+
+ 12944
+ nld
+
+
+
+ true
+ 1984
+
+
+
+
+
+ PYLYSER, JEAN-MARIE
+ same title and Element more than 50 charactersAAAAA
+ 12944
+ nl
+ EFG1914
+ World War I
+ WESTVLAAMS FILMJOURNAAL - 164
+ Same title and Element more than 50 charactersAAAAA
+ Newsreel
+
+
+ 1984
+ Belgium
+
+ false
+
+
+ VIDEO
+
+
+ PYLYSER, JEAN-MARIE
+ same title and Element more than 50 charactersAAAAA
+
+ AThis is an element to try B a near identical occurrence C
+
+ AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+ 12944
+ nl
+ EFG1914
+ World War I
+ WESTVLAAMS FILMJOURNAAL - 164
+ Same title and Element more than 50 charactersAAAAA
+ This is an element to try a near identical occurrence
+ This is title not achieving near identicality
+
+ AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+ Newsreel
+
+
+ 1984
+ Belgium
+
+
+
+
+ VIDEO
+
+
+
+ Europeana Foundation
+ Europeana Foundation
+ 1_test
+ Netherlands
+
+ nl
+ 10
+
+
+ The Royal Belgian Film Archives
+ Cinémathèque royale de Belgique
+ Koninklijk Belgisch Filmarchief
+
+
\ No newline at end of file
diff --git a/metis-pattern-analysis/src/test/resources/europeana_record_with_P5.xml b/metis-pattern-analysis/src/test/resources/europeana_record_with_P5.xml
new file mode 100644
index 000000000..89b90d661
--- /dev/null
+++ b/metis-pattern-analysis/src/test/resources/europeana_record_with_P5.xml
@@ -0,0 +1,114 @@
+
+
+
+
+
+
+ text/html
+ 197506
+
+
+ image/jpeg
+ 6643
+ 640
+ 480
+ sRGB
+ #F0E68C
+ #B22222
+ #FF4500
+ #2F4F4F
+ #ADFF2F
+ #87CEEB
+ landscape
+
+
+ 50.75
+ 4.5
+
+
+
+ 1914-1919 के मध्य मुख्यतः यूरोप में व्याप्त इस महायुद्ध को प्रथम विश्व युद्ध कहते हैं । यह महायुद्ध
+ यूरोप, एशिया व अफ्रीका तीन महाद्वीपों और जल, थल तथा आकाश में लड़ा गया। इसमें भाग लेने वाले देशों की संख्या, इसका क्षेत्र
+ (जिसमें यह लड़ा गया) तथा इससे हुई क्षति के अभूतपूर्व आंकड़ों के कारण ही इसे विश्वयुद्ध कहते हैं ।
+
+ Første verdenskrig
+
+
+
+
+
+
+
+ EFG - The European Film Gateway
+ J.M.P.- Trends
+
+
+
+ 12944
+ nld
+
+
+
+ true
+ 1984
+
+
+
+
+
+ PYLYSER, JEAN-MARIE
+ This description is more than 50 charactersAAAAAAAA
+
+ 12944
+ nl
+ EFG1914
+ World War I
+ WESTVLAAMS FILMJOURNAAL - 164
+ के मध्य मुख्यतः यूरोप में व्याप्त इस
+ Καλημέρα Ελλάδα
+ Καλημέρα - Ελλάδα
+
+ Καλημέρα
+ Ελλάδα
+
+ Contains identifier 12944
+ Contains more than 5 non-alphanumeric ?!.?!.
+ Contains more than 5 non-alphanumeric(new lines)
+
+
+
+
+
+
+ မြန်မာဘာသာ
+ Newsreel
+
+
+ 1984
+ Belgium
+
+
+
+ VIDEO
+
+
+
+ Europeana Foundation
+ Europeana Foundation
+ 1_test
+ Netherlands
+
+ nl
+ 10
+
+
+ The Royal Belgian Film Archives
+ Cinémathèque royale de Belgique
+ Koninklijk Belgisch Filmarchief
+
+
\ No newline at end of file
diff --git a/metis-pattern-analysis/src/test/resources/europeana_record_with_P6.xml b/metis-pattern-analysis/src/test/resources/europeana_record_with_P6.xml
index 381c8b6f9..e5b598e5a 100644
--- a/metis-pattern-analysis/src/test/resources/europeana_record_with_P6.xml
+++ b/metis-pattern-analysis/src/test/resources/europeana_record_with_P6.xml
@@ -62,10 +62,7 @@
PYLYSER, JEAN-MARIE
- Newsitems West Flanders. Day of the Navy in Ostend, with a wreath-laying memorial service at the
- monument of the sailors on the dike. Thereafter, a military parade takes place at the Wapenplein in the presence of Prince
- Albert, Princess Paola, Governor Vanneste, mayor Goekindt and certain naval officers. The navy admiral gives a speech,
- followed by a short parade. The princess salutes the flag bearers of the veterans' associations.
+ This description is more than 50 charactersAAAAAAAA
12944
nl
@@ -79,7 +76,6 @@
1984
Belgium
- false
VIDEO
diff --git a/metis-pattern-analysis/src/test/resources/europeana_record_with_P7.xml b/metis-pattern-analysis/src/test/resources/europeana_record_with_P7.xml
new file mode 100644
index 000000000..527bdf088
--- /dev/null
+++ b/metis-pattern-analysis/src/test/resources/europeana_record_with_P7.xml
@@ -0,0 +1,95 @@
+
+
+
+
+
+
+ text/html
+ 197506
+
+
+ image/jpeg
+ 6643
+ 640
+ 480
+ sRGB
+ #F0E68C
+ #B22222
+ #FF4500
+ #2F4F4F
+ #ADFF2F
+ #87CEEB
+ landscape
+
+
+ 50.75
+ 4.5
+
+
+
+ 1914-1919 के मध्य मुख्यतः यूरोप में व्याप्त इस महायुद्ध को प्रथम विश्व युद्ध कहते हैं । यह महायुद्ध
+ यूरोप, एशिया व अफ्रीका तीन महाद्वीपों और जल, थल तथा आकाश में लड़ा गया। इसमें भाग लेने वाले देशों की संख्या, इसका क्षेत्र
+ (जिसमें यह लड़ा गया) तथा इससे हुई क्षति के अभूतपूर्व आंकड़ों के कारण ही इसे विश्वयुद्ध कहते हैं ।
+
+ Første verdenskrig
+
+
+
+
+
+
+
+ EFG - The European Film Gateway
+ J.M.P.- Trends
+
+
+
+ 12944
+ nld
+
+
+
+ true
+ 1984
+
+
+
+
+
+ PYLYSER, JEAN-MARIE
+ 12944
+ nl
+ EFG1914
+ World War I
+ WESTVLAAMS FILMJOURNAAL - 164
+ Newsreel
+
+
+ 1984
+ Belgium
+
+
+
+ VIDEO
+
+
+
+ Europeana Foundation
+ Europeana Foundation
+ 1_test
+ Netherlands
+
+ nl
+ 10
+
+
+ The Royal Belgian Film Archives
+ Cinémathèque royale de Belgique
+ Koninklijk Belgisch Filmarchief
+
+
\ No newline at end of file
diff --git a/metis-pattern-analysis/src/test/resources/europeana_record_with_P7_descriptions_empty.xml b/metis-pattern-analysis/src/test/resources/europeana_record_with_P7_descriptions_empty.xml
new file mode 100644
index 000000000..4b5913567
--- /dev/null
+++ b/metis-pattern-analysis/src/test/resources/europeana_record_with_P7_descriptions_empty.xml
@@ -0,0 +1,99 @@
+
+
+
+
+
+
+ text/html
+ 197506
+
+
+ image/jpeg
+ 6643
+ 640
+ 480
+ sRGB
+ #F0E68C
+ #B22222
+ #FF4500
+ #2F4F4F
+ #ADFF2F
+ #87CEEB
+ landscape
+
+
+ 50.75
+ 4.5
+
+
+
+ 1914-1919 के मध्य मुख्यतः यूरोप में व्याप्त इस महायुद्ध को प्रथम विश्व युद्ध कहते हैं । यह महायुद्ध
+ यूरोप, एशिया व अफ्रीका तीन महाद्वीपों और जल, थल तथा आकाश में लड़ा गया। इसमें भाग लेने वाले देशों की संख्या, इसका क्षेत्र
+ (जिसमें यह लड़ा गया) तथा इससे हुई क्षति के अभूतपूर्व आंकड़ों के कारण ही इसे विश्वयुद्ध कहते हैं ।
+
+ Første verdenskrig
+
+
+
+
+
+
+
+ EFG - The European Film Gateway
+ J.M.P.- Trends
+
+
+
+ 12944
+ nld
+
+
+
+ true
+ 1984
+
+
+
+
+
+ PYLYSER, JEAN-MARIE
+
+
+
+
+ 12944
+ nl
+ EFG1914
+ World War I
+ WESTVLAAMS FILMJOURNAAL - 164
+ Newsreel
+
+
+ 1984
+ Belgium
+
+
+
+ VIDEO
+
+
+
+ Europeana Foundation
+ Europeana Foundation
+ 1_test
+ Netherlands
+
+ nl
+ 10
+
+
+ The Royal Belgian Film Archives
+ Cinémathèque royale de Belgique
+ Koninklijk Belgisch Filmarchief
+
+
\ No newline at end of file
diff --git a/metis-pattern-analysis/src/test/resources/europeana_record_with_P9.xml b/metis-pattern-analysis/src/test/resources/europeana_record_with_P9.xml
new file mode 100644
index 000000000..ca2e4dd83
--- /dev/null
+++ b/metis-pattern-analysis/src/test/resources/europeana_record_with_P9.xml
@@ -0,0 +1,98 @@
+
+
+
+
+
+
+ text/html
+ 197506
+
+
+ image/jpeg
+ 6643
+ 640
+ 480
+ sRGB
+ #F0E68C
+ #B22222
+ #FF4500
+ #2F4F4F
+ #ADFF2F
+ #87CEEB
+ landscape
+
+
+ 50.75
+ 4.5
+
+
+
+ 1914-1919 के मध्य मुख्यतः यूरोप में व्याप्त इस महायुद्ध को प्रथम विश्व युद्ध कहते हैं । यह महायुद्ध
+ यूरोप, एशिया व अफ्रीका तीन महाद्वीपों और जल, थल तथा आकाश में लड़ा गया। इसमें भाग लेने वाले देशों की संख्या, इसका क्षेत्र
+ (जिसमें यह लड़ा गया) तथा इससे हुई क्षति के अभूतपूर्व आंकड़ों के कारण ही इसे विश्वयुद्ध कहते हैं ।
+
+ Første verdenskrig
+
+
+
+
+
+
+
+ EFG - The European Film Gateway
+ J.M.P.- Trends
+
+
+
+ 12944
+ nld
+
+
+
+ true
+ 1984
+
+
+
+
+
+ PYLYSER, JEAN-MARIE
+ This description is less than 50 characters
+ This description is exactly 50 charactersAAAAAAAAA
+ This description is more 50 charactersAAAAAAAAAAAAA
+ 12944
+ nl
+ EFG1914
+ World War I
+ WESTVLAAMS FILMJOURNAAL - 164
+ Newsreel
+
+
+ 1984
+ Belgium
+
+
+
+ VIDEO
+
+
+
+ Europeana Foundation
+ Europeana Foundation
+ 1_test
+ Netherlands
+
+ nl
+ 10
+
+
+ The Royal Belgian Film Archives
+ Cinémathèque royale de Belgique
+ Koninklijk Belgisch Filmarchief
+
+
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 1e0c90eb6..d4aedc5ed 100644
--- a/pom.xml
+++ b/pom.xml
@@ -133,6 +133,7 @@
2.9.0
3.12.0
3.2.2
+ 1.9
1.11
2.15.3
From 7a6244ca8779950fd08c465f03534ab303304a3c Mon Sep 17 00:00:00 2001
From: JoanaCMS <70145179+JoanaCMS@users.noreply.github.com>
Date: Wed, 11 May 2022 14:57:51 +0200
Subject: [PATCH 29/73] MET-4418 Fix sorting of Countries list (#529)
---
.../eu/europeana/metis/core/common/Country.java | 15 +++++++++++++++
.../metis/core/rest/DatasetController.java | 3 ++-
2 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/common/Country.java b/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/common/Country.java
index 2c2022cdd..ac842da5a 100644
--- a/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/common/Country.java
+++ b/metis-core/metis-core-common/src/main/java/eu/europeana/metis/core/common/Country.java
@@ -1,5 +1,9 @@
package eu.europeana.metis.core.common;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.List;
+
/**
* Countries supported by METIS
*/
@@ -115,4 +119,15 @@ public static Country getCountryFromIsoCode(String isoCode) {
}
return null;
}
+
+ /**
+ * Provides the countries sorted by the {@link #getName()} field
+ *
+ * @return the list of countries sorted
+ */
+ public static List getCountryListSortedByName() {
+ List countries = Arrays.asList(Country.values());
+ countries.sort(Comparator.comparing(Country::getName));
+ return countries;
+ }
}
diff --git a/metis-core/metis-core-rest/src/main/java/eu/europeana/metis/core/rest/DatasetController.java b/metis-core/metis-core-rest/src/main/java/eu/europeana/metis/core/rest/DatasetController.java
index 28c1d5074..9f7b82e0d 100644
--- a/metis-core/metis-core-rest/src/main/java/eu/europeana/metis/core/rest/DatasetController.java
+++ b/metis-core/metis-core-rest/src/main/java/eu/europeana/metis/core/rest/DatasetController.java
@@ -613,7 +613,8 @@ public ResponseListWrapper getAllDatasetsByOrganizationName(
public List getDatasetsCountries(
@RequestHeader("Authorization") String authorization) throws GenericMetisException {
authenticationClient.getUserByAccessTokenInHeader(authorization);
- return Arrays.stream(Country.values()).map(CountryView::new).collect(Collectors.toList());
+ return Country.getCountryListSortedByName().stream().map(CountryView::new)
+ .collect(Collectors.toList());
}
/**
From 7e724784088de6774c77b348118c760a012ac779 Mon Sep 17 00:00:00 2001
From: Simon Tzanakis
Date: Fri, 13 May 2022 10:08:22 +0200
Subject: [PATCH 30/73] MET-4457 Set default truncate on elements for report
(#532)
---
.../ProblemPatternAnalyzer.java | 23 ++++++++++++++-----
1 file changed, 17 insertions(+), 6 deletions(-)
diff --git a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/ProblemPatternAnalyzer.java b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/ProblemPatternAnalyzer.java
index 6960374de..7df496029 100644
--- a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/ProblemPatternAnalyzer.java
+++ b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/ProblemPatternAnalyzer.java
@@ -6,6 +6,7 @@
import static java.util.stream.Collectors.toMap;
import static java.util.stream.Collectors.toSet;
import static org.apache.commons.lang3.BooleanUtils.isFalse;
+import static org.apache.commons.lang3.StringUtils.truncate;
import eu.europeana.metis.schema.convert.RdfConversionUtils;
import eu.europeana.metis.schema.convert.SerializationException;
@@ -48,6 +49,7 @@ public class ProblemPatternAnalyzer {
private static final int UNRECOGNIZABLE_CHARACTERS_THRESHOLD = 5;
private static final double LCS_CALCULATION_THRESHOLD = 0.9;
private static final int TITLE_DESCRIPTION_LENGTH_DISTANCE = 20;
+ private static final int DEFAULT_MAX_CHARACTERS_ELEMENT_LENGTH_FOR_REPORT = 50;
// Match anything that is not alphanumeric in all languages or literal spaces. We cannot just use \\w
private static final String UNRECOGNIZABLE_CHARACTERS_REGEX = "[^\\p{IsAlphabetic}\\p{IsDigit} ]";
private static final Pattern UNRECOGNIZABLE_CHARACTERS_PATTERN = Pattern.compile(UNRECOGNIZABLE_CHARACTERS_REGEX);
@@ -132,7 +134,8 @@ private List checkP2(List titles, List descri
equalTitlesAndDescriptions.retainAll(uniqueDescriptions);
return equalTitlesAndDescriptions.stream().map(
- value -> new ProblemOccurrence(format("Equal(lower cased) title and description: %s", value))
+ value -> new ProblemOccurrence(format("Equal(lower cased) title and description: %s(...)",
+ truncate(value, DEFAULT_MAX_CHARACTERS_ELEMENT_LENGTH_FOR_REPORT)))
).collect(toList());
}
@@ -160,7 +163,9 @@ private List checkP3(List titles, List descri
return nearIdenticalTitleDescriptionsMap.entrySet().stream().flatMap(
entry -> entry.getValue().stream().map(
- value -> new ProblemOccurrence(format("Near-Identical title and description fields: %s | %s", entry.getKey(), value))
+ value -> new ProblemOccurrence(format("Near-Identical title and description fields: %s(...) | %s(...)",
+ truncate(entry.getKey(), DEFAULT_MAX_CHARACTERS_ELEMENT_LENGTH_FOR_REPORT),
+ truncate(value, DEFAULT_MAX_CHARACTERS_ELEMENT_LENGTH_FOR_REPORT)))
)
).collect(toList());
}
@@ -199,7 +204,8 @@ private List checkP5(List titles, List identi
UNRECOGNIZABLE_CHARACTERS_PATTERN.matcher(s).results().count() > UNRECOGNIZABLE_CHARACTERS_THRESHOLD;
final Predicate containsIdentifier = s -> identifiers.stream().anyMatch(s::contains);
return titles.stream().filter(moreThanThresholdUnrecognizableCharacters.or(containsIdentifier))
- .map(title -> new ProblemOccurrence(format("Unrecognized title: %s", title))
+ .map(title -> new ProblemOccurrence(
+ format("Unrecognized title: %s(...)", truncate(title, DEFAULT_MAX_CHARACTERS_ELEMENT_LENGTH_FOR_REPORT)))
).collect(toList());
}
@@ -211,7 +217,9 @@ private List checkP5(List titles, List identi
*/
private List checkP6(List titles) {
return titles.stream().filter(title -> title.length() <= MIN_TITLE_LENGTH)
- .map(title -> new ProblemOccurrence(format("Non meaningful title: %s", title))).collect(toList());
+ .map(title -> new ProblemOccurrence(
+ format("Non meaningful title: %s(...)", truncate(title, DEFAULT_MAX_CHARACTERS_ELEMENT_LENGTH_FOR_REPORT))))
+ .collect(toList());
}
/**
@@ -237,7 +245,8 @@ private List checkP7(List descriptions) {
private List checkP9(List descriptions) {
return descriptions.stream().filter(StringUtils::isNotBlank)
.filter(description -> description.length() <= MIN_DESCRIPTION_LENGTH)
- .map(description -> new ProblemOccurrence(format("Very short description: %s", description)))
+ .map(description -> new ProblemOccurrence(format("Very short description: %s(...)",
+ truncate(description, DEFAULT_MAX_CHARACTERS_ELEMENT_LENGTH_FOR_REPORT))))
.collect(toList());
}
@@ -250,7 +259,9 @@ private List checkP9(List descriptions) {
*/
private List checkP12(List titles) {
return titles.stream().filter(title -> title.length() > MAX_TITLE_LENGTH)
- .map(title -> new ProblemOccurrence(format("Extremely long title: %s", title))).collect(toList());
+ .map(title -> new ProblemOccurrence(
+ format("Extremely long title: %s(...)", truncate(title, DEFAULT_MAX_CHARACTERS_ELEMENT_LENGTH_FOR_REPORT))))
+ .collect(toList());
}
private Optional constructProblemPattern(String recordId, ProblemPatternDescription problemPatternDescription,
From e2c3aeab0f3f43d01b97d3285dedaf2f6e162829 Mon Sep 17 00:00:00 2001
From: Joana Sousa
Date: Fri, 13 May 2022 12:23:39 +0200
Subject: [PATCH 31/73] MET-4418 Fixing the schema for North Macedonia
---
metis-schema/src/main/resources/schema_xsds/EDM-COMMON-MAIN.xsd | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/metis-schema/src/main/resources/schema_xsds/EDM-COMMON-MAIN.xsd b/metis-schema/src/main/resources/schema_xsds/EDM-COMMON-MAIN.xsd
index 039e9b802..31c9cf21f 100644
--- a/metis-schema/src/main/resources/schema_xsds/EDM-COMMON-MAIN.xsd
+++ b/metis-schema/src/main/resources/schema_xsds/EDM-COMMON-MAIN.xsd
@@ -168,7 +168,7 @@
-
+
From cfbefed31e723b25df538cf54137544365056e93 Mon Sep 17 00:00:00 2001
From: JoanaCMS <70145179+JoanaCMS@users.noreply.github.com>
Date: Wed, 18 May 2022 08:33:18 +0200
Subject: [PATCH 32/73] MET-4472 New Tier Calculation for 3D (#530)
* MET-4472 Started implementing tier calculation for 3D
* MET-4472 Fixed unit tests
* MET-4472 Changes for tier 1 calculation
* MET-4472 Changed code to remove bug from sonarqube
* MET-4472 Trying to fix bug from sonarqube
* MET-4472 Fixing unit tests and code
* MET-4472 Added new unit test for null values
* MET-4472 Code review changes
Changed classification of webresource for 3D. Updated unit tests
* MET-4472 Removed code smell
---
.../tiers/media/ThreeDClassifier.java | 37 ++++++++++++--
.../tiers/media/ThreeDClassifierTest.java | 48 +++++++++++++------
2 files changed, 67 insertions(+), 18 deletions(-)
diff --git a/metis-indexing/src/main/java/eu/europeana/indexing/tiers/media/ThreeDClassifier.java b/metis-indexing/src/main/java/eu/europeana/indexing/tiers/media/ThreeDClassifier.java
index 0d2deb16c..ac827ad73 100644
--- a/metis-indexing/src/main/java/eu/europeana/indexing/tiers/media/ThreeDClassifier.java
+++ b/metis-indexing/src/main/java/eu/europeana/indexing/tiers/media/ThreeDClassifier.java
@@ -4,9 +4,11 @@
import eu.europeana.indexing.tiers.view.ResolutionTierMetadata;
import eu.europeana.indexing.tiers.view.ResolutionTierMetadata.ResolutionTierMetadataBuilder;
import eu.europeana.indexing.utils.RdfWrapper;
+import eu.europeana.indexing.utils.WebResourceLinkType;
import eu.europeana.indexing.utils.WebResourceWrapper;
import eu.europeana.metis.schema.model.MediaType;
-import org.apache.commons.lang3.StringUtils;
+
+import java.util.Set;
/**
* Classifier for 3D objects.
@@ -30,8 +32,19 @@ MediaTier classifyEntityWithoutWebResources(RdfWrapper entity, boolean hasLandin
@Override
MediaTier classifyWebResource(WebResourceWrapper webResource, boolean hasLandingPage, boolean hasEmbeddableMedia) {
- // T2-T4 if there is a mime type (any whatsoever), T0 otherwise.
- return StringUtils.isNotBlank(webResource.getMimeType()) ? MediaTier.T4 : MediaTier.T0;
+ final MediaTier result;
+
+ if(webResource == null){
+ result = MediaTier.T0;
+ } else if(mimeTypeIsNotImageOrApplicationPdf(webResource) && containsIsShownByOrHasViewWebResource(webResource)){
+ result = MediaTier.T4;
+ } else if(hasLandingPage && onlyContainsShownAtWebResource(webResource)){
+ result = MediaTier.T1;
+ } else {
+ result = MediaTier.T0;
+ }
+
+ return result;
}
@Override
@@ -43,4 +56,22 @@ ResolutionTierMetadata extractResolutionTierMetadata(WebResourceWrapper webResou
MediaType getMediaType() {
return MediaType.THREE_D;
}
+
+ private boolean mimeTypeIsNotImageOrApplicationPdf(WebResourceWrapper webResource){
+ String mimeType = webResource.getMimeType();
+ return mimeType != null && webResource.getMediaType() != MediaType.IMAGE && !mimeType.startsWith("application/pdf");
+ }
+
+ private boolean containsIsShownByOrHasViewWebResource(WebResourceWrapper webResource){
+ Set extractedLinkTypes = webResource.getLinkTypes();
+ return extractedLinkTypes != null && (extractedLinkTypes.contains(WebResourceLinkType.IS_SHOWN_BY) ||
+ extractedLinkTypes.contains(WebResourceLinkType.HAS_VIEW));
+ }
+
+ private boolean onlyContainsShownAtWebResource(WebResourceWrapper webResource){
+ Set linkTypes = webResource.getLinkTypes();
+ return linkTypes.contains(WebResourceLinkType.IS_SHOWN_AT) &&
+ !linkTypes.contains(WebResourceLinkType.IS_SHOWN_BY) &&
+ !linkTypes.contains(WebResourceLinkType.HAS_VIEW);
+ }
}
diff --git a/metis-indexing/src/test/java/eu/europeana/indexing/tiers/media/ThreeDClassifierTest.java b/metis-indexing/src/test/java/eu/europeana/indexing/tiers/media/ThreeDClassifierTest.java
index 53efbc152..6493c6fbe 100644
--- a/metis-indexing/src/test/java/eu/europeana/indexing/tiers/media/ThreeDClassifierTest.java
+++ b/metis-indexing/src/test/java/eu/europeana/indexing/tiers/media/ThreeDClassifierTest.java
@@ -3,22 +3,20 @@
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
-import static org.mockito.Mockito.doReturn;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.reset;
-import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.*;
import eu.europeana.indexing.tiers.model.MediaTier;
import eu.europeana.indexing.utils.RdfWrapper;
+import eu.europeana.indexing.utils.WebResourceLinkType;
import eu.europeana.indexing.utils.WebResourceWrapper;
import eu.europeana.metis.schema.model.MediaType;
+
+import java.util.Set;
import java.util.stream.Stream;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
-import org.junit.jupiter.params.provider.MethodSource;
class ThreeDClassifierTest {
@@ -65,16 +63,36 @@ private static Stream testClassifyWebResource() {
);
}
- @ParameterizedTest(name = "[{index}] - expectedTier:{0} for mimeType:{1}")
- @MethodSource("testClassifyWebResource")
- void testClassifyWebResource(MediaTier expectedTier, String mimeType) {
+ @Test
+ void testClassifyWebResource_tier4Result() {
+ final WebResourceWrapper webResource = mock(WebResourceWrapper.class);
+ Set mockSetResponse = Set.of(WebResourceLinkType.HAS_VIEW, WebResourceLinkType.IS_SHOWN_BY);
+ when(webResource.getLinkTypes()).thenReturn(mockSetResponse);
+ when(webResource.getMimeType()).thenReturn("video");
+ assertEquals(MediaTier.T4, classifier.classifyWebResource(webResource, true, false));
+ }
+
+ @Test
+ void testClassifyWebResource_tier1Result() {
+ final WebResourceWrapper webResource = mock(WebResourceWrapper.class);
+ when(webResource.getLinkTypes()).thenReturn(Set.of(WebResourceLinkType.IS_SHOWN_AT));
+ when(webResource.getMimeType()).thenReturn("video");
+ assertEquals(MediaTier.T1, classifier.classifyWebResource(webResource, true, false));
+ }
+
+ @Test
+ void testClassifyWebResource_tier0Result() {
+ final WebResourceWrapper webResource = mock(WebResourceWrapper.class);
+ when(webResource.getLinkTypes()).thenReturn(Set.of());
+ when(webResource.getMimeType()).thenReturn("video");
+ assertEquals(MediaTier.T0, classifier.classifyWebResource(webResource, false, false));
+ }
+
+ @Test
+ void testClassifyWebResource_tier0NullValuesResult() {
final WebResourceWrapper webResource = mock(WebResourceWrapper.class);
- doReturn(mimeType).when(webResource).getMimeType();
- //Any combination of hasLandingPage and hasEmbeddableMedia should not change the result
- assertEquals(expectedTier, classifier.classifyWebResource(webResource, false, false));
- assertEquals(expectedTier, classifier.classifyWebResource(webResource, true, false));
- assertEquals(expectedTier, classifier.classifyWebResource(webResource, false, true));
- assertEquals(expectedTier, classifier.classifyWebResource(webResource, true, true));
+ when(webResource.getLinkTypes()).thenReturn(null);
+ assertEquals(MediaTier.T0, classifier.classifyWebResource(webResource, false, false));
}
@Test
From 98bc8b3f0ee217270363abe1f3fcb948b21367d6 Mon Sep 17 00:00:00 2001
From: Jorge Ortiz
Date: Thu, 19 May 2022 11:08:53 +0200
Subject: [PATCH 33/73] MET-4452 depublication counter after incremental
processing (#533)
---
.../eu/europeana/metis/core/service/OrchestratorService.java | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/service/OrchestratorService.java b/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/service/OrchestratorService.java
index 8a237a8a3..37447732f 100644
--- a/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/service/OrchestratorService.java
+++ b/metis-core/metis-core-service/src/main/java/eu/europeana/metis/core/service/OrchestratorService.java
@@ -774,11 +774,9 @@ private void setPublishInformation(DatasetExecutionInformation executionInfo,
final int depublishedRecordCount;
if (datasetCurrentlyDepublished) {
depublishedRecordCount = executionInfo.getLastPublishedRecords();
- } else if (depublishHappenedAfterLatestExecutablePublish) {
+ } else {
depublishedRecordCount = (int) depublishRecordIdDao
.countSuccessfullyDepublishedRecordIdsForDataset(datasetId);
- } else {
- depublishedRecordCount = 0;
}
//Compute more general information of the plugin
From 7d5dfc97930ffea20aeec91fc383b71e8dd5c686 Mon Sep 17 00:00:00 2001
From: JoanaCMS <70145179+JoanaCMS@users.noreply.github.com>
Date: Fri, 20 May 2022 09:59:55 +0200
Subject: [PATCH 34/73] MET-4531 Started writing unit tests (#535)
* MET-4531 Started writing unit tests
* MET-4531 Added new unit tests
* MET-4531 Removed code smell
---
metis-repository/pom.xml | 10 +-
.../metis/repository/dao/RecordTest.java | 92 +++++++++++++++++++
2 files changed, 101 insertions(+), 1 deletion(-)
create mode 100644 metis-repository/src/test/java/eu/europeana/metis/repository/dao/RecordTest.java
diff --git a/metis-repository/pom.xml b/metis-repository/pom.xml
index d60b3a2a7..49f2783a0 100644
--- a/metis-repository/pom.xml
+++ b/metis-repository/pom.xml
@@ -77,6 +77,14 @@
metis-harvesting
${project.version}
+
+ org.junit.jupiter
+ junit-jupiter-api
+
+
+ org.junit.jupiter
+ junit-jupiter-engine
+
@@ -90,4 +98,4 @@
-
\ No newline at end of file
+
diff --git a/metis-repository/src/test/java/eu/europeana/metis/repository/dao/RecordTest.java b/metis-repository/src/test/java/eu/europeana/metis/repository/dao/RecordTest.java
new file mode 100644
index 000000000..13c4b657e
--- /dev/null
+++ b/metis-repository/src/test/java/eu/europeana/metis/repository/dao/RecordTest.java
@@ -0,0 +1,92 @@
+package eu.europeana.metis.repository.dao;
+
+import org.bson.types.ObjectId;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.time.Instant;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertNull;
+
+class RecordTest {
+
+ private Record recordToTest;
+ private final Instant instantUsedForTest = Instant.now();
+
+ @BeforeEach
+ void setUp(){
+ recordToTest = new Record("recordId", "datasetId", instantUsedForTest, false, "edmRecord");
+ }
+
+ @Test
+ void testGetRecordId(){
+ assertEquals("recordId",recordToTest.getRecordId());
+ }
+
+ @Test
+ void testGetDatasetId(){
+ assertEquals("datasetId",recordToTest.getDatasetId());
+ }
+
+ @Test
+ void testGetDateStamp(){
+ assertEquals(instantUsedForTest,recordToTest.getDateStamp());
+ }
+
+ @Test
+ void testIsDeleted(){
+ assertFalse(recordToTest.isDeleted());
+ }
+
+ @Test
+ void testEdmRecord(){
+ assertEquals("edmRecord",recordToTest.getEdmRecord());
+ }
+
+ @Test
+ void testSetAndGetId(){
+ assertNull(recordToTest.getId());
+ ObjectId objectId = new ObjectId();
+ recordToTest.setId(objectId);
+ assertEquals(objectId, recordToTest.getId());
+ }
+
+ @Test
+ void testSetRecordId(){
+ assertEquals("recordId",recordToTest.getRecordId());
+ recordToTest.setRecordId("newRecordId");
+ assertEquals("newRecordId", recordToTest.getRecordId());
+ }
+
+ @Test
+ void testSetDatasetId(){
+ assertEquals("datasetId",recordToTest.getDatasetId());
+ recordToTest.setDatasetId("newDatasetId");
+ assertEquals("newDatasetId", recordToTest.getDatasetId());
+ }
+
+ @Test
+ void testSetDateStamp(){
+ assertEquals(instantUsedForTest,recordToTest.getDateStamp());
+ Instant instantNew = Instant.now();
+ recordToTest.setDateStamp(instantNew);
+ assertEquals(instantNew, recordToTest.getDateStamp());
+ }
+
+ @Test
+ void testSetDeleted(){
+ assertFalse(recordToTest.isDeleted());
+ recordToTest.setDeleted(true);
+ assertTrue( recordToTest.isDeleted());
+ }
+
+ @Test
+ void testSetEdmRecord(){
+ assertEquals("edmRecord",recordToTest.getEdmRecord());
+ recordToTest.setEdmRecord("newEdmRecord");
+ assertEquals("newEdmRecord", recordToTest.getEdmRecord());
+ }
+}
From f0234bf997fedbdc571596253a87954b280ba25b Mon Sep 17 00:00:00 2001
From: Simon Tzanakis
Date: Fri, 20 May 2022 12:14:40 +0200
Subject: [PATCH 35/73] Feat/met 4458 implement p1 problem pattern (#537)
* MET-4458 Introduce wrapper ProblemPatternAnalysis class
* MET-4458 Centralize patterns titles
* MET-4458 Compute static sets of global and non-global patterns
* MET-4458 Add more unit tests
* MET-4458 Fix sonar issues
---
.../ProblemPatternAnalyzer.java | 74 +++++++++++--------
.../view/ProblemPatternAnalysis.java | 44 +++++++++++
.../view/ProblemPatternDescription.java | 52 +++++--------
.../ProblemPatternAnalyzerTest.java | 15 ++--
.../view/ProblemOccurrenceTest.java | 9 ++-
.../view/ProblemPatternAnalysisTest.java | 27 +++++++
.../view/ProblemPatternDescriptionTest.java | 69 +++++++++--------
7 files changed, 187 insertions(+), 103 deletions(-)
create mode 100644 metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemPatternAnalysis.java
create mode 100644 metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemPatternAnalysisTest.java
diff --git a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/ProblemPatternAnalyzer.java b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/ProblemPatternAnalyzer.java
index 7df496029..047b69a65 100644
--- a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/ProblemPatternAnalyzer.java
+++ b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/ProblemPatternAnalyzer.java
@@ -6,7 +6,7 @@
import static java.util.stream.Collectors.toMap;
import static java.util.stream.Collectors.toSet;
import static org.apache.commons.lang3.BooleanUtils.isFalse;
-import static org.apache.commons.lang3.StringUtils.truncate;
+import static org.apache.commons.lang3.StringUtils.abbreviate;
import eu.europeana.metis.schema.convert.RdfConversionUtils;
import eu.europeana.metis.schema.convert.SerializationException;
@@ -19,11 +19,14 @@
import eu.europeana.metis.schema.jibx.ResourceOrLiteralType;
import eu.europeana.patternanalysis.view.ProblemOccurrence;
import eu.europeana.patternanalysis.view.ProblemPattern;
+import eu.europeana.patternanalysis.view.ProblemPatternAnalysis;
import eu.europeana.patternanalysis.view.ProblemPatternDescription;
+import eu.europeana.patternanalysis.view.ProblemPatternDescription.ProblemPatternId;
import eu.europeana.patternanalysis.view.RecordAnalysis;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
+import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
@@ -54,6 +57,11 @@ public class ProblemPatternAnalyzer {
private static final String UNRECOGNIZABLE_CHARACTERS_REGEX = "[^\\p{IsAlphabetic}\\p{IsDigit} ]";
private static final Pattern UNRECOGNIZABLE_CHARACTERS_PATTERN = Pattern.compile(UNRECOGNIZABLE_CHARACTERS_REGEX);
+ public static final Set globalProblemPatterns = Collections.unmodifiableSet(EnumSet.of(ProblemPatternId.P1));
+ public static final Set nonGlobalProblemPatterns = Collections.unmodifiableSet(
+ EnumSet.complementOf(EnumSet.of(ProblemPatternId.P1)));
+
+
/**
* Analyzes a record for problem patterns.
*
@@ -61,7 +69,7 @@ public class ProblemPatternAnalyzer {
* @return a list of problem patterns
* @throws SerializationException if the record could not be converted to {@link RDF}
*/
- public List analyzeRecord(String rdfString) throws SerializationException {
+ public ProblemPatternAnalysis analyzeRecord(String rdfString) throws SerializationException {
return analyzeRecord(new RdfConversionUtils().convertStringToRdf(rdfString));
}
@@ -71,7 +79,7 @@ public List analyzeRecord(String rdfString) throws Serialization
* @param rdf the rdf record
* @return a list of problem patterns
*/
- public List analyzeRecord(RDF rdf) {
+ public ProblemPatternAnalysis analyzeRecord(RDF rdf) {
final List providerProxies = getProviderProxies(rdf);
final List choices = providerProxies.stream().map(EuropeanaType::getChoiceList)
.filter(Objects::nonNull)
@@ -85,7 +93,8 @@ public List analyzeRecord(RDF rdf) {
LiteralType::getString);
final String rdfAbout = rdf.getProvidedCHOList().stream().filter(Objects::nonNull).findFirst()
.map(ProvidedCHOType::getAbout).orElse(null);
- return computeProblemPatterns(rdfAbout, titles, descriptions, identifiers);
+ final ArrayList problemPatterns = computeProblemPatterns(rdfAbout, titles, descriptions, identifiers);
+ return new ProblemPatternAnalysis(rdfAbout, problemPatterns, Set.copyOf(titles));
}
private List getChoicesInStringList(List choices, Predicate choicePredicate,
@@ -97,6 +106,7 @@ private ArrayList computeProblemPatterns(String rdfAbout, List identifiers) {
final ArrayList problemPatterns = new ArrayList<>();
+ //We can only compute non-global patterns here
constructProblemPattern(rdfAbout, ProblemPatternDescription.P2, checkP2(titles, descriptions)).ifPresent(
problemPatterns::add);
constructProblemPattern(rdfAbout, ProblemPatternDescription.P3, checkP3(titles, descriptions)).ifPresent(
@@ -119,6 +129,18 @@ private List getProviderProxies(RDF rdf) {
.collect(Collectors.toList());
}
+ /**
+ * Abbreviate(based on {@link StringUtils#abbreviate(String, int)}) an element up to a default max length {@link
+ * #DEFAULT_MAX_CHARACTERS_ELEMENT_LENGTH_FOR_REPORT}.
+ * Is used locally and can be used publicly for global problem patterns like P1.
+ *
+ * @param element the string element
+ * @return the truncated string
+ */
+ public String abbreviateElement(String element) {
+ return abbreviate(element, DEFAULT_MAX_CHARACTERS_ELEMENT_LENGTH_FOR_REPORT);
+ }
+
/**
* Check whether there is a title - description pair for which the values are equal, ignoring letter (upper or lower) case.
* It will report a single occurrence for multiple same fields
@@ -134,11 +156,21 @@ private List checkP2(List titles, List descri
equalTitlesAndDescriptions.retainAll(uniqueDescriptions);
return equalTitlesAndDescriptions.stream().map(
- value -> new ProblemOccurrence(format("Equal(lower cased) title and description: %s(...)",
- truncate(value, DEFAULT_MAX_CHARACTERS_ELEMENT_LENGTH_FOR_REPORT)))
+ value -> new ProblemOccurrence(abbreviateElement(value))
).collect(toList());
}
+ private List nearIdenticalDescriptions(String title, List descriptions) {
+ final LongestCommonSubsequence longestCommonSubsequence = new LongestCommonSubsequence();
+ final Predicate lcsPredicate = description ->
+ ((double) longestCommonSubsequence.apply(title, description) / Math.min(title.length(), description.length()))
+ >= LCS_CALCULATION_THRESHOLD;
+ final Predicate distancePredicate = description -> Math.abs(title.length() - description.length())
+ <= TITLE_DESCRIPTION_LENGTH_DISTANCE;
+ return descriptions.stream().filter(StringUtils::isNotBlank).filter(not(title::equalsIgnoreCase))
+ .filter(lcsPredicate.and(distancePredicate)).collect(toList());
+ }
+
/**
* Check whether there is a title - description pair for which the values are too similar.
*
@@ -163,24 +195,11 @@ private List checkP3(List titles, List descri
return nearIdenticalTitleDescriptionsMap.entrySet().stream().flatMap(
entry -> entry.getValue().stream().map(
- value -> new ProblemOccurrence(format("Near-Identical title and description fields: %s(...) | %s(...)",
- truncate(entry.getKey(), DEFAULT_MAX_CHARACTERS_ELEMENT_LENGTH_FOR_REPORT),
- truncate(value, DEFAULT_MAX_CHARACTERS_ELEMENT_LENGTH_FOR_REPORT)))
+ value -> new ProblemOccurrence(format("%s <--> %s", abbreviateElement(entry.getKey()), abbreviateElement(value)))
)
).collect(toList());
}
- private List nearIdenticalDescriptions(String title, List descriptions) {
- final LongestCommonSubsequence longestCommonSubsequence = new LongestCommonSubsequence();
- final Predicate lcsPredicate = description ->
- ((double) longestCommonSubsequence.apply(title, description) / Math.min(title.length(), description.length()))
- >= LCS_CALCULATION_THRESHOLD;
- final Predicate distancePredicate = description -> Math.abs(title.length() - description.length())
- <= TITLE_DESCRIPTION_LENGTH_DISTANCE;
- return descriptions.stream().filter(StringUtils::isNotBlank).filter(not(title::equalsIgnoreCase))
- .filter(lcsPredicate.and(distancePredicate)).collect(toList());
- }
-
/**
* Check whether a title is not human-readable.
*
@@ -204,8 +223,7 @@ private List checkP5(List titles, List identi
UNRECOGNIZABLE_CHARACTERS_PATTERN.matcher(s).results().count() > UNRECOGNIZABLE_CHARACTERS_THRESHOLD;
final Predicate containsIdentifier = s -> identifiers.stream().anyMatch(s::contains);
return titles.stream().filter(moreThanThresholdUnrecognizableCharacters.or(containsIdentifier))
- .map(title -> new ProblemOccurrence(
- format("Unrecognized title: %s(...)", truncate(title, DEFAULT_MAX_CHARACTERS_ELEMENT_LENGTH_FOR_REPORT)))
+ .map(title -> new ProblemOccurrence(abbreviateElement(title))
).collect(toList());
}
@@ -217,8 +235,7 @@ private List checkP5(List titles, List identi
*/
private List checkP6(List titles) {
return titles.stream().filter(title -> title.length() <= MIN_TITLE_LENGTH)
- .map(title -> new ProblemOccurrence(
- format("Non meaningful title: %s(...)", truncate(title, DEFAULT_MAX_CHARACTERS_ELEMENT_LENGTH_FOR_REPORT))))
+ .map(title -> new ProblemOccurrence(abbreviateElement(title)))
.collect(toList());
}
@@ -230,7 +247,7 @@ private List checkP6(List titles) {
*/
private List checkP7(List descriptions) {
if (CollectionUtils.isEmpty(descriptions) || descriptions.stream().allMatch(StringUtils::isBlank)) {
- return List.of(new ProblemOccurrence("Missing description fields"));
+ return List.of(new ProblemOccurrence(abbreviateElement("Missing description fields")));
}
return Collections.emptyList();
}
@@ -245,8 +262,7 @@ private List checkP7(List descriptions) {
private List checkP9(List descriptions) {
return descriptions.stream().filter(StringUtils::isNotBlank)
.filter(description -> description.length() <= MIN_DESCRIPTION_LENGTH)
- .map(description -> new ProblemOccurrence(format("Very short description: %s(...)",
- truncate(description, DEFAULT_MAX_CHARACTERS_ELEMENT_LENGTH_FOR_REPORT))))
+ .map(description -> new ProblemOccurrence(abbreviateElement(description)))
.collect(toList());
}
@@ -259,8 +275,7 @@ private List checkP9(List descriptions) {
*/
private List checkP12(List titles) {
return titles.stream().filter(title -> title.length() > MAX_TITLE_LENGTH)
- .map(title -> new ProblemOccurrence(
- format("Extremely long title: %s(...)", truncate(title, DEFAULT_MAX_CHARACTERS_ELEMENT_LENGTH_FOR_REPORT))))
+ .map(title -> new ProblemOccurrence(abbreviateElement(title)))
.collect(toList());
}
@@ -272,4 +287,5 @@ private Optional constructProblemPattern(String recordId, Proble
}
return Optional.empty();
}
+
}
diff --git a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemPatternAnalysis.java b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemPatternAnalysis.java
new file mode 100644
index 000000000..338ed8107
--- /dev/null
+++ b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemPatternAnalysis.java
@@ -0,0 +1,44 @@
+package eu.europeana.patternanalysis.view;
+
+import static java.util.Objects.requireNonNull;
+import static java.util.Objects.requireNonNullElseGet;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Class containing the problem pattern analysis for a record.
+ */
+public class ProblemPatternAnalysis {
+
+ private final String rdfAbout;
+ private final List problemPatterns;
+ private final Set titles;
+
+ /**
+ * Constructor with required parameters.
+ *
+ * @param rdfAbout the rdf about
+ * @param problemPatterns the problem patterns
+ * @param titles the record titles
+ */
+ public ProblemPatternAnalysis(String rdfAbout, List problemPatterns, Set titles) {
+ this.rdfAbout = requireNonNull(rdfAbout);
+ this.problemPatterns = requireNonNullElseGet(problemPatterns, ArrayList::new);
+ this.titles = requireNonNullElseGet(titles, HashSet::new);
+ }
+
+ public String getRdfAbout() {
+ return rdfAbout;
+ }
+
+ public List getProblemPatterns() {
+ return new ArrayList<>(problemPatterns);
+ }
+
+ public Set getTitles() {
+ return new HashSet<>(titles);
+ }
+}
diff --git a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemPatternDescription.java b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemPatternDescription.java
index 3983d9d64..d94d7b6ec 100644
--- a/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemPatternDescription.java
+++ b/metis-pattern-analysis/src/main/java/eu/europeana/patternanalysis/view/ProblemPatternDescription.java
@@ -9,48 +9,30 @@
@JsonFormat(shape = JsonFormat.Shape.OBJECT)
public enum ProblemPatternDescription {
- /**
- * Systematic use of the same title
- */
- P1(ProblemPatternId.P1, ProblemPatternSeverity.WARNING, ProblemPatternQualityDimension.CONCISENESS),
- /**
- * Equal title and description fields
- */
- P2(ProblemPatternId.P2, ProblemPatternSeverity.WARNING, ProblemPatternQualityDimension.CONCISENESS),
- /**
- * Near-Identical title and description fields
- */
- P3(ProblemPatternId.P3, ProblemPatternSeverity.NOTICE, ProblemPatternQualityDimension.CONCISENESS),
- /**
- * Unrecognizable title
- */
- P5(ProblemPatternId.P5, ProblemPatternSeverity.NOTICE, ProblemPatternQualityDimension.ACCURACY),
- /**
- * Non-meaningful title
- */
- P6(ProblemPatternId.P6, ProblemPatternSeverity.NOTICE, ProblemPatternQualityDimension.ACCURACY),
- /**
- * Missing description fields
- */
- P7(ProblemPatternId.P7, ProblemPatternSeverity.NOTICE, ProblemPatternQualityDimension.COMPLETENESS),
- /**
- * Very short description
- */
- P9(ProblemPatternId.P9, ProblemPatternSeverity.WARNING, ProblemPatternQualityDimension.ACCURACY),
- /**
- * Extremely long values
- */
- P12(ProblemPatternId.P12, ProblemPatternSeverity.NOTICE, ProblemPatternQualityDimension.ACCURACY);
+ P1(ProblemPatternId.P1, "Systematic use of the same title", ProblemPatternSeverity.WARNING,
+ ProblemPatternQualityDimension.CONCISENESS),
+ P2(ProblemPatternId.P2, "Equal title and description fields", ProblemPatternSeverity.WARNING,
+ ProblemPatternQualityDimension.CONCISENESS),
+ P3(ProblemPatternId.P3, "Near-Identical title and description fields", ProblemPatternSeverity.NOTICE,
+ ProblemPatternQualityDimension.CONCISENESS),
+ P5(ProblemPatternId.P5, "Unrecognizable title", ProblemPatternSeverity.NOTICE, ProblemPatternQualityDimension.ACCURACY),
+ P6(ProblemPatternId.P6, "Non-meaningful title", ProblemPatternSeverity.NOTICE, ProblemPatternQualityDimension.ACCURACY),
+ P7(ProblemPatternId.P7, "Missing description fields", ProblemPatternSeverity.NOTICE,
+ ProblemPatternQualityDimension.COMPLETENESS),
+ P9(ProblemPatternId.P9, "Very short description", ProblemPatternSeverity.WARNING, ProblemPatternQualityDimension.ACCURACY),
+ P12(ProblemPatternId.P12, "Extremely long values", ProblemPatternSeverity.NOTICE, ProblemPatternQualityDimension.ACCURACY);
private final ProblemPatternId problemPatternId;
+ private final String problemPatternTitle;
private final ProblemPatternSeverity problemPatternSeverity;
private final ProblemPatternQualityDimension problemPatternQualityDimension;
ProblemPatternDescription(ProblemPatternId problemPatternId,
- ProblemPatternSeverity problemPatternSeverity,
+ String problemPatternTitle, ProblemPatternSeverity problemPatternSeverity,
ProblemPatternQualityDimension problemPatternQualityDimension) {
this.problemPatternId = problemPatternId;
+ this.problemPatternTitle = problemPatternTitle;
this.problemPatternSeverity = problemPatternSeverity;
this.problemPatternQualityDimension = problemPatternQualityDimension;
}
@@ -59,6 +41,10 @@ public ProblemPatternId getProblemPatternId() {
return problemPatternId;
}
+ public String getProblemPatternTitle() {
+ return problemPatternTitle;
+ }
+
public ProblemPatternSeverity getProblemPatternSeverity() {
return problemPatternSeverity;
}
diff --git a/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/ProblemPatternAnalyzerTest.java b/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/ProblemPatternAnalyzerTest.java
index b23f8c499..1d84a4189 100644
--- a/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/ProblemPatternAnalyzerTest.java
+++ b/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/ProblemPatternAnalyzerTest.java
@@ -5,6 +5,7 @@
import eu.europeana.metis.schema.convert.SerializationException;
import eu.europeana.patternanalysis.view.ProblemPattern;
+import eu.europeana.patternanalysis.view.ProblemPatternAnalysis;
import eu.europeana.patternanalysis.view.ProblemPatternDescription;
import java.io.FileInputStream;
import java.io.IOException;
@@ -61,7 +62,7 @@ private ProblemPatternDescription getRequestedProblemPattern(ProblemPatternDescr
.filter(patternDescription -> patternDescription == problemPatternDescription).findFirst().orElse(null);
}
- private List analyzeProblemPatternsForFile(String fileLocation) throws IOException, SerializationException {
+ private ProblemPatternAnalysis analyzeProblemPatternsForFile(String fileLocation) throws IOException, SerializationException {
String xml = IOUtils.toString(new FileInputStream(fileLocation), StandardCharsets.UTF_8);
final ProblemPatternAnalyzer problemPatternAnalyzer = new ProblemPatternAnalyzer();
@@ -81,11 +82,13 @@ private int getRequestedProblemOccurrencesSize(ProblemPatternDescription problem
@MethodSource
void analyzeRecord(String fileLocation, int totalPatterns, ProblemPatternDescription problemPatternDescription,
int totalOccurrences) throws Exception {
- final List problemPatterns = analyzeProblemPatternsForFile(fileLocation);
+ final ProblemPatternAnalysis problemPatternAnalysis = analyzeProblemPatternsForFile(fileLocation);
- assertNotNull(problemPatterns);
- assertEquals(totalPatterns, problemPatterns.size());
- assertEquals(problemPatternDescription, getRequestedProblemPattern(problemPatternDescription, problemPatterns));
- assertEquals(totalOccurrences, getRequestedProblemOccurrencesSize(problemPatternDescription, problemPatterns));
+ assertNotNull(problemPatternAnalysis);
+ assertEquals(totalPatterns, problemPatternAnalysis.getProblemPatterns().size());
+ assertEquals(problemPatternDescription,
+ getRequestedProblemPattern(problemPatternDescription, problemPatternAnalysis.getProblemPatterns()));
+ assertEquals(totalOccurrences,
+ getRequestedProblemOccurrencesSize(problemPatternDescription, problemPatternAnalysis.getProblemPatterns()));
}
}
\ No newline at end of file
diff --git a/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemOccurrenceTest.java b/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemOccurrenceTest.java
index a8a73e73f..f92d04a3e 100644
--- a/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemOccurrenceTest.java
+++ b/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemOccurrenceTest.java
@@ -16,9 +16,12 @@ void objectCreationTest() {
assertEquals("Duplicate titleA", problemOccurrence1.getMessageReport());
assertTrue(CollectionUtils.isEqualCollection(List.of("recordId2", "recordId1"), problemOccurrence1.getAffectedRecordIds()));
- final ProblemOccurrence problemOccurrence2 = new ProblemOccurrence("Duplicate titleB");
- assertNotNull(problemOccurrence2.getAffectedRecordIds());
+ final ProblemOccurrence problemOccurrence2 = new ProblemOccurrence("Duplicate titleA", null);
+ assertEquals("Duplicate titleA", problemOccurrence2.getMessageReport());
assertEquals(0, problemOccurrence2.getAffectedRecordIds().size());
- }
+ final ProblemOccurrence problemOccurrence3 = new ProblemOccurrence("Duplicate titleB");
+ assertNotNull(problemOccurrence3.getAffectedRecordIds());
+ assertEquals(0, problemOccurrence3.getAffectedRecordIds().size());
+ }
}
\ No newline at end of file
diff --git a/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemPatternAnalysisTest.java b/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemPatternAnalysisTest.java
new file mode 100644
index 000000000..3ce6ae647
--- /dev/null
+++ b/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemPatternAnalysisTest.java
@@ -0,0 +1,27 @@
+package eu.europeana.patternanalysis.view;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.util.List;
+import java.util.Set;
+import org.junit.jupiter.api.Test;
+
+class ProblemPatternAnalysisTest {
+
+ @Test
+ void objectCreationTest() {
+ final List problemPatterns = List.of(new ProblemPattern(ProblemPatternDescription.P2, 1,
+ List.of(new RecordAnalysis("recordId", List.of(new ProblemOccurrence("message"))))));
+ final Set titles = Set.of("titleA");
+ final ProblemPatternAnalysis problemPatternAnalysis = new ProblemPatternAnalysis("rdfAbout", problemPatterns, titles);
+
+ assertEquals("rdfAbout", problemPatternAnalysis.getRdfAbout());
+ assertEquals(problemPatternAnalysis.getProblemPatterns().size(), problemPatterns.size());
+ assertEquals(problemPatternAnalysis.getTitles().size(), titles.size());
+
+ assertThrows(NullPointerException.class, () -> new ProblemPatternAnalysis(null, problemPatterns, titles));
+ assertEquals(0, new ProblemPatternAnalysis("rdfAbout", null, titles).getProblemPatterns().size());
+ assertEquals(0, new ProblemPatternAnalysis("rdfAbout", problemPatterns, null).getTitles().size());
+ }
+}
\ No newline at end of file
diff --git a/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemPatternDescriptionTest.java b/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemPatternDescriptionTest.java
index 793c7935d..fe1b82e70 100644
--- a/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemPatternDescriptionTest.java
+++ b/metis-pattern-analysis/src/test/java/eu/europeana/patternanalysis/view/ProblemPatternDescriptionTest.java
@@ -1,44 +1,49 @@
package eu.europeana.patternanalysis.view;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import java.util.NoSuchElementException;
import org.junit.jupiter.api.Test;
class ProblemPatternDescriptionTest {
+ private void assertProblemPatternDescription(ProblemPatternDescription problemPatternDescription, String patternId,
+ String problemTitle,
+ String problemSeverity, String problemAccuracy) {
+ assertEquals(patternId, problemPatternDescription.getProblemPatternId().toString());
+ assertEquals(problemTitle, problemPatternDescription.getProblemPatternTitle());
+ assertEquals(problemSeverity, problemPatternDescription.getProblemPatternSeverity().toString());
+ assertEquals(problemAccuracy, problemPatternDescription.getProblemPatternQualityDimension().toString());
+ }
+
+ @Test
+ void checkValuesTest() {
+ assertProblemPatternDescription(ProblemPatternDescription.P1, "P1", "Systematic use of the same title", "WARNING",
+ "CONCISENESS");
+ assertProblemPatternDescription(ProblemPatternDescription.P2, "P2", "Equal title and description fields", "WARNING",
+ "CONCISENESS");
+ assertProblemPatternDescription(ProblemPatternDescription.P3, "P3", "Near-Identical title and description fields", "NOTICE",
+ "CONCISENESS");
+ assertProblemPatternDescription(ProblemPatternDescription.P5, "P5", "Unrecognizable title", "NOTICE", "ACCURACY");
+ assertProblemPatternDescription(ProblemPatternDescription.P6, "P6", "Non-meaningful title", "NOTICE", "ACCURACY");
+ assertProblemPatternDescription(ProblemPatternDescription.P7, "P7", "Missing description fields", "NOTICE", "COMPLETENESS");
+ assertProblemPatternDescription(ProblemPatternDescription.P9, "P9", "Very short description", "WARNING", "ACCURACY");
+ assertProblemPatternDescription(ProblemPatternDescription.P12, "P12", "Extremely long values", "NOTICE", "ACCURACY");
+ }
+
@Test
- void checkValues() {
- assertEquals("P1", ProblemPatternDescription.P1.getProblemPatternId().toString());
- assertEquals("WARNING", ProblemPatternDescription.P1.getProblemPatternSeverity().toString());
- assertEquals("CONCISENESS", ProblemPatternDescription.P1.getProblemPatternQualityDimension().toString());
-
- assertEquals("P2", ProblemPatternDescription.P2.getProblemPatternId().toString());
- assertEquals("WARNING", ProblemPatternDescription.P2.getProblemPatternSeverity().toString());
- assertEquals("CONCISENESS", ProblemPatternDescription.P2.getProblemPatternQualityDimension().toString());
-
- assertEquals("P3", ProblemPatternDescription.P3.getProblemPatternId().toString());
- assertEquals("NOTICE", ProblemPatternDescription.P3.getProblemPatternSeverity().toString());
- assertEquals("CONCISENESS", ProblemPatternDescription.P3.getProblemPatternQualityDimension().toString());
-
- assertEquals("P5", ProblemPatternDescription.P5.getProblemPatternId().toString());
- assertEquals("NOTICE", ProblemPatternDescription.P5.getProblemPatternSeverity().toString());
- assertEquals("ACCURACY", ProblemPatternDescription.P5.getProblemPatternQualityDimension().toString());
-
- assertEquals("P6", ProblemPatternDescription.P6.getProblemPatternId().toString());
- assertEquals("NOTICE", ProblemPatternDescription.P6.getProblemPatternSeverity().toString());
- assertEquals("ACCURACY", ProblemPatternDescription.P6.getProblemPatternQualityDimension().toString());
-
- assertEquals("P7", ProblemPatternDescription.P7.getProblemPatternId().toString());
- assertEquals("NOTICE", ProblemPatternDescription.P7.getProblemPatternSeverity().toString());
- assertEquals("COMPLETENESS", ProblemPatternDescription.P7.getProblemPatternQualityDimension().toString());
-
- assertEquals("P9", ProblemPatternDescription.P9.getProblemPatternId().toString());
- assertEquals("WARNING", ProblemPatternDescription.P9.getProblemPatternSeverity().toString());
- assertEquals("ACCURACY", ProblemPatternDescription.P9.getProblemPatternQualityDimension().toString());
-
- assertEquals("P12", ProblemPatternDescription.P12.getProblemPatternId().toString());
- assertEquals("NOTICE", ProblemPatternDescription.P12.getProblemPatternSeverity().toString());
- assertEquals("ACCURACY", ProblemPatternDescription.P12.getProblemPatternQualityDimension().toString());
+ void fromNameTest() {
+ assertEquals(ProblemPatternDescription.P1, ProblemPatternDescription.fromName("P1"));
+ assertEquals(ProblemPatternDescription.P2, ProblemPatternDescription.fromName("P2"));
+ assertEquals(ProblemPatternDescription.P3, ProblemPatternDescription.fromName("P3"));
+ assertEquals(ProblemPatternDescription.P5, ProblemPatternDescription.fromName("P5"));
+ assertEquals(ProblemPatternDescription.P6, ProblemPatternDescription.fromName("P6"));
+ assertEquals(ProblemPatternDescription.P7, ProblemPatternDescription.fromName("P7"));
+ assertEquals(ProblemPatternDescription.P9, ProblemPatternDescription.fromName("P9"));
+ assertEquals(ProblemPatternDescription.P12, ProblemPatternDescription.fromName("P12"));
+
+ assertThrows(NoSuchElementException.class, () -> ProblemPatternDescription.fromName("invalid"));
}
}
\ No newline at end of file
From 7530667d811826cbdc34b0fdc39e7589aa2e896e Mon Sep 17 00:00:00 2001
From: Jorge Ortiz
Date: Fri, 20 May 2022 12:34:51 +0200
Subject: [PATCH 36/73] MET-4375_MET-4541 RecordView unit test (#534)
---
metis-repository/pom.xml | 199 +++++++++---------
.../metis/repository/rest/RecordViewTest.java | 47 +++++
2 files changed, 146 insertions(+), 100 deletions(-)
create mode 100644 metis-repository/src/test/java/eu/europeana/metis/repository/rest/RecordViewTest.java
diff --git a/metis-repository/pom.xml b/metis-repository/pom.xml
index 49f2783a0..418f2a7b6 100644
--- a/metis-repository/pom.xml
+++ b/metis-repository/pom.xml
@@ -1,101 +1,100 @@
-
-
- metis-framework
- eu.europeana.metis
- 7-SNAPSHOT
-
- 4.0.0
- metis-repository
- war
-
-
- javax.xml.bind
- jaxb-api
-
-
- org.glassfish.jaxb
- jaxb-runtime
-
-
- org.apache.logging.log4j
- log4j-slf4j-impl
-
-
- org.springframework
- spring-core
- ${version.spring}
-
-
- org.springframework
- spring-webmvc
- ${version.spring}
-
-
- javax.servlet
- javax.servlet-api
- ${version.servlet.api}
- provided
-
-
- io.springfox
- springfox-swagger2
- ${version.swagger}
-
-
- io.springfox
- springfox-swagger-ui
- ${version.swagger}
-
-
- com.fasterxml.jackson.core
- jackson-annotations
- ${version.jackson}
-
-
- com.fasterxml.jackson.dataformat
- jackson-dataformat-xml
- ${version.jackson}
-
-
- com.fasterxml.jackson.datatype
- jackson-datatype-jsr310
- ${version.jackson}
-
-
- eu.europeana.metis
- metis-common-utils
- ${project.version}
-
-
- eu.europeana.metis
- metis-common-mongo
- ${project.version}
-
-
- eu.europeana.metis
- metis-harvesting
- ${project.version}
-
-
- org.junit.jupiter
- junit-jupiter-api
-
-
- org.junit.jupiter
- junit-jupiter-engine
-
-
-
-
-
- org.apache.maven.plugins
- maven-war-plugin
- ${version.maven.war.plugin}
-
- false
-
-
-
-
-
+
+
+ metis-framework
+ eu.europeana.metis
+ 7-SNAPSHOT
+
+ 4.0.0
+ metis-repository
+ war
+
+
+ javax.xml.bind
+ jaxb-api
+
+
+ org.glassfish.jaxb
+ jaxb-runtime
+
+
+ org.apache.logging.log4j
+ log4j-slf4j-impl
+
+
+ org.springframework
+ spring-core
+ ${version.spring}
+
+
+ org.springframework
+ spring-webmvc
+ ${version.spring}
+
+
+ javax.servlet
+ javax.servlet-api
+ ${version.servlet.api}
+ provided
+
+
+ io.springfox
+ springfox-swagger2
+ ${version.swagger}
+
+
+ io.springfox
+ springfox-swagger-ui
+ ${version.swagger}
+
+
+ com.fasterxml.jackson.core
+ jackson-annotations
+ ${version.jackson}
+
+
+ com.fasterxml.jackson.dataformat
+ jackson-dataformat-xml
+ ${version.jackson}
+
+
+ com.fasterxml.jackson.datatype
+ jackson-datatype-jsr310
+ ${version.jackson}
+
+
+ eu.europeana.metis
+ metis-common-utils
+ ${project.version}
+
+
+ eu.europeana.metis
+ metis-common-mongo
+ ${project.version}
+
+
+ eu.europeana.metis
+ metis-harvesting
+ ${project.version}
+
+
+ org.junit.jupiter
+ junit-jupiter
+ ${version.junit}
+ test
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-war-plugin
+ ${version.maven.war.plugin}
+
+ false
+
+
+
+
+
\ No newline at end of file
diff --git a/metis-repository/src/test/java/eu/europeana/metis/repository/rest/RecordViewTest.java b/metis-repository/src/test/java/eu/europeana/metis/repository/rest/RecordViewTest.java
new file mode 100644
index 000000000..ead7b8c8a
--- /dev/null
+++ b/metis-repository/src/test/java/eu/europeana/metis/repository/rest/RecordViewTest.java
@@ -0,0 +1,47 @@
+package eu.europeana.metis.repository.rest;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+
+import java.time.Instant;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Unit test for {@link RecordView} class
+ */
+class RecordViewTest {
+
+ private RecordView recordView;
+
+ @BeforeEach
+ void setup() {
+ final Instant instant = Instant.parse("2022-05-19T08:28:00.823118Z");
+ recordView = new RecordView("recordId", "datasetId", instant, false, "edmRecord");
+ }
+
+ @Test
+ void getRecordId() {
+ assertEquals("recordId", recordView.getRecordId());
+ }
+
+ @Test
+ void getDatasetId() {
+ assertEquals("datasetId", recordView.getDatasetId());
+ }
+
+ @Test
+ void getDateStamp() {
+ assertEquals("2022-05-19T08:28:00.823118Z", recordView.getDateStamp().toString());
+ }
+
+ @Test
+ void isMarkedAsDeleted() {
+ assertFalse(recordView.isMarkedAsDeleted());
+ }
+
+ @Test
+ void getEdmRecord() {
+ assertEquals("edmRecord", recordView.getEdmRecord());
+ }
+}
From a563905fe9b0af34f284e6ffc7c032fe23722dc4 Mon Sep 17 00:00:00 2001
From: Jorge Ortiz
Date: Fri, 20 May 2022 12:49:57 +0200
Subject: [PATCH 37/73] MET-4375_MET-4539 RecordController unit tests (#536)
* MET-4375_MET-4539 RecordController unit tests
* MET-4375_MET-4539 resource files for unit tests
* MET-4375_MET-4539 remove mockito code smells
---
metis-repository/pom.xml | 18 ++
.../repository/rest/RecordControllerTest.java | 281 ++++++++++++++++++
.../test/resources/repository-test-error.zip | Bin 0 -> 3274 bytes
.../src/test/resources/repository-test.zip | Bin 0 -> 3282 bytes
4 files changed, 299 insertions(+)
create mode 100644 metis-repository/src/test/java/eu/europeana/metis/repository/rest/RecordControllerTest.java
create mode 100644 metis-repository/src/test/resources/repository-test-error.zip
create mode 100644 metis-repository/src/test/resources/repository-test.zip
diff --git a/metis-repository/pom.xml b/metis-repository/pom.xml
index 418f2a7b6..13f332383 100644
--- a/metis-repository/pom.xml
+++ b/metis-repository/pom.xml
@@ -84,6 +84,24 @@
${version.junit}
test
+
+ org.mockito
+ mockito-core
+
+
+ com.jayway.jsonpath
+ json-path
+ test
+
+
+ org.springframework
+ spring-webmvc
+ ${version.spring}
+
+
+ org.springframework
+ spring-test
+
diff --git a/metis-repository/src/test/java/eu/europeana/metis/repository/rest/RecordControllerTest.java b/metis-repository/src/test/java/eu/europeana/metis/repository/rest/RecordControllerTest.java
new file mode 100644
index 000000000..681293507
--- /dev/null
+++ b/metis-repository/src/test/java/eu/europeana/metis/repository/rest/RecordControllerTest.java
@@ -0,0 +1,281 @@
+package eu.europeana.metis.repository.rest;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.reset;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.delete;
+import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get;
+import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.multipart;
+import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post;
+import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.put;
+import static org.springframework.test.web.servlet.result.MockMvcResultHandlers.print;
+import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.content;
+import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath;
+import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
+
+import eu.europeana.metis.repository.dao.Record;
+import eu.europeana.metis.repository.dao.RecordDao;
+import eu.europeana.metis.utils.RestEndpoints;
+import java.io.InputStream;
+import java.time.Instant;
+import org.jetbrains.annotations.NotNull;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.springframework.http.MediaType;
+import org.springframework.mock.web.MockMultipartFile;
+import org.springframework.test.web.servlet.MockMvc;
+import org.springframework.test.web.servlet.setup.MockMvcBuilders;
+import org.springframework.web.server.ResponseStatusException;
+
+/**
+ * Unit test for {@link RecordController} class
+ */
+class RecordControllerTest {
+
+ private static RecordDao recordDaoMock;
+ private static MockMvc recordControllerMock;
+ private static RecordController recordController;
+
+ @BeforeAll
+ static void setup() {
+ recordDaoMock = mock(RecordDao.class);
+ recordController = new RecordController();
+ recordControllerMock = MockMvcBuilders.standaloneSetup(recordController)
+ .build();
+ }
+
+ @AfterEach
+ void cleanUp() {
+ reset(recordDaoMock);
+ }
+
+ @Test
+ void setRecordDaoAndGetRecord() {
+ Record expectedRecord = getTestRecord();
+
+ when(recordDaoMock.getRecord("recordId")).thenReturn(expectedRecord);
+ recordController.setRecordDao(recordDaoMock);
+
+ RecordView recordView = recordController.getRecord("recordId");
+
+ assertEquals(expectedRecord.getRecordId(), recordView.getRecordId());
+ assertEquals(expectedRecord.getEdmRecord(), recordView.getEdmRecord());
+ assertEquals(expectedRecord.getDatasetId(), recordView.getDatasetId());
+ assertEquals(expectedRecord.isDeleted(), recordView.isMarkedAsDeleted());
+ assertEquals(expectedRecord.getDateStamp(), recordView.getDateStamp());
+ }
+
+ @Test
+ void setRecordDaoAndGetRecord_expectException() {
+ when(recordDaoMock.getRecord("recordId")).thenReturn(null);
+ recordController.setRecordDao(recordDaoMock);
+
+ RuntimeException expectedException = assertThrows(ResponseStatusException.class, () -> {
+ recordController.getRecord("recordId");
+ });
+
+ assertEquals("404 NOT_FOUND \"No record found for this identifier.\"", expectedException.getMessage());
+ }
+
+ @Test
+ void getRecordViaController() throws Exception {
+ Record expectedRecord = getTestRecord();
+
+ when(recordDaoMock.getRecord("recordId")).thenReturn(expectedRecord);
+ recordController.setRecordDao(recordDaoMock);
+ recordControllerMock.perform(get(RestEndpoints.REPOSITORY_RECORDS_RECORD_ID, "recordId")
+ .content(""))
+ .andDo(print())
+ .andExpect(status().is(200))
+ .andExpect(content().string(getXMLTestRecord()));
+
+ verify(recordDaoMock, times(1)).getRecord("recordId");
+ }
+
+ @Test
+ void getRecordViaController_notFound() throws Exception {
+ when(recordDaoMock.getRecord("recordId")).thenReturn(null);
+ recordController.setRecordDao(recordDaoMock);
+ recordControllerMock.perform(get(RestEndpoints.REPOSITORY_RECORDS_RECORD_ID, "recordId")
+ .content(""))
+ .andDo(print())
+ .andExpect(status().is(404))
+ .andExpect(content().string(""));
+
+ verify(recordDaoMock, times(1)).getRecord("recordId");
+ }
+
+ @Test
+ void saveRecord() throws Exception {
+ when(recordDaoMock.createRecord(any(Record.class))).thenReturn(true);
+ recordController.setRecordDao(recordDaoMock);
+ recordControllerMock.perform(post(RestEndpoints.REPOSITORY_RECORDS_RECORD_ID, "recordId")
+ .contentType(MediaType.APPLICATION_XML)
+ .param("datasetId", "datasetId")
+ .param("datestamp", "+1000000000-12-31T23:59:59.999999999Z")
+ .param("markAsDeleted", "false")
+ .content("edmRecord"))
+ .andDo(print())
+ .andExpect(status().is(200))
+ .andExpect(jsonPath("$.datasetId", is("datasetId")))
+ .andExpect(jsonPath("$.dateStamp").exists())
+ .andExpect(jsonPath("$.insertedRecords", is(1)))
+ .andExpect(jsonPath("$.updatedRecords", is(0)))
+ .andExpect(jsonPath("$.insertedRecordIds").isArray())
+ .andExpect(jsonPath("$.insertedRecordIds").isNotEmpty())
+ .andExpect(jsonPath("$.updatedRecordIds").isArray())
+ .andExpect(jsonPath("$.updatedRecordIds").isEmpty());
+ verify(recordDaoMock, times(1)).createRecord(any());
+ }
+
+ @Test
+ void saveRecord_Exception() throws Exception {
+ when(recordDaoMock.createRecord(any(Record.class))).thenThrow(new RuntimeException("Fail to save record"));
+ recordController.setRecordDao(recordDaoMock);
+ recordControllerMock.perform(post(RestEndpoints.REPOSITORY_RECORDS_RECORD_ID, "recordId")
+ .contentType(MediaType.APPLICATION_XML)
+ .param("datasetId", "datasetId")
+ .param("dateStamp", "+1000000000-12-31T23:59:59.999999999Z")
+ .param("markAsDeleted", "false")
+ .content("edmRecord"))
+ .andDo(print())
+ .andExpect(status().is(500));
+ verify(recordDaoMock, times(1)).createRecord(any());
+ }
+
+ @Test
+ void saveRecords() throws Exception {
+ InputStream inputStream = this.getClass().getClassLoader().getResourceAsStream("repository-test.zip");
+ MockMultipartFile recordsFile = new MockMultipartFile("recordsZipFile",
+ "repository-test.zip",
+ "application/zip",
+ inputStream);
+ when(recordDaoMock.createRecord(any(Record.class))).thenReturn(true);
+ recordController.setRecordDao(recordDaoMock);
+ recordControllerMock.perform(multipart(RestEndpoints.REPOSITORY_RECORDS)
+ .file(recordsFile)
+ .param("datasetId", "datasetId")
+ .param("dateStamp", "+1000000000-12-31T23:59:59.999999999Z")
+ .contentType(MediaType.MULTIPART_FORM_DATA_VALUE))
+ .andDo(print())
+ .andExpect(status().is(200))
+ .andExpect(jsonPath("$.datasetId", is("datasetId")))
+ .andExpect(jsonPath("$.dateStamp").exists())
+ .andExpect(jsonPath("$.insertedRecords", is(2)))
+ .andExpect(jsonPath("$.updatedRecords", is(0)))
+ .andExpect(jsonPath("$.insertedRecordIds").isArray())
+ .andExpect(jsonPath("$.insertedRecordIds").isNotEmpty())
+ .andExpect(jsonPath("$.updatedRecordIds").isArray())
+ .andExpect(jsonPath("$.updatedRecordIds").isEmpty());
+ verify(recordDaoMock, times(2)).createRecord(any());
+ }
+
+ @Test
+ void saveRecords_Exception() throws Exception {
+ InputStream inputStream = this.getClass().getClassLoader().getResourceAsStream("repository-test-error.zip");
+ MockMultipartFile recordsFile = new MockMultipartFile("recordsZipFile",
+ "repository-test-error.zip",
+ "application/zip",
+ inputStream);
+ when(recordDaoMock.createRecord(any(Record.class))).thenReturn(true);
+ recordController.setRecordDao(recordDaoMock);
+ recordControllerMock.perform(multipart(RestEndpoints.REPOSITORY_RECORDS)
+ .file(recordsFile)
+ .param("datasetId", "datasetId")
+ .param("dateStamp", "+1000000000-12-31T23:59:59.999999999Z")
+ .contentType(MediaType.MULTIPART_FORM_DATA_VALUE))
+ .andDo(print())
+ .andExpect(status().is(500));
+ verify(recordDaoMock, times(0)).createRecord(any());
+ }
+
+ @Test
+ void updateRecordHeader() throws Exception {
+ when(recordDaoMock.getRecord("recordId")).thenReturn(getTestRecord());
+ when(recordDaoMock.createRecord(any(Record.class))).thenReturn(false);
+ recordController.setRecordDao(recordDaoMock);
+ recordControllerMock.perform(put(RestEndpoints.REPOSITORY_RECORDS_RECORD_ID_HEADER, "recordId")
+ .contentType(MediaType.APPLICATION_XML)
+ .param("datasetId", "datasetId")
+ .param("datestamp", "+1000000000-12-31T23:59:59.999999999Z")
+ .param("markAsDeleted", "false")
+ .content("edmRecord"))
+ .andDo(print())
+ .andExpect(status().is(200))
+ .andExpect(jsonPath("$.datasetId", is("datasetId")))
+ .andExpect(jsonPath("$.dateStamp").exists())
+ .andExpect(jsonPath("$.insertedRecords", is(0)))
+ .andExpect(jsonPath("$.updatedRecords", is(1)))
+ .andExpect(jsonPath("$.insertedRecordIds").isArray())
+ .andExpect(jsonPath("$.insertedRecordIds").isEmpty())
+ .andExpect(jsonPath("$.updatedRecordIds").isArray())
+ .andExpect(jsonPath("$.updatedRecordIds").isNotEmpty());
+ verify(recordDaoMock, times(1)).getRecord("recordId");
+ verify(recordDaoMock, times(1)).createRecord(any());
+ }
+
+ @Test
+ void updateRecordHeader_Exception() throws Exception {
+ when(recordDaoMock.getRecord("recordId")).thenReturn(null);
+
+ recordController.setRecordDao(recordDaoMock);
+ recordControllerMock.perform(put(RestEndpoints.REPOSITORY_RECORDS_RECORD_ID_HEADER, "recordId")
+ .contentType(MediaType.APPLICATION_XML)
+ .param("datasetId", "datasetId")
+ .param("datestamp", "+1000000000-12-31T23:59:59.999999999Z")
+ .param("markAsDeleted", "false")
+ .content("edmRecord"))
+ .andDo(print())
+ .andExpect(status().is(404));
+ verify(recordDaoMock, times(1)).getRecord("recordId");
+ }
+
+ @Test
+ void deleteRecord() throws Exception {
+ when(recordDaoMock.deleteRecord("recordId")).thenReturn(true);
+ recordController.setRecordDao(recordDaoMock);
+ recordControllerMock.perform(delete(RestEndpoints.REPOSITORY_RECORDS_RECORD_ID, "recordId")
+ .content(""))
+ .andExpect(status().is(200))
+ .andExpect(content().string(""));
+ }
+
+ @Test
+ void deleteRecord_notFound() throws Exception {
+ when(recordDaoMock.deleteRecord("recordId")).thenReturn(false);
+ recordController.setRecordDao(recordDaoMock);
+ recordControllerMock.perform(delete(RestEndpoints.REPOSITORY_RECORDS_RECORD_ID, "recordId")
+ .content(""))
+ .andExpect(status().is(404))
+ .andExpect(content().string(""));
+ }
+
+ @NotNull
+ private Record getTestRecord() {
+ Record testRecord = new Record();
+ testRecord.setRecordId("recordId");
+ testRecord.setEdmRecord("edmRecord");
+ testRecord.setDatasetId("datasetId");
+ testRecord.setDeleted(false);
+ testRecord.setDateStamp(Instant.MAX);
+ return testRecord;
+ }
+
+ @NotNull
+ private String getXMLTestRecord() {
+ return "" +
+ "recordId" +
+ "datasetId" +
+ "+1000000000-12-31T23:59:59.999999999Z" +
+ "false" +
+ "edmRecord" +
+ "";
+ }
+}
diff --git a/metis-repository/src/test/resources/repository-test-error.zip b/metis-repository/src/test/resources/repository-test-error.zip
new file mode 100644
index 0000000000000000000000000000000000000000..81e4d83617f51657e798c0353ba8bee9cd779b4b
GIT binary patch
literal 3274
zcmbW3c{mjM8plUtNS35&Vyq=(Z5Yc)_DJ@u31yGL*s~;*P%+lAO|r#E_QAmrS&wC!
zEK@Xgk}abUS%#Z)Zs(qR@45GR?tS0qxBc_|KJP!D-xt7%sWt=rsn`Jm0AFWsFF!Yb
zFW+EEe`i1c3+86507jTkv;+LdIy?h{m;rS33qSzix6N1&NDl@a{n6dtu79ZM9|r)=
z{e_w$94;**4VRZhxdxg#$XQ4Qd3yZj&}}B-0Q#YRxcaQG4Ay<9cq+-d4veN%FrulH|G?zud%oYjFZaUBOK*TlnrH9s`H(Upn`dy1#jV
zoU6WFXQk_6S4v)TE8_olksds(@$$Rdd_qF{cnYa{AQCPp+XMU4#`!Y}wfEUAcF=@~F6Uy1Ho^ZDZ3zpRXVrGi!;O0|nMIy<^i
z1iQ$H@G6D4-15mEzN{!|GLes#U6W}HLEBXja|bZ+Hy;qm$o$1Vu#7P&
z|21mNfj97GhHU*AL=-4`a}*h=QlmAYs;pedxo6j}^|-tY<5)NT3z>;DmOT?#Y;Liq5`DH#751Y34YW{Ckl0
zwz1IMx9{IgB^RSW{Ly?vT~E1jM1JGki_a&mdgGqy`j$V;Y)^+1AT;S6DCWVFCi^{i
zy3OnnML0|-$4U$q^C@gd_0mlsV{c1k3r!L*#xxGTdxRATg<
zut_EadOgFv*8A=4`dT(^JXHWwy)5CZxpG*9nLzLn-RGQ?!4$RWLG%5C@tlKwA?kj7
zcPgF)RyWthx9J+LVa)ZGDF_^o^hMCwq#3ryfXRN7>0V|r1Cxex>$-&;9F5g#29Qcs
zuvGC2QrpEP7a3uS6A>MH#eJtXF?7j^fv(C%_2ZMa5Qf!e0ONET}i0-TdRp$jIUV
zXkkXjc_UGWR-LrL#7;%
z-rQ+_p7J6bIw!UtA27)PinYfq%RVYQ>ZwuL|4z7O1KAj*dDnIaLSVC}2Z>MP7=WV8
zsRS>+Uwqk}Lm;K4V0m;m!sm`#ei)vu1Y)6qINBbPXSsCb^yLZ*vq&h?Lwf(nm}i2L
z{CQG;K>qqz-6CkIBMDKz3nA1tdDvs;5oX`WVeD5G0S
zONM}N#3@HzfjL``ntma{VOJIV^5XJMYF|9nHgjo#5=Iv{1nEXaq2x|7(=uxz)lu
zE-&?zoIk#103}=swC}nby6m)cB3~^mKu2Xk7St|RJ+iz|e1{FV&EgR)E%-WDPE|X|
zy0a)B!xbEAEHw5($y(0#2q9_xV}XlPkaLvZfv3{dU>eZ7j%63Hj2L!4jeZVmn69r
zlWgx|g&q0kI2rgdl8(%;UjV(+XWN3fp7$v}rLjbv`WPhsD5`eeys%TwW*|LcGA`WyW5eatphpHhT5{5a(jb%5!d~LN709tNzD`R=LXZLFj+7-pk6t&aRF9xjlU^&M>Am_!={Z{Va;@*g2=g9Y;>9L{s>4%**}ndjcgmUFpk#mL!!
zNguwa&6}g%+JEv|zw)`0T1ECmD+EO$l`E}`?i$#Sr$g3&VHXltxfBuQeL03V@!1db
zW35zD{9JXx980Yb6o20~v{w?Az`0QYqezxfW1N-@8Kt*0vCRcRO$pO?lxA&?weOxs
z_xJ0`J`p~TCFY2@$z(;4QvItRJD3_@?f)=e`+#N_Q-;gpCgC2FGxJu#;M`Z5QMCF_
zVJaRkM)*N~p=dV35AyqKsUQ>;^EmgsN3NiotzF+XZ#J5UAl4b5aK{s7`q<<`pipH>
zEbFMV#QP>V2{=AQJx+R8&2Enz#N$(ZQ~MZ{8UK}o4*#eZk6IiyVLXAXaf~7t
zZe5Q0PF4Z^S|*{48sG&gzy&hRKuPg16X>#}HB3ui`x2jMX6w>j&(F%#aTQCV;@N_A
z3tuA%1d)C7R1!9HccntLqfYfCM^GVzquTP!v&ajlq)=*t5T+vgBK%Z5^Ji^-Set<}
z?Sa*Ogk=9U<2E6KM^6(eNiJOwVuI!X3(e$e*s3$#iKJMMH$9NnfL@JH0bbyD_tKD11?0!(*fG0UXZ8hC0&)J|((g
z;|E(8z9X*2TrFM?hjG8WQN`0$cl7AW|2
zLTzF_qmsJyY}3{p)JYH6?vuntn{
zHe$Gi*qb@uan$$amz78xnRs9fUeChp32zd2A3aZ}e2eFeG@FNM%4bz)n^YybQ*SH?
zuJs^p!S?VxaoQRxE?mvm9>%cCZnHeFxqh;LIC@eYHn9=nAG+u=>UZJQ1KXBUvuY?7
zww0zu(u-PIwOoB#xbtE
z7P7GAujSQOjC+|hD#hwqPQ1b$x#=qYAgvk=_KF+`&bID$WJcN2c?j~a*fQStEUau$
zmS-!!z)k=$dByd~QoaTUy(um^%a&Dqf1~GH`>X9uC2L*I;;IUmIo>Cnc^sEY4RJx<
zqc#p<41!G@IO`2LSXIv5Jci8R;VmWVAm_!;7=&l2OfosRt-Q$}rG%Gk|H3pxT-o}n
zNm_uUw&Z~!`IkM$6i
zON0W=MtT6?5Q)ZmbVtBI@NbFxud?^gC{9y9y#4P<{CD-AS^UqKKFs1jtLDDpU#R~-
v$^7R$hmME%|MRT>>}bI7@ALfK@qhXZjP*c2lmGzWkJg|A0794#U&sCiqU**|
literal 0
HcmV?d00001
diff --git a/metis-repository/src/test/resources/repository-test.zip b/metis-repository/src/test/resources/repository-test.zip
new file mode 100644
index 0000000000000000000000000000000000000000..d0658faf2297b2dcb4f24ca80617d58316d5d8c6
GIT binary patch
literal 3282
zcmbW3c{mjM8plUtEXR^GO^mgKtPNus$)1vZUqW^@OJg_@bVY)aq{-LU}DS!poQIzvWI`~qw|g*1AvNp0SEy6*z|v>(E;cH-JULg
zsA(Su0M7k|nj9Q1At?cu5ktB78re&miu<|W`r*)RENBnqTntkm~bquJZ{AY2(Sns{U{|pFsQZ3*z*eBJyDD
zoSysx@9o5awzhnYf|(OT>?WxJly)Kzzei@1pS>HYaz|!B0Oyiw#NaF-H*C!k$56&p
z;^|~@MI+Wv74E4G`}LWrZsEmL+eOvD?S|s=xRsuMowAjvuC?2rok+_HxF$uk%3z9Z
ztlrttjY8N(S{&QK(^SDkiH*ySKD0fT-yzw&KUy^41h6`$A$>1vESX|7JEO?}k{oQlKKcd1qsa=Rxw
zJ*zTqi${L6&Zl{{$`93$)gWYWU2Ssl1PbdG=%5T`qNLU&8v@X_<$XE*82Ib=h)2l0
z#aCcSePZ5A)R;Y&?~Qb+x-*DKP}JrqGF-7*eL_h=p@41AmZbimtQ6x=JN|-9PaMmd
z2{5$`OumnvSVwbzii~oHLy!~D`7Bh$V3YTq4}2Se*Wb)_JG}AKh+0KwU89Ja-;Bf-BSAb-+=E?@IdFYE`Z*V$PFnQFJ<;?ki^ynCgA*Y85<5^#
z*uzGNaztU+|maJV?I91Upnv@DtcTDA|
z2zp(8xg7O=85
zR4MB~DwM$DMbC(B7nhtR1x6hE(1BOoc4`s>mmF!RDs81B3^e5ATFD7}B=bzDC!WRR
z!^!aQqC3!n^p5j-LiVj1sRPT~VBa}(p`KGIA=jlPfSeVYSXZz)(j8l=1XmgB7M+lc
za8vH89^%;CX@8phEEqZ`d=P(Uk_HrOhgp`2DW!8)tsuP>sNO&}M5^7h-hmL9?WjQ_
z(>NNS5JL*VgZqUSi&Frkqy#L3?nd0c>zWsYXD)`Asv_vx19Hulj-9?-ZfYD3Mc$G)
zI5y^4ffL?LRzv-|=R~4Sl)_*TLXguUL|+
z9u-LIn%t7k>lJd!L6djR`c^fGpKr*8kyGxozw*x8F<3|Qkas6It1&eYusj+9jOY0h
zDR*wQ;I8v?EqSL8FKIvtmwfHI?gcJ8E}h6z4!WbExF7{;m#!LKUMRZD4BTeC6(zy<
zGDlkJil1d?VIGFvKU`Li^Hcai&!RzXaKIkhc!T3W&U_^6LZmL&7-d+MJ-*PY#zz+H
zkhJ40o#_*^L#%{^s33u+bFekEN!9+Kj4bDghFdMH!0Exav1zNsg(W#rZ9n-+wMe4coSfekxtnQ~KmTl8aV`Dh?SpLK)x84l8rw+-4PDzT224
zj=yxX{H3Iv)s5DymYzWMrTlp8Eo^^%%X%t;?LecSKT*8ZaiPKEmWo||$H%Xu_`AWt
zabpoNqH7V+`W{xmfqRaPhC4m!*!=nh&|7WhEr`qc+eN2Tmnc&o{6u0RYvxTFYO;m*
zas1ZV+DbP+9+stwjc!Z!r-e+$1>1e7zntP1qtl}TA)`8STWhp+Nw^5#;}gfBSotf8bw|>3`y1`ZxT0%lv`=qP+0`j(;uhQ5lxz
zc{5u~vAVw3jd#Pn2;DIpS}Y#XH`Af2q2)=*5HjTQbhJ(Jn
zw`!tq*Xy$D9B(m(d}$h&Gt%$5Q@s^YasZ8c>|NW%`$Q}5wGeiW9Qs6y8O1|A_>hCa
z&B}U+zL!<;DacPC{U+JP1JlEgAQ;>1PtF&6Z!NvgmqNu>D#kZ64sN95fY|U{w@&8l
z%atp7PCE43@VzTs*($B1lLjP(b0^gc?fNVbBeE&%sijl~U~kS2DIJg}VoMF%obT6|Fnw2k
z*6Nq`-ScP?NlWUXz=1>ot>S^3;AqE<9&A0BS3GD>qwSHMk&zun
z?`uhO7U4A7SsxMzzSlQwi^?}Q*Uz~(YdL$4{75PB_!Rfl0S0BvbM>&pC-PZLi~S~y
z)0ZiZR`7z&<;ZVjMbNLMq6(;fE}$%&H^Uf|6c00mE{j>h)U~f%;ugwiUApJ~Nr5u1
zXx1lpHhbYWp?IC_=QvAC}lnfeW6_;ekz{f
z(-j_An~u|dm_;Z;jAWqS#;+6eIB_J&xeL;lpw`d0Z+IQ&y)AHACTt<`s{G0{EwiU!tlyJ=0ISGO)19epXq-W25c^?9avq
zI#KyP?sLV)540|PLtKl#R!7z;9EN!6o$@ty95Xz%hB=MpC
zLyZbk6x>|Dvia~LRq<4ETOZ#(w&=C^dWNqEpV>=k@&aO1PC|JTMxsWorD8D~aP3HD{+xi~IFdtB6#Evr4m#
zN}}B<*BAKKdJs0SJv?XJ6;(xN_GW{KXcnn$#xN`Mlcb@jNfp?{Mu1P?qVuTtg{Cm;
zmQ%CJC`RU$#zo?@8jvLh#{AwRzI~A5nwhyjTfz8rYG})2^8TfQ<@q|{MPk{QD9JKC
zh$h-0uB-;Ku;ruf@u~>-JZV&%$-S)aDo6OHi%3{%6&maj-tV7f+3mo9vZlJl$FpKh
z8|q$AQLiAwTy}wl0HXJZdnGmU#ozCBQSn*k%%aeZp0Djq+ne&1nruat%7sC;NtBU
z^n-mXTVIq2NZ+{BV)Bh+L6Skgc5_%AAMmRaJRRGI4QRcoBh*_axo1;K7)<%L8!a9T
zKxgkU9l>&mkgwK24FDV=QD2Md7#Iltk+%OTd;iSh`zlAbzn{c^Q~#O8|BUIQEdH~q
zLv{Z`{r^ekKj%4eJi`B}EfxasN0Ko4{l?ng|U^sgH1Nphe
A{r~^~
literal 0
HcmV?d00001
From eab08601ab8e788d058f675522f45bcd66a0cf5c Mon Sep 17 00:00:00 2001
From: JoanaCMS <70145179+JoanaCMS@users.noreply.github.com>
Date: Fri, 20 May 2022 16:38:02 +0200
Subject: [PATCH 38/73] MET-4532 Created new unit tests for RecordDao (#538)
---
.../metis/repository/dao/RecordDaoTest.java | 116 ++++++++++++++++++
1 file changed, 116 insertions(+)
create mode 100644 metis-repository/src/test/java/eu/europeana/metis/repository/dao/RecordDaoTest.java
diff --git a/metis-repository/src/test/java/eu/europeana/metis/repository/dao/RecordDaoTest.java b/metis-repository/src/test/java/eu/europeana/metis/repository/dao/RecordDaoTest.java
new file mode 100644
index 000000000..1fc17bd4b
--- /dev/null
+++ b/metis-repository/src/test/java/eu/europeana/metis/repository/dao/RecordDaoTest.java
@@ -0,0 +1,116 @@
+package eu.europeana.metis.repository.dao;
+
+import com.mongodb.client.MongoClient;
+import com.mongodb.client.MongoClients;
+import eu.europeana.metis.mongo.embedded.EmbeddedLocalhostMongo;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.time.Instant;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertNull;
+
+class RecordDaoTest {
+
+ private final static String DATABASE_NAME = "dbTest";
+
+ private RecordDao recordDao;
+
+ private EmbeddedLocalhostMongo embeddedLocalhostMongo;
+
+ @BeforeEach
+ void setup() {
+ embeddedLocalhostMongo = new EmbeddedLocalhostMongo();
+ embeddedLocalhostMongo.start();
+ final String mongoHost = embeddedLocalhostMongo.getMongoHost();
+ final int mongoPort = embeddedLocalhostMongo.getMongoPort();
+ final MongoClient mongoClient = MongoClients.create(String.format("mongodb://%s:%s", mongoHost, mongoPort));
+ recordDao = new RecordDao(mongoClient, DATABASE_NAME);
+ }
+
+ @AfterEach
+ void tearDown() {
+ embeddedLocalhostMongo.stop();
+ }
+
+ @Test
+ void createRecordTest_expectTrue(){
+ Instant dateStamp = Instant.now();
+ Record recordToTest = new Record("recordId", "datasetId", dateStamp, false, "edmRecord");
+
+ assertTrue(recordDao.createRecord(recordToTest));
+ }
+
+ @Test
+ void createRecordTest_expectFalse(){
+ Instant dateStamp = Instant.now();
+ Record recordToTest = new Record("recordId", "datasetId", dateStamp, false, "edmRecord");
+ Record otherRecordToTest = new Record("recordId", "datasetId", dateStamp, false, "newEdmRecord");
+ recordDao.createRecord(recordToTest);
+
+ assertFalse(recordDao.createRecord(otherRecordToTest));
+ }
+
+ @Test
+ void getAllRecordsFromDatasetTest(){
+ Instant dateStamp = Instant.now();
+ Record recordToTest = new Record("recordId", "datasetId", dateStamp, false, "edmRecord");
+ Record otherRecordToTest = new Record("otherRecordId", "datasetId", dateStamp, false, "otherEdmRecord");
+ recordDao.createRecord(recordToTest);
+ recordDao.createRecord(otherRecordToTest);
+
+ List result = recordDao.getAllRecordsFromDataset("datasetId").collect(Collectors.toUnmodifiableList());
+ assertEquals("datasetId", result.get(0).getDatasetId());
+ assertEquals("datasetId", result.get(1).getDatasetId());
+ assertEquals("recordId", result.get(0).getRecordId());
+ assertEquals("otherRecordId", result.get(1).getRecordId());
+ assertEquals(dateStamp.toEpochMilli(), result.get(0).getDateStamp().toEpochMilli());
+ assertEquals(dateStamp.toEpochMilli(), result.get(1).getDateStamp().toEpochMilli());
+ assertFalse(result.get(0).isDeleted());
+ assertFalse(result.get(1).isDeleted());
+ assertEquals("edmRecord", result.get(0).getEdmRecord());
+ assertEquals("otherEdmRecord", result.get(1).getEdmRecord());
+
+ }
+
+ @Test
+ void getRecordTest_expectResult(){
+ Instant dateStamp = Instant.now();
+ Record recordToTest = new Record("recordId", "datasetId", dateStamp, false, "edmRecord");
+ recordDao.createRecord(recordToTest);
+
+ Record result = recordDao.getRecord("recordId");
+
+ assertEquals("recordId", result.getRecordId());
+ assertEquals("datasetId", result.getDatasetId());
+ assertEquals(dateStamp.toEpochMilli(), result.getDateStamp().toEpochMilli());
+ assertFalse(result.isDeleted());
+ assertEquals("edmRecord", result.getEdmRecord());
+
+ }
+
+ @Test
+ void getRecordTest_expectNull(){
+ assertNull(recordDao.getRecord("recordId"));
+ }
+
+ @Test
+ void deleteRecord_expectTrue(){
+ Instant dateStamp = Instant.now();
+ Record recordToTest = new Record("recordId", "datasetId", dateStamp, false, "edmRecord");
+ recordDao.createRecord(recordToTest);
+
+ assertTrue(recordDao.deleteRecord("recordId"));
+ }
+
+ @Test
+ void deleteRecord_expectFalse(){
+ assertFalse(recordDao.deleteRecord("recordId"));
+ }
+}
From 096df6b3dac825bb018617b27c1718df42bb4174 Mon Sep 17 00:00:00 2001
From: JoanaCMS <70145179+JoanaCMS@users.noreply.github.com>
Date: Fri, 20 May 2022 16:46:44 +0200
Subject: [PATCH 39/73] MET-4534 Created unit tests for InsertionResult (#539)
---
.../repository/rest/InsertionResultTest.java | 75 +++++++++++++++++++
1 file changed, 75 insertions(+)
create mode 100644 metis-repository/src/test/java/eu/europeana/metis/repository/rest/InsertionResultTest.java
diff --git a/metis-repository/src/test/java/eu/europeana/metis/repository/rest/InsertionResultTest.java b/metis-repository/src/test/java/eu/europeana/metis/repository/rest/InsertionResultTest.java
new file mode 100644
index 000000000..7bcba3aeb
--- /dev/null
+++ b/metis-repository/src/test/java/eu/europeana/metis/repository/rest/InsertionResultTest.java
@@ -0,0 +1,75 @@
+package eu.europeana.metis.repository.rest;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.time.Instant;
+import java.util.Set;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+class InsertionResultTest {
+
+ private InsertionResult insertionResultToTest;
+ private final Instant instantForTest = Instant.now();
+
+ @BeforeEach
+ void setUp(){
+ insertionResultToTest = new InsertionResult("datasetId", instantForTest);
+ }
+
+ @Test
+ void testGetDatasetId(){
+ assertEquals("datasetId",insertionResultToTest.getDatasetId());
+ }
+
+ @Test
+ void testGetDateStamp(){
+ assertEquals(instantForTest, insertionResultToTest.getDateStamp());
+ }
+
+ @Test
+ void testAddInsertedRecordAndGetInsertedRecords(){
+ setInsertedRecords();
+ assertEquals(3, insertionResultToTest.getInsertedRecords());
+ }
+
+ @Test
+ void testAddUpdatedRecordAndGetUpdatedRecords(){
+ setUpdatedRecords();
+ assertEquals(3, insertionResultToTest.getUpdatedRecords());
+ }
+
+ @Test
+ void testGetInsertedRecordIds(){
+ setInsertedRecords();
+ Set result = insertionResultToTest.getInsertedRecordIds();
+ assertTrue(result.contains("recordId1"));
+ assertTrue(result.contains("recordId2"));
+ assertTrue(result.contains("recordId3"));
+
+ }
+
+ @Test
+ void testGetUpdatedRecordIds(){
+ setUpdatedRecords();
+ Set result = insertionResultToTest.getUpdatedRecordIds();
+ assertTrue(result.contains("recordId1"));
+ assertTrue(result.contains("recordId2"));
+ assertTrue(result.contains("recordId3"));
+
+ }
+
+ private void setInsertedRecords(){
+ insertionResultToTest.addInsertedRecord("recordId1");
+ insertionResultToTest.addInsertedRecord("recordId2");
+ insertionResultToTest.addInsertedRecord("recordId3");
+ }
+
+ private void setUpdatedRecords(){
+ insertionResultToTest.addUpdatedRecord("recordId1");
+ insertionResultToTest.addUpdatedRecord("recordId2");
+ insertionResultToTest.addUpdatedRecord("recordId3");
+ }
+}
From 45d8d020759fb53cefda89238ee0ee90688fe36d Mon Sep 17 00:00:00 2001
From: Jorge Ortiz
Date: Fri, 20 May 2022 17:21:51 +0200
Subject: [PATCH 40/73] MET-4375_MET-4537 OaiPmhController unit tests (#540)
---
metis-repository/pom.xml | 241 +++++++++--------
.../repository/rest/OaiPmhControllerTest.java | 252 ++++++++++++++++++
.../src/test/resources/record-test.xml | 52 ++++
3 files changed, 428 insertions(+), 117 deletions(-)
create mode 100644 metis-repository/src/test/java/eu/europeana/metis/repository/rest/OaiPmhControllerTest.java
create mode 100644 metis-repository/src/test/resources/record-test.xml
diff --git a/metis-repository/pom.xml b/metis-repository/pom.xml
index 13f332383..998498158 100644
--- a/metis-repository/pom.xml
+++ b/metis-repository/pom.xml
@@ -1,118 +1,125 @@
-