From ff8da2a64779650648bb60bc9a3b4993ef003f50 Mon Sep 17 00:00:00 2001 From: marmoure Date: Mon, 8 Dec 2025 22:48:04 +0100 Subject: [PATCH 1/2] [feature] get schema endpoint and list all schema --- pom.xml | 7 ++ .../controller/SchemaController.java | 26 +++++ .../bblValidator/dto/SchemaInfo.java | 75 +++++++++++++++ .../bblValidator/service/SchemaService.java | 95 +++++++++++++++++++ 4 files changed, 203 insertions(+) create mode 100644 src/main/java/com/evolvedbinary/bblValidator/controller/SchemaController.java create mode 100644 src/main/java/com/evolvedbinary/bblValidator/dto/SchemaInfo.java create mode 100644 src/main/java/com/evolvedbinary/bblValidator/service/SchemaService.java diff --git a/pom.xml b/pom.xml index 34ca9b3..8a56d91 100644 --- a/pom.xml +++ b/pom.xml @@ -90,6 +90,13 @@ 2.0 runtime + + + + com.fasterxml.jackson.core + jackson-databind + 2.15.3 + diff --git a/src/main/java/com/evolvedbinary/bblValidator/controller/SchemaController.java b/src/main/java/com/evolvedbinary/bblValidator/controller/SchemaController.java new file mode 100644 index 0000000..66912c8 --- /dev/null +++ b/src/main/java/com/evolvedbinary/bblValidator/controller/SchemaController.java @@ -0,0 +1,26 @@ +package com.evolvedbinary.bblValidator.controller; + +import com.evolvedbinary.bblValidator.dto.SchemaInfo; +import com.evolvedbinary.bblValidator.service.SchemaService; +import io.micronaut.http.MediaType; +import io.micronaut.http.annotation.Controller; +import io.micronaut.http.annotation.Get; +import io.micronaut.http.annotation.Produces; + +import java.util.List; + +@Controller("/schema") +public class SchemaController { + + private final SchemaService schemaService; + + public SchemaController(SchemaService schemaService) { + this.schemaService = schemaService; + } + + @Get + @Produces(MediaType.APPLICATION_JSON) + public List listSchemas() { + return schemaService.listSchemas(); + } +} diff --git a/src/main/java/com/evolvedbinary/bblValidator/dto/SchemaInfo.java b/src/main/java/com/evolvedbinary/bblValidator/dto/SchemaInfo.java new file mode 100644 index 0000000..64218b1 --- /dev/null +++ b/src/main/java/com/evolvedbinary/bblValidator/dto/SchemaInfo.java @@ -0,0 +1,75 @@ +package com.evolvedbinary.bblValidator.dto; + +import io.micronaut.serde.annotation.Serdeable; + +@Serdeable +public class SchemaInfo { + + private String id; + private String name; + private String version; + private String date; + private String url; + private String description; + + public SchemaInfo() { + } + + public SchemaInfo(String id, String name, String version, String date, String url, String description) { + this.id = id; + this.name = name; + this.version = version; + this.date = date; + this.url = url; + this.description = description; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getVersion() { + return version; + } + + public void setVersion(String version) { + this.version = version; + } + + public String getDate() { + return date; + } + + public void setDate(String date) { + this.date = date; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } +} + diff --git a/src/main/java/com/evolvedbinary/bblValidator/service/SchemaService.java b/src/main/java/com/evolvedbinary/bblValidator/service/SchemaService.java new file mode 100644 index 0000000..408f214 --- /dev/null +++ b/src/main/java/com/evolvedbinary/bblValidator/service/SchemaService.java @@ -0,0 +1,95 @@ +package com.evolvedbinary.bblValidator.service; + +import com.evolvedbinary.bblValidator.dto.SchemaInfo; +import com.fasterxml.jackson.databind.ObjectMapper; +import jakarta.annotation.PostConstruct; +import jakarta.inject.Singleton; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + +@Singleton +public class SchemaService { + + private static final Logger LOG = LoggerFactory.getLogger(SchemaService.class); + private static final String SCHEMA_DIRECTORY = "schemas"; + + private final List schemas = new ArrayList<>(); + private final ObjectMapper objectMapper = new ObjectMapper(); + + @PostConstruct + public void loadSchemas() { + try { + // Load schemas from classpath + ClassLoader classLoader = getClass().getClassLoader(); + + // Get all .json files from the schemas directory + try (InputStream is = classLoader.getResourceAsStream(SCHEMA_DIRECTORY)) { + if (is == null) { + LOG.warn("Schemas directory not found in classpath"); + return; + } + } + + // Scan for schema metadata files + loadSchemasFromClasspath(); + + LOG.info("Loaded {} schemas from disk", schemas.size()); + } catch (Exception e) { + LOG.error("Error loading schemas from disk", e); + } + } + + private void loadSchemasFromClasspath() { + try { + // Get resource URL and list files + ClassLoader classLoader = getClass().getClassLoader(); + var resource = classLoader.getResource(SCHEMA_DIRECTORY); + + if (resource != null) { + Path schemaPath = Paths.get(resource.toURI()); + + try (Stream paths = Files.walk(schemaPath, 1)) { + paths.filter(path -> path.toString().endsWith(".json")) + .forEach(this::loadSchemaMetadata); + } + } + } catch (Exception e) { + LOG.error("Error scanning schema directory", e); + } + } + + private void loadSchemaMetadata(Path metadataPath) { + try { + String content = Files.readString(metadataPath, StandardCharsets.UTF_8); + SchemaInfo schemaInfo = objectMapper.readValue(content, SchemaInfo.class); + + // Load corresponding schema file + String schemaFileName = metadataPath.getFileName().toString().replace(".json", ".csvs"); + Path schemaFilePath = metadataPath.getParent().resolve(schemaFileName); + + if (Files.exists(schemaFilePath)) { + schemas.add(schemaInfo); + LOG.debug("Loaded schema: {}", schemaInfo.getId()); + } else { + LOG.warn("Schema file not found for metadata: {}", schemaFileName); + } + } catch (IOException e) { + LOG.error("Error loading schema metadata from: {}", metadataPath, e); + } + } + + public List listSchemas() { + return new ArrayList<>(schemas); + } +} + From 76b2e958dd8d97da45cc4a73eaeafe64e23da30a Mon Sep 17 00:00:00 2001 From: marmoure Date: Mon, 8 Dec 2025 22:48:27 +0100 Subject: [PATCH 2/2] [feature] sample schema for testing --- ...cquisition-with-minimal-transcription.csvs | 68 +++++++++++++++++++ ...cquisition-with-minimal-transcription.json | 8 +++ .../schemas/thunder-stone-sample-csvs.csvs | 13 ++++ .../schemas/thunder-stone-sample-csvs.json | 8 +++ 4 files changed, 97 insertions(+) create mode 100644 src/main/resources/schemas/ADM_362-technical-acquisition-with-minimal-transcription.csvs create mode 100644 src/main/resources/schemas/ADM_362-technical-acquisition-with-minimal-transcription.json create mode 100644 src/main/resources/schemas/thunder-stone-sample-csvs.csvs create mode 100644 src/main/resources/schemas/thunder-stone-sample-csvs.json diff --git a/src/main/resources/schemas/ADM_362-technical-acquisition-with-minimal-transcription.csvs b/src/main/resources/schemas/ADM_362-technical-acquisition-with-minimal-transcription.csvs new file mode 100644 index 0000000..7698428 --- /dev/null +++ b/src/main/resources/schemas/ADM_362-technical-acquisition-with-minimal-transcription.csvs @@ -0,0 +1,68 @@ +version 1.0 +@totalColumns 42 +/*------------------------------------------------------------------------------- +|Schema: ADM_363-technical-acquisition-with-minimal-transcription.csvs | +|Authors: Nicki Welch | +| David Underdown | +|Purpose: To capture metadata about the digitisation of the ADM 363 series | +| Primarily technical metadata, but with a minimal amount of | +| transcription to verify that the records may be publicly released | +| after receipt by The National Archives | +|Revision: 1.0 first release | +| 1.1 update as some official numbers only single digit | +| 1.2 allow M as official number prefix too | +| 1.3 further additions to prefixes, L, S, SS, SSX | +| 1.4 allow for asterisk and ? in official number | +| 1.5 further prefixes MX, KX, JX, and longer volume number | +| 1.6 add explicit check that checksum is not that for a 0 byte file | +| 1.7 Fix errors eg use correct not(), rather than isNot() | +| 1.8 Allow brackets etc in comments, range checking for birth year | +| ???? for birth year | +| 1.9 Add piece check in ordinal: unique($piece,$item,$ordinal) | +| Remove and in($resource_uri) from item: | +| resource_uri, change starts(...) to | +| regex("...") | +| 2.0 Allow LX as a prefix too | +|-------------------------------------------------------------------------------*/ +batch_code: length(10) regex("^ADM362B([0-9]{3})$") +department: (is("ADM") if($file_path/notEmpty,in($file_path) and in($resource_uri))) +series: is("362") and if($file_path/notEmpty,in($file_path) and in($resource_uri)) +piece: range(1,69720) if($file_path/notEmpty,in($file_path) and in($resource_uri)) +item: ((positiveInteger unique($piece,$item,$ordinal)) or empty) if($file_path/notEmpty,in($file_path)) +ordinal: if($item/empty,empty,unique($piece,$item,$ordinal)) +file_uuid: if($ordinal/empty,empty,uuid4 unique) +file_path: uri if($ordinal/empty,empty,unique fileExists regex("^file:\/\/\/ADM_362\/[0-9]{1,5}\/[1-9][0-9]{0,4}\/[1-9][0-9]{0,4}_[0-9]{1,4}\.jp2$")) +file_checksum: if($ordinal/empty,empty,not("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855") and checksum(file($file_path),"SHA-256")) +resource_uri: if($ordinal/notEmpty,uri and regex("^http://datagov.nationalarchives.gov.uk/66/ADM/362/[1-9][0-9]*/[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}$")) +scan_operator: if($ordinal/empty,empty,length(1,12) regex("^[0-9a-zA-Z]{1,12}$")) +scan_id: if($ordinal/empty,empty,length(1,12) regex("^[0-9a-zA-Z_]{1,12}$")) +scan_location: if($ordinal/empty,empty,regex("[-\w\s,]+")) +scan_native_format: if($ordinal/empty,empty,regex("[0-9\w\s,.:]+")) +scan_timestamp: if($ordinal/empty,empty,xDateTime) +image_resolution: if($ordinal/empty,empty,is("300")) +image_width: if($ordinal/empty,empty,positiveInteger) +image_height: if($ordinal/empty,empty,positiveInteger) +image_tonal_resolution: if($ordinal/empty,empty,is("24-bit colour")) +image_format: if($ordinal/empty,empty,is("x-fmt/392")) +image_colour_space: if($ordinal/empty,empty,is("sRGB")) +process_location: if($ordinal/empty,empty,regex("[-\w\s,]+")) +jp2_creation_timestamp: if($ordinal/empty,empty,xDateTime) +uuid_timestamp: if($ordinal/empty,empty,xDateTime) +embed_timestamp: if($ordinal/empty,empty,xDateTime) +image_split: if($ordinal/empty,empty,is("yes") or is("no")) +image_split_other_uuid: if($ordinal/empty,empty,if($image_split/is("yes"),uuid4,is(""))) +image_split_operator: if($ordinal/empty,empty,if($image_split/is("yes"),length(1,12) and regex("^[0-9a-zA-Z]{1,12}$"),is(""))) +image_split_timestamp: if($ordinal/empty,empty,if($image_split/is("yes"),xDateTime,is(""))) +image_crop: if($ordinal/empty,empty,is("auto") or is("manual") or is("none")) +image_crop_operator: if($ordinal/empty,empty,if($image_split/is("manual"),length(1,12) and regex("^[0-9a-zA-Z]{1,12}$"),is(""))) +image_crop_timestamp: if($ordinal/empty,empty,if($image_crop/is("none"),empty,xDateTime)) +image_deskew: if($ordinal/empty,empty,is("yes") or is("no")) +image_deskew_operator: if($ordinal/empty,empty,if($image_deskew/is("yes"),regex("^[0-9a-zA-Z]{1,12}$"),is(""))) +image_deskew_timestamp: if($ordinal/empty,empty,if($image_deskew/is("yes"),xDateTime,is(""))) +QA-code: regex("^[0-9/,]{1,2}$") @optional +comments: regex("[-\w\s,\.\(\)\/'":\?]+") @optional +transcribed_volume_number: if($item/empty,regex("[0-9A-Z\-\s]{1,19}"),is("")) +transcribed_birth_date_day: if(($ordinal/empty and $item/notEmpty),regex("^\*|([0\?][1-9\?])|([1-2\?][0-9\?])|([3\?][0-1\?])$"),is("")) +transcribed_birth_date_month: if(($ordinal/empty and $item/notEmpty),is("*") or is("?") or is("January") or is("February") or is("March") or is("April") or is("May") or is("June") or is("July") or is("August") or is("September") or is("October") or is("November") or is("December"), is("")) +transcribed_birth_date_year: if(($ordinal/empty and $item/notEmpty),if(positiveInteger,range(1850,1914),regex("^1[7-9][0-9\?]{2}|\*|\?{4}$")),is("")) +transcribed_official_number: if(($ordinal/empty and $item/notEmpty),regex("^(([CDP]\/)?([FJKLMS]|LX|MX|JX|KX|SS|SSX)[/?0-9]{1,6}|[/?1-9][/?0-9]{5}|\*)$"),is("")) \ No newline at end of file diff --git a/src/main/resources/schemas/ADM_362-technical-acquisition-with-minimal-transcription.json b/src/main/resources/schemas/ADM_362-technical-acquisition-with-minimal-transcription.json new file mode 100644 index 0000000..5a7e1d4 --- /dev/null +++ b/src/main/resources/schemas/ADM_362-technical-acquisition-with-minimal-transcription.json @@ -0,0 +1,8 @@ +{ + "id": "ADM_362-technical-acquisition-with-minimal-transcription", + "name": "ADM_362-technical-acquisition-with-minimal-transcription", + "version": "1.0.0", + "date": "2015-11-01", + "url": "https://github.com/digital-preservation/csv-schema/blob/master/example-schemas/ADM_362-technical-acquisition-with-minimal-transcription.csvs", + "description": "Minor updates and bug fixes" +} diff --git a/src/main/resources/schemas/thunder-stone-sample-csvs.csvs b/src/main/resources/schemas/thunder-stone-sample-csvs.csvs new file mode 100644 index 0000000..658a08d --- /dev/null +++ b/src/main/resources/schemas/thunder-stone-sample-csvs.csvs @@ -0,0 +1,13 @@ +database /tmp/testdb +table customer +# indicate csv format with a delimiter of | +csv | +# Name Type Tag +field CustID varchar(10) 1 +field Company varchar(80) 2 +field Address varchar(80) 3 +field City varchar(20) 4 +field State varchar(10) 5 +field Zip varchar(10) 6 +field Country varchar(10) 7 +field Phone varchar(20) 8 \ No newline at end of file diff --git a/src/main/resources/schemas/thunder-stone-sample-csvs.json b/src/main/resources/schemas/thunder-stone-sample-csvs.json new file mode 100644 index 0000000..8ccca3b --- /dev/null +++ b/src/main/resources/schemas/thunder-stone-sample-csvs.json @@ -0,0 +1,8 @@ +{ + "id": "thunder-stone-sample-csvs", + "name": "thunder-stone-sample-csvs", + "version": "1.0.0", + "date": "2015-11-01", + "url": "https://docs.thunderstone.com/site/texisman/example_schema_comma_separated.html", + "description": "sample file for testing" +} \ No newline at end of file