Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,13 @@
<version>2.0</version>
<scope>runtime</scope>
</dependency>

<!-- Jackson Databind for JSON parsing -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.15.3</version>
</dependency>
</dependencies>

<build>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package com.evolvedbinary.bblValidator.controller;

import com.evolvedbinary.bblValidator.dto.SchemaInfo;
import com.evolvedbinary.bblValidator.service.SchemaService;
import io.micronaut.http.MediaType;
import io.micronaut.http.annotation.Controller;
import io.micronaut.http.annotation.Get;
import io.micronaut.http.annotation.Produces;

import java.util.List;

@Controller("/schema")
public class SchemaController {

private final SchemaService schemaService;

public SchemaController(SchemaService schemaService) {
this.schemaService = schemaService;
}

@Get
@Produces(MediaType.APPLICATION_JSON)
public List<SchemaInfo> listSchemas() {
return schemaService.listSchemas();
}
}
75 changes: 75 additions & 0 deletions src/main/java/com/evolvedbinary/bblValidator/dto/SchemaInfo.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package com.evolvedbinary.bblValidator.dto;

import io.micronaut.serde.annotation.Serdeable;

@Serdeable
public class SchemaInfo {

private String id;
private String name;
private String version;
private String date;
private String url;
private String description;

public SchemaInfo() {
}

public SchemaInfo(String id, String name, String version, String date, String url, String description) {
this.id = id;
this.name = name;
this.version = version;
this.date = date;
this.url = url;
this.description = description;
}

public String getId() {
return id;
}

public void setId(String id) {
this.id = id;
}

public String getName() {
return name;
}

public void setName(String name) {
this.name = name;
}

public String getVersion() {
return version;
}

public void setVersion(String version) {
this.version = version;
}

public String getDate() {
return date;
}

public void setDate(String date) {
this.date = date;
}

public String getUrl() {
return url;
}

public void setUrl(String url) {
this.url = url;
}

public String getDescription() {
return description;
}

public void setDescription(String description) {
this.description = description;
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package com.evolvedbinary.bblValidator.service;

import com.evolvedbinary.bblValidator.dto.SchemaInfo;
import com.fasterxml.jackson.databind.ObjectMapper;
import jakarta.annotation.PostConstruct;
import jakarta.inject.Singleton;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Stream;

@Singleton
public class SchemaService {

private static final Logger LOG = LoggerFactory.getLogger(SchemaService.class);
private static final String SCHEMA_DIRECTORY = "schemas";

private final List<SchemaInfo> schemas = new ArrayList<>();
private final ObjectMapper objectMapper = new ObjectMapper();

@PostConstruct
public void loadSchemas() {
try {
// Load schemas from classpath
ClassLoader classLoader = getClass().getClassLoader();

// Get all .json files from the schemas directory
try (InputStream is = classLoader.getResourceAsStream(SCHEMA_DIRECTORY)) {
if (is == null) {
LOG.warn("Schemas directory not found in classpath");
return;
}
}

// Scan for schema metadata files
loadSchemasFromClasspath();

LOG.info("Loaded {} schemas from disk", schemas.size());
} catch (Exception e) {
LOG.error("Error loading schemas from disk", e);
}
}

private void loadSchemasFromClasspath() {
try {
// Get resource URL and list files
ClassLoader classLoader = getClass().getClassLoader();
var resource = classLoader.getResource(SCHEMA_DIRECTORY);

if (resource != null) {
Path schemaPath = Paths.get(resource.toURI());

try (Stream<Path> paths = Files.walk(schemaPath, 1)) {
paths.filter(path -> path.toString().endsWith(".json"))
.forEach(this::loadSchemaMetadata);
}
}
} catch (Exception e) {
LOG.error("Error scanning schema directory", e);
}
}

private void loadSchemaMetadata(Path metadataPath) {
try {
String content = Files.readString(metadataPath, StandardCharsets.UTF_8);
SchemaInfo schemaInfo = objectMapper.readValue(content, SchemaInfo.class);

// Load corresponding schema file
String schemaFileName = metadataPath.getFileName().toString().replace(".json", ".csvs");
Path schemaFilePath = metadataPath.getParent().resolve(schemaFileName);

if (Files.exists(schemaFilePath)) {
schemas.add(schemaInfo);
LOG.debug("Loaded schema: {}", schemaInfo.getId());
} else {
LOG.warn("Schema file not found for metadata: {}", schemaFileName);
}
} catch (IOException e) {
LOG.error("Error loading schema metadata from: {}", metadataPath, e);
}
}

public List<SchemaInfo> listSchemas() {
return new ArrayList<>(schemas);
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
version 1.0
@totalColumns 42
/*-------------------------------------------------------------------------------
|Schema: ADM_363-technical-acquisition-with-minimal-transcription.csvs |
|Authors: Nicki Welch |
| David Underdown |
|Purpose: To capture metadata about the digitisation of the ADM 363 series |
| Primarily technical metadata, but with a minimal amount of |
| transcription to verify that the records may be publicly released |
| after receipt by The National Archives |
|Revision: 1.0 first release |
| 1.1 update as some official numbers only single digit |
| 1.2 allow M as official number prefix too |
| 1.3 further additions to prefixes, L, S, SS, SSX |
| 1.4 allow for asterisk and ? in official number |
| 1.5 further prefixes MX, KX, JX, and longer volume number |
| 1.6 add explicit check that checksum is not that for a 0 byte file |
| 1.7 Fix errors eg use correct not(), rather than isNot() |
| 1.8 Allow brackets etc in comments, range checking for birth year |
| ???? for birth year |
| 1.9 Add piece check in ordinal: unique($piece,$item,$ordinal) |
| Remove and in($resource_uri) from item: |
| resource_uri, change starts(...) to |
| regex("...") |
| 2.0 Allow LX as a prefix too |
|-------------------------------------------------------------------------------*/
batch_code: length(10) regex("^ADM362B([0-9]{3})$")
department: (is("ADM") if($file_path/notEmpty,in($file_path) and in($resource_uri)))
series: is("362") and if($file_path/notEmpty,in($file_path) and in($resource_uri))
piece: range(1,69720) if($file_path/notEmpty,in($file_path) and in($resource_uri))
item: ((positiveInteger unique($piece,$item,$ordinal)) or empty) if($file_path/notEmpty,in($file_path))
ordinal: if($item/empty,empty,unique($piece,$item,$ordinal))
file_uuid: if($ordinal/empty,empty,uuid4 unique)
file_path: uri if($ordinal/empty,empty,unique fileExists regex("^file:\/\/\/ADM_362\/[0-9]{1,5}\/[1-9][0-9]{0,4}\/[1-9][0-9]{0,4}_[0-9]{1,4}\.jp2$"))
file_checksum: if($ordinal/empty,empty,not("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855") and checksum(file($file_path),"SHA-256"))
resource_uri: if($ordinal/notEmpty,uri and regex("^http://datagov.nationalarchives.gov.uk/66/ADM/362/[1-9][0-9]*/[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}$"))
scan_operator: if($ordinal/empty,empty,length(1,12) regex("^[0-9a-zA-Z]{1,12}$"))
scan_id: if($ordinal/empty,empty,length(1,12) regex("^[0-9a-zA-Z_]{1,12}$"))
scan_location: if($ordinal/empty,empty,regex("[-\w\s,]+"))
scan_native_format: if($ordinal/empty,empty,regex("[0-9\w\s,.:]+"))
scan_timestamp: if($ordinal/empty,empty,xDateTime)
image_resolution: if($ordinal/empty,empty,is("300"))
image_width: if($ordinal/empty,empty,positiveInteger)
image_height: if($ordinal/empty,empty,positiveInteger)
image_tonal_resolution: if($ordinal/empty,empty,is("24-bit colour"))
image_format: if($ordinal/empty,empty,is("x-fmt/392"))
image_colour_space: if($ordinal/empty,empty,is("sRGB"))
process_location: if($ordinal/empty,empty,regex("[-\w\s,]+"))
jp2_creation_timestamp: if($ordinal/empty,empty,xDateTime)
uuid_timestamp: if($ordinal/empty,empty,xDateTime)
embed_timestamp: if($ordinal/empty,empty,xDateTime)
image_split: if($ordinal/empty,empty,is("yes") or is("no"))
image_split_other_uuid: if($ordinal/empty,empty,if($image_split/is("yes"),uuid4,is("")))
image_split_operator: if($ordinal/empty,empty,if($image_split/is("yes"),length(1,12) and regex("^[0-9a-zA-Z]{1,12}$"),is("")))
image_split_timestamp: if($ordinal/empty,empty,if($image_split/is("yes"),xDateTime,is("")))
image_crop: if($ordinal/empty,empty,is("auto") or is("manual") or is("none"))
image_crop_operator: if($ordinal/empty,empty,if($image_split/is("manual"),length(1,12) and regex("^[0-9a-zA-Z]{1,12}$"),is("")))
image_crop_timestamp: if($ordinal/empty,empty,if($image_crop/is("none"),empty,xDateTime))
image_deskew: if($ordinal/empty,empty,is("yes") or is("no"))
image_deskew_operator: if($ordinal/empty,empty,if($image_deskew/is("yes"),regex("^[0-9a-zA-Z]{1,12}$"),is("")))
image_deskew_timestamp: if($ordinal/empty,empty,if($image_deskew/is("yes"),xDateTime,is("")))
QA-code: regex("^[0-9/,]{1,2}$") @optional
comments: regex("[-\w\s,\.\(\)\/'":\?]+") @optional
transcribed_volume_number: if($item/empty,regex("[0-9A-Z\-\s]{1,19}"),is(""))
transcribed_birth_date_day: if(($ordinal/empty and $item/notEmpty),regex("^\*|([0\?][1-9\?])|([1-2\?][0-9\?])|([3\?][0-1\?])$"),is(""))
transcribed_birth_date_month: if(($ordinal/empty and $item/notEmpty),is("*") or is("?") or is("January") or is("February") or is("March") or is("April") or is("May") or is("June") or is("July") or is("August") or is("September") or is("October") or is("November") or is("December"), is(""))
transcribed_birth_date_year: if(($ordinal/empty and $item/notEmpty),if(positiveInteger,range(1850,1914),regex("^1[7-9][0-9\?]{2}|\*|\?{4}$")),is(""))
transcribed_official_number: if(($ordinal/empty and $item/notEmpty),regex("^(([CDP]\/)?([FJKLMS]|LX|MX|JX|KX|SS|SSX)[/?0-9]{1,6}|[/?1-9][/?0-9]{5}|\*)$"),is(""))
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"id": "ADM_362-technical-acquisition-with-minimal-transcription",
"name": "ADM_362-technical-acquisition-with-minimal-transcription",
"version": "1.0.0",
"date": "2015-11-01",
"url": "https://github.com/digital-preservation/csv-schema/blob/master/example-schemas/ADM_362-technical-acquisition-with-minimal-transcription.csvs",
"description": "Minor updates and bug fixes"
}
13 changes: 13 additions & 0 deletions src/main/resources/schemas/thunder-stone-sample-csvs.csvs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
database /tmp/testdb
table customer
# indicate csv format with a delimiter of |
csv |
# Name Type Tag
field CustID varchar(10) 1
field Company varchar(80) 2
field Address varchar(80) 3
field City varchar(20) 4
field State varchar(10) 5
field Zip varchar(10) 6
field Country varchar(10) 7
field Phone varchar(20) 8
8 changes: 8 additions & 0 deletions src/main/resources/schemas/thunder-stone-sample-csvs.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"id": "thunder-stone-sample-csvs",
"name": "thunder-stone-sample-csvs",
"version": "1.0.0",
"date": "2015-11-01",
"url": "https://docs.thunderstone.com/site/texisman/example_schema_comma_separated.html",
"description": "sample file for testing"
}