From 62bd9315273f4903fa3f47eef8fb13c778b8b874 Mon Sep 17 00:00:00 2001 From: roll Date: Fri, 24 Oct 2025 08:05:47 +0100 Subject: [PATCH 01/19] Added fixture --- .../fixtures/issue-153/datapackage.json | 223 ++++++++++++++++++ .../fixtures/issue-153/deployments.csv | 4 + .../fixtures/issue-153/observations_1.tsv | 4 + .../fixtures/issue-153/observations_2.tsv | 6 + 4 files changed, 237 insertions(+) create mode 100644 lib/package/fixtures/issue-153/datapackage.json create mode 100644 lib/package/fixtures/issue-153/deployments.csv create mode 100644 lib/package/fixtures/issue-153/observations_1.tsv create mode 100644 lib/package/fixtures/issue-153/observations_2.tsv diff --git a/lib/package/fixtures/issue-153/datapackage.json b/lib/package/fixtures/issue-153/datapackage.json new file mode 100644 index 00000000..f346652c --- /dev/null +++ b/lib/package/fixtures/issue-153/datapackage.json @@ -0,0 +1,223 @@ +{ + "name": "example_package", + "id": "115f49c1-8603-463e-a908-68de98327266", + "licenses": [ + { + "name": "CC0-1.0", + "path": "https://creativecommons.org/publicdomain/zero/1.0/", + "title": "CC0 1.0" + } + ], + "version": "1.0", + "created": "2021-03-02T17:22:33Z", + "spatial": null, + "temporal": { + "start": "2020-01-01", + "end": "2021-01-10" + }, + "resources": [ + { + "name": "deployments", + "path": "deployments.csv", + "profile": "tabular-data-resource", + "title": "Camera trap deployments", + "format": "csv", + "mediatype": "text/csv", + "encoding": "utf-8", + "schema": { + "fields": [ + { + "name": "deployment_id", + "type": "string", + "constraints": { + "required": true, + "unique": true + } + }, + { + "name": "longitude", + "type": "number", + "constraints": { + "required": true, + "minimum": -180, + "maximum": 180 + } + }, + { + "name": "latitude", + "constraints": { + "required": true + } + }, + { + "name": "start", + "type": "date", + "format": "%x", + "constraints": { + "required": true + } + }, + { + "name": "comments", + "type": "string", + "constraints": { + "required": false + } + } + ], + "missingValues": ["", "NA", "NaN"], + "primaryKey": "deployment_id" + } + }, + { + "name": "observations", + "path": ["observations_1.tsv", "observations_2.tsv"], + "profile": "tabular-data-resource", + "title": "Camera trap observations", + "format": "csv", + "mediatype": "text/csv", + "encoding": "utf-8", + "dialect": { + "delimiter": "\t" + }, + "schema": { + "fields": [ + { + "name": "observation_id", + "type": "string", + "constraints": { + "required": true, + "unique": true + } + }, + { + "name": "deployment_id", + "type": "string", + "constraints": { + "required": true + } + }, + { + "name": "timestamp", + "type": "datetime", + "format": "%Y-%m-%dT%H:%M:%S%z", + "constraints": { + "required": true + } + }, + { + "name": "scientific_name", + "type": "string", + "constraints": { + "required": false + } + }, + { + "name": "count", + "type": "integer", + "constraints": { + "required": false, + "minimum": 1 + } + }, + { + "name": "life_stage", + "type": "string", + "constraints": { + "required": false, + "enum": ["adult", "subadult", "juvenile", "offspring", "unknown"] + } + }, + { + "name": "comments", + "type": "string", + "constraints": { + "required": false + } + } + ], + "missingValues": ["", "NA", "NaN"], + "primaryKey": "observation_id", + "foreignKeys": [ + { + "fields": "deployment_id", + "reference": { + "resource": "deployments", + "fields": "deployment_id" + } + } + ] + } + }, + { + "name": "media", + "data": [ + { + "media_id": "aed5fa71-3ed4-4284-a6ba-3550d1a4de8d", + "deployment_id": "1", + "observation_id": "1-1", + "timestamp": "2020-09-28 02:14:59+02:00", + "file_path": "https://multimedia.agouti.eu/assets/aed5fa71-3ed4-4284-a6ba-3550d1a4de8d/file" + }, + { + "media_id": "da81a501-8236-4cbd-aa95-4bc4b10a05df", + "deployment_id": "1", + "observation_id": "1-1", + "timestamp": "2020-09-28 02:15:00+02:00", + "file_path": "https://multimedia.agouti.eu/assets/da81a501-8236-4cbd-aa95-4bc4b10a05df/file" + }, + { + "media_id": "0ba57608-3cf1-49d6-a5a2-fe680851024d", + "deployment_id": "1", + "observation_id": "1-1", + "timestamp": "2020-09-28 02:15:01+02:00", + "file_path": "https://multimedia.agouti.eu/assets/0ba57608-3cf1-49d6-a5a2-fe680851024d/file" + } + ], + "profile": "tabular-data-resource", + "title": "Camera trap media files", + "schema": { + "fields": [ + { + "name": "media_id", + "type": "string" + }, + { + "name": "deployment_id", + "type": "string" + }, + { + "name": "observation_id", + "type": "string" + }, + { + "name": "timestamp", + "type": "datetime", + "format": "%Y-%m-%d %H:%M:%S%z" + }, + { + "name": "file_path", + "type": "string" + } + ], + "primaryKey": "media_id", + "foreignKeys": [ + { + "fields": "deployment_id", + "reference": { + "resource": "deployments", + "fields": "deployment_id" + } + }, + { + "fields": "observation_id", + "reference": { + "resource": "observations", + "fields": "observation_id" + } + } + ] + } + } + ] +} diff --git a/lib/package/fixtures/issue-153/deployments.csv b/lib/package/fixtures/issue-153/deployments.csv new file mode 100644 index 00000000..9197708e --- /dev/null +++ b/lib/package/fixtures/issue-153/deployments.csv @@ -0,0 +1,4 @@ +deployment_id,longitude,latitude,start,comments +1,4.61612,50.76698,09/25/20, +2,4.64286,50.82716,10/01/20,"On ""forêt"" road." +3,bad,50.81860,10/05/20,"Malfunction/no photos, data" diff --git a/lib/package/fixtures/issue-153/observations_1.tsv b/lib/package/fixtures/issue-153/observations_1.tsv new file mode 100644 index 00000000..047d10b5 --- /dev/null +++ b/lib/package/fixtures/issue-153/observations_1.tsv @@ -0,0 +1,4 @@ +observation_id deployment_id timestamp scientific_name count life_stage comments +1-1 1 2020-09-28T00:13:07Z Capreolus capreolus 1 juvenile Comment 1 +1-2 1 2020-09-28T15:59:17Z Capreolus capreolus 1 adult Comment 2 +1-3 1 2020-09-28T16:35:23Z Lepus europaeus 1 adult Comment 3 diff --git a/lib/package/fixtures/issue-153/observations_2.tsv b/lib/package/fixtures/issue-153/observations_2.tsv new file mode 100644 index 00000000..883ec4b3 --- /dev/null +++ b/lib/package/fixtures/issue-153/observations_2.tsv @@ -0,0 +1,6 @@ +observation_id deployment_id timestamp scientific_name count life_stage comments +1-4 1 2020-09-28T17:04:04Z Lepus europaeus 1 adult NA +1-5 1 2020-09-28T19:19:54Z Sus scrofa 2 unknown NA +2-1 2 2021-10-01T01:25:06Z Sus scrofa 1 unknown Duplicate +2-2 2 2021-10-01T01:25:06Z Sus scrofa 1 unknown Duplicate +2-3 2 2021-10-01T04:47:30Z Sus scrofa 1 unknown NA From bae58c36f2cbbc8a2187e55357d073b6d04a278d Mon Sep 17 00:00:00 2001 From: roll Date: Fri, 24 Oct 2025 08:11:01 +0100 Subject: [PATCH 02/19] Added test --- lib/package/validate.spec.ts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/package/validate.spec.ts b/lib/package/validate.spec.ts index 8f787ada..cc6b788d 100644 --- a/lib/package/validate.spec.ts +++ b/lib/package/validate.spec.ts @@ -170,4 +170,13 @@ describe("validatePackage", () => { expect(error.resource).toBe("error-resource") }) }) + + it("should detect tabular validation errors (issue-153)", async () => { + const dataPackage = "lib/package/fixtures/issue-153/datapackage.json" + + const result = await validatePackage(dataPackage) + + expect(result.valid).toBe(false) + expect(result.errors.length).toBe(1) + }) }) From 7422eac2a8058c17b009ef33d1a5f8c4f58f1c15 Mon Sep 17 00:00:00 2001 From: roll Date: Fri, 24 Oct 2025 09:07:03 +0100 Subject: [PATCH 03/19] Added concatFileStreams --- file/package.json | 4 ++++ file/stream/concat.ts | 6 ++++++ file/stream/index.ts | 1 + lib/resource/validate.ts | 9 +++++--- lib/table/load.ts | 6 +++--- lib/table/save.ts | 2 +- pnpm-lock.yaml | 44 ++++++++++++++++++++++++++++++++++++++++ 7 files changed, 65 insertions(+), 7 deletions(-) create mode 100644 file/stream/concat.ts diff --git a/file/package.json b/file/package.json index a5899235..6f3f7b47 100644 --- a/file/package.json +++ b/file/package.json @@ -29,7 +29,11 @@ "exit-hook": "^4.0.0", "hasha": "^6.0.0", "isbinaryfile": "^5.0.4", + "multistream": "^4.1.0", "tempy": "3.1.0", "tiny-invariant": "^1.3.3" + }, + "devDependencies": { + "@types/multistream": "4.1.3" } } diff --git a/file/stream/concat.ts b/file/stream/concat.ts new file mode 100644 index 00000000..4ee77fa8 --- /dev/null +++ b/file/stream/concat.ts @@ -0,0 +1,6 @@ +import type { Readable } from "node:stream" +import { default as Multistream } from "multistream" + +export function concatFileStreams(streams: Readable[]) { + return new Multistream(streams) +} diff --git a/file/stream/index.ts b/file/stream/index.ts index 471bbcc2..9bb335db 100644 --- a/file/stream/index.ts +++ b/file/stream/index.ts @@ -1,2 +1,3 @@ +export { concatFileStreams } from "./concat.ts" export { loadFileStream } from "./load.ts" export { saveFileStream } from "./save.ts" diff --git a/lib/resource/validate.ts b/lib/resource/validate.ts index fae3a089..ac2ad09f 100644 --- a/lib/resource/validate.ts +++ b/lib/resource/validate.ts @@ -40,13 +40,16 @@ export async function validateResource( } } + const table = await loadTable(resource, { denormalized: true }) + if (table) { + let schema = await loadResourceSchema(resource.schema) + if (!schema) schema = await inferSchema(resource, options) + } + try { // TODO: rebase on not-rasing? // It will raise if the resource is not a table - let schema = await loadResourceSchema(resource.schema) - if (!schema) schema = await inferSchema(resource, options) - const table = await loadTable(resource, { denormalized: true }) return await validateTable(table, { schema }) } catch {} diff --git a/lib/table/load.ts b/lib/table/load.ts index 029e6dc7..4dcd484b 100644 --- a/lib/table/load.ts +++ b/lib/table/load.ts @@ -1,11 +1,11 @@ import type { Resource } from "@dpkit/core" -import type { LoadTableOptions, Table } from "@dpkit/table" +import type { LoadTableOptions } from "@dpkit/table" import { dpkit } from "../plugin.ts" export async function loadTable( resource: Partial, options?: LoadTableOptions, -): Promise { +) { for (const plugin of dpkit.plugins) { const table = await plugin.loadTable?.(resource, options) if (table) { @@ -13,5 +13,5 @@ export async function loadTable( } } - throw new Error(`No plugin can load the table: ${resource}`) + return undefined } diff --git a/lib/table/save.ts b/lib/table/save.ts index 204ed4ec..2332c080 100644 --- a/lib/table/save.ts +++ b/lib/table/save.ts @@ -9,5 +9,5 @@ export async function saveTable(table: Table, options: SaveTableOptions) { } } - throw new Error(`No plugin can save the table to the path: ${options.path}`) + throw new Error(`No plugin can save the table: ${options.path}`) } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 682983d2..627dbfba 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -434,12 +434,19 @@ importers: isbinaryfile: specifier: ^5.0.4 version: 5.0.6 + multistream: + specifier: ^4.1.0 + version: 4.1.0 tempy: specifier: 3.1.0 version: 3.1.0 tiny-invariant: specifier: ^1.3.3 version: 1.3.3 + devDependencies: + '@types/multistream': + specifier: 4.1.3 + version: 4.1.3 folder: dependencies: @@ -2661,6 +2668,9 @@ packages: '@types/ms@2.1.0': resolution: {integrity: sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA==} + '@types/multistream@4.1.3': + resolution: {integrity: sha512-t57vmDEJOZuC0M3IrZYfCd9wolTcr3ZTCGk1iwHNosvgBX+7/SMvCGcR8wP9lidpelBZQ12crSuINOxkk0azPA==} + '@types/nlcst@2.0.3': resolution: {integrity: sha512-vSYNSDe6Ix3q+6Z7ri9lyWqgGhJTmzRjZRqyq15N0Z/1/UnVsno9G/N40NBijoYx2seFDIl0+B2mgAb9mezUCA==} @@ -4608,6 +4618,9 @@ packages: ms@2.1.3: resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==} + multistream@4.1.0: + resolution: {integrity: sha512-J1XDiAmmNpRCBfIWJv+n0ymC4ABcf/Pl+5YvC5B/D2f/2+8PtHvCNxMPKiQcZyi922Hq69J2YOpb1pTywfifyw==} + mysql2@3.15.1: resolution: {integrity: sha512-WZMIRZstT2MFfouEaDz/AGFnGi1A2GwaDe7XvKTdRJEYiAHbOrh4S3d8KFmQeh11U85G+BFjIvS1Di5alusZsw==} engines: {node: '>= 8.0'} @@ -4882,6 +4895,9 @@ packages: resolution: {integrity: sha512-737ZY3yNnXy37FHkQxPzt4UZ2UWPWiCZWLvFZ4fu5cueciegX0zGPnrlY6bwRg4FdQOe9YU8MkmJwGhoMybl8A==} engines: {node: '>= 0.8'} + once@1.4.0: + resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==} + onetime@5.1.2: resolution: {integrity: sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==} engines: {node: '>=6'} @@ -5377,6 +5393,10 @@ packages: readable-stream@2.3.8: resolution: {integrity: sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==} + readable-stream@3.6.2: + resolution: {integrity: sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==} + engines: {node: '>= 6'} + readdirp@3.6.0: resolution: {integrity: sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==} engines: {node: '>=8.10.0'} @@ -6523,6 +6543,9 @@ packages: resolution: {integrity: sha512-42AtmgqjV+X1VpdOfyTGOYRi0/zsoLqtXQckTmqTeybT+BDIbM/Guxo7x3pE2vtpr1ok6xRqM9OpBe+Jyoqyww==} engines: {node: '>=18'} + wrappy@1.0.2: + resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==} + ws@7.5.10: resolution: {integrity: sha512-+dbF1tHwZpXcbOJdVOkzLDxZP1ailvSxM6ZweXTegylPny803bFhA+vqBYw4s31NSAk4S2Qz+AKXK9a4wkdjcQ==} engines: {node: '>=8.3.0'} @@ -8578,6 +8601,10 @@ snapshots: '@types/ms@2.1.0': {} + '@types/multistream@4.1.3': + dependencies: + '@types/node': 24.2.0 + '@types/nlcst@2.0.3': dependencies: '@types/unist': 3.0.3 @@ -11047,6 +11074,11 @@ snapshots: ms@2.1.3: {} + multistream@4.1.0: + dependencies: + once: 1.4.0 + readable-stream: 3.6.2 + mysql2@3.15.1: dependencies: aws-ssl-profiles: 1.1.2 @@ -11236,6 +11268,10 @@ snapshots: on-headers@1.1.0: {} + once@1.4.0: + dependencies: + wrappy: 1.0.2 + onetime@5.1.2: dependencies: mimic-fn: 2.1.0 @@ -11702,6 +11738,12 @@ snapshots: string_decoder: 1.1.1 util-deprecate: 1.0.2 + readable-stream@3.6.2: + dependencies: + inherits: 2.0.4 + string_decoder: 1.1.1 + util-deprecate: 1.0.2 + readdirp@3.6.0: dependencies: picomatch: 2.3.1 @@ -12912,6 +12954,8 @@ snapshots: string-width: 7.2.0 strip-ansi: 7.1.2 + wrappy@1.0.2: {} + ws@7.5.10: {} ws@8.18.0: {} From 1f4c17159be41111d44c43519f8b6a05e8507022 Mon Sep 17 00:00:00 2001 From: roll Date: Fri, 24 Oct 2025 09:27:36 +0100 Subject: [PATCH 04/19] Support multipart path in validateFile --- file/file/infer.ts | 42 ++++++++++++++++++++++++++++-------------- file/file/validate.ts | 12 +++++------- file/package.json | 1 + file/stream/load.ts | 16 ++++++++-------- pnpm-lock.yaml | 3 +++ 5 files changed, 45 insertions(+), 29 deletions(-) diff --git a/file/file/infer.ts b/file/file/infer.ts index 6aaaf087..de7b2564 100644 --- a/file/file/infer.ts +++ b/file/file/infer.ts @@ -1,38 +1,52 @@ import { stat } from "node:fs/promises" import chardet from "chardet" -import { hashFile } from "hasha" +import * as hasha from "hasha" import { isBinaryFile } from "isbinaryfile" -import { prefetchFile } from "./fetch.ts" +import pMap from "p-map" +import { concatFileStreams } from "../stream/concat.ts" +import { loadFileStream } from "../stream/index.ts" +import { prefetchFiles } from "./fetch.ts" import { loadFile } from "./load.ts" export type HashType = "md5" | "sha1" | "sha256" | "sha512" +export async function inferFileBytes(path: string | string[]) { + const localPaths = await prefetchFiles(path) + + let bytes = 0 + for (const localPath of localPaths) { + const result = await stat(localPath) + bytes += result.size + } + + return bytes +} + export async function inferFileHash( - path: string, + path: string | string[], options?: { hashType?: HashType }, ) { - const localPath = await prefetchFile(path) const algorithm = options?.hashType ?? "sha256" + const localPaths = await prefetchFiles(path) - const result = await hashFile(localPath, { algorithm }) - return `${algorithm}:${result}` -} - -export async function inferFileBytes(path: string) { - const localPath = await prefetchFile(path) + const streams = await pMap(localPaths, async path => loadFileStream(path)) + const stream = concatFileStreams(streams) - const result = await stat(localPath) - return result.size + const hash = await hasha.hash(stream, { algorithm }) + return `${algorithm}:${hash}` } export async function inferFileEncoding( - path: string, + path: string | string[], options?: { sampleBytes?: number; confidencePercent?: number }, ) { const maxBytes = options?.sampleBytes ?? 10_000 const confidencePercent = options?.confidencePercent ?? 75 - const buffer = await loadFile(path, { maxBytes }) + const firstPath = Array.isArray(path) ? path[0] : path + if (!firstPath) return undefined + + const buffer = await loadFile(firstPath, { maxBytes }) const isBinary = await isBinaryFile(buffer) if (!isBinary) { diff --git a/file/file/validate.ts b/file/file/validate.ts index fc892dc6..8d2cd132 100644 --- a/file/file/validate.ts +++ b/file/file/validate.ts @@ -1,16 +1,16 @@ import type { FileError } from "../error/index.ts" -import { prefetchFile } from "./fetch.ts" +import { prefetchFiles } from "./fetch.ts" import { inferFileBytes, inferFileHash } from "./infer.ts" export async function validateFile( - path: string, + path: string | string[], options?: { bytes?: number; hash?: string }, ) { const errors: FileError[] = [] - const localPath = await prefetchFile(path) + const localPaths = await prefetchFiles(path) if (options?.bytes) { - const bytes = await inferFileBytes(localPath) + const bytes = await inferFileBytes(localPaths) if (bytes !== options.bytes) { errors.push({ type: "file/bytes", @@ -22,9 +22,7 @@ export async function validateFile( if (options?.hash) { const [_hashValue, hashType = "md5"] = options.hash.split(":").toReversed() - // TODO: figure out how we should handle other hash types - // @ts-ignore - const hash = await inferFileHash(localPath, { hashType }) + const hash = await inferFileHash(localPaths, { hashType: hashType as any }) if (hash !== options.hash) { errors.push({ type: "file/hash", diff --git a/file/package.json b/file/package.json index 6f3f7b47..c99ced6f 100644 --- a/file/package.json +++ b/file/package.json @@ -30,6 +30,7 @@ "hasha": "^6.0.0", "isbinaryfile": "^5.0.4", "multistream": "^4.1.0", + "p-map": "^7.0.3", "tempy": "3.1.0", "tiny-invariant": "^1.3.3" }, diff --git a/file/stream/load.ts b/file/stream/load.ts index 5a426130..4a96e43d 100644 --- a/file/stream/load.ts +++ b/file/stream/load.ts @@ -3,7 +3,7 @@ import { Readable, Transform } from "node:stream" import { isRemotePath } from "@dpkit/core" export async function loadFileStream( - pathOrPaths: string | string[], + path: string | string[], options?: { index?: number maxBytes?: number @@ -11,17 +11,17 @@ export async function loadFileStream( ) { const index = options?.index ?? 0 - const paths = Array.isArray(pathOrPaths) ? pathOrPaths : [pathOrPaths] - const path = paths[index] + const paths = Array.isArray(path) ? path : [path] + const indexPath = paths[index] - if (!path) { - throw new Error(`Cannot stream resource ${path} at index ${index}`) + if (!indexPath) { + throw new Error(`Cannot stream resource ${indexPath} at index ${index}`) } - const isRemote = isRemotePath(path) + const isRemote = isRemotePath(indexPath) const stream = isRemote - ? await loadRemoteFileStream(path, options) - : await loadLocalFileStream(path, options) + ? await loadRemoteFileStream(indexPath, options) + : await loadLocalFileStream(indexPath, options) return stream } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 627dbfba..26c67621 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -437,6 +437,9 @@ importers: multistream: specifier: ^4.1.0 version: 4.1.0 + p-map: + specifier: ^7.0.3 + version: 7.0.3 tempy: specifier: 3.1.0 version: 3.1.0 From 1f87253ad1c252f00a752ed85da6374ed9ce1483 Mon Sep 17 00:00:00 2001 From: roll Date: Fri, 24 Oct 2025 10:18:44 +0100 Subject: [PATCH 05/19] Added aliases validateResource/PackageMetadata --- core/package/assert.ts | 7 ++----- core/package/index.ts | 5 ++++- core/package/validate.spec.ts | 10 +++++----- core/package/validate.ts | 2 +- core/resource/assert.ts | 4 ++-- core/resource/index.ts | 5 ++++- core/resource/validate.spec.ts | 8 ++++---- core/resource/validate.ts | 2 +- lib/resource/validate.ts | 21 +++++++++++---------- 9 files changed, 34 insertions(+), 30 deletions(-) diff --git a/core/package/assert.ts b/core/package/assert.ts index 26d5a44c..d8eb1595 100644 --- a/core/package/assert.ts +++ b/core/package/assert.ts @@ -1,7 +1,7 @@ import { AssertionError } from "../error/index.ts" import type { Descriptor } from "../general/index.ts" import type { Package } from "./Package.ts" -import { validatePackageDescriptor } from "./validate.ts" +import { validatePackageMetadata } from "./validate.ts" /** * Assert a Package descriptor (JSON Object) against its profile @@ -12,10 +12,7 @@ export async function assertPackage( basepath?: string }, ) { - const { errors, dataPackage } = await validatePackageDescriptor( - source, - options, - ) + const { errors, dataPackage } = await validatePackageMetadata(source, options) if (!dataPackage) throw new AssertionError(errors) return dataPackage diff --git a/core/package/index.ts b/core/package/index.ts index 924098f7..0d6e37bc 100644 --- a/core/package/index.ts +++ b/core/package/index.ts @@ -2,8 +2,11 @@ export type { Package } from "./Package.ts" export { assertPackage } from "./assert.ts" export { loadPackageDescriptor } from "./load.ts" export { savePackageDescriptor } from "./save.ts" -export { validatePackageDescriptor } from "./validate.ts" +export { validatePackageMetadata } from "./validate.ts" export { convertPackageFromDescriptor } from "./convert/fromDescriptor.ts" export { convertPackageToDescriptor } from "./convert/toDescriptor.ts" export type { Contributor } from "./Contributor.ts" export { mergePackages } from "./merge.ts" + +// TODO: Remove in v2 +export { validatePackageMetadata as validatePackageDescriptor } from "./validate.ts" diff --git a/core/package/validate.spec.ts b/core/package/validate.spec.ts index efbf54a7..4f54e79e 100644 --- a/core/package/validate.spec.ts +++ b/core/package/validate.spec.ts @@ -1,11 +1,11 @@ import { useRecording } from "@dpkit/test" import { describe, expect, it } from "vitest" import { loadDescriptor } from "../general/index.ts" -import { validatePackageDescriptor } from "./validate.ts" +import { validatePackageMetadata } from "./validate.ts" useRecording() -describe("validatePackageDescriptor", () => { +describe("validatePackageMetadata", () => { it("returns valid result for valid package", async () => { const descriptor = { name: "example-package", @@ -17,7 +17,7 @@ describe("validatePackageDescriptor", () => { ], } - const { valid, errors } = await validatePackageDescriptor(descriptor) + const { valid, errors } = await validatePackageMetadata(descriptor) expect(valid).toBe(true) expect(errors).toEqual([]) @@ -29,7 +29,7 @@ describe("validatePackageDescriptor", () => { resources: "not-an-array", // Should be an array } - const { valid, errors } = await validatePackageDescriptor(descriptor) + const { valid, errors } = await validatePackageMetadata(descriptor) expect(valid).toBe(false) expect(errors.length).toBeGreaterThan(0) @@ -46,7 +46,7 @@ describe("validatePackageDescriptor", () => { "https://raw.githubusercontent.com/tdwg/camtrap-dp/refs/tags/1.0.2/example/datapackage.json", ) - const { valid } = await validatePackageDescriptor(descriptor) + const { valid } = await validatePackageMetadata(descriptor) expect(valid).toBe(true) }) }) diff --git a/core/package/validate.ts b/core/package/validate.ts index 40236bc5..ef36e28f 100644 --- a/core/package/validate.ts +++ b/core/package/validate.ts @@ -8,7 +8,7 @@ const DEFAULT_PROFILE = "https://datapackage.org/profiles/1.0/datapackage.json" /** * Validate a Package descriptor (JSON Object) against its profile */ -export async function validatePackageDescriptor( +export async function validatePackageMetadata( source: Descriptor | Package, options?: { basepath?: string diff --git a/core/resource/assert.ts b/core/resource/assert.ts index f30207a7..a05c0ae1 100644 --- a/core/resource/assert.ts +++ b/core/resource/assert.ts @@ -1,7 +1,7 @@ import { AssertionError } from "../error/index.ts" import type { Descriptor } from "../general/index.ts" import type { Resource } from "./Resource.ts" -import { validateResourceDescriptor } from "./validate.ts" +import { validateResourceMetadata } from "./validate.ts" /** * Assert a Resource descriptor (JSON Object) against its profile @@ -12,7 +12,7 @@ export async function assertResource( basepath?: string }, ) { - const { errors, resource } = await validateResourceDescriptor(source, options) + const { errors, resource } = await validateResourceMetadata(source, options) if (!resource) throw new AssertionError(errors) return resource diff --git a/core/resource/index.ts b/core/resource/index.ts index b5b8abe1..085b83e2 100644 --- a/core/resource/index.ts +++ b/core/resource/index.ts @@ -3,7 +3,7 @@ export { inferResourceName, inferResourceFormat } from "./infer.ts" export { assertResource } from "./assert.ts" export { loadResourceDescriptor } from "./load.ts" export { saveResourceDescriptor } from "./save.ts" -export { validateResourceDescriptor } from "./validate.ts" +export { validateResourceMetadata } from "./validate.ts" export { convertResourceFromDescriptor } from "./convert/fromDescriptor.ts" export { convertResourceToDescriptor } from "./convert/toDescriptor.ts" export type { Source } from "./Source.ts" @@ -11,3 +11,6 @@ export type { License } from "./License.ts" export { loadResourceDialect } from "./dialect.ts" export { loadResourceSchema } from "./schema.ts" export { isRemoteResource } from "./helpers.ts" + +// TODO: Remove in v2 +export { validateResourceMetadata as validateResourceDescriptor } from "./validate.ts" diff --git a/core/resource/validate.spec.ts b/core/resource/validate.spec.ts index a6bf646d..04a06d73 100644 --- a/core/resource/validate.spec.ts +++ b/core/resource/validate.spec.ts @@ -1,7 +1,7 @@ import { describe, expect, it } from "vitest" -import { validateResourceDescriptor } from "./validate.ts" +import { validateResourceMetadata } from "./validate.ts" -describe("validateResourceDescriptor", () => { +describe("validateResourceMetadata", () => { it("returns valid result for valid resource", async () => { const descriptor = { name: "example-resource", @@ -10,7 +10,7 @@ describe("validateResourceDescriptor", () => { encoding: "utf-8", } - const result = await validateResourceDescriptor(descriptor) + const result = await validateResourceMetadata(descriptor) expect(result.valid).toBe(true) expect(result.errors).toEqual([]) @@ -22,7 +22,7 @@ describe("validateResourceDescriptor", () => { path: true, // Should be a string or array of strings } - const result = await validateResourceDescriptor(invalidResource) + const result = await validateResourceMetadata(invalidResource) expect(result.valid).toBe(false) expect(result.errors.length).toBeGreaterThan(0) diff --git a/core/resource/validate.ts b/core/resource/validate.ts index 10cc2efa..7ef1d3eb 100644 --- a/core/resource/validate.ts +++ b/core/resource/validate.ts @@ -11,7 +11,7 @@ const DEFAULT_PROFILE = "https://datapackage.org/profiles/1.0/dataresource.json" /** * Validate a Resource descriptor (JSON Object) against its profile */ -export async function validateResourceDescriptor( +export async function validateResourceMetadata( source: Descriptor | Resource, options?: { basepath?: string diff --git a/lib/resource/validate.ts b/lib/resource/validate.ts index ac2ad09f..e8f33f43 100644 --- a/lib/resource/validate.ts +++ b/lib/resource/validate.ts @@ -1,6 +1,6 @@ import type { Descriptor, Resource } from "@dpkit/core" import { loadResourceSchema } from "@dpkit/core" -import { loadDescriptor, validateResourceDescriptor } from "@dpkit/core" +import { loadDescriptor, validateResourceMetadata } from "@dpkit/core" import { validateFile } from "@dpkit/file" import { validateTable } from "@dpkit/table" import type { InferSchemaOptions } from "@dpkit/table" @@ -22,7 +22,7 @@ export async function validateResource( basepath = result.basepath } - const { valid, errors, resource } = await validateResourceDescriptor( + const { valid, errors, resource } = await validateResourceMetadata( descriptor, { basepath }, ) @@ -31,14 +31,10 @@ export async function validateResource( return { valid, errors } } - if (resource.bytes || resource.hash) { - if (typeof resource.path === "string") { - return await validateFile(resource.path, { - bytes: resource.bytes, - hash: resource.hash, - }) - } - } + const fileReport = await validateFile(resource.path, { + bytes: resource.bytes, + hash: resource.hash, + }) const table = await loadTable(resource, { denormalized: true }) if (table) { @@ -55,3 +51,8 @@ export async function validateResource( return { valid: true, errors: [] } } + +export async function validateResourceData( + source: Partial, + options?: InferSchemaOptions, +) {} From 4500e7cce0fc26525fb9492dc84690e14be1ac10 Mon Sep 17 00:00:00 2001 From: roll Date: Fri, 24 Oct 2025 10:37:42 +0100 Subject: [PATCH 06/19] Don't rais if can't load a table --- cli/commands/schema/infer.tsx | 7 ++++++- cli/commands/table/convert.tsx | 5 +++++ cli/commands/table/describe.tsx | 5 +++++ cli/commands/table/explore.tsx | 5 +++++ cli/commands/table/script.tsx | 5 +++++ cli/commands/table/validate.tsx | 5 +++++ file/file/validate.ts | 6 ++++-- lib/resource/validate.ts | 30 ++++++++++++++++-------------- lib/schema/infer.ts | 8 ++++++-- lib/table/infer.ts | 13 ++++++------- 10 files changed, 63 insertions(+), 26 deletions(-) diff --git a/cli/commands/schema/infer.tsx b/cli/commands/schema/infer.tsx index dfbd5ff9..ee770f61 100644 --- a/cli/commands/schema/infer.tsx +++ b/cli/commands/schema/infer.tsx @@ -79,6 +79,11 @@ export const inferSchemaCommand = new Command("infer") loadTable(resource, { denormalized: true }), ) + if (!table) { + session.terminate("Could not load table") + process.exit(1) + } + const inferredSchema = await session.task( "Inferring schema", inferSchemaFromTable(table, options), @@ -86,7 +91,7 @@ export const inferSchemaCommand = new Command("infer") if (isEmptyObject(inferredSchema)) { session.terminate("Could not infer schema") - process.exit(1) // typescript ignore never return type above + process.exit(1) } await session.render(inferredSchema, ) diff --git a/cli/commands/table/convert.tsx b/cli/commands/table/convert.tsx index 6f45d941..5c0649d2 100644 --- a/cli/commands/table/convert.tsx +++ b/cli/commands/table/convert.tsx @@ -142,6 +142,11 @@ export const convertTableCommand = new Command("convert") loadTable(resource, options), ) + if (!table) { + session.terminate("Could not load table") + process.exit(1) + } + if (options.query) { table = queryTable(table, options.query) } diff --git a/cli/commands/table/describe.tsx b/cli/commands/table/describe.tsx index 63fc4a56..8b5ba6da 100644 --- a/cli/commands/table/describe.tsx +++ b/cli/commands/table/describe.tsx @@ -93,6 +93,11 @@ export const describeTableCommand = new Command("describe") loadTable(resource, options), ) + if (!table) { + session.terminate("Could not load table") + process.exit(1) + } + if (options.query) { table = queryTable(table, options.query) } diff --git a/cli/commands/table/explore.tsx b/cli/commands/table/explore.tsx index 39b28687..ee4db088 100644 --- a/cli/commands/table/explore.tsx +++ b/cli/commands/table/explore.tsx @@ -92,6 +92,11 @@ export const exploreTableCommand = new Command("explore") loadTable(resource, { denormalized: true }), ) + if (!table) { + session.terminate("Could not load table") + process.exit(1) + } + if (!schema && resource.schema) { schema = await session.task( "Loading schema", diff --git a/cli/commands/table/script.tsx b/cli/commands/table/script.tsx index 4718bad4..c8f5dddf 100644 --- a/cli/commands/table/script.tsx +++ b/cli/commands/table/script.tsx @@ -94,6 +94,11 @@ export const scriptTableCommand = new Command("script") loadTable(resource, options), ) + if (!table) { + session.terminate("Could not load table") + process.exit(1) + } + if (options.query) { table = queryTable(table, options.query) } diff --git a/cli/commands/table/validate.tsx b/cli/commands/table/validate.tsx index 7d553c84..386f5dd6 100644 --- a/cli/commands/table/validate.tsx +++ b/cli/commands/table/validate.tsx @@ -99,6 +99,11 @@ export const validateTableCommand = new Command("validate") loadTable(resource, { denormalized: true }), ) + if (!table) { + session.terminate("Could not load table") + process.exit(1) + } + if (!schema && resource.schema) { schema = await session.task( "Loading schema", diff --git a/file/file/validate.ts b/file/file/validate.ts index 8d2cd132..191996d6 100644 --- a/file/file/validate.ts +++ b/file/file/validate.ts @@ -3,7 +3,7 @@ import { prefetchFiles } from "./fetch.ts" import { inferFileBytes, inferFileHash } from "./infer.ts" export async function validateFile( - path: string | string[], + path?: string | string[], options?: { bytes?: number; hash?: string }, ) { const errors: FileError[] = [] @@ -22,7 +22,9 @@ export async function validateFile( if (options?.hash) { const [_hashValue, hashType = "md5"] = options.hash.split(":").toReversed() - const hash = await inferFileHash(localPaths, { hashType: hashType as any }) + const hash = await inferFileHash(localPaths, { + hashType: hashType as any, + }) if (hash !== options.hash) { errors.push({ type: "file/hash", diff --git a/lib/resource/validate.ts b/lib/resource/validate.ts index e8f33f43..d9b5cf7f 100644 --- a/lib/resource/validate.ts +++ b/lib/resource/validate.ts @@ -7,8 +7,6 @@ import type { InferSchemaOptions } from "@dpkit/table" import { inferSchema } from "../schema/index.ts" import { loadTable } from "../table/index.ts" -// TODO: Support multipart resources? (clarify on the specs level) - export async function validateResource( source: string | Descriptor | Partial, options?: InferSchemaOptions & { basepath?: string }, @@ -31,28 +29,32 @@ export async function validateResource( return { valid, errors } } + return await validateResourceData(resource, options) +} + +export async function validateResourceData( + resource: Partial, + options?: InferSchemaOptions, +) { const fileReport = await validateFile(resource.path, { bytes: resource.bytes, hash: resource.hash, }) + if (!fileReport.valid) { + return fileReport + } + const table = await loadTable(resource, { denormalized: true }) if (table) { let schema = await loadResourceSchema(resource.schema) if (!schema) schema = await inferSchema(resource, options) - } + const tableReport = await validateTable(table, { schema }) - try { - // TODO: rebase on not-rasing? - // It will raise if the resource is not a table - - return await validateTable(table, { schema }) - } catch {} + if (!tableReport.valid) { + return tableReport + } + } return { valid: true, errors: [] } } - -export async function validateResourceData( - source: Partial, - options?: InferSchemaOptions, -) {} diff --git a/lib/schema/infer.ts b/lib/schema/infer.ts index bd8b7f8b..aa2f1b85 100644 --- a/lib/schema/infer.ts +++ b/lib/schema/infer.ts @@ -1,4 +1,4 @@ -import type { Resource, Schema } from "@dpkit/core" +import type { Resource } from "@dpkit/core" import type { InferSchemaOptions } from "@dpkit/table" import { inferSchemaFromTable } from "@dpkit/table" import { dpkit } from "../plugin.ts" @@ -7,7 +7,7 @@ import { loadTable } from "../table/index.ts" export async function inferSchema( resource: Partial, options?: InferSchemaOptions, -): Promise { +) { for (const plugin of dpkit.plugins) { const schema = await plugin.inferSchema?.(resource, options) if (schema) { @@ -16,6 +16,10 @@ export async function inferSchema( } const table = await loadTable(resource, { denormalized: true }) + if (!table) { + return undefined + } + const schema = await inferSchemaFromTable(table, options) return schema } diff --git a/lib/table/infer.ts b/lib/table/infer.ts index 42970275..dc25105e 100644 --- a/lib/table/infer.ts +++ b/lib/table/infer.ts @@ -1,15 +1,10 @@ -import type { Dialect, Resource, Schema } from "@dpkit/core" +import type { Resource } from "@dpkit/core" import { loadResourceDialect, loadResourceSchema } from "@dpkit/core" -import type { Table } from "@dpkit/table" import { inferSchemaFromTable } from "@dpkit/table" import { inferDialect } from "../dialect/index.ts" import { loadTable } from "./load.ts" -// TODO: Allow non-tabular resources returning undefined? - -export async function inferTable( - resource: Partial, -): Promise<{ dialect: Dialect; schema: Schema; table: Table }> { +export async function inferTable(resource: Partial) { let dialect = await loadResourceDialect(resource.dialect) if (!dialect) { dialect = await inferDialect(resource) @@ -20,6 +15,10 @@ export async function inferTable( { denormalized: true }, ) + if (!table) { + return undefined + } + let schema = await loadResourceSchema(resource.schema) if (!schema) { schema = await inferSchemaFromTable(table) From af4626fd0491dbc9893df7f5cb683d1148c2c8ab Mon Sep 17 00:00:00 2001 From: roll Date: Fri, 24 Oct 2025 10:52:56 +0100 Subject: [PATCH 07/19] Added encoding validation --- file/error/Encoding.ts | 7 +++++++ file/error/File.ts | 3 ++- file/file/validate.ts | 17 +++++++++++++++-- lib/resource/validate.ts | 1 + 4 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 file/error/Encoding.ts diff --git a/file/error/Encoding.ts b/file/error/Encoding.ts new file mode 100644 index 00000000..57217338 --- /dev/null +++ b/file/error/Encoding.ts @@ -0,0 +1,7 @@ +import type { BaseFileError } from "./Base.ts" + +export interface EncodingError extends BaseFileError { + type: "file/encoding" + encoding: string + actualEncoding: string +} diff --git a/file/error/File.ts b/file/error/File.ts index 997380d7..d079a57b 100644 --- a/file/error/File.ts +++ b/file/error/File.ts @@ -1,4 +1,5 @@ import type { BytesError } from "./Bytes.ts" +import type { EncodingError } from "./Encoding.ts" import type { HashError } from "./Hash.ts" -export type FileError = BytesError | HashError +export type FileError = BytesError | HashError | EncodingError diff --git a/file/file/validate.ts b/file/file/validate.ts index 191996d6..ea0e5cbb 100644 --- a/file/file/validate.ts +++ b/file/file/validate.ts @@ -1,10 +1,10 @@ import type { FileError } from "../error/index.ts" import { prefetchFiles } from "./fetch.ts" -import { inferFileBytes, inferFileHash } from "./infer.ts" +import { inferFileBytes, inferFileEncoding, inferFileHash } from "./infer.ts" export async function validateFile( path?: string | string[], - options?: { bytes?: number; hash?: string }, + options?: { bytes?: number; hash?: string; encoding?: string }, ) { const errors: FileError[] = [] const localPaths = await prefetchFiles(path) @@ -25,6 +25,7 @@ export async function validateFile( const hash = await inferFileHash(localPaths, { hashType: hashType as any, }) + if (hash !== options.hash) { errors.push({ type: "file/hash", @@ -34,6 +35,18 @@ export async function validateFile( } } + if (options?.encoding) { + const encoding = await inferFileEncoding(localPaths) + + if (encoding && encoding !== options.encoding) { + errors.push({ + type: "file/encoding", + encoding: options.encoding, + actualEncoding: encoding, + }) + } + } + const valid = errors.length === 0 return { valid, errors } } diff --git a/lib/resource/validate.ts b/lib/resource/validate.ts index d9b5cf7f..ec74b4e4 100644 --- a/lib/resource/validate.ts +++ b/lib/resource/validate.ts @@ -39,6 +39,7 @@ export async function validateResourceData( const fileReport = await validateFile(resource.path, { bytes: resource.bytes, hash: resource.hash, + encoding: resource.encoding, }) if (!fileReport.valid) { From 86185444b22d346a6e18023e6b74857a59c3f51e Mon Sep 17 00:00:00 2001 From: roll Date: Fri, 24 Oct 2025 11:23:18 +0100 Subject: [PATCH 08/19] Fixed test --- lib/package/validate.spec.ts | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lib/package/validate.spec.ts b/lib/package/validate.spec.ts index cc6b788d..c5f42996 100644 --- a/lib/package/validate.spec.ts +++ b/lib/package/validate.spec.ts @@ -171,12 +171,11 @@ describe("validatePackage", () => { }) }) - it("should detect tabular validation errors (issue-153)", async () => { + it("should throw on invalid table encoding (issue-153)", async () => { const dataPackage = "lib/package/fixtures/issue-153/datapackage.json" - const result = await validatePackage(dataPackage) - - expect(result.valid).toBe(false) - expect(result.errors.length).toBe(1) + await expect(validatePackage(dataPackage)).rejects.toThrow( + "encoding not utf-8 not implemented", + ) }) }) From 5a78fcb890cc4e08e0e7a0552c8a190b03a1faf4 Mon Sep 17 00:00:00 2001 From: roll Date: Fri, 24 Oct 2025 11:33:36 +0100 Subject: [PATCH 09/19] Improved error message --- file/file/infer.ts | 2 +- lib/package/index.ts | 2 +- lib/package/validate.ts | 24 +++++++++++++++--------- lib/resource/index.ts | 2 +- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/file/file/infer.ts b/file/file/infer.ts index de7b2564..0f5acb98 100644 --- a/file/file/infer.ts +++ b/file/file/infer.ts @@ -41,7 +41,7 @@ export async function inferFileEncoding( options?: { sampleBytes?: number; confidencePercent?: number }, ) { const maxBytes = options?.sampleBytes ?? 10_000 - const confidencePercent = options?.confidencePercent ?? 75 + const confidencePercent = options?.confidencePercent ?? 80 const firstPath = Array.isArray(path) ? path[0] : path if (!firstPath) return undefined diff --git a/lib/package/index.ts b/lib/package/index.ts index 39f8f5db..9786294d 100644 --- a/lib/package/index.ts +++ b/lib/package/index.ts @@ -1,4 +1,4 @@ export { loadPackage } from "./load.ts" export { savePackage } from "./save.ts" export { inferPackage } from "./infer.ts" -export { validatePackage } from "./validate.ts" +export { validatePackage, validatePackageData } from "./validate.ts" diff --git a/lib/package/validate.ts b/lib/package/validate.ts index 56941400..cf8f2109 100644 --- a/lib/package/validate.ts +++ b/lib/package/validate.ts @@ -1,10 +1,7 @@ import type { Descriptor, Package } from "@dpkit/core" import { loadDescriptor, validatePackageDescriptor } from "@dpkit/core" import { dpkit } from "../plugin.ts" -import { validateResource } from "../resource/index.ts" - -// TODO: Improve implementation -// TODO: Support multipart resources? (clarify on the specs level) +import { validateResourceData } from "../resource/index.ts" export async function validatePackage( source: string | Descriptor | Partial, @@ -43,15 +40,24 @@ export async function validatePackage( } } - const resourceErrors = ( + return await validatePackageData(dataPackage) +} + +export async function validatePackageData(dataPackage: Package) { + const errors = ( await Promise.all( dataPackage.resources.map(async resource => { - const { errors } = await validateResource(resource) - return errors.map(error => ({ ...error, resource: resource.name })) + try { + const { errors } = await validateResourceData(resource) + return errors.map(error => ({ ...error, resource: resource.name })) + } catch (error) { + const message = error instanceof Error ? error.message : String(error) + throw new Error(`[${resource.name}] ${message}`) + } }), ) ).flat() - const resourceValid = !resourceErrors.length - return { valid: resourceValid, errors: resourceErrors } + const valid = !errors.length + return { valid, errors: errors } } diff --git a/lib/resource/index.ts b/lib/resource/index.ts index ea494c50..1f1da403 100644 --- a/lib/resource/index.ts +++ b/lib/resource/index.ts @@ -1,2 +1,2 @@ export { inferResource } from "./infer.ts" -export { validateResource } from "./validate.ts" +export { validateResource, validateResourceData } from "./validate.ts" From d9db657f920d382d641c56d4c08a1e22249526de Mon Sep 17 00:00:00 2001 From: roll Date: Fri, 24 Oct 2025 12:06:11 +0100 Subject: [PATCH 10/19] Fixed polars encoding --- csv/table/load.ts | 9 +++++++++ lib/package/validate.spec.ts | 11 +++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/csv/table/load.ts b/csv/table/load.ts index f5d61c85..b451dc66 100644 --- a/csv/table/load.ts +++ b/csv/table/load.ts @@ -71,6 +71,15 @@ function getScanOptions(resource: Partial, dialect?: Dialect) { if (resource.encoding) { options.encoding = resource.encoding + + // Polars supports only utf-8 and utf-8-lossy encodings + if (options.encoding === "utf-8") { + options.encoding = "utf8" + } + + if (options.encoding !== "utf8") { + throw new Error(`Encoding ${options.encoding} for CSV files is not supported`) + } } options.skipRows = getRowsToSkip(dialect) diff --git a/lib/package/validate.spec.ts b/lib/package/validate.spec.ts index c5f42996..d5ce3e31 100644 --- a/lib/package/validate.spec.ts +++ b/lib/package/validate.spec.ts @@ -171,11 +171,14 @@ describe("validatePackage", () => { }) }) - it("should throw on invalid table encoding (issue-153)", async () => { + it("should detect tabular validation errors (issue-153)", async () => { const dataPackage = "lib/package/fixtures/issue-153/datapackage.json" - await expect(validatePackage(dataPackage)).rejects.toThrow( - "encoding not utf-8 not implemented", - ) + const result = await validatePackage(dataPackage) + + console.log(result.errors) + + expect(result.valid).toBe(false) + expect(result.errors.length).toBe(1) }) }) From 4f73e3afaff9101ff59969471a93560f0e286425 Mon Sep 17 00:00:00 2001 From: roll Date: Fri, 24 Oct 2025 12:44:21 +0100 Subject: [PATCH 11/19] Fixed fixture for issue #153 --- lib/package/fixtures/issue-153/datapackage.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/package/fixtures/issue-153/datapackage.json b/lib/package/fixtures/issue-153/datapackage.json index f346652c..44a2ca9f 100644 --- a/lib/package/fixtures/issue-153/datapackage.json +++ b/lib/package/fixtures/issue-153/datapackage.json @@ -100,7 +100,7 @@ { "name": "timestamp", "type": "datetime", - "format": "%Y-%m-%dT%H:%M:%S%z", + "format": "%Y-%m-%dT%H:%M:%S%Z", "constraints": { "required": true } From 7f5b161eceeb9d8b05d3af455aa3872d68da0c72 Mon Sep 17 00:00:00 2001 From: roll Date: Fri, 24 Oct 2025 12:55:17 +0100 Subject: [PATCH 12/19] Updated test --- lib/package/validate.spec.ts | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/lib/package/validate.spec.ts b/lib/package/validate.spec.ts index d5ce3e31..4d91f373 100644 --- a/lib/package/validate.spec.ts +++ b/lib/package/validate.spec.ts @@ -171,14 +171,20 @@ describe("validatePackage", () => { }) }) - it("should detect tabular validation errors (issue-153)", async () => { + it.fails("should detect bad cell type (issue-153)", async () => { const dataPackage = "lib/package/fixtures/issue-153/datapackage.json" const result = await validatePackage(dataPackage) - console.log(result.errors) - expect(result.valid).toBe(false) - expect(result.errors.length).toBe(1) + expect(result.errors).toEqual([ + { + rowNumber: 3, + type: "cell/type", + fieldName: "longitude", + cell: "bad", + resource: "deployments", + }, + ]) }) }) From 7630b65c2135cd68cf2a05b34c3d4a615a86e63e Mon Sep 17 00:00:00 2001 From: roll Date: Fri, 24 Oct 2025 13:05:51 +0100 Subject: [PATCH 13/19] Fixed type erors --- cloud/components/Report/Error/Error.tsx | 4 +- cloud/components/Report/Error/File.tsx | 17 ++ cloud/locales/de.json | 1 + cloud/locales/en.json | 1 + cloud/locales/es.json | 1 + cloud/locales/fr.json | 1 + cloud/locales/it.json | 1 + cloud/locales/pt.json | 1 + cloud/locales/ru.json | 1 + cloud/locales/uk.json | 1 + .../recording.har | 156 ++++++++++++++++++ file/error/index.ts | 1 + 12 files changed, 185 insertions(+), 1 deletion(-) create mode 100644 core/package/fixtures/generated/validatePackageMetadata-should-validate-camtrap-dp-144_2984677073/recording.har diff --git a/cloud/components/Report/Error/Error.tsx b/cloud/components/Report/Error/Error.tsx index db079bd3..3c2a8471 100644 --- a/cloud/components/Report/Error/Error.tsx +++ b/cloud/components/Report/Error/Error.tsx @@ -14,7 +14,7 @@ import { } from "./Cell.tsx" import { FieldNameError, FieldTypeError } from "./Field.tsx" import { FieldsExtraError, FieldsMissingError } from "./Fields.tsx" -import { BytesError, HashError } from "./File.tsx" +import { BytesError, EncodingError, HashError } from "./File.tsx" import { MetadataError } from "./Metadata.tsx" import { RowUniqueError } from "./Row.tsx" @@ -31,6 +31,8 @@ export function Error(props: { return case "file/hash": return + case "file/encoding": + return case "fields/missing": return case "fields/extra": diff --git a/cloud/components/Report/Error/File.tsx b/cloud/components/Report/Error/File.tsx index 2e907788..179c1fbb 100644 --- a/cloud/components/Report/Error/File.tsx +++ b/cloud/components/Report/Error/File.tsx @@ -35,3 +35,20 @@ export function HashError(props: { error: errorTypes.HashError }) { ) } + +export function EncodingError(props: { error: errorTypes.EncodingError }) { + const { t } = useTranslation() + + return ( + + {t("File encoding")} {t("is expected to be")}{" "} + + {props.error.encoding} + {" "} + {t("but it is actually")}{" "} + + {props.error.actualEncoding} + + + ) +} diff --git a/cloud/locales/de.json b/cloud/locales/de.json index 3de71ba1..14b4b813 100644 --- a/cloud/locales/de.json +++ b/cloud/locales/de.json @@ -51,6 +51,7 @@ "are not expected": "werden nicht erwartet", "File size": "Dateigröße", "File hash": "Datei-Hash", + "File encoding": "Datei-Kodierung", "Field name": "Feldname", "at": "bei", "The cell values of the fields": "Die Zellwerte der Felder", diff --git a/cloud/locales/en.json b/cloud/locales/en.json index e5d066e5..72ea6d2d 100644 --- a/cloud/locales/en.json +++ b/cloud/locales/en.json @@ -51,6 +51,7 @@ "are not expected": "are not expected", "File size": "File size", "File hash": "File hash", + "File encoding": "File encoding", "Field name": "Field name", "at": "at", "The cell values of the fields": "The cell values of the fields", diff --git a/cloud/locales/es.json b/cloud/locales/es.json index c8f6eb9d..9810ec87 100644 --- a/cloud/locales/es.json +++ b/cloud/locales/es.json @@ -51,6 +51,7 @@ "are not expected": "no se esperan", "File size": "Tamaño del archivo", "File hash": "Hash del archivo", + "File encoding": "Codificación del archivo", "Field name": "Nombre del campo", "at": "en", "The cell values of the fields": "Los valores de celda de los campos", diff --git a/cloud/locales/fr.json b/cloud/locales/fr.json index cd5a3d7a..81f2078d 100644 --- a/cloud/locales/fr.json +++ b/cloud/locales/fr.json @@ -51,6 +51,7 @@ "are not expected": "ne sont pas attendus", "File size": "Taille du fichier", "File hash": "Hash du fichier", + "File encoding": "Encodage du fichier", "Field name": "Nom du champ", "at": "à", "The cell values of the fields": "Les valeurs de cellule des champs", diff --git a/cloud/locales/it.json b/cloud/locales/it.json index 65f9d679..150a4122 100644 --- a/cloud/locales/it.json +++ b/cloud/locales/it.json @@ -51,6 +51,7 @@ "are not expected": "non sono previsti", "File size": "Dimensione file", "File hash": "Hash file", + "File encoding": "Codifica file", "Field name": "Nome campo", "at": "a", "The cell values of the fields": "I valori delle celle dei campi", diff --git a/cloud/locales/pt.json b/cloud/locales/pt.json index e1fe124d..f80054a9 100644 --- a/cloud/locales/pt.json +++ b/cloud/locales/pt.json @@ -51,6 +51,7 @@ "are not expected": "não são esperados", "File size": "Tamanho do arquivo", "File hash": "Hash do arquivo", + "File encoding": "Codificação do arquivo", "Field name": "Nome do campo", "at": "em", "The cell values of the fields": "Os valores das células dos campos", diff --git a/cloud/locales/ru.json b/cloud/locales/ru.json index 8323b0b2..fac2a1fa 100644 --- a/cloud/locales/ru.json +++ b/cloud/locales/ru.json @@ -51,6 +51,7 @@ "are not expected": "не ожидаются", "File size": "Размер файла", "File hash": "Хеш файла", + "File encoding": "Кодировка файла", "Field name": "Имя поля", "at": "в", "The cell values of the fields": "Значения ячеек полей", diff --git a/cloud/locales/uk.json b/cloud/locales/uk.json index 37ada8a0..9bd3e518 100644 --- a/cloud/locales/uk.json +++ b/cloud/locales/uk.json @@ -51,6 +51,7 @@ "are not expected": "не очікуються", "File size": "Розмір файлу", "File hash": "Хеш файлу", + "File encoding": "Кодування файлу", "Field name": "Ім'я поля", "at": "в", "The cell values of the fields": "Значення комірок полів", diff --git a/core/package/fixtures/generated/validatePackageMetadata-should-validate-camtrap-dp-144_2984677073/recording.har b/core/package/fixtures/generated/validatePackageMetadata-should-validate-camtrap-dp-144_2984677073/recording.har new file mode 100644 index 00000000..7ae6732d --- /dev/null +++ b/core/package/fixtures/generated/validatePackageMetadata-should-validate-camtrap-dp-144_2984677073/recording.har @@ -0,0 +1,156 @@ +{ + "log": { + "_recordingName": "validatePackageMetadata-should validate camtrap dp (#144)", + "creator": { + "comment": "persister:fs", + "name": "Polly.JS", + "version": "6.0.6" + }, + "entries": [ + { + "_id": "4306f83bec3be19c1183804f9d081277", + "_order": 0, + "cache": {}, + "request": { + "bodySize": 0, + "cookies": [], + "headers": [], + "headersSize": 109, + "httpVersion": "HTTP/1.1", + "method": "GET", + "queryString": [], + "url": "https://raw.githubusercontent.com/tdwg/camtrap-dp/refs/tags/1.0.2/example/datapackage.json" + }, + "response": { + "bodySize": 2603, + "content": { + "mimeType": "text/plain; charset=utf-8", + "size": 2603, + "text": "{\n \"resources\": [\n {\n \"name\": \"deployments\",\n \"path\": \"deployments.csv\",\n \"profile\": \"tabular-data-resource\",\n \"format\": \"csv\",\n \"mediatype\": \"text/csv\",\n \"encoding\": \"utf-8\",\n \"schema\": \"https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.2/deployments-table-schema.json\"\n },\n {\n \"name\": \"media\",\n \"path\": \"media.csv\",\n \"profile\": \"tabular-data-resource\",\n \"format\": \"csv\",\n \"mediatype\": \"text/csv\",\n \"encoding\": \"utf-8\",\n \"schema\": \"https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.2/media-table-schema.json\"\n },\n {\n \"name\": \"observations\",\n \"path\": \"observations.csv\",\n \"profile\": \"tabular-data-resource\",\n \"format\": \"csv\",\n \"mediatype\": \"text/csv\",\n \"encoding\": \"utf-8\",\n \"schema\": \"https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.2/observations-table-schema.json\"\n },\n {\n \"name\": \"individuals\",\n \"description\": \"Custom table/resource not part of the Camtrap DP model. Included to showcase that extending with more resources is possible.\",\n \"data\": [\n {\n \"id\": 1,\n \"individualName\": \"Reinaert\",\n \"scientificName\": \"Vulpes vulpes\"\n }\n ]\n }\n ],\n \"profile\": \"https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0.2/camtrap-dp-profile.json\",\n \"name\": \"camtrap-dp-example-dataset\",\n \"id\": \"7cca70f5-ef8c-4f86-85fb-8f070937d7ab\",\n \"created\": \"2023-02-06T11:23:03Z\",\n \"title\": \"Sample from: MICA - Muskrat and coypu camera trap observations in Belgium, the Netherlands and Germany\",\n \"contributors\": [\n {\n \"title\": \"Axel Neukermans\",\n \"email\": \"axel.neukermans@inbo.be\",\n \"path\": \"https://orcid.org/0000-0003-0272-9180\",\n \"role\": \"contributor\",\n \"organization\": \"Research Institute for Nature and Forest (INBO)\"\n },\n {\n \"title\": \"Danny Van der beeck\",\n \"email\": \"daniel.vanderbeeck@gmail.com\"\n },\n {\n \"title\": \"Emma Cartuyvels\",\n \"email\": \"emma.cartuyvels@inbo.be\",\n \"role\": \"principalInvestigator\",\n \"organization\": \"Research Institute for Nature and Forest (INBO)\"\n },\n {\n \"title\": \"Peter Desmet\",\n \"email\": \"peter.desmet@inbo.be\",\n \"path\": \"https://orcid.org/0000-0002-8442-8025\",\n \"role\": \"contact\",\n \"organization\": \"Research Institute for Nature and Forest (INBO)\"\n },\n {\n \"title\": \"Research Institute for Nature and Forest (INBO)\",\n \"path\": \"https://inbo.be\",\n \"role\": \"rightsHolder\"\n },\n {\n \"title\": \"Research Institute for Nature and Forest (INBO)\",\n \"path\": \"https://inbo.be\",\n \"role\": \"publisher\"\n }\n ],\n \"description\": \"MICA - Muskrat and coypu camera trap observations in Belgium, the Netherlands and Germany is an occurrence dataset published by the Research Institute of Nature and Forest (INBO). It is part of the LIFE project MICA, in which innovative techniques are tested for a more efficient control of muskrat and coypu populations, both invasive species. This dataset is a sample of the original dataset and serves as an example of a Camera Trap Data Package (Camtrap DP).\",\n \"version\": \"1.0.2\",\n \"keywords\": [\n \"camera traps\",\n \"public awareness campaign\",\n \"flood protection\",\n \"flood control\",\n \"damage prevention\",\n \"animal damage\",\n \"pest control\",\n \"invasive alien species\",\n \"muskrat\",\n \"coypu\"\n ],\n \"image\": \"\",\n \"homepage\": \"https://camtrap-dp.tdwg.org/example/\",\n \"sources\": [\n {\n \"title\": \"Agouti\",\n \"path\": \"https://www.agouti.eu\",\n \"email\": \"agouti@wur.nl\",\n \"version\": \"v3.21\"\n }\n ],\n \"licenses\": [\n {\n \"name\": \"CC0-1.0\",\n \"scope\": \"data\"\n },\n {\n \"path\": \"http://creativecommons.org/licenses/by/4.0/\",\n \"scope\": \"media\"\n }\n ],\n \"bibliographicCitation\": \"Desmet P, Neukermans A, Van der beeck D, Cartuyvels E (2022). Sample from: MICA - Muskrat and coypu camera trap observations in Belgium, the Netherlands and Germany. Version 1.0.2. Research Institute for Nature and Forest (INBO). Dataset. https://camtrap-dp.tdwg.org/example/\",\n \"project\": {\n \"id\": \"86cabc14-d475-4439-98a7-e7b590bed60e\",\n \"title\": \"Management of Invasive Coypu and muskrAt in Europe\",\n \"acronym\": \"MICA\",\n \"description\": \"Invasive alien species such as the coypu and muskrat pose a major threat to biodiversity and cost millions of euros annually. By feeding on rushes and reeds, these animals cause serious damage to the environment in which they live and endangered species suffer from habitat loss. The disappearance of reeds and digging in dikes represents a safety risk for humans in the lowland areas. With the LIFE project MICA (), the partners from the participating countries want to develop a transnational plan for the management of coypu and muskrat populations in Europe and aim to reduce their population. The objective of an effective population control of coypu and muskrat is to protect lowlands from flooding, to prevent crop damage and loss of biodiversity. The objective of the project is to serve as a pilot and demonstration project in which ‘best practices’ are tested and new techniques are developed for a more efficient control of muskrat and coypu populations. By involving organisations from Belgium, Germany and the Netherlands, the project also promotes international cooperation and knowledge exchange in the field of muskrat and coypu management.\",\n \"samplingDesign\": \"targeted\",\n \"path\": \"https://lifemica.eu\",\n \"captureMethod\": [\n \"activityDetection\",\n \"timeLapse\"\n ],\n \"individualAnimals\": false,\n \"observationLevel\": [\n \"media\",\n \"event\"\n ]\n },\n \"coordinatePrecision\": 0.001,\n \"spatial\": {\n \"type\": \"Polygon\",\n \"coordinates\": [\n [\n [\n 4.013,\n 50.699\n ],\n [\n 5.659,\n 50.699\n ],\n [\n 5.659,\n 51.496\n ],\n [\n 4.013,\n 51.496\n ],\n [\n 4.013,\n 50.699\n ]\n ]\n ]\n },\n \"temporal\": {\n \"start\": \"2020-05-30\",\n \"end\": \"2021-04-18\"\n },\n \"taxonomic\": [\n {\n \"scientificName\": \"Anas platyrhynchos\",\n \"taxonID\": \"https://www.checklistbank.org/dataset/COL2023/taxon/DGP6\",\n \"taxonRank\": \"species\",\n \"vernacularNames\": {\n \"eng\": \"mallard\",\n \"nld\": \"wilde eend\"\n }\n },\n {\n \"scientificName\": \"Anas strepera\",\n \"taxonID\": \"https://www.checklistbank.org/dataset/COL2023/taxon/DGPL\",\n \"taxonRank\": \"species\",\n \"vernacularNames\": {\n \"eng\": \"gadwall\",\n \"nld\": \"krakeend\"\n }\n },\n {\n \"scientificName\": \"Ardea\",\n \"taxonID\": \"https://www.checklistbank.org/dataset/COL2023/taxon/32FH\",\n \"taxonRank\": \"genus\",\n \"vernacularNames\": {\n \"eng\": \"great herons\",\n \"nld\": \"reigers\"\n }\n },\n {\n \"scientificName\": \"Ardea cinerea\",\n \"taxonID\": \"https://www.checklistbank.org/dataset/COL2023/taxon/GCHS\",\n \"taxonRank\": \"species\",\n \"vernacularNames\": {\n \"eng\": \"grey heron\",\n \"nld\": \"blauwe reiger\"\n }\n },\n {\n \"scientificName\": \"Aves\",\n \"taxonID\": \"https://www.checklistbank.org/dataset/COL2023/taxon/V2\",\n \"taxonRank\": \"class\",\n \"vernacularNames\": {\n \"eng\": \"bird sp.\",\n \"nld\": \"vogel\"\n }\n },\n {\n \"scientificName\": \"Homo sapiens\",\n \"taxonID\": \"https://www.checklistbank.org/dataset/COL2023/taxon/6MB3T\",\n \"taxonRank\": \"species\",\n \"vernacularNames\": {\n \"eng\": \"human\",\n \"nld\": \"mens\"\n }\n },\n {\n \"scientificName\": \"Martes foina\",\n \"taxonID\": \"https://www.checklistbank.org/dataset/COL2023/taxon/3Y9VW\",\n \"taxonRank\": \"species\",\n \"vernacularNames\": {\n \"eng\": \"beech marten\",\n \"nld\": \"steenmarter\"\n }\n },\n {\n \"scientificName\": \"Mustela putorius\",\n \"taxonID\": \"https://www.checklistbank.org/dataset/COL2023/taxon/44QYC\",\n \"taxonRank\": \"species\",\n \"vernacularNames\": {\n \"eng\": \"European polecat\",\n \"nld\": \"bunzing\"\n }\n },\n {\n \"scientificName\": \"Rattus norvegicus\",\n \"taxonID\": \"https://www.checklistbank.org/dataset/COL2023/taxon/4RM67\",\n \"taxonRank\": \"species\",\n \"vernacularNames\": {\n \"eng\": \"brown rat\",\n \"nld\": \"bruine rat\"\n }\n },\n {\n \"scientificName\": \"Vulpes vulpes\",\n \"taxonID\": \"https://www.checklistbank.org/dataset/COL2023/taxon/5BSG3\",\n \"taxonRank\": \"species\",\n \"vernacularNames\": {\n \"eng\": \"red fox\",\n \"nld\": \"vos\"\n }\n }\n ],\n \"relatedIdentifiers\": [\n {\n \"relationType\": \"IsDerivedFrom\",\n \"relatedIdentifier\": \"https://doi.org/10.15468/5tb6ze\",\n \"resourceTypeGeneral\": \"Dataset\",\n \"relatedIdentifierType\": \"DOI\"\n },\n {\n \"relationType\": \"IsSupplementTo\",\n \"relatedIdentifier\": \"https://inbo.github.io/camtrapdp/\",\n \"resourceTypeGeneral\": \"Software\",\n \"relatedIdentifierType\": \"URL\"\n }\n ],\n \"references\": [ ]\n}\n" + }, + "cookies": [], + "headers": [ + { + "name": "accept-ranges", + "value": "bytes" + }, + { + "name": "access-control-allow-origin", + "value": "*" + }, + { + "name": "cache-control", + "value": "max-age=300" + }, + { + "name": "connection", + "value": "keep-alive" + }, + { + "name": "content-encoding", + "value": "gzip" + }, + { + "name": "content-length", + "value": "2603" + }, + { + "name": "content-security-policy", + "value": "default-src 'none'; style-src 'unsafe-inline'; sandbox" + }, + { + "name": "content-type", + "value": "text/plain; charset=utf-8" + }, + { + "name": "cross-origin-resource-policy", + "value": "cross-origin" + }, + { + "name": "date", + "value": "Fri, 24 Oct 2025 12:04:41 GMT" + }, + { + "name": "etag", + "value": "W/\"568d81aa4b4f1148ac5358387a8367b5853f76cb76dfb282d001809dbe43e173\"" + }, + { + "name": "expires", + "value": "Fri, 24 Oct 2025 12:09:41 GMT" + }, + { + "name": "source-age", + "value": "0" + }, + { + "name": "strict-transport-security", + "value": "max-age=31536000" + }, + { + "name": "vary", + "value": "Authorization,Accept-Encoding" + }, + { + "name": "via", + "value": "1.1 varnish" + }, + { + "name": "x-cache", + "value": "MISS" + }, + { + "name": "x-cache-hits", + "value": "0" + }, + { + "name": "x-content-type-options", + "value": "nosniff" + }, + { + "name": "x-fastly-request-id", + "value": "c27c933a46fb9b15459264fba7573facffd24599" + }, + { + "name": "x-frame-options", + "value": "deny" + }, + { + "name": "x-github-request-id", + "value": "54DE:431DC:29FED3:2E50D4:68FB6B59" + }, + { + "name": "x-served-by", + "value": "cache-lis1490035-LIS" + }, + { + "name": "x-timer", + "value": "S1761307482.672912,VS0,VE181" + }, + { + "name": "x-xss-protection", + "value": "1; mode=block" + } + ], + "headersSize": 902, + "httpVersion": "HTTP/1.1", + "redirectURL": "", + "status": 200, + "statusText": "OK" + }, + "startedDateTime": "2025-10-24T12:04:41.482Z", + "time": 398, + "timings": { + "blocked": -1, + "connect": -1, + "dns": -1, + "receive": 0, + "send": 0, + "ssl": -1, + "wait": 398 + } + } + ], + "pages": [], + "version": "1.2" + } +} diff --git a/file/error/index.ts b/file/error/index.ts index 1e4f6ce1..547d82e2 100644 --- a/file/error/index.ts +++ b/file/error/index.ts @@ -1,3 +1,4 @@ export type * from "./File.ts" export type * from "./Bytes.ts" +export type * from "./Encoding.ts" export type * from "./Hash.ts" From a32a061f43f7743bd47be01191535af2dc447b22 Mon Sep 17 00:00:00 2001 From: roll Date: Fri, 24 Oct 2025 13:10:24 +0100 Subject: [PATCH 14/19] Fixed validateFile spec --- file/file/validate.spec.ts | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/file/file/validate.spec.ts b/file/file/validate.spec.ts index fb8e79fe..e6e219a5 100644 --- a/file/file/validate.spec.ts +++ b/file/file/validate.spec.ts @@ -5,24 +5,24 @@ import { writeTempFile } from "./temp.ts" import { validateFile } from "./validate.ts" vi.mock("./fetch.ts", () => ({ - prefetchFile: vi.fn(), + prefetchFiles: vi.fn(), })) describe("validateFile", () => { - let mockPrefetchFile: ReturnType + let mockPrefetchFiles: ReturnType beforeEach(() => { - mockPrefetchFile = vi.mocked(fetchModule.prefetchFile) + mockPrefetchFiles = vi.mocked(fetchModule.prefetchFiles) vi.clearAllMocks() }) it("should return valid result when no validation options provided", async () => { const tempFilePath = await writeTempFile("Hello, World!") - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const result = await validateFile("https://example.com/file.txt") - expect(mockPrefetchFile).toHaveBeenCalledWith( + expect(mockPrefetchFiles).toHaveBeenCalledWith( "https://example.com/file.txt", ) expect(result).toEqual({ valid: true, errors: [] }) @@ -30,7 +30,7 @@ describe("validateFile", () => { it("should validate bytes successfully when they match", async () => { const tempFilePath = await writeTempFile("Hello, World!") - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const result = await validateFile("https://example.com/file.txt", { bytes: 13, @@ -41,7 +41,7 @@ describe("validateFile", () => { it("should return error when bytes do not match", async () => { const tempFilePath = await writeTempFile("Hello, World!") - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const result = await validateFile("https://example.com/file.txt", { bytes: 1024, @@ -58,7 +58,7 @@ describe("validateFile", () => { it("should validate hash successfully when it matches", async () => { const tempFilePath = await writeTempFile("Hello, World!") - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const actualHash = await inferFileHash(tempFilePath, { hashType: "md5" }) @@ -71,7 +71,7 @@ describe("validateFile", () => { it("should return error when hash does not match", async () => { const tempFilePath = await writeTempFile("Hello, World!") - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const actualHash = await inferFileHash(tempFilePath, { hashType: "md5" }) @@ -90,7 +90,7 @@ describe("validateFile", () => { it("should validate sha256 hash", async () => { const tempFilePath = await writeTempFile("Hello, World!") - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const actualHash = await inferFileHash(tempFilePath, { hashType: "sha256" }) @@ -103,7 +103,7 @@ describe("validateFile", () => { it("should validate sha1 hash", async () => { const tempFilePath = await writeTempFile("Hello, World!") - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const actualHash = await inferFileHash(tempFilePath, { hashType: "sha1" }) @@ -116,7 +116,7 @@ describe("validateFile", () => { it("should validate sha512 hash", async () => { const tempFilePath = await writeTempFile("Hello, World!") - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const actualHash = await inferFileHash(tempFilePath, { hashType: "sha512" }) @@ -129,7 +129,7 @@ describe("validateFile", () => { it("should validate both bytes and hash when both match", async () => { const tempFilePath = await writeTempFile("Hello, World!") - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const actualHash = await inferFileHash(tempFilePath, { hashType: "md5" }) @@ -143,7 +143,7 @@ describe("validateFile", () => { it("should return multiple errors when both bytes and hash do not match", async () => { const tempFilePath = await writeTempFile("Hello, World!") - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const actualHash = await inferFileHash(tempFilePath, { hashType: "md5" }) @@ -168,7 +168,7 @@ describe("validateFile", () => { it("should return error when only bytes mismatch", async () => { const tempFilePath = await writeTempFile("Hello, World!") - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const actualHash = await inferFileHash(tempFilePath, { hashType: "md5" }) @@ -184,7 +184,7 @@ describe("validateFile", () => { it("should return error when only hash mismatch", async () => { const tempFilePath = await writeTempFile("Hello, World!") - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const result = await validateFile("https://example.com/file.txt", { bytes: 13, @@ -198,17 +198,17 @@ describe("validateFile", () => { it("should handle local file paths", async () => { const tempFilePath = await writeTempFile("x".repeat(2048)) - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const result = await validateFile("/local/path/file.txt", { bytes: 2048 }) - expect(mockPrefetchFile).toHaveBeenCalledWith("/local/path/file.txt") + expect(mockPrefetchFiles).toHaveBeenCalledWith("/local/path/file.txt") expect(result).toEqual({ valid: true, errors: [] }) }) it("should handle empty file validation", async () => { const tempFilePath = await writeTempFile("") - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const result = await validateFile("https://example.com/empty.txt", { bytes: 0, From 2c9503f745a0168afd613cbba122cd5ee9592951 Mon Sep 17 00:00:00 2001 From: roll Date: Fri, 24 Oct 2025 13:12:38 +0100 Subject: [PATCH 15/19] Fixed inferFile spec --- file/file/infer.spec.ts | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/file/file/infer.spec.ts b/file/file/infer.spec.ts index 0068483c..aca85bad 100644 --- a/file/file/infer.spec.ts +++ b/file/file/infer.spec.ts @@ -4,71 +4,71 @@ import { inferFileBytes, inferFileEncoding, inferFileHash } from "./infer.ts" import { writeTempFile } from "./temp.ts" vi.mock("./fetch.ts", () => ({ - prefetchFile: vi.fn(), + prefetchFiles: vi.fn(), })) describe("inferFileHash", () => { - let mockPrefetchFile: ReturnType + let mockPrefetchFiles: ReturnType let tempFilePath: string beforeEach(async () => { - mockPrefetchFile = vi.mocked(fetchModule.prefetchFile) + mockPrefetchFiles = vi.mocked(fetchModule.prefetchFiles) tempFilePath = await writeTempFile("Hello, World!") vi.clearAllMocks() }) it("should compute sha256 hash by default", async () => { - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const result = await inferFileHash("https://example.com/file.txt") - expect(mockPrefetchFile).toHaveBeenCalledWith( + expect(mockPrefetchFiles).toHaveBeenCalledWith( "https://example.com/file.txt", ) expect(result).toMatch(/^sha256:[a-f0-9]{64}$/) }) it("should compute md5 hash when specified", async () => { - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const result = await inferFileHash("https://example.com/file.txt", { hashType: "md5", }) - expect(mockPrefetchFile).toHaveBeenCalledWith( + expect(mockPrefetchFiles).toHaveBeenCalledWith( "https://example.com/file.txt", ) expect(result).toMatch(/^md5:[a-f0-9]{32}$/) }) it("should compute sha1 hash when specified", async () => { - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const result = await inferFileHash("https://example.com/file.txt", { hashType: "sha1", }) - expect(mockPrefetchFile).toHaveBeenCalledWith( + expect(mockPrefetchFiles).toHaveBeenCalledWith( "https://example.com/file.txt", ) expect(result).toMatch(/^sha1:[a-f0-9]{40}$/) }) it("should compute sha512 hash when specified", async () => { - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const result = await inferFileHash("https://example.com/file.txt", { hashType: "sha512", }) - expect(mockPrefetchFile).toHaveBeenCalledWith( + expect(mockPrefetchFiles).toHaveBeenCalledWith( "https://example.com/file.txt", ) expect(result).toMatch(/^sha512:[a-f0-9]{128}$/) }) it("should compute consistent hashes for same content", async () => { - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const result1 = await inferFileHash("https://example.com/file.txt") const result2 = await inferFileHash("https://example.com/file.txt") @@ -78,20 +78,20 @@ describe("inferFileHash", () => { }) describe("inferFileBytes", () => { - let mockPrefetchFile: ReturnType + let mockPrefetchFiles: ReturnType beforeEach(() => { - mockPrefetchFile = vi.mocked(fetchModule.prefetchFile) + mockPrefetchFiles = vi.mocked(fetchModule.prefetchFiles) vi.clearAllMocks() }) it("should return file size in bytes", async () => { const tempFilePath = await writeTempFile("Hello, World!") - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const result = await inferFileBytes("https://example.com/file.txt") - expect(mockPrefetchFile).toHaveBeenCalledWith( + expect(mockPrefetchFiles).toHaveBeenCalledWith( "https://example.com/file.txt", ) expect(result).toBe(13) @@ -99,7 +99,7 @@ describe("inferFileBytes", () => { it("should handle empty files", async () => { const tempFilePath = await writeTempFile("") - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const result = await inferFileBytes("https://example.com/empty.txt") @@ -108,7 +108,7 @@ describe("inferFileBytes", () => { it("should handle larger files", async () => { const tempFilePath = await writeTempFile("x".repeat(10000)) - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const result = await inferFileBytes("https://example.com/large.txt") @@ -119,11 +119,11 @@ describe("inferFileBytes", () => { const tempFilePath = await writeTempFile( Buffer.from([0xff, 0xd8, 0xff, 0xe0]), ) - mockPrefetchFile.mockResolvedValue(tempFilePath) + mockPrefetchFiles.mockResolvedValue([tempFilePath]) const result = await inferFileBytes("https://example.com/file.bin") - expect(mockPrefetchFile).toHaveBeenCalledWith( + expect(mockPrefetchFiles).toHaveBeenCalledWith( "https://example.com/file.bin", ) expect(result).toBe(4) From 6bc8ef8960f249d345fefdcd4e0ddbe15c49f54b Mon Sep 17 00:00:00 2001 From: roll Date: Fri, 24 Oct 2025 13:16:05 +0100 Subject: [PATCH 16/19] Fixed validateResourceCommand spec --- cli/commands/resource/validate.spec.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cli/commands/resource/validate.spec.ts b/cli/commands/resource/validate.spec.ts index 9bd3b432..4dbc78da 100644 --- a/cli/commands/resource/validate.spec.ts +++ b/cli/commands/resource/validate.spec.ts @@ -9,9 +9,10 @@ useRecording() describe("resource validate", () => { it("should validate a valid resource", async () => { + const csvPath = await writeTempFile("id,name\n1,alice\n2,bob") const resourceContent = JSON.stringify({ name: "test-resource", - path: "data.csv", + path: basename(csvPath), }) const resourcePath = await writeTempFile(resourceContent) From cb169b26d780789b3c4eb385f38bfc99d5b346e6 Mon Sep 17 00:00:00 2001 From: roll Date: Fri, 24 Oct 2025 13:24:26 +0100 Subject: [PATCH 17/19] Enable failing test --- lib/package/validate.spec.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/package/validate.spec.ts b/lib/package/validate.spec.ts index 4d91f373..ef097e63 100644 --- a/lib/package/validate.spec.ts +++ b/lib/package/validate.spec.ts @@ -171,7 +171,7 @@ describe("validatePackage", () => { }) }) - it.fails("should detect bad cell type (issue-153)", async () => { + it("should detect bad cell type (issue-153)", async () => { const dataPackage = "lib/package/fixtures/issue-153/datapackage.json" const result = await validatePackage(dataPackage) From d9b695142f9d4f9f4d376050af5867c536614a5a Mon Sep 17 00:00:00 2001 From: roll Date: Fri, 24 Oct 2025 19:15:11 +0100 Subject: [PATCH 18/19] Fixed missing values being considered cell/type errors --- table/field/denormalize.ts | 23 ++++++++++++++ table/field/index.ts | 2 ++ table/field/normalize.ts | 31 +++++++++++++++++++ table/field/parse.ts | 15 +-------- table/field/stringify.ts | 63 +++++++++++--------------------------- table/table/denormalize.ts | 5 +-- table/table/normalize.ts | 17 +++++----- table/table/validate.ts | 4 +-- 8 files changed, 89 insertions(+), 71 deletions(-) create mode 100644 table/field/denormalize.ts create mode 100644 table/field/normalize.ts diff --git a/table/field/denormalize.ts b/table/field/denormalize.ts new file mode 100644 index 00000000..e0c3ec2e --- /dev/null +++ b/table/field/denormalize.ts @@ -0,0 +1,23 @@ +import type { Field } from "@dpkit/core" +import { col, lit, when } from "nodejs-polars" +import type { Expr } from "nodejs-polars" +import { stringifyField } from "./stringify.ts" + +const DEFAULT_MISSING_VALUE = "" + +export function denormalizeField(field: Field, expr?: Expr) { + expr = expr ?? col(field.name) + expr = stringifyField(field, expr) + + const flattenMissingValues = field.missingValues?.map(it => + typeof it === "string" ? it : it.value, + ) + + const missingValue = flattenMissingValues?.[0] ?? DEFAULT_MISSING_VALUE + expr = when(expr.isNull()) + .then(lit(missingValue)) + .otherwise(expr) + .alias(field.name) + + return expr +} diff --git a/table/field/index.ts b/table/field/index.ts index c0ecec8e..990d8eb1 100644 --- a/table/field/index.ts +++ b/table/field/index.ts @@ -1,5 +1,7 @@ +export { denormalizeField } from "./denormalize.ts" export { parseField } from "./parse.ts" export { validateField } from "./validate.ts" export { matchField } from "./match.ts" +export { normalizeField } from "./normalize.ts" export { stringifyField } from "./stringify.ts" export type { PolarsField } from "./Field.ts" diff --git a/table/field/normalize.ts b/table/field/normalize.ts new file mode 100644 index 00000000..f1329b67 --- /dev/null +++ b/table/field/normalize.ts @@ -0,0 +1,31 @@ +import type { Field } from "@dpkit/core" +import { col, lit, when } from "nodejs-polars" +import type { Expr } from "nodejs-polars" +import { parseField } from "./parse.ts" + +const DEFAULT_MISSING_VALUES = [""] + +export function normalizeField( + field: Field, + expr?: Expr, + options?: { dontParse?: boolean }, +) { + expr = expr ?? col(field.name) + + const flattenMissingValues = + field.missingValues?.map(it => (typeof it === "string" ? it : it.value)) ?? + DEFAULT_MISSING_VALUES + + if (flattenMissingValues.length) { + expr = when(expr.isIn(flattenMissingValues)) + .then(lit(null)) + .otherwise(expr) + .alias(field.name) + } + + if (options?.dontParse) { + return expr + } + + return parseField(field, expr) +} diff --git a/table/field/parse.ts b/table/field/parse.ts index 0ac6c831..64a8eb2d 100644 --- a/table/field/parse.ts +++ b/table/field/parse.ts @@ -1,5 +1,5 @@ import type { Field } from "@dpkit/core" -import { col, lit, when } from "nodejs-polars" +import { col } from "nodejs-polars" import type { Expr } from "nodejs-polars" import { parseArrayField } from "./types/array.ts" import { parseBooleanField } from "./types/boolean.ts" @@ -17,22 +17,9 @@ import { parseTimeField } from "./types/time.ts" import { parseYearField } from "./types/year.ts" import { parseYearmonthField } from "./types/yearmonth.ts" -const DEFAULT_MISSING_VALUES = [""] - export function parseField(field: Field, expr?: Expr) { expr = expr ?? col(field.name) - const flattenMissingValues = - field.missingValues?.map(it => (typeof it === "string" ? it : it.value)) ?? - DEFAULT_MISSING_VALUES - - if (flattenMissingValues.length) { - expr = when(expr.isIn(flattenMissingValues)) - .then(lit(null)) - .otherwise(expr) - .alias(field.name) - } - switch (field.type) { case "array": return parseArrayField(field, expr) diff --git a/table/field/stringify.ts b/table/field/stringify.ts index 6b62ad0b..f2d2ad8c 100644 --- a/table/field/stringify.ts +++ b/table/field/stringify.ts @@ -1,5 +1,5 @@ import type { Field } from "@dpkit/core" -import { col, lit, when } from "nodejs-polars" +import { col } from "nodejs-polars" import type { Expr } from "nodejs-polars" import { stringifyArrayField } from "./types/array.ts" import { stringifyBooleanField } from "./types/boolean.ts" @@ -17,68 +17,41 @@ import { stringifyTimeField } from "./types/time.ts" import { stringifyYearField } from "./types/year.ts" import { stringifyYearmonthField } from "./types/yearmonth.ts" -const DEFAULT_MISSING_VALUE = "" - export function stringifyField(field: Field, expr?: Expr) { expr = expr ?? col(field.name) switch (field.type) { case "array": - expr = stringifyArrayField(field, expr) - break + return stringifyArrayField(field, expr) case "boolean": - expr = stringifyBooleanField(field, expr) - break + return stringifyBooleanField(field, expr) case "date": - expr = stringifyDateField(field, expr) - break + return stringifyDateField(field, expr) case "datetime": - expr = stringifyDatetimeField(field, expr) - break + return stringifyDatetimeField(field, expr) case "duration": - expr = stringifyDurationField(field, expr) - break + return stringifyDurationField(field, expr) case "geojson": - expr = stringifyGeojsonField(field, expr) - break + return stringifyGeojsonField(field, expr) case "geopoint": - expr = stringifyGeopointField(field, expr) - break + return stringifyGeopointField(field, expr) case "integer": - expr = stringifyIntegerField(field, expr) - break + return stringifyIntegerField(field, expr) case "list": - expr = stringifyListField(field, expr) - break + return stringifyListField(field, expr) case "number": - expr = stringifyNumberField(field, expr) - break + return stringifyNumberField(field, expr) case "object": - expr = stringifyObjectField(field, expr) - break + return stringifyObjectField(field, expr) case "string": - expr = stringifyStringField(field, expr) - break + return stringifyStringField(field, expr) case "time": - expr = stringifyTimeField(field, expr) - break + return stringifyTimeField(field, expr) case "year": - expr = stringifyYearField(field, expr) - break + return stringifyYearField(field, expr) case "yearmonth": - expr = stringifyYearmonthField(field, expr) - break + return stringifyYearmonthField(field, expr) + default: + return expr } - - const flattenMissingValues = field.missingValues?.map(it => - typeof it === "string" ? it : it.value, - ) - - const missingValue = flattenMissingValues?.[0] ?? DEFAULT_MISSING_VALUE - expr = when(expr.isNull()) - .then(lit(missingValue)) - .otherwise(expr) - .alias(field.name) - - return expr } diff --git a/table/table/denormalize.ts b/table/table/denormalize.ts index 29ffa762..f39be815 100644 --- a/table/table/denormalize.ts +++ b/table/table/denormalize.ts @@ -1,7 +1,7 @@ import type { Field, Schema } from "@dpkit/core" import { col, lit } from "nodejs-polars" import type { Expr } from "nodejs-polars" -import { stringifyField } from "../field/index.ts" +import { denormalizeField } from "../field/index.ts" import type { PolarsSchema } from "../schema/index.ts" import { getPolarsSchema } from "../schema/index.ts" import type { Table } from "./Table.ts" @@ -40,10 +40,11 @@ export function denormalizeFields( if (polarsField) { expr = col(polarsField.name).alias(field.name) + // TODO: Move this logic to denormalizeField? if (!nativeTypes?.includes(field.type ?? "any")) { const missingValues = field.missingValues ?? schema.missingValues const mergedField = { ...field, missingValues } - expr = stringifyField(mergedField, expr) + expr = denormalizeField(mergedField, expr) } } diff --git a/table/table/normalize.ts b/table/table/normalize.ts index 2121af3d..39cded33 100644 --- a/table/table/normalize.ts +++ b/table/table/normalize.ts @@ -3,7 +3,7 @@ import type { Expr } from "nodejs-polars" import { DataType } from "nodejs-polars" import { col, lit } from "nodejs-polars" import { matchField } from "../field/index.ts" -import { parseField } from "../field/index.ts" +import { normalizeField } from "../field/index.ts" import { getPolarsSchema } from "../schema/index.ts" import type { PolarsSchema } from "../schema/index.ts" import type { Table } from "./Table.ts" @@ -14,16 +14,16 @@ export async function normalizeTable( table: Table, schema: Schema, options?: { - noParse?: boolean + dontParse?: boolean }, ) { - const { noParse } = options ?? {} + const { dontParse } = options ?? {} const head = await table.head(HEAD_ROWS).collect() const polarsSchema = getPolarsSchema(head.schema) return table.select( - ...Object.values(normalizeFields(schema, polarsSchema, { noParse })), + ...Object.values(normalizeFields(schema, polarsSchema, { dontParse })), ) } @@ -31,10 +31,10 @@ export function normalizeFields( schema: Schema, polarsSchema: PolarsSchema, options?: { - noParse?: boolean + dontParse?: boolean }, ) { - const { noParse } = options ?? {} + const { dontParse } = options ?? {} const exprs: Record = {} for (const [index, field] of schema.fields.entries()) { @@ -44,10 +44,11 @@ export function normalizeFields( if (polarsField) { expr = col(polarsField.name).alias(field.name) - if (!noParse && polarsField.type.equals(DataType.String)) { + // TODO: Move this logic to normalizeField? + if (polarsField.type.equals(DataType.String)) { const missingValues = field.missingValues ?? schema.missingValues const mergedField = { ...field, missingValues } - expr = parseField(mergedField, expr) + expr = normalizeField(mergedField, expr, { dontParse }) } } diff --git a/table/table/validate.ts b/table/table/validate.ts index 8cfd7179..9812ae76 100644 --- a/table/table/validate.ts +++ b/table/table/validate.ts @@ -137,13 +137,13 @@ async function validateFields( const targetNames: string[] = [] const sources = Object.entries( - normalizeFields(schema, polarsSchema, { noParse: true }), + normalizeFields(schema, polarsSchema, { dontParse: true }), ).map(([name, expr]) => { return expr.alias(`source:${name}`) }) const targets = Object.entries( - normalizeFields(schema, polarsSchema, { noParse: false }), + normalizeFields(schema, polarsSchema, { dontParse: false }), ).map(([name, expr]) => { const targetName = `target:${name}` targetNames.push(targetName) From 41f7a54d85ed9d0f9bc73b90016859b506c0ea3f Mon Sep 17 00:00:00 2001 From: roll Date: Fri, 24 Oct 2025 19:28:37 +0100 Subject: [PATCH 19/19] Fixed cell/type-required duplication --- table/table/validate.ts | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/table/table/validate.ts b/table/table/validate.ts index 9812ae76..9ee59b89 100644 --- a/table/table/validate.ts +++ b/table/table/validate.ts @@ -179,20 +179,27 @@ async function validateFields( .collect() for (const record of errorFrame.toRecords() as any[]) { + const typeErrorInFields: string[] = [] for (const [key, value] of Object.entries(record)) { const [kind, type, name] = key.split(":") - if (kind === "error" && value === true && type && name) { const rowNumber = record.row_nr // Cell-level errors if (type.startsWith("cell/")) { - errors.push({ - rowNumber, - type: type as any, - fieldName: name as any, - cell: (record[`source:${name}`] ?? "").toString(), - }) + if (!typeErrorInFields.includes(name)) { + errors.push({ + rowNumber, + type: type as any, + fieldName: name as any, + cell: (record[`source:${name}`] ?? "").toString(), + }) + } + + // Type error is a terminating error for a cell + if (type === "cell/type") { + typeErrorInFields.push(name) + } } // Row-level errors