diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 7f40d14d..64a52ba7 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,4 +1,4 @@ -- fixes # +- Fixes # --- diff --git a/.github/workflows/general.yaml b/.github/workflows/general.yaml index 83f2004c..660d3731 100644 --- a/.github/workflows/general.yaml +++ b/.github/workflows/general.yaml @@ -34,7 +34,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: binaries - path: cli/compile/binaries/*.zip + path: terminal/compile/binaries/*.zip test-linux: needs: [test-compile] @@ -70,9 +70,10 @@ jobs: uses: actions/download-artifact@v5 with: name: binaries - path: cli/compile/binaries + path: terminal/compile/binaries - name: Test Binaries - run: unzip -j cli/compile/binaries/*linux-x64.zip && ./dp --version + working-directory: terminal/compile/binaries + run: unzip -j *linux-x64.zip && ./dpkit --version services: postgres: @@ -119,9 +120,10 @@ jobs: uses: actions/download-artifact@v5 with: name: binaries - path: cli/compile/binaries + path: terminal/compile/binaries - name: Test Binaries - run: unzip -j cli/compile/binaries/*macos-arm64.zip && ./dp --version + working-directory: terminal/compile/binaries + run: unzip -j *macos-arm64.zip && ./dpkit --version test-windows: needs: [test-compile] @@ -149,10 +151,11 @@ jobs: uses: actions/download-artifact@v5 with: name: binaries - path: cli/compile/binaries + path: terminal/compile/binaries - name: Test Binaries shell: bash - run: unzip -j cli/compile/binaries/*windows-x64.zip && ./dp.exe --version + working-directory: terminal/compile/binaries + run: unzip -j *windows-x64.zip && ./dpkit.exe --version # We have to split the release step because for some reason # using semantic-release before compilation inflates the binary sizes @@ -228,7 +231,7 @@ jobs: uses: softprops/action-gh-release@v2 with: draft: false - files: cli/compile/binaries/*.zip + files: terminal/compile/binaries/*.zip tag_name: v${{ needs.release-draft.outputs.version }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore index d6e40e6c..80321017 100644 --- a/.gitignore +++ b/.gitignore @@ -69,4 +69,3 @@ dist/ .serena/ .mcp.json .env -dp diff --git a/.releaserc.json b/.releaserc.json index 35285663..d986442f 100644 --- a/.releaserc.json +++ b/.releaserc.json @@ -1,5 +1,10 @@ { - "branches": ["main"], + "branches": [ + { + "name": "main", + "prerelease": "rc" + } + ], "plugins": [ ["@semantic-release/commit-analyzer", { "preset": "conventionalcommits" }], [ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 557f4f78..512864cd 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -9,13 +9,14 @@ Thank you for your interest in contributing to dpkit! This document provides gui Project is a monorepo with the following packages: -- `@dpkit/core`: Core metadata functionality -- `@dpkit/file`: File-related functionality +- `@dpkit/metadata`: Core metadata functionality +- `@dpkit/dataset`: File-related functionality - `@dpkit/table`: Table-related functionality -- `@dpkit/cli`: Command-line interface for running tasks - `@dpkit/`: Domain-specific functionality -- `@dpkit/lib`: All-in-one package that re-exports all functionality -- `dpkit`: Meta-package that re-exports `lib` and `cli` +- `@dpkit/library`: All-in-one package that re-exports all functionality +- `@dpkit/terminal`: Terminal interface for running tasks +- `@dpkit/browser`: Browser-related functionality +- `dpkit`: Meta-package that re-exports the underlying functionality ## Development Environment diff --git a/README.md b/README.md index 31029069..511953db 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![Coverage](https://img.shields.io/codecov/c/github/datisthq/dpkit/main)](https://codecov.io/gh/datisthq/dpkit) [![Version](https://img.shields.io/npm/v/dpkit)](https://www.npmjs.com/package/dpkit) -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). +dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [project's website](https://dpkit.app). ## Funding diff --git a/arrow/package.json b/arrow/package.json deleted file mode 100644 index 8e8b03ee..00000000 --- a/arrow/package.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "name": "@dpkit/arrow", - "type": "module", - "version": "0.0.0-dev", - "exports": "./build/index.js", - "sideEffects": false, - "license": "MIT", - "author": "Evgeny Karev", - "repository": "https://github.com/datisthq/dpkit", - "description": "Fast TypeScript data management framework built on top of the Data Package standard and Polars DataFrames", - "keywords": [ - "data", - "polars", - "dataframe", - "datapackage", - "tableschema", - "typescript", - "validation", - "quality", - "fair", - "parquet" - ], - "scripts": { - "build": "tsc" - }, - "dependencies": { - "@dpkit/core": "workspace:*", - "@dpkit/file": "workspace:*", - "@dpkit/table": "workspace:*", - "csv-sniffer": "^0.1.1", - "nodejs-polars": "^0.22.1" - }, - "devDependencies": { - "@dpkit/test": "workspace:*" - } -} diff --git a/lib/README.md b/audio/README.md similarity index 78% rename from lib/README.md rename to audio/README.md index 5ee449cf..aac0d22c 100644 --- a/lib/README.md +++ b/audio/README.md @@ -1,3 +1,3 @@ -# @dpkit/lib +# @dpkit/audio -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). +dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [project's website](https://dpkit.app). diff --git a/audio/index.ts b/audio/index.ts new file mode 100644 index 00000000..e69de29b diff --git a/folder/package.json b/audio/package.json similarity index 80% rename from folder/package.json rename to audio/package.json index 7d87560a..51d48a2a 100644 --- a/folder/package.json +++ b/audio/package.json @@ -1,9 +1,10 @@ { - "name": "@dpkit/folder", + "name": "@dpkit/audio", "type": "module", "version": "0.0.0-dev", "exports": "./build/index.js", "sideEffects": false, + "files": ["build"], "license": "MIT", "author": "Evgeny Karev", "repository": "https://github.com/datisthq/dpkit", @@ -18,14 +19,13 @@ "validation", "quality", "fair", - "folder" + "audio" ], "scripts": { "build": "tsc" }, "dependencies": { - "@dpkit/core": "workspace:*", - "@dpkit/file": "workspace:*", - "exit-hook": "^4.0.0" + "@dpkit/metadata": "workspace:*", + "@dpkit/dataset": "workspace:*" } } diff --git a/arrow/tsconfig.json b/audio/tsconfig.json similarity index 100% rename from arrow/tsconfig.json rename to audio/tsconfig.json diff --git a/arrow/typedoc.json b/audio/typedoc.json similarity index 100% rename from arrow/typedoc.json rename to audio/typedoc.json diff --git a/browser/README.md b/browser/README.md index d99e6749..bd4055bf 100644 --- a/browser/README.md +++ b/browser/README.md @@ -1,3 +1,3 @@ # @dpkit/browser -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). +dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [project's website](https://dpkit.app). diff --git a/browser/components/Report/Error/Cell.tsx b/browser/components/Report/Error/Cell.tsx index dd99239a..3417d251 100644 --- a/browser/components/Report/Error/Cell.tsx +++ b/browser/components/Report/Error/Cell.tsx @@ -1,8 +1,8 @@ -import type * as errorTypes from "@dpkit/lib" +import type * as library from "@dpkit/library" import { Code, Text } from "@mantine/core" import { useTranslation } from "react-i18next" -export function CellTypeError(props: { error: errorTypes.CellTypeError }) { +export function CellTypeError(props: { error: library.CellTypeError }) { const { t } = useTranslation() const { error } = props @@ -30,7 +30,7 @@ export function CellTypeError(props: { error: errorTypes.CellTypeError }) { } export function CellRequiredError(props: { - error: errorTypes.CellRequiredError + error: library.CellRequiredError }) { const { t } = useTranslation() const { error } = props @@ -51,7 +51,7 @@ export function CellRequiredError(props: { } export function CellMinimumError(props: { - error: errorTypes.CellMinimumError + error: library.CellMinimumError }) { const { t } = useTranslation() const { error } = props @@ -80,7 +80,7 @@ export function CellMinimumError(props: { } export function CellMaximumError(props: { - error: errorTypes.CellMaximumError + error: library.CellMaximumError }) { const { t } = useTranslation() const { error } = props @@ -109,7 +109,7 @@ export function CellMaximumError(props: { } export function CellExclusiveMinimumError(props: { - error: errorTypes.CellExclusiveMinimumError + error: library.CellExclusiveMinimumError }) { const { t } = useTranslation() const { error } = props @@ -138,7 +138,7 @@ export function CellExclusiveMinimumError(props: { } export function CellExclusiveMaximumError(props: { - error: errorTypes.CellExclusiveMaximumError + error: library.CellExclusiveMaximumError }) { const { t } = useTranslation() const { error } = props @@ -167,7 +167,7 @@ export function CellExclusiveMaximumError(props: { } export function CellMinLengthError(props: { - error: errorTypes.CellMinLengthError + error: library.CellMinLengthError }) { const { t } = useTranslation() const { error } = props @@ -196,7 +196,7 @@ export function CellMinLengthError(props: { } export function CellMaxLengthError(props: { - error: errorTypes.CellMaxLengthError + error: library.CellMaxLengthError }) { const { t } = useTranslation() const { error } = props @@ -225,7 +225,7 @@ export function CellMaxLengthError(props: { } export function CellPatternError(props: { - error: errorTypes.CellPatternError + error: library.CellPatternError }) { const { t } = useTranslation() const { error } = props @@ -253,7 +253,7 @@ export function CellPatternError(props: { ) } -export function CellUniqueError(props: { error: errorTypes.CellUniqueError }) { +export function CellUniqueError(props: { error: library.CellUniqueError }) { const { t } = useTranslation() const { error } = props @@ -276,7 +276,7 @@ export function CellUniqueError(props: { error: errorTypes.CellUniqueError }) { ) } -export function CellEnumError(props: { error: errorTypes.CellEnumError }) { +export function CellEnumError(props: { error: library.CellEnumError }) { const { t } = useTranslation() const { error } = props @@ -304,7 +304,7 @@ export function CellEnumError(props: { error: errorTypes.CellEnumError }) { } export function CellJsonSchemaError(props: { - error: errorTypes.CellJsonSchemaError + error: library.CellJsonSchemaError }) { const { t } = useTranslation() const { error } = props diff --git a/browser/components/Report/Error/Data.tsx b/browser/components/Report/Error/Data.tsx index e728f4fd..2f9f3944 100644 --- a/browser/components/Report/Error/Data.tsx +++ b/browser/components/Report/Error/Data.tsx @@ -1,8 +1,8 @@ -import type * as errorTypes from "@dpkit/lib" +import type * as library from "@dpkit/library" import { Text } from "@mantine/core" import { useTranslation } from "react-i18next" -export function DataError(props: { error: errorTypes.DataError }) { +export function DataError(props: { error: library.DataError }) { const { t } = useTranslation() return {t(props.error.message as any)} diff --git a/browser/components/Report/Error/Error.tsx b/browser/components/Report/Error/Error.tsx index de7acc12..27971e47 100644 --- a/browser/components/Report/Error/Error.tsx +++ b/browser/components/Report/Error/Error.tsx @@ -1,4 +1,4 @@ -import type * as errorTypes from "@dpkit/lib" +import type { UnboundError } from "@dpkit/library" import { CellEnumError, CellExclusiveMaximumError, @@ -22,12 +22,7 @@ import { MetadataError } from "./Metadata.tsx" import { RowUniqueError } from "./Row.tsx" export function Error(props: { - // TODO: should be lib.Error - error: - | errorTypes.MetadataError - | errorTypes.DataError - | errorTypes.FileError - | errorTypes.TableError + error: UnboundError }) { const { error } = props diff --git a/browser/components/Report/Error/Field.tsx b/browser/components/Report/Error/Field.tsx index d8321342..2e8b9452 100644 --- a/browser/components/Report/Error/Field.tsx +++ b/browser/components/Report/Error/Field.tsx @@ -1,8 +1,8 @@ -import type * as errorTypes from "@dpkit/lib" +import type * as library from "@dpkit/library" import { Code, Text } from "@mantine/core" import { useTranslation } from "react-i18next" -export function FieldNameError(props: { error: errorTypes.FieldNameError }) { +export function FieldNameError(props: { error: library.FieldNameError }) { const { t } = useTranslation() return ( @@ -18,7 +18,7 @@ export function FieldNameError(props: { error: errorTypes.FieldNameError }) { ) } -export function FieldTypeError(props: { error: errorTypes.FieldTypeError }) { +export function FieldTypeError(props: { error: library.FieldTypeError }) { const { t } = useTranslation() return ( diff --git a/browser/components/Report/Error/Fields.tsx b/browser/components/Report/Error/Fields.tsx index 79f6fb17..a580be6d 100644 --- a/browser/components/Report/Error/Fields.tsx +++ b/browser/components/Report/Error/Fields.tsx @@ -1,9 +1,9 @@ -import type * as errorTypes from "@dpkit/lib" +import type * as library from "@dpkit/library" import { Code, Text } from "@mantine/core" import { useTranslation } from "react-i18next" export function FieldsMissingError(props: { - error: errorTypes.FieldsMissingError + error: library.FieldsMissingError }) { const { t } = useTranslation() return ( @@ -18,7 +18,7 @@ export function FieldsMissingError(props: { } export function FieldsExtraError(props: { - error: errorTypes.FieldsExtraError + error: library.FieldsExtraError }) { const { t } = useTranslation() return ( diff --git a/browser/components/Report/Error/File.tsx b/browser/components/Report/Error/File.tsx index 179c1fbb..8430f798 100644 --- a/browser/components/Report/Error/File.tsx +++ b/browser/components/Report/Error/File.tsx @@ -1,8 +1,8 @@ -import type * as errorTypes from "@dpkit/lib" +import type * as library from "@dpkit/library" import { Code, Text } from "@mantine/core" import { useTranslation } from "react-i18next" -export function BytesError(props: { error: errorTypes.BytesError }) { +export function BytesError(props: { error: library.BytesError }) { const { t } = useTranslation() return ( @@ -19,7 +19,7 @@ export function BytesError(props: { error: errorTypes.BytesError }) { ) } -export function HashError(props: { error: errorTypes.HashError }) { +export function HashError(props: { error: library.HashError }) { const { t } = useTranslation() return ( @@ -36,7 +36,7 @@ export function HashError(props: { error: errorTypes.HashError }) { ) } -export function EncodingError(props: { error: errorTypes.EncodingError }) { +export function EncodingError(props: { error: library.EncodingError }) { const { t } = useTranslation() return ( diff --git a/browser/components/Report/Error/ForeignKey.tsx b/browser/components/Report/Error/ForeignKey.tsx index bdd2bf8f..5e709823 100644 --- a/browser/components/Report/Error/ForeignKey.tsx +++ b/browser/components/Report/Error/ForeignKey.tsx @@ -1,9 +1,9 @@ -import type * as errorTypes from "@dpkit/lib" +import type * as library from "@dpkit/library" import { Code, Text } from "@mantine/core" import { useTranslation } from "react-i18next" // TODO: improve error message -export function ForeignKeyError(props: { error: errorTypes.ForeignKeyError }) { +export function ForeignKeyError(props: { error: library.ForeignKeyError }) { const { t } = useTranslation() return ( diff --git a/browser/components/Report/Error/Metadata.tsx b/browser/components/Report/Error/Metadata.tsx index d9e0fb3b..9fe2880a 100644 --- a/browser/components/Report/Error/Metadata.tsx +++ b/browser/components/Report/Error/Metadata.tsx @@ -1,25 +1,17 @@ -import type * as errorTypes from "@dpkit/lib" +import type * as library from "@dpkit/library" import { Code, Text } from "@mantine/core" +import { capitalize } from "es-toolkit" import { useTranslation } from "react-i18next" -export function MetadataError(props: { error: errorTypes.MetadataError }) { +export function MetadataError(props: { error: library.MetadataError }) { const { t } = useTranslation() return ( + {capitalize(t(props.error.message as any))} {t("at")}{" "} - {props.error.keyword} + {props.error.pointer} - {props.error.message && ` ${t(props.error.message as any)}`} - {props.error.instancePath && ( - <> - {" "} - {t("at")}{" "} - - {props.error.instancePath} - - - )} ) } diff --git a/browser/components/Report/Error/Row.tsx b/browser/components/Report/Error/Row.tsx index 0ee6f372..92f4e334 100644 --- a/browser/components/Report/Error/Row.tsx +++ b/browser/components/Report/Error/Row.tsx @@ -1,8 +1,8 @@ -import type * as errorTypes from "@dpkit/lib" +import type * as library from "@dpkit/library" import { Code, Text } from "@mantine/core" import { useTranslation } from "react-i18next" -export function RowUniqueError(props: { error: errorTypes.RowUniqueError }) { +export function RowUniqueError(props: { error: library.RowUniqueError }) { const { t } = useTranslation() return ( diff --git a/browser/components/Report/Report.tsx b/browser/components/Report/Report.tsx index 23e7431f..88664624 100644 --- a/browser/components/Report/Report.tsx +++ b/browser/components/Report/Report.tsx @@ -1,9 +1,4 @@ -import type { - DataError, - FileError, - MetadataError, - TableError, -} from "@dpkit/lib" +import type { UnboundError } from "@dpkit/library" import { Card, Divider, ScrollArea, Stack, Tabs } from "@mantine/core" import { groupBy } from "es-toolkit" import { useState } from "react" @@ -12,7 +7,7 @@ import { objectKeys } from "ts-extras" import { Error } from "./Error/Error.tsx" export function Report(props: { - errors?: (MetadataError | DataError | FileError | TableError)[] + errors?: UnboundError[] }) { const { t } = useTranslation() const { errors } = props diff --git a/browser/package.json b/browser/package.json index 1ff7f8a2..15c16ac3 100644 --- a/browser/package.json +++ b/browser/package.json @@ -19,7 +19,7 @@ "devDependencies": { "@cloudflare/containers": "0.0.28", "@cloudflare/vite-plugin": "1.13.13", - "@dpkit/lib": "workspace:*", + "@dpkit/library": "workspace:*", "@dpkit/service": "workspace:*", "@loglayer/transport-tslog": "3.0.4", "@mantine/core": "8.3.5", diff --git a/browser/public/install.sh b/browser/public/install.sh index d9902bca..e5af1875 100755 --- a/browser/public/install.sh +++ b/browser/public/install.sh @@ -2,7 +2,7 @@ # This script installs dpkit # -# Quick install: `curl -fsSL https://dpkit.dev/install.sh | sh` +# Quick install: `curl -fsSL https://dpkit.app/install.sh | sh` # # Acknowledgments: # - eget (https://github.com/zyedidia/eget) @@ -11,7 +11,7 @@ set -e -u # Program -echo "Program: dp" +echo "Program: dpkit" # Version @@ -68,7 +68,7 @@ re-run this script. For example: $ export DPKIT_PLATFORM=linux-x64 - $ curl -fsSL https://dpkit.dev/install.sh | sh + $ curl -fsSL https://dpkit.app/install.sh | sh EOM exit 1 else @@ -77,7 +77,7 @@ fi # Download -archive="dp-$version-$platform.zip" +archive="dpkit-terminal-$version-$platform.zip" source="https://github.com/datisthq/dpkit/releases/download/v$version/$archive" echo "Downloading: $source" @@ -99,8 +99,8 @@ unlink $archive # Done case "$platform" in - windows*) command="./dp.exe" ;; - *) command="./dp" ;; + windows*) command="./dpkit.exe" ;; + *) command="./dpkit" ;; esac echo "Done: run it with \"$command\"" diff --git a/ckan/package.json b/ckan/package.json deleted file mode 100644 index 986eab72..00000000 --- a/ckan/package.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "name": "@dpkit/ckan", - "type": "module", - "version": "0.0.0-dev", - "exports": "./build/index.js", - "sideEffects": false, - "license": "MIT", - "author": "Evgeny Karev", - "repository": "https://github.com/datisthq/dpkit", - "description": "Fast TypeScript data management framework built on top of the Data Package standard and Polars DataFrames", - "keywords": [ - "data", - "polars", - "dataframe", - "datapackage", - "tableschema", - "typescript", - "validation", - "quality", - "fair", - "ckan" - ], - "scripts": { - "build": "tsc" - }, - "dependencies": { - "@dpkit/core": "workspace:*", - "@dpkit/file": "workspace:*" - }, - "devDependencies": { - "@dpkit/test": "workspace:*" - } -} diff --git a/cli/.npmignore b/cli/.npmignore deleted file mode 100644 index 35d88b1c..00000000 --- a/cli/.npmignore +++ /dev/null @@ -1 +0,0 @@ -compile/ diff --git a/core/dialect/validate.spec.ts b/core/dialect/validate.spec.ts deleted file mode 100644 index 4fb4cad6..00000000 --- a/core/dialect/validate.spec.ts +++ /dev/null @@ -1,35 +0,0 @@ -import { describe, expect, it } from "vitest" -import { validateDialect } from "./validate.ts" - -describe("validateDialect", () => { - it("returns valid result for valid dialect", async () => { - const descriptor = { - delimiter: ";", - } - - const result = await validateDialect({ - descriptor, - }) - - expect(result.valid).toBe(true) - expect(result.errors).toEqual([]) - }) - - it("returns validation errors for invalid dialect", async () => { - const invalidDialect = { - delimiter: 1, // Should be a string - } - - const result = await validateDialect(invalidDialect) - - expect(result.valid).toBe(false) - expect(result.errors.length).toBeGreaterThan(0) - - const error = result.errors[0] - expect(error).toBeDefined() - if (error) { - expect(error.keyword).toBe("type") - expect(error.instancePath).toBe("/delimiter") - } - }) -}) diff --git a/core/error/Assertion.ts b/core/error/Assertion.ts deleted file mode 100644 index 4287e235..00000000 --- a/core/error/Assertion.ts +++ /dev/null @@ -1,13 +0,0 @@ -import type { MetadataError } from "./Metadata.ts" - -/** - * Thrown when a descriptor assertion fails - */ -export class AssertionError extends Error { - readonly errors: MetadataError[] - - constructor(errors: MetadataError[]) { - super("Assertion failed") - this.errors = errors - } -} diff --git a/core/error/Metadata.ts b/core/error/Metadata.ts deleted file mode 100644 index 91512062..00000000 --- a/core/error/Metadata.ts +++ /dev/null @@ -1,17 +0,0 @@ -import type { BaseError } from "./Base.ts" - -/** - * A descriptor error - */ -export interface MetadataError extends BaseError { - type: "metadata" - keyword: string - instancePath: string - schemaPath: string - params: object - propertyName?: string - message?: string - schema?: any - parentSchema?: object - data?: any -} diff --git a/core/error/index.ts b/core/error/index.ts deleted file mode 100644 index cf171be0..00000000 --- a/core/error/index.ts +++ /dev/null @@ -1,4 +0,0 @@ -export { AssertionError } from "./Assertion.ts" -export type { BaseError } from "./Base.ts" -export type { DataError } from "./Data.ts" -export type { MetadataError } from "./Metadata.ts" diff --git a/core/index.ts b/core/index.ts deleted file mode 100644 index c5c07974..00000000 --- a/core/index.ts +++ /dev/null @@ -1,12 +0,0 @@ -export * from "./descriptor/index.ts" -export * from "./metadata/index.ts" -export * from "./path/index.ts" -export * from "./profile/index.ts" -export * from "./node/index.ts" -export * from "./error/index.ts" -export * from "./dialect/index.ts" -export * from "./field/index.ts" -export * from "./package/index.ts" -export * from "./resource/index.ts" -export * from "./schema/index.ts" -export * from "./plugin.ts" diff --git a/core/node/load.spec.ts b/core/node/load.spec.ts deleted file mode 100644 index 159da363..00000000 --- a/core/node/load.spec.ts +++ /dev/null @@ -1,24 +0,0 @@ -import { describe, expect, it } from "vitest" -import { loadNodeApis } from "./load.ts" - -describe("loadNodeApis", () => { - it("should return node APIs when running in Node.js environment", async () => { - const result = await loadNodeApis() - - expect(result).toBeDefined() - expect(result?.fs).toBeDefined() - expect(result?.path).toBeDefined() - }) - - it("should have fs.readFile function", async () => { - const result = await loadNodeApis() - - expect(typeof result?.fs.readFile).toBe("function") - }) - - it("should have path.join function", async () => { - const result = await loadNodeApis() - - expect(typeof result?.path.join).toBe("function") - }) -}) diff --git a/core/node/node.ts b/core/node/node.ts deleted file mode 100644 index 7f4fc626..00000000 --- a/core/node/node.ts +++ /dev/null @@ -1,3 +0,0 @@ -import { loadNodeApis } from "./load.ts" - -export const node = await loadNodeApis() diff --git a/core/profile/cache.ts b/core/profile/cache.ts deleted file mode 100644 index a5b0d1af..00000000 --- a/core/profile/cache.ts +++ /dev/null @@ -1,8 +0,0 @@ -import QuickLRU from "quick-lru" -import type { Profile } from "./Profile.ts" -import { profileRegistry } from "./registry.ts" - -export const cache = new QuickLRU({ maxSize: 100 }) -for (const { path, profile } of Object.values(profileRegistry)) { - cache.set(path, profile) -} diff --git a/core/profile/load.ts b/core/profile/load.ts deleted file mode 100644 index 0f79d9d8..00000000 --- a/core/profile/load.ts +++ /dev/null @@ -1,19 +0,0 @@ -import { loadDescriptor } from "../descriptor/index.ts" -import { cache } from "./cache.ts" -import type { ProfileType } from "./Profile.ts" -import { assertProfile } from "./assert.ts" - -export async function loadProfile( - path: string, - options?: { type?: ProfileType }, -) { - let profile = cache.get(path) - - if (!profile) { - const descriptor = await loadDescriptor(path, { onlyRemote: true }) - profile = await assertProfile(descriptor, { path, type: options?.type }) - cache.set(path, profile) - } - - return profile -} diff --git a/core/profile/validate.spec.ts b/core/profile/validate.spec.ts deleted file mode 100644 index 540e3f69..00000000 --- a/core/profile/validate.spec.ts +++ /dev/null @@ -1,173 +0,0 @@ -import { describe, expect, it } from "vitest" -import { validateDescriptor } from "./validate.ts" - -describe("validateDescriptor", () => { - it("returns empty array for valid descriptor", async () => { - const descriptor = { - name: "test-package", - version: "1.0.0", - description: "A test package", - } - - const profile = { - type: "object", - required: ["name", "version"], - properties: { - name: { type: "string" }, - version: { type: "string" }, - description: { type: "string" }, - }, - } - - const result = await validateDescriptor(descriptor, { profile }) - - expect(result.valid).toBe(true) - expect(result.errors).toEqual([]) - }) - - it("returns validation errors for invalid descriptor", async () => { - const profile = { - type: "object", - required: ["name", "version"], - properties: { - name: { type: "string" }, - version: { type: "string" }, - description: { type: "string" }, - }, - } - - const descriptor = { - name: "test-package", - version: 123, - description: "A test package with wrong version type", - } - - const result = await validateDescriptor(descriptor, { profile }) - - expect(result.valid).toBe(false) - expect(result.errors.length).toBeGreaterThan(0) - - const error = result.errors[0] - expect(error).toBeDefined() - if (error) { - expect(error.keyword).toBe("type") - expect(error.instancePath).toBe("/version") - } - }) - - it("returns errors when required fields are missing", async () => { - const profile = { - type: "object", - required: ["name", "version", "required_field"], - properties: { - name: { type: "string" }, - version: { type: "string" }, - required_field: { type: "string" }, - }, - } - - const descriptor = { - name: "test-package", - version: "1.0.0", - } - - const result = await validateDescriptor(descriptor, { profile }) - - expect(result.valid).toBe(false) - expect(result.errors.length).toBeGreaterThan(0) - - const error = result.errors[0] - expect(error).toBeDefined() - if (error) { - expect(error.keyword).toBe("required") - expect(error.params).toBeDefined() - if (error.params) { - // @ts-ignore - expect(error.params.missingProperty).toBe("required_field") - } - } - }) - - it("validates nested objects in the descriptor", async () => { - const profile = { - type: "object", - required: ["name", "version", "author"], - properties: { - name: { type: "string" }, - version: { type: "string" }, - author: { - type: "object", - required: ["name", "email"], - properties: { - name: { type: "string" }, - email: { - type: "string", - pattern: "^[^@]+@[^@]+\\.[^@]+$", - }, - }, - }, - }, - } - - const descriptor = { - name: "test-package", - version: "1.0.0", - author: { - name: "Test Author", - email: "invalid-email", - }, - } - - const result = await validateDescriptor(descriptor, { profile }) - - expect(result.valid).toBe(false) - expect(result.errors.length).toBeGreaterThan(0) - - const hasEmailPatternError = result.errors.some( - error => - error && - error.instancePath === "/author/email" && - error.keyword === "pattern", - ) - - expect(hasEmailPatternError).toBe(true) - }) - - it("returns multiple errors for descriptor with multiple issues", async () => { - const profile = { - type: "object", - required: ["name", "version", "license"], - additionalProperties: false, - properties: { - name: { type: "string", minLength: 3 }, - version: { type: "string", pattern: "^\\d+\\.\\d+\\.\\d+$" }, - license: { type: "string" }, - description: { type: "string" }, - keywords: { - type: "array", - items: { type: "string" }, - }, - }, - } - - const descriptor = { - name: "ab", - version: "not-a-version", - description: 123, - keywords: ["valid", 456, "another"], - extra_field: "should not be here", - } - - const result = await validateDescriptor(descriptor, { profile }) - - expect(result.valid).toBe(false) - expect(result.errors.length).toBeGreaterThan(3) - - const errorKeywords = result.errors.map(err => err?.keyword) - expect(errorKeywords).toContain("required") - expect(errorKeywords).toContain("minLength") - expect(errorKeywords).toContain("pattern") - expect(errorKeywords).toContain("type") - expect(errorKeywords).toContain("additionalProperties") - }) -}) diff --git a/core/profile/validate.ts b/core/profile/validate.ts deleted file mode 100644 index 8badeb2a..00000000 --- a/core/profile/validate.ts +++ /dev/null @@ -1,31 +0,0 @@ -import type { Descriptor } from "../descriptor/index.ts" -import type { MetadataError } from "../error/index.ts" -import { ajv } from "../profile/ajv.ts" -import type { Profile } from "./Profile.ts" -import { loadProfile } from "./load.ts" - -/** - * Validate a descriptor (JSON Object) against a JSON Schema - * It uses Ajv for JSON Schema validation under the hood - * It returns a list of errors (empty if valid) - */ -export async function validateDescriptor( - descriptor: Descriptor, - options: { - profile: Profile | string - }, -) { - const profile = - typeof options.profile === "string" - ? await loadProfile(options.profile) - : options.profile - - const validate = await ajv.compileAsync(profile) - const valid = validate(descriptor) - - const errors: MetadataError[] = validate.errors - ? validate.errors?.map(error => ({ ...error, type: "metadata" })) - : [] - - return { valid, errors } -} diff --git a/core/resource/validate.ts b/core/resource/validate.ts deleted file mode 100644 index 28bab261..00000000 --- a/core/resource/validate.ts +++ /dev/null @@ -1,79 +0,0 @@ -import type { Descriptor } from "../descriptor/index.ts" -import { loadDialect } from "../dialect/index.ts" -import { AssertionError } from "../error/index.ts" -import { validateDescriptor } from "../profile/index.ts" -import { loadSchema } from "../schema/index.ts" -import type { Resource } from "./Resource.ts" -import { convertResourceFromDescriptor } from "./convert/fromDescriptor.ts" - -const DEFAULT_PROFILE = "https://datapackage.org/profiles/1.0/dataresource.json" - -/** - * Validate a Resource descriptor (JSON Object) against its profile - */ -export async function validateResourceMetadata( - source: Descriptor | Resource, - options?: { - basepath?: string - }, -) { - const descriptor = source as Descriptor - - const profile = - typeof descriptor.$schema === "string" - ? descriptor.$schema - : DEFAULT_PROFILE - - let { valid, errors } = await validateDescriptor(descriptor, { profile }) - - let resource: Resource | undefined = undefined - if (valid) { - // Validation + normalization = we can cast it - resource = convertResourceFromDescriptor(descriptor, { - basepath: options?.basepath, - }) as unknown as Resource - } - - if (resource) { - const dialectErorrs = await validateDialectIfExternal(resource) - if (dialectErorrs) errors.push(...dialectErorrs) - - const schemaErorrs = await validateSchemaIfExternal(resource) - if (schemaErorrs) errors.push(...schemaErorrs) - - if (errors.length) { - resource = undefined - valid = false - } - } - - return { valid, errors, resource } -} - -async function validateDialectIfExternal(resource: Resource) { - if (typeof resource.dialect === "string") { - try { - await loadDialect(resource.dialect) - } catch (error) { - if (error instanceof AssertionError) { - return error.errors - } - } - } - - return undefined -} - -async function validateSchemaIfExternal(resource: Resource) { - if (typeof resource.schema === "string") { - try { - await loadSchema(resource.schema) - } catch (error) { - if (error instanceof AssertionError) { - return error.errors - } - } - } - - return undefined -} diff --git a/csv/README.md b/csv/README.md deleted file mode 100644 index 48f759f2..00000000 --- a/csv/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# @dpkit/csv - -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). diff --git a/csv/package.json b/csv/package.json deleted file mode 100644 index 8b6ed3be..00000000 --- a/csv/package.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "name": "@dpkit/csv", - "type": "module", - "version": "0.0.0-dev", - "exports": "./build/index.js", - "sideEffects": false, - "license": "MIT", - "author": "Evgeny Karev", - "repository": "https://github.com/datisthq/dpkit", - "description": "Fast TypeScript data management framework built on top of the Data Package standard and Polars DataFrames", - "keywords": [ - "data", - "polars", - "dataframe", - "datapackage", - "tableschema", - "typescript", - "validation", - "quality", - "fair", - "csv" - ], - "scripts": { - "build": "tsc" - }, - "dependencies": { - "@dpkit/core": "workspace:*", - "@dpkit/file": "workspace:*", - "@dpkit/table": "workspace:*", - "csv-sniffer": "^0.1.1", - "nodejs-polars": "^0.22.1" - }, - "devDependencies": { - "@dpkit/test": "workspace:*" - } -} diff --git a/database/README.md b/database/README.md index 27106983..bb3f51e2 100644 --- a/database/README.md +++ b/database/README.md @@ -1,3 +1,3 @@ # @dpkit/database -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). +dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [project's website](https://dpkit.app). diff --git a/database/adapters/base.ts b/database/adapters/base.ts index df089d1d..ea4ddde4 100644 --- a/database/adapters/base.ts +++ b/database/adapters/base.ts @@ -1,4 +1,4 @@ -import type { Field, FieldType, Schema } from "@dpkit/core" +import type { Field, FieldType, Schema } from "@dpkit/metadata" import type { Dialect } from "kysely" import { Kysely } from "kysely" import { LRUCache } from "lru-cache" diff --git a/database/adapters/mysql.spec.ts b/database/adapters/mysql.spec.ts index 630b9840..b0800279 100644 --- a/database/adapters/mysql.spec.ts +++ b/database/adapters/mysql.spec.ts @@ -1,6 +1,6 @@ -import { useRecording } from "@dpkit/test" -import { DataFrame, DataType, Series } from "nodejs-polars" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" +import { useRecording } from "vitest-polly" import { loadPackageFromDatabase } from "../package/index.ts" import { inferDatabaseSchema } from "../schema/index.ts" import { loadDatabaseTable, saveDatabaseTable } from "../table/index.ts" @@ -19,11 +19,13 @@ describe.skipIf(!path)("MysqlAdapter", () => { if (!path) return it("should infer schema", async () => { - const source = DataFrame([ - Series("string", ["string"], DataType.Utf8), - Series("integer", [1], DataType.Int32), - Series("number", [1.1], DataType.Float64), - ]).lazy() + const source = pl + .DataFrame([ + pl.Series("string", ["string"], pl.Utf8), + pl.Series("integer", [1], pl.Int32), + pl.Series("number", [1.1], pl.Float64), + ]) + .lazy() await saveDatabaseTable(source, { path, @@ -48,7 +50,7 @@ describe.skipIf(!path)("MysqlAdapter", () => { }) it("should save/load table", async () => { - const source = DataFrame([record1, record2]).lazy() + const source = pl.DataFrame([record1, record2]).lazy() await saveDatabaseTable(source, { path, @@ -67,23 +69,25 @@ describe.skipIf(!path)("MysqlAdapter", () => { }) it("should save/load table with various data types", async () => { - const source = DataFrame([ - Series("array", ["[1, 2, 3]"], DataType.String), - Series("boolean", [true], DataType.Bool), - Series("date", [new Date(Date.UTC(2025, 0, 1))], DataType.Date), - Series("datetime", [new Date(Date.UTC(2025, 0, 1))], DataType.Datetime), - Series("duration", ["P23DT23H"], DataType.String), - Series("geojson", ['{"value": 1}'], DataType.String), - Series("geopoint", [[40.0, 50.0]], DataType.List(DataType.Float32)), - Series("integer", [1], DataType.Int32), - Series("list", [[1.0, 2.0, 3.0]], DataType.List(DataType.Float32)), - Series("number", [1.1], DataType.Float64), - Series("object", ['{"value": 1}']), - Series("string", ["string"], DataType.String), - Series("time", [new Date(Date.UTC(2025, 0, 1))], DataType.Time), - Series("year", [2025], DataType.Int32), - Series("yearmonth", [[2025, 1]], DataType.List(DataType.Int16)), - ]).lazy() + const source = pl + .DataFrame([ + pl.Series("array", ["[1, 2, 3]"], pl.String), + pl.Series("boolean", [true], pl.Bool), + pl.Series("date", [new Date(Date.UTC(2025, 0, 1))], pl.Date), + pl.Series("datetime", [new Date(Date.UTC(2025, 0, 1))], pl.Datetime), + pl.Series("duration", ["P23DT23H"], pl.String), + pl.Series("geojson", ['{"value": 1}'], pl.String), + pl.Series("geopoint", [[40.0, 50.0]], pl.List(pl.Float32)), + pl.Series("integer", [1], pl.Int32), + pl.Series("list", [[1.0, 2.0, 3.0]], pl.List(pl.Float32)), + pl.Series("number", [1.1], pl.Float64), + pl.Series("object", ['{"value": 1}']), + pl.Series("string", ["string"], pl.String), + pl.Series("time", [new Date(Date.UTC(2025, 0, 1))], pl.Time), + pl.Series("year", [2025], pl.Int32), + pl.Series("yearmonth", [[2025, 1]], pl.List(pl.Int16)), + ]) + .lazy() await saveDatabaseTable(source, { path, diff --git a/database/adapters/mysql.ts b/database/adapters/mysql.ts index 3e79f337..04088192 100644 --- a/database/adapters/mysql.ts +++ b/database/adapters/mysql.ts @@ -1,4 +1,4 @@ -import type { FieldType } from "@dpkit/core" +import type { FieldType } from "@dpkit/metadata" import { MysqlDialect } from "kysely" import { createPool } from "mysql2" import type { DatabaseType } from "../field/index.ts" diff --git a/database/adapters/postgresql.spec.ts b/database/adapters/postgresql.spec.ts index e83fead6..c2f3b357 100644 --- a/database/adapters/postgresql.spec.ts +++ b/database/adapters/postgresql.spec.ts @@ -1,6 +1,6 @@ -import { useRecording } from "@dpkit/test" -import { DataFrame, DataType, Series } from "nodejs-polars" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" +import { useRecording } from "vitest-polly" import { loadPackageFromDatabase } from "../package/index.ts" import { inferDatabaseSchema } from "../schema/index.ts" import { loadDatabaseTable, saveDatabaseTable } from "../table/index.ts" @@ -19,11 +19,13 @@ describe.skipIf(!path)("PostgresqlAdapter", () => { if (!path) return it("should infer schema", async () => { - const source = DataFrame([ - Series("string", ["string"], DataType.Utf8), - Series("integer", [1], DataType.Int32), - Series("number", [1.1], DataType.Float64), - ]).lazy() + const source = pl + .DataFrame([ + pl.Series("string", ["string"], pl.Utf8), + pl.Series("integer", [1], pl.Int32), + pl.Series("number", [1.1], pl.Float64), + ]) + .lazy() await saveDatabaseTable(source, { path, @@ -48,7 +50,7 @@ describe.skipIf(!path)("PostgresqlAdapter", () => { }) it("should save/load table", async () => { - const source = DataFrame([record1, record2]).lazy() + const source = pl.DataFrame([record1, record2]).lazy() await saveDatabaseTable(source, { path, @@ -67,23 +69,25 @@ describe.skipIf(!path)("PostgresqlAdapter", () => { }) it("should save/load table with various data types", async () => { - const source = DataFrame([ - Series("array", ["[1, 2, 3]"], DataType.String), - Series("boolean", [true], DataType.Bool), - Series("date", [new Date(Date.UTC(2025, 0, 1))], DataType.Date), - Series("datetime", [new Date(Date.UTC(2025, 0, 1))], DataType.Datetime), - Series("duration", ["P23DT23H"], DataType.String), - Series("geojson", ['{"value": 1}'], DataType.String), - Series("geopoint", [[40.0, 50.0]], DataType.List(DataType.Float32)), - Series("integer", [1], DataType.Int32), - Series("list", [[1.0, 2.0, 3.0]], DataType.List(DataType.Float32)), - Series("number", [1.1], DataType.Float64), - Series("object", ['{"value": 1}']), - Series("string", ["string"], DataType.String), - Series("time", [new Date(Date.UTC(2025, 0, 1))], DataType.Time), - Series("year", [2025], DataType.Int32), - Series("yearmonth", [[2025, 1]], DataType.List(DataType.Int16)), - ]).lazy() + const source = pl + .DataFrame([ + pl.Series("array", ["[1, 2, 3]"], pl.String), + pl.Series("boolean", [true], pl.Bool), + pl.Series("date", [new Date(Date.UTC(2025, 0, 1))], pl.Date), + pl.Series("datetime", [new Date(Date.UTC(2025, 0, 1))], pl.Datetime), + pl.Series("duration", ["P23DT23H"], pl.String), + pl.Series("geojson", ['{"value": 1}'], pl.String), + pl.Series("geopoint", [[40.0, 50.0]], pl.List(pl.Float32)), + pl.Series("integer", [1], pl.Int32), + pl.Series("list", [[1.0, 2.0, 3.0]], pl.List(pl.Float32)), + pl.Series("number", [1.1], pl.Float64), + pl.Series("object", ['{"value": 1}']), + pl.Series("string", ["string"], pl.String), + pl.Series("time", [new Date(Date.UTC(2025, 0, 1))], pl.Time), + pl.Series("year", [2025], pl.Int32), + pl.Series("yearmonth", [[2025, 1]], pl.List(pl.Int16)), + ]) + .lazy() await saveDatabaseTable(source, { path, diff --git a/database/adapters/postgresql.ts b/database/adapters/postgresql.ts index bf494555..ff0b5ae9 100644 --- a/database/adapters/postgresql.ts +++ b/database/adapters/postgresql.ts @@ -1,4 +1,4 @@ -import type { FieldType } from "@dpkit/core" +import type { FieldType } from "@dpkit/metadata" import { PostgresDialect } from "kysely" import { Pool } from "pg" import type { DatabaseType } from "../field/index.ts" diff --git a/database/adapters/sqlite.spec.ts b/database/adapters/sqlite.spec.ts index 5f16f8d3..448b9b09 100644 --- a/database/adapters/sqlite.spec.ts +++ b/database/adapters/sqlite.spec.ts @@ -1,8 +1,8 @@ -import type { Package } from "@dpkit/core" -import { getTempFilePath } from "@dpkit/file" -import { useRecording } from "@dpkit/test" -import { DataFrame, DataType, Series } from "nodejs-polars" +import { getTempFilePath } from "@dpkit/dataset" +import type { Package } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" +import { useRecording } from "vitest-polly" import { loadPackageFromDatabase } from "../package/index.ts" import { savePackageToDatabase } from "../package/index.ts" import { inferDatabaseSchema } from "../schema/index.ts" @@ -20,11 +20,13 @@ describe("SqliteAdapter", () => { it("should infer schema", async () => { const path = getTempFilePath() - const source = DataFrame([ - Series("string", ["string"], DataType.Utf8), - Series("integer", [1], DataType.Int32), - Series("number", [1.1], DataType.Float64), - ]).lazy() + const source = pl + .DataFrame([ + pl.Series("string", ["string"], pl.Utf8), + pl.Series("integer", [1], pl.Int32), + pl.Series("number", [1.1], pl.Float64), + ]) + .lazy() await saveDatabaseTable(source, { path, @@ -51,7 +53,7 @@ describe("SqliteAdapter", () => { it("should save/load table", async () => { const path = getTempFilePath() - const source = DataFrame([record1, record2]).lazy() + const source = pl.DataFrame([record1, record2]).lazy() await saveDatabaseTable(source, { path, dialect, @@ -66,7 +68,7 @@ describe("SqliteAdapter", () => { it("should save/load table with protocol", async () => { const path = `sqlite://${getTempFilePath()}` - const source = DataFrame([record1, record2]).lazy() + const source = pl.DataFrame([record1, record2]).lazy() await saveDatabaseTable(source, { path, dialect, @@ -81,23 +83,25 @@ describe("SqliteAdapter", () => { it("should save/load table with various data types", async () => { const path = `sqlite://${getTempFilePath()}` - const source = DataFrame([ - Series("array", ["[1, 2, 3]"], DataType.String), - Series("boolean", [true], DataType.Bool), - Series("date", [new Date(Date.UTC(2025, 0, 1))], DataType.Date), - Series("datetime", [new Date(Date.UTC(2025, 0, 1))], DataType.Datetime), - Series("duration", ["P23DT23H"], DataType.String), - Series("geojson", ['{"value": 1}'], DataType.String), - Series("geopoint", [[40.0, 50.0]], DataType.List(DataType.Float32)), - Series("integer", [1], DataType.Int32), - Series("list", [[1.0, 2.0, 3.0]], DataType.List(DataType.Float32)), - Series("number", [1.1], DataType.Float64), - Series("object", ['{"value": 1}']), - Series("string", ["string"], DataType.String), - Series("time", [new Date(Date.UTC(2025, 0, 1))], DataType.Time), - Series("year", [2025], DataType.Int32), - Series("yearmonth", [[2025, 1]], DataType.List(DataType.Int16)), - ]).lazy() + const source = pl + .DataFrame([ + pl.Series("array", ["[1, 2, 3]"], pl.String), + pl.Series("boolean", [true], pl.Bool), + pl.Series("date", [new Date(Date.UTC(2025, 0, 1))], pl.Date), + pl.Series("datetime", [new Date(Date.UTC(2025, 0, 1))], pl.Datetime), + pl.Series("duration", ["P23DT23H"], pl.String), + pl.Series("geojson", ['{"value": 1}'], pl.String), + pl.Series("geopoint", [[40.0, 50.0]], pl.List(pl.Float32)), + pl.Series("integer", [1], pl.Int32), + pl.Series("list", [[1.0, 2.0, 3.0]], pl.List(pl.Float32)), + pl.Series("number", [1.1], pl.Float64), + pl.Series("object", ['{"value": 1}']), + pl.Series("string", ["string"], pl.String), + pl.Series("time", [new Date(Date.UTC(2025, 0, 1))], pl.Time), + pl.Series("year", [2025], pl.Int32), + pl.Series("yearmonth", [[2025, 1]], pl.List(pl.Int16)), + ]) + .lazy() await saveDatabaseTable(source, { path, @@ -236,18 +240,22 @@ describe("SqliteAdapter", () => { { loadTable: async resource => { if (resource.name === "table1") { - return DataFrame([ - Series("id", [1, 2]), - Series("name", ["english", "中文"]), - ]).lazy() + return pl + .DataFrame([ + pl.Series("id", [1, 2]), + pl.Series("name", ["english", "中文"]), + ]) + .lazy() } if (resource.name === "table2") { - return DataFrame([ - Series("id", [1, 2]), - Series("number", [1.1, 2.2]), - Series("boolean", ["true", "false"]), - ]).lazy() + return pl + .DataFrame([ + pl.Series("id", [1, 2]), + pl.Series("number", [1.1, 2.2]), + pl.Series("boolean", ["true", "false"]), + ]) + .lazy() } return undefined diff --git a/database/adapters/sqlite.ts b/database/adapters/sqlite.ts index edade86e..c93eb291 100644 --- a/database/adapters/sqlite.ts +++ b/database/adapters/sqlite.ts @@ -1,5 +1,5 @@ -import type { FieldType } from "@dpkit/core" -import { isLocalPathExist } from "@dpkit/file" +import { isLocalPathExist } from "@dpkit/dataset" +import type { FieldType } from "@dpkit/metadata" import type { DatabaseType } from "../field/index.ts" import { BaseAdapter } from "./base.ts" diff --git a/database/index.ts b/database/index.ts index e5beef8e..6d4ebc6b 100644 --- a/database/index.ts +++ b/database/index.ts @@ -1,4 +1,7 @@ -export * from "./schema/index.ts" -export * from "./package/index.ts" -export * from "./table/index.ts" -export * from "./plugin.ts" +export { DatabasePlugin } from "./plugin.ts" + +export { inferDatabaseSchema } from "./schema/index.ts" +export { loadDatabaseTable } from "./table/index.ts" +export { loadPackageFromDatabase } from "./package/index.ts" +export { saveDatabaseTable } from "./table/index.ts" +export { savePackageToDatabase } from "./package/index.ts" diff --git a/database/package.json b/database/package.json index 816ea334..249227a3 100644 --- a/database/package.json +++ b/database/package.json @@ -4,6 +4,7 @@ "version": "0.0.0-dev", "exports": "./build/index.js", "sideEffects": false, + "files": ["build"], "license": "MIT", "author": "Evgeny Karev", "repository": "https://github.com/datisthq/dpkit", @@ -24,19 +25,18 @@ "build": "tsc" }, "dependencies": { - "@dpkit/core": "workspace:*", + "@dpkit/metadata": "workspace:*", "@dpkit/table": "workspace:*", "kysely": "^0.28.5", "kysely-bun-sqlite": "^0.4.0", "kysely-generic-sqlite": "^1.2.1", "lru-cache": "^11.2.1", "mysql2": "^3.14.4", - "nodejs-polars": "^0.22.1", + "nodejs-polars": "^0.22.2", "pg": "^8.16.3" }, "devDependencies": { - "@dpkit/file": "workspace:*", - "@dpkit/test": "workspace:*", + "@dpkit/dataset": "workspace:*", "@types/pg": "^8.15.5" } } diff --git a/database/package/load.ts b/database/package/load.ts index a38c9238..c0e83a8b 100644 --- a/database/package/load.ts +++ b/database/package/load.ts @@ -1,4 +1,4 @@ -import type { Package } from "@dpkit/core" +import type { Package } from "@dpkit/metadata" import { createAdapter } from "../adapters/create.ts" import type { DatabaseFormat } from "../resource/index.ts" diff --git a/database/package/save.ts b/database/package/save.ts index c74bc06b..0c8c5a0d 100644 --- a/database/package/save.ts +++ b/database/package/save.ts @@ -1,7 +1,7 @@ -import type { Package } from "@dpkit/core" -import { resolveSchema } from "@dpkit/core" -import { isRemoteResource } from "@dpkit/core" -import type { SavePackageOptions } from "@dpkit/core" +import type { SavePackageOptions } from "@dpkit/dataset" +import type { Package } from "@dpkit/metadata" +import { resolveSchema } from "@dpkit/metadata" +import { isRemoteResource } from "@dpkit/metadata" import type { TablePlugin } from "@dpkit/table" import type { DatabaseFormat } from "../resource/index.ts" import { saveDatabaseTable } from "../table/index.ts" diff --git a/database/plugin.spec.ts b/database/plugin.spec.ts index 9dcb044b..a3cafcd9 100644 --- a/database/plugin.spec.ts +++ b/database/plugin.spec.ts @@ -1,5 +1,5 @@ -import type { Package, Resource } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Package, Resource } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { beforeEach, describe, expect, it, vi } from "vitest" import * as packageModule from "./package/index.ts" import { DatabasePlugin } from "./plugin.ts" @@ -196,7 +196,7 @@ describe("DatabasePlugin", () => { const resource: Partial = { path: "postgresql://localhost/testdb", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadDatabaseTable.mockResolvedValue(mockTable) const result = await plugin.loadTable(resource) @@ -212,7 +212,7 @@ describe("DatabasePlugin", () => { const resource: Partial = { path: "mysql://localhost/testdb", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadDatabaseTable.mockResolvedValue(mockTable) const result = await plugin.loadTable(resource) @@ -228,7 +228,7 @@ describe("DatabasePlugin", () => { const resource: Partial = { path: "sqlite://test.db", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadDatabaseTable.mockResolvedValue(mockTable) const result = await plugin.loadTable(resource) @@ -256,7 +256,7 @@ describe("DatabasePlugin", () => { path: "test.txt", format: "sqlite", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadDatabaseTable.mockResolvedValue(mockTable) const result = await plugin.loadTable(resource) @@ -271,7 +271,7 @@ describe("DatabasePlugin", () => { describe("saveTable", () => { it("should save table to postgresql database", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "postgresql://localhost/testdb" } mockSaveDatabaseTable.mockResolvedValue("postgresql://localhost/testdb") @@ -285,7 +285,7 @@ describe("DatabasePlugin", () => { }) it("should save table to mysql database", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "mysql://localhost/testdb" } mockSaveDatabaseTable.mockResolvedValue("mysql://localhost/testdb") @@ -299,7 +299,7 @@ describe("DatabasePlugin", () => { }) it("should save table to sqlite database", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "sqlite://test.db" } mockSaveDatabaseTable.mockResolvedValue("sqlite://test.db") @@ -313,7 +313,7 @@ describe("DatabasePlugin", () => { }) it("should return undefined for non-database paths", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.csv" } const result = await plugin.saveTable(table, options) @@ -323,7 +323,7 @@ describe("DatabasePlugin", () => { }) it("should handle explicit format specification", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "test.txt", format: "sqlite" as const } mockSaveDatabaseTable.mockResolvedValue("test.txt") diff --git a/database/plugin.ts b/database/plugin.ts index df49d6ee..74858a2d 100644 --- a/database/plugin.ts +++ b/database/plugin.ts @@ -1,6 +1,6 @@ -import type { Package, Resource } from "@dpkit/core" -import { inferResourceFormat } from "@dpkit/core" -import type { SavePackageOptions } from "@dpkit/core" +import type { SavePackageOptions } from "@dpkit/dataset" +import type { Package, Resource } from "@dpkit/metadata" +import { inferFormat } from "@dpkit/metadata" import type { TablePlugin } from "@dpkit/table" import type { SaveTableOptions, Table } from "@dpkit/table" import { loadPackageFromDatabase } from "./package/index.ts" @@ -60,7 +60,7 @@ export class DatabasePlugin implements TablePlugin { } function getDatabaseFormat(resource: Partial) { - const format = inferResourceFormat(resource) + const format = inferFormat(resource) return format === "postgresql" || format === "mysql" || format === "sqlite" ? format : undefined diff --git a/database/schema/infer.ts b/database/schema/infer.ts index 7bf0dbd8..f82159e3 100644 --- a/database/schema/infer.ts +++ b/database/schema/infer.ts @@ -1,5 +1,5 @@ -import type { Resource } from "@dpkit/core" -import { resolveDialect } from "@dpkit/core" +import type { Resource } from "@dpkit/metadata" +import { resolveDialect } from "@dpkit/metadata" import { createAdapter } from "../adapters/create.ts" export async function inferDatabaseSchema( diff --git a/database/table/load.ts b/database/table/load.ts index 1e8a29f0..c6999474 100644 --- a/database/table/load.ts +++ b/database/table/load.ts @@ -1,8 +1,8 @@ -import { resolveDialect, resolveSchema } from "@dpkit/core" -import type { Resource } from "@dpkit/core" +import { resolveDialect, resolveSchema } from "@dpkit/metadata" +import type { Resource } from "@dpkit/metadata" import { normalizeTable } from "@dpkit/table" import type { LoadTableOptions } from "@dpkit/table" -import { DataFrame } from "nodejs-polars" +import * as pl from "nodejs-polars" import { createAdapter } from "../adapters/create.ts" import { inferDatabaseSchema } from "../schema/index.ts" @@ -27,7 +27,7 @@ export async function loadDatabaseTable( const database = await adapter.connectDatabase(path) const records = await database.selectFrom(dialect.table).selectAll().execute() - let table = DataFrame(records).lazy() + let table = pl.DataFrame(records).lazy() if (!options?.denormalized) { let schema = await resolveSchema(resource.schema) diff --git a/database/table/save.spec.ts b/database/table/save.spec.ts index 74bac46b..34a3f78e 100644 --- a/database/table/save.spec.ts +++ b/database/table/save.spec.ts @@ -1,9 +1,9 @@ -import { DataFrame } from "nodejs-polars" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { saveDatabaseTable } from "./save.js" describe("saveDatabaseTable", () => { - const mockTable = DataFrame({ col1: [1, 2, 3] }).lazy() + const mockTable = pl.DataFrame({ col1: [1, 2, 3] }).lazy() it("throws error when table name is not defined in dialect", async () => { await expect( diff --git a/database/table/save.ts b/database/table/save.ts index 81e20815..6df0c47e 100644 --- a/database/table/save.ts +++ b/database/table/save.ts @@ -75,9 +75,9 @@ async function populateTable( table: Table, ) { let offset = 0 - const df = await table.collect({ streaming: true }) + const frame = await table.collect({ streaming: true }) while (true) { - const buffer = df.slice(offset, offset + BUFFER_SIZE) + const buffer = frame.slice(offset, offset + BUFFER_SIZE) offset += BUFFER_SIZE const records = buffer.toRecords() diff --git a/datahub/README.md b/datahub/README.md deleted file mode 100644 index ec17ee2e..00000000 --- a/datahub/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# @dpkit/datahub - -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). diff --git a/file/README.md b/dataset/README.md similarity index 78% rename from file/README.md rename to dataset/README.md index b4e1a26e..0b8014d7 100644 --- a/file/README.md +++ b/dataset/README.md @@ -1,3 +1,3 @@ -# @dpkit/data +# @dpkit/dataset -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). +dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [project's website](https://dpkit.app). diff --git a/file/file/copy.ts b/dataset/file/copy.ts similarity index 100% rename from file/file/copy.ts rename to dataset/file/copy.ts diff --git a/file/file/describe.ts b/dataset/file/describe.ts similarity index 55% rename from file/file/describe.ts rename to dataset/file/describe.ts index 4188a2b6..47a09a78 100644 --- a/file/file/describe.ts +++ b/dataset/file/describe.ts @@ -1,5 +1,5 @@ import { prefetchFile } from "./fetch.ts" -import { inferFileBytes, inferFileHash } from "./infer.ts" +import { inferBytes, inferHash } from "./infer.ts" import type { HashType } from "./infer.ts" export async function describeFile( @@ -8,8 +8,11 @@ export async function describeFile( ) { const localPath = await prefetchFile(path) - const bytes = await inferFileBytes(localPath) - const hash = await inferFileHash(localPath, { hashType: options?.hashType }) + const bytes = await inferBytes({ path: localPath }) + const hash = await inferHash( + { path: localPath }, + { hashType: options?.hashType }, + ) return { bytes, hash } } diff --git a/file/file/fetch.ts b/dataset/file/fetch.ts similarity index 64% rename from file/file/fetch.ts rename to dataset/file/fetch.ts index 87616634..f6ea467c 100644 --- a/file/file/fetch.ts +++ b/dataset/file/fetch.ts @@ -1,11 +1,20 @@ -import { isRemotePath } from "@dpkit/core" +import os from "node:os" +import { isRemotePath } from "@dpkit/metadata" +import pAll from "p-all" import { copyFile } from "./copy.ts" import { getTempFilePath } from "./temp.ts" export async function prefetchFiles(path?: string | string[]) { if (!path) return [] + const paths = Array.isArray(path) ? path : [path] - const newPaths = await Promise.all(paths.map(prefetchFile)) + const concurrency = os.cpus().length + + const newPaths = await pAll( + paths.map(path => () => prefetchFile(path)), + { concurrency }, + ) + return newPaths } diff --git a/file/file/index.ts b/dataset/file/index.ts similarity index 83% rename from file/file/index.ts rename to dataset/file/index.ts index ed15776b..3f58548b 100644 --- a/file/file/index.ts +++ b/dataset/file/index.ts @@ -4,6 +4,6 @@ export { saveFile } from "./save.ts" export { getTempFilePath, writeTempFile } from "./temp.ts" export { assertLocalPathVacant, isLocalPathExist } from "./path.ts" export { prefetchFile, prefetchFiles } from "./fetch.ts" -export { inferFileEncoding, inferFileBytes, inferFileHash } from "./infer.ts" +export { inferEncoding, inferBytes, inferHash } from "./infer.ts" export { describeFile } from "./describe.ts" export { validateFile } from "./validate.ts" diff --git a/file/file/infer.spec.ts b/dataset/file/infer.spec.ts similarity index 72% rename from file/file/infer.spec.ts rename to dataset/file/infer.spec.ts index aca85bad..d8c81d51 100644 --- a/file/file/infer.spec.ts +++ b/dataset/file/infer.spec.ts @@ -1,13 +1,13 @@ import { beforeEach, describe, expect, it, vi } from "vitest" import * as fetchModule from "./fetch.ts" -import { inferFileBytes, inferFileEncoding, inferFileHash } from "./infer.ts" +import { inferBytes, inferEncoding, inferHash } from "./infer.ts" import { writeTempFile } from "./temp.ts" vi.mock("./fetch.ts", () => ({ prefetchFiles: vi.fn(), })) -describe("inferFileHash", () => { +describe("inferHash", () => { let mockPrefetchFiles: ReturnType let tempFilePath: string @@ -20,7 +20,7 @@ describe("inferFileHash", () => { it("should compute sha256 hash by default", async () => { mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const result = await inferFileHash("https://example.com/file.txt") + const result = await inferHash({ path: "https://example.com/file.txt" }) expect(mockPrefetchFiles).toHaveBeenCalledWith( "https://example.com/file.txt", @@ -31,9 +31,12 @@ describe("inferFileHash", () => { it("should compute md5 hash when specified", async () => { mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const result = await inferFileHash("https://example.com/file.txt", { - hashType: "md5", - }) + const result = await inferHash( + { path: "https://example.com/file.txt" }, + { + hashType: "md5", + }, + ) expect(mockPrefetchFiles).toHaveBeenCalledWith( "https://example.com/file.txt", @@ -44,9 +47,12 @@ describe("inferFileHash", () => { it("should compute sha1 hash when specified", async () => { mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const result = await inferFileHash("https://example.com/file.txt", { - hashType: "sha1", - }) + const result = await inferHash( + { path: "https://example.com/file.txt" }, + { + hashType: "sha1", + }, + ) expect(mockPrefetchFiles).toHaveBeenCalledWith( "https://example.com/file.txt", @@ -57,9 +63,12 @@ describe("inferFileHash", () => { it("should compute sha512 hash when specified", async () => { mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const result = await inferFileHash("https://example.com/file.txt", { - hashType: "sha512", - }) + const result = await inferHash( + { path: "https://example.com/file.txt" }, + { + hashType: "sha512", + }, + ) expect(mockPrefetchFiles).toHaveBeenCalledWith( "https://example.com/file.txt", @@ -70,14 +79,14 @@ describe("inferFileHash", () => { it("should compute consistent hashes for same content", async () => { mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const result1 = await inferFileHash("https://example.com/file.txt") - const result2 = await inferFileHash("https://example.com/file.txt") + const result1 = await inferHash({ path: "https://example.com/file.txt" }) + const result2 = await inferHash({ path: "https://example.com/file.txt" }) expect(result1).toBe(result2) }) }) -describe("inferFileBytes", () => { +describe("inferBytes", () => { let mockPrefetchFiles: ReturnType beforeEach(() => { @@ -89,7 +98,7 @@ describe("inferFileBytes", () => { const tempFilePath = await writeTempFile("Hello, World!") mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const result = await inferFileBytes("https://example.com/file.txt") + const result = await inferBytes({ path: "https://example.com/file.txt" }) expect(mockPrefetchFiles).toHaveBeenCalledWith( "https://example.com/file.txt", @@ -101,7 +110,7 @@ describe("inferFileBytes", () => { const tempFilePath = await writeTempFile("") mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const result = await inferFileBytes("https://example.com/empty.txt") + const result = await inferBytes({ path: "https://example.com/empty.txt" }) expect(result).toBe(0) }) @@ -110,7 +119,7 @@ describe("inferFileBytes", () => { const tempFilePath = await writeTempFile("x".repeat(10000)) mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const result = await inferFileBytes("https://example.com/large.txt") + const result = await inferBytes({ path: "https://example.com/large.txt" }) expect(result).toBe(10000) }) @@ -121,7 +130,7 @@ describe("inferFileBytes", () => { ) mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const result = await inferFileBytes("https://example.com/file.bin") + const result = await inferBytes({ path: "https://example.com/file.bin" }) expect(mockPrefetchFiles).toHaveBeenCalledWith( "https://example.com/file.bin", @@ -130,13 +139,13 @@ describe("inferFileBytes", () => { }) }) -describe("inferFileEncoding", () => { +describe("inferEncoding", () => { it("should detect utf-8 encoding", async () => { const tempFilePath = await writeTempFile( "Hello, World! This is UTF-8 text.", ) - const result = await inferFileEncoding(tempFilePath) + const result = await inferEncoding({ path: tempFilePath }) expect(result).toBeDefined() expect(["utf-8", "utf8", "ascii"]).toContain(result) @@ -147,7 +156,7 @@ describe("inferFileEncoding", () => { Buffer.from([0xff, 0xd8, 0xff, 0xe0, 0x00]), ) - const result = await inferFileEncoding(tempFilePath) + const result = await inferEncoding({ path: tempFilePath }) expect(result).toBeUndefined() }) @@ -157,7 +166,10 @@ describe("inferFileEncoding", () => { "This is a test file with UTF-8 content.", ) - const result = await inferFileEncoding(tempFilePath, { sampleBytes: 20 }) + const result = await inferEncoding( + { path: tempFilePath }, + { sampleBytes: 20 }, + ) expect(result).toBeDefined() }) @@ -165,9 +177,12 @@ describe("inferFileEncoding", () => { it("should use custom confidence threshold", async () => { const tempFilePath = await writeTempFile("Sample text content") - const result = await inferFileEncoding(tempFilePath, { - confidencePercent: 50, - }) + const result = await inferEncoding( + { path: tempFilePath }, + { + confidencePercent: 50, + }, + ) expect(result).toBeDefined() }) @@ -175,7 +190,7 @@ describe("inferFileEncoding", () => { it("should handle large text files", async () => { const tempFilePath = await writeTempFile("Hello World! ".repeat(1000)) - const result = await inferFileEncoding(tempFilePath) + const result = await inferEncoding({ path: tempFilePath }) expect(result).toBeDefined() expect(["utf-8", "utf8", "ascii"]).toContain(result) @@ -186,7 +201,7 @@ describe("inferFileEncoding", () => { "Test content for encoding detection", ) - const result = await inferFileEncoding(tempFilePath) + const result = await inferEncoding({ path: tempFilePath }) if (result) { expect(result).toBe(result.toLowerCase()) @@ -196,7 +211,7 @@ describe("inferFileEncoding", () => { it("should handle empty files", async () => { const tempFilePath = await writeTempFile("") - const result = await inferFileEncoding(tempFilePath) + const result = await inferEncoding({ path: tempFilePath }) expect([undefined, "utf-8", "utf8", "ascii"]).toContain(result) }) @@ -204,7 +219,7 @@ describe("inferFileEncoding", () => { it("should handle files with special characters", async () => { const tempFilePath = await writeTempFile("Special: é, ñ, ü, ö, à") - const result = await inferFileEncoding(tempFilePath) + const result = await inferEncoding({ path: tempFilePath }) expect(result).toBeDefined() }) @@ -212,9 +227,12 @@ describe("inferFileEncoding", () => { it("should detect encoding with low confidence threshold", async () => { const tempFilePath = await writeTempFile("Simple text") - const result = await inferFileEncoding(tempFilePath, { - confidencePercent: 30, - }) + const result = await inferEncoding( + { path: tempFilePath }, + { + confidencePercent: 30, + }, + ) expect(result).toBeDefined() }) diff --git a/file/file/infer.ts b/dataset/file/infer.ts similarity index 73% rename from file/file/infer.ts rename to dataset/file/infer.ts index 0f5acb98..b7157790 100644 --- a/file/file/infer.ts +++ b/dataset/file/infer.ts @@ -1,4 +1,5 @@ import { stat } from "node:fs/promises" +import type { Resource } from "@dpkit/metadata" import chardet from "chardet" import * as hasha from "hasha" import { isBinaryFile } from "isbinaryfile" @@ -10,8 +11,8 @@ import { loadFile } from "./load.ts" export type HashType = "md5" | "sha1" | "sha256" | "sha512" -export async function inferFileBytes(path: string | string[]) { - const localPaths = await prefetchFiles(path) +export async function inferBytes(resource: Partial) { + const localPaths = await prefetchFiles(resource.path) let bytes = 0 for (const localPath of localPaths) { @@ -22,12 +23,12 @@ export async function inferFileBytes(path: string | string[]) { return bytes } -export async function inferFileHash( - path: string | string[], +export async function inferHash( + resource: Partial, options?: { hashType?: HashType }, ) { const algorithm = options?.hashType ?? "sha256" - const localPaths = await prefetchFiles(path) + const localPaths = await prefetchFiles(resource.path) const streams = await pMap(localPaths, async path => loadFileStream(path)) const stream = concatFileStreams(streams) @@ -36,15 +37,20 @@ export async function inferFileHash( return `${algorithm}:${hash}` } -export async function inferFileEncoding( - path: string | string[], +export async function inferEncoding( + resource: Partial, options?: { sampleBytes?: number; confidencePercent?: number }, ) { const maxBytes = options?.sampleBytes ?? 10_000 const confidencePercent = options?.confidencePercent ?? 80 - const firstPath = Array.isArray(path) ? path[0] : path - if (!firstPath) return undefined + const firstPath = Array.isArray(resource.path) + ? resource.path[0] + : resource.path + + if (!firstPath) { + return undefined + } const buffer = await loadFile(firstPath, { maxBytes }) const isBinary = await isBinaryFile(buffer) diff --git a/file/file/load.ts b/dataset/file/load.ts similarity index 100% rename from file/file/load.ts rename to dataset/file/load.ts diff --git a/file/file/path.ts b/dataset/file/path.ts similarity index 100% rename from file/file/path.ts rename to dataset/file/path.ts diff --git a/file/file/save.ts b/dataset/file/save.ts similarity index 100% rename from file/file/save.ts rename to dataset/file/save.ts diff --git a/file/file/temp.ts b/dataset/file/temp.ts similarity index 60% rename from file/file/temp.ts rename to dataset/file/temp.ts index 5fbae9ed..2b1866f8 100644 --- a/file/file/temp.ts +++ b/dataset/file/temp.ts @@ -6,15 +6,23 @@ import { temporaryFile } from "tempy" export async function writeTempFile( content: string | Buffer, - options?: { persist?: boolean }, + options?: { persist?: boolean; filename?: string; format?: string }, ) { const path = getTempFilePath(options) await writeFile(path, content) return path } -export function getTempFilePath(options?: { persist?: boolean }) { - const path = temporaryFile() +export function getTempFilePath(options?: { + persist?: boolean + filename?: string + format?: string +}) { + const { filename, format } = options ?? {} + + const path = temporaryFile( + filename ? { name: filename } : { extension: format }, + ) if (!options?.persist) { exitHook(() => { diff --git a/file/file/validate.spec.ts b/dataset/file/validate.spec.ts similarity index 54% rename from file/file/validate.spec.ts rename to dataset/file/validate.spec.ts index e6e219a5..22c1e4f0 100644 --- a/file/file/validate.spec.ts +++ b/dataset/file/validate.spec.ts @@ -1,6 +1,6 @@ import { beforeEach, describe, expect, it, vi } from "vitest" import * as fetchModule from "./fetch.ts" -import { inferFileHash } from "./infer.ts" +import { inferHash } from "./infer.ts" import { writeTempFile } from "./temp.ts" import { validateFile } from "./validate.ts" @@ -16,40 +16,42 @@ describe("validateFile", () => { vi.clearAllMocks() }) - it("should return valid result when no validation options provided", async () => { + it("should return valid report when no validation options provided", async () => { const tempFilePath = await writeTempFile("Hello, World!") mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const result = await validateFile("https://example.com/file.txt") + const report = await validateFile({ path: "https://example.com/file.txt" }) expect(mockPrefetchFiles).toHaveBeenCalledWith( "https://example.com/file.txt", ) - expect(result).toEqual({ valid: true, errors: [] }) + expect(report).toEqual({ valid: true, errors: [] }) }) it("should validate bytes successfully when they match", async () => { const tempFilePath = await writeTempFile("Hello, World!") mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const result = await validateFile("https://example.com/file.txt", { + const report = await validateFile({ + path: "https://example.com/file.txt", bytes: 13, }) - expect(result).toEqual({ valid: true, errors: [] }) + expect(report).toEqual({ valid: true, errors: [] }) }) it("should return error when bytes do not match", async () => { const tempFilePath = await writeTempFile("Hello, World!") mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const result = await validateFile("https://example.com/file.txt", { + const report = await validateFile({ + path: "https://example.com/file.txt", bytes: 1024, }) - expect(result.valid).toBe(false) - expect(result.errors).toHaveLength(1) - expect(result.errors[0]).toEqual({ + expect(report.valid).toBe(false) + expect(report.errors).toHaveLength(1) + expect(report.errors[0]).toEqual({ type: "file/bytes", bytes: 1024, actualBytes: 13, @@ -60,28 +62,36 @@ describe("validateFile", () => { const tempFilePath = await writeTempFile("Hello, World!") mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const actualHash = await inferFileHash(tempFilePath, { hashType: "md5" }) + const actualHash = await inferHash( + { path: tempFilePath }, + { hashType: "md5" }, + ) - const result = await validateFile("https://example.com/file.txt", { + const report = await validateFile({ + path: "https://example.com/file.txt", hash: actualHash, }) - expect(result).toEqual({ valid: true, errors: [] }) + expect(report).toEqual({ valid: true, errors: [] }) }) it("should return error when hash does not match", async () => { const tempFilePath = await writeTempFile("Hello, World!") mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const actualHash = await inferFileHash(tempFilePath, { hashType: "md5" }) + const actualHash = await inferHash( + { path: tempFilePath }, + { hashType: "md5" }, + ) - const result = await validateFile("https://example.com/file.txt", { + const report = await validateFile({ + path: "https://example.com/file.txt", hash: "md5:wronghash", }) - expect(result.valid).toBe(false) - expect(result.errors).toHaveLength(1) - expect(result.errors[0]).toEqual({ + expect(report.valid).toBe(false) + expect(report.errors).toHaveLength(1) + expect(report.errors[0]).toEqual({ type: "file/hash", hash: "md5:wronghash", actualHash, @@ -92,74 +102,94 @@ describe("validateFile", () => { const tempFilePath = await writeTempFile("Hello, World!") mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const actualHash = await inferFileHash(tempFilePath, { hashType: "sha256" }) + const actualHash = await inferHash( + { path: tempFilePath }, + { hashType: "sha256" }, + ) - const result = await validateFile("https://example.com/file.txt", { + const report = await validateFile({ + path: "https://example.com/file.txt", hash: actualHash, }) - expect(result).toEqual({ valid: true, errors: [] }) + expect(report).toEqual({ valid: true, errors: [] }) }) it("should validate sha1 hash", async () => { const tempFilePath = await writeTempFile("Hello, World!") mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const actualHash = await inferFileHash(tempFilePath, { hashType: "sha1" }) + const actualHash = await inferHash( + { path: tempFilePath }, + { hashType: "sha1" }, + ) - const result = await validateFile("https://example.com/file.txt", { + const report = await validateFile({ + path: "https://example.com/file.txt", hash: actualHash, }) - expect(result).toEqual({ valid: true, errors: [] }) + expect(report).toEqual({ valid: true, errors: [] }) }) it("should validate sha512 hash", async () => { const tempFilePath = await writeTempFile("Hello, World!") mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const actualHash = await inferFileHash(tempFilePath, { hashType: "sha512" }) + const actualHash = await inferHash( + { path: tempFilePath }, + { hashType: "sha512" }, + ) - const result = await validateFile("https://example.com/file.txt", { + const report = await validateFile({ + path: "https://example.com/file.txt", hash: actualHash, }) - expect(result).toEqual({ valid: true, errors: [] }) + expect(report).toEqual({ valid: true, errors: [] }) }) it("should validate both bytes and hash when both match", async () => { const tempFilePath = await writeTempFile("Hello, World!") mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const actualHash = await inferFileHash(tempFilePath, { hashType: "md5" }) + const actualHash = await inferHash( + { path: tempFilePath }, + { hashType: "md5" }, + ) - const result = await validateFile("https://example.com/file.txt", { + const report = await validateFile({ + path: "https://example.com/file.txt", bytes: 13, hash: actualHash, }) - expect(result).toEqual({ valid: true, errors: [] }) + expect(report).toEqual({ valid: true, errors: [] }) }) it("should return multiple errors when both bytes and hash do not match", async () => { const tempFilePath = await writeTempFile("Hello, World!") mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const actualHash = await inferFileHash(tempFilePath, { hashType: "md5" }) + const actualHash = await inferHash( + { path: tempFilePath }, + { hashType: "md5" }, + ) - const result = await validateFile("https://example.com/file.txt", { + const report = await validateFile({ + path: "https://example.com/file.txt", bytes: 1024, hash: "md5:wronghash", }) - expect(result.valid).toBe(false) - expect(result.errors).toHaveLength(2) - expect(result.errors[0]).toEqual({ + expect(report.valid).toBe(false) + expect(report.errors).toHaveLength(2) + expect(report.errors[0]).toEqual({ type: "file/bytes", bytes: 1024, actualBytes: 13, }) - expect(result.errors[1]).toEqual({ + expect(report.errors[1]).toEqual({ type: "file/hash", hash: "md5:wronghash", actualHash, @@ -170,50 +200,59 @@ describe("validateFile", () => { const tempFilePath = await writeTempFile("Hello, World!") mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const actualHash = await inferFileHash(tempFilePath, { hashType: "md5" }) + const actualHash = await inferHash( + { path: tempFilePath }, + { hashType: "md5" }, + ) - const result = await validateFile("https://example.com/file.txt", { + const report = await validateFile({ + path: "https://example.com/file.txt", bytes: 1024, hash: actualHash, }) - expect(result.valid).toBe(false) - expect(result.errors).toHaveLength(1) - expect(result.errors[0]?.type).toBe("file/bytes") + expect(report.valid).toBe(false) + expect(report.errors).toHaveLength(1) + expect(report.errors[0]?.type).toBe("file/bytes") }) it("should return error when only hash mismatch", async () => { const tempFilePath = await writeTempFile("Hello, World!") mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const result = await validateFile("https://example.com/file.txt", { + const report = await validateFile({ + path: "https://example.com/file.txt", bytes: 13, hash: "md5:wronghash", }) - expect(result.valid).toBe(false) - expect(result.errors).toHaveLength(1) - expect(result.errors[0]?.type).toBe("file/hash") + expect(report.valid).toBe(false) + expect(report.errors).toHaveLength(1) + expect(report.errors[0]?.type).toBe("file/hash") }) it("should handle local file paths", async () => { const tempFilePath = await writeTempFile("x".repeat(2048)) mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const result = await validateFile("/local/path/file.txt", { bytes: 2048 }) + const report = await validateFile({ + path: "/local/path/file.txt", + bytes: 2048, + }) expect(mockPrefetchFiles).toHaveBeenCalledWith("/local/path/file.txt") - expect(result).toEqual({ valid: true, errors: [] }) + expect(report).toEqual({ valid: true, errors: [] }) }) it("should handle empty file validation", async () => { const tempFilePath = await writeTempFile("") mockPrefetchFiles.mockResolvedValue([tempFilePath]) - const result = await validateFile("https://example.com/empty.txt", { + const report = await validateFile({ + path: "https://example.com/empty.txt", bytes: 0, }) - expect(result).toEqual({ valid: true, errors: [] }) + expect(report).toEqual({ valid: true, errors: [] }) }) }) diff --git a/dataset/file/validate.ts b/dataset/file/validate.ts new file mode 100644 index 00000000..fc6b619b --- /dev/null +++ b/dataset/file/validate.ts @@ -0,0 +1,60 @@ +import type { FileError } from "@dpkit/metadata" +import { createReport } from "@dpkit/metadata" +import type { Resource } from "@dpkit/metadata" +import { prefetchFiles } from "./fetch.ts" +import { inferBytes, inferEncoding, inferHash } from "./infer.ts" + +export async function validateFile(resource: Partial) { + const errors: FileError[] = [] + const localPaths = await prefetchFiles(resource.path) + + if (resource.bytes) { + const bytes = resource.bytes + const actualBytes = await inferBytes({ path: localPaths }) + + if (bytes !== actualBytes) { + errors.push({ + type: "file/bytes", + bytes, + actualBytes, + }) + } + } + + if (resource.hash) { + const [hashValue, hashType = "md5"] = resource.hash.split(":").toReversed() + + const hash = `${hashType}:${hashValue}` + const actualHash = await inferHash( + { path: localPaths }, + { + hashType: hashType as any, + }, + ) + + if (hash !== actualHash) { + errors.push({ + type: "file/hash", + hash, + actualHash, + }) + } + } + + if (resource.encoding) { + const encoding = resource.encoding + const actualEncoding = await inferEncoding({ path: localPaths }) + + if (actualEncoding) { + if (encoding !== actualEncoding) { + errors.push({ + type: "file/encoding", + encoding, + actualEncoding, + }) + } + } + } + + return createReport(errors) +} diff --git a/folder/folder/create.ts b/dataset/folder/create.ts similarity index 100% rename from folder/folder/create.ts rename to dataset/folder/create.ts diff --git a/folder/folder/index.ts b/dataset/folder/index.ts similarity index 100% rename from folder/folder/index.ts rename to dataset/folder/index.ts diff --git a/folder/folder/temp.ts b/dataset/folder/temp.ts similarity index 100% rename from folder/folder/temp.ts rename to dataset/folder/temp.ts diff --git a/dataset/index.ts b/dataset/index.ts new file mode 100644 index 00000000..0cb13d03 --- /dev/null +++ b/dataset/index.ts @@ -0,0 +1,38 @@ +export type { DatasetPlugin } from "./plugin.ts" +export type { SavePackageOptions } from "./plugin.ts" + +export { CkanPlugin } from "./plugins/ckan/index.ts" +export { DatahubPlugin } from "./plugins/datahub/index.ts" +export { DescriptorPlugin } from "./plugins/descriptor/index.ts" +export { FolderPlugin } from "./plugins/folder/index.ts" +export { GithubPlugin } from "./plugins/github/index.ts" +export { ZenodoPlugin } from "./plugins/zenodo/index.ts" +export { ZipPlugin } from "./plugins/zip/index.ts" + +export { assertLocalPathVacant } from "./file/index.ts" +export { copyFile } from "./file/index.ts" +export { describeFile } from "./file/index.ts" +export { getTempFilePath } from "./file/index.ts" +export { getTempFolderPath } from "./folder/index.ts" +export { inferBytes } from "./file/index.ts" +export { inferEncoding } from "./file/index.ts" +export { inferHash } from "./file/index.ts" +export { isLocalPathExist } from "./file/index.ts" +export { loadFile } from "./file/index.ts" +export { loadFileStream } from "./stream/index.ts" +export { loadPackageFromCkan } from "./plugins/ckan/index.ts" +export { loadPackageFromDatahub } from "./plugins/datahub/index.ts" +export { loadPackageFromFolder } from "./plugins/folder/index.ts" +export { loadPackageFromGithub } from "./plugins/github/index.ts" +export { loadPackageFromZenodo } from "./plugins/zenodo/index.ts" +export { loadPackageFromZip } from "./plugins/zip/index.ts" +export { prefetchFile } from "./file/index.ts" +export { prefetchFiles } from "./file/index.ts" +export { saveFile } from "./file/index.ts" +export { savePackageToCkan } from "./plugins/ckan/index.ts" +export { savePackageToFolder } from "./plugins/folder/index.ts" +export { savePackageToGithub } from "./plugins/github/index.ts" +export { savePackageToZenodo } from "./plugins/zenodo/index.ts" +export { savePackageToZip } from "./plugins/zip/index.ts" +export { validateFile } from "./file/index.ts" +export { writeTempFile } from "./file/index.ts" diff --git a/file/package.json b/dataset/package.json similarity index 82% rename from file/package.json rename to dataset/package.json index c99ced6f..a7014bf3 100644 --- a/file/package.json +++ b/dataset/package.json @@ -1,9 +1,10 @@ { - "name": "@dpkit/file", + "name": "@dpkit/dataset", "type": "module", "version": "0.0.0-dev", "exports": "./build/index.js", "sideEffects": false, + "files": ["build"], "license": "MIT", "author": "Evgeny Karev", "repository": "https://github.com/datisthq/dpkit", @@ -18,18 +19,22 @@ "validation", "quality", "fair", - "file" + "dataset", + "ckan", + "datahub" ], "scripts": { "build": "tsc" }, "dependencies": { - "@dpkit/core": "workspace:*", + "@dpkit/metadata": "workspace:*", "chardet": "^2.1.0", "exit-hook": "^4.0.0", + "fflate": "^0.8.2", "hasha": "^6.0.0", "isbinaryfile": "^5.0.4", "multistream": "^4.1.0", + "p-all": "^5.0.1", "p-map": "^7.0.3", "tempy": "3.1.0", "tiny-invariant": "^1.3.3" diff --git a/file/package/index.ts b/dataset/package/index.ts similarity index 52% rename from file/package/index.ts rename to dataset/package/index.ts index 8b08f57a..933dd45b 100644 --- a/file/package/index.ts +++ b/dataset/package/index.ts @@ -1 +1,2 @@ export { getPackageBasepath } from "./path.ts" +export { mergePackages } from "./merge.ts" diff --git a/core/package/merge.ts b/dataset/package/merge.ts similarity index 75% rename from core/package/merge.ts rename to dataset/package/merge.ts index 88d841d4..cf5ec041 100644 --- a/core/package/merge.ts +++ b/dataset/package/merge.ts @@ -1,7 +1,5 @@ -import type { Package } from "./Package.ts" -import { loadPackageDescriptor } from "./load.ts" - -// TODO: Move to @dpkit/dataset? +import type { Package } from "@dpkit/metadata" +import { loadPackageDescriptor } from "@dpkit/metadata" /** * Merges a system data package into a user data package if provided diff --git a/file/package/path.spec.ts b/dataset/package/path.spec.ts similarity index 100% rename from file/package/path.spec.ts rename to dataset/package/path.spec.ts diff --git a/file/package/path.ts b/dataset/package/path.ts similarity index 99% rename from file/package/path.ts rename to dataset/package/path.ts index 9300d965..c46c78fb 100644 --- a/file/package/path.ts +++ b/dataset/package/path.ts @@ -1,5 +1,5 @@ import { join, relative, resolve, sep } from "node:path" -import { type Package, getBasepath, isRemotePath } from "@dpkit/core" +import { type Package, getBasepath, isRemotePath } from "@dpkit/metadata" export function getPackageBasepath(dataPackage: Package) { const paths: string[] = [] diff --git a/core/plugin.ts b/dataset/plugin.ts similarity index 64% rename from core/plugin.ts rename to dataset/plugin.ts index 22f91c64..ca49dd50 100644 --- a/core/plugin.ts +++ b/dataset/plugin.ts @@ -1,15 +1,13 @@ -import type { Package } from "./package/index.ts" +import type { Package } from "@dpkit/metadata" export type SavePackageOptions = { target: string withRemote?: boolean } -export interface Plugin { - // TODO: move to @dpkit/dataset? +export interface DatasetPlugin { loadPackage?(source: string): Promise - // TODO: move to @dpkit/dataset? savePackage?( dataPackage: Package, options: SavePackageOptions, diff --git a/ckan/ckan/index.ts b/dataset/plugins/ckan/ckan/index.ts similarity index 100% rename from ckan/ckan/index.ts rename to dataset/plugins/ckan/ckan/index.ts diff --git a/ckan/ckan/request.ts b/dataset/plugins/ckan/ckan/request.ts similarity index 95% rename from ckan/ckan/request.ts rename to dataset/plugins/ckan/ckan/request.ts index 8df70809..8e8c8cf9 100644 --- a/ckan/ckan/request.ts +++ b/dataset/plugins/ckan/ckan/request.ts @@ -1,4 +1,4 @@ -import type { Descriptor } from "@dpkit/core" +import type { Descriptor } from "@dpkit/metadata" export async function makeCkanApiRequest(options: { ckanUrl: string diff --git a/ckan/index.ts b/dataset/plugins/ckan/index.ts similarity index 100% rename from ckan/index.ts rename to dataset/plugins/ckan/index.ts diff --git a/ckan/package/Organization.ts b/dataset/plugins/ckan/package/Organization.ts similarity index 100% rename from ckan/package/Organization.ts rename to dataset/plugins/ckan/package/Organization.ts diff --git a/ckan/package/Package.ts b/dataset/plugins/ckan/package/Package.ts similarity index 100% rename from ckan/package/Package.ts rename to dataset/plugins/ckan/package/Package.ts diff --git a/ckan/package/Tag.ts b/dataset/plugins/ckan/package/Tag.ts similarity index 100% rename from ckan/package/Tag.ts rename to dataset/plugins/ckan/package/Tag.ts diff --git a/ckan/package/convert/fromCkan.spec.ts b/dataset/plugins/ckan/package/convert/fromCkan.spec.ts similarity index 100% rename from ckan/package/convert/fromCkan.spec.ts rename to dataset/plugins/ckan/package/convert/fromCkan.spec.ts diff --git a/ckan/package/convert/fromCkan.ts b/dataset/plugins/ckan/package/convert/fromCkan.ts similarity index 94% rename from ckan/package/convert/fromCkan.ts rename to dataset/plugins/ckan/package/convert/fromCkan.ts index 456329a7..51a91495 100644 --- a/ckan/package/convert/fromCkan.ts +++ b/dataset/plugins/ckan/package/convert/fromCkan.ts @@ -1,5 +1,5 @@ -import type { Contributor, Package } from "@dpkit/core" -import type { License } from "@dpkit/core" +import type { Contributor, Package } from "@dpkit/metadata" +import type { License } from "@dpkit/metadata" import { convertResourceFromCkan } from "../../resource/index.ts" import type { CkanPackage } from "../Package.ts" diff --git a/ckan/package/convert/toCkan.spec.ts b/dataset/plugins/ckan/package/convert/toCkan.spec.ts similarity index 99% rename from ckan/package/convert/toCkan.spec.ts rename to dataset/plugins/ckan/package/convert/toCkan.spec.ts index cf4e0090..ce15d915 100644 --- a/ckan/package/convert/toCkan.spec.ts +++ b/dataset/plugins/ckan/package/convert/toCkan.spec.ts @@ -1,4 +1,4 @@ -import type { Package } from "@dpkit/core" +import type { Package } from "@dpkit/metadata" import { describe, expect, it } from "vitest" import type { CkanPackage } from "../Package.ts" import ckanPackageFixture from "../fixtures/ckan-package.json" with { diff --git a/ckan/package/convert/toCkan.ts b/dataset/plugins/ckan/package/convert/toCkan.ts similarity index 97% rename from ckan/package/convert/toCkan.ts rename to dataset/plugins/ckan/package/convert/toCkan.ts index 3787332a..a1c58826 100644 --- a/ckan/package/convert/toCkan.ts +++ b/dataset/plugins/ckan/package/convert/toCkan.ts @@ -1,4 +1,4 @@ -import type { Package } from "@dpkit/core" +import type { Package } from "@dpkit/metadata" import type { SetRequired } from "type-fest" import type { CkanResource } from "../../resource/Resource.ts" import { convertResourceToCkan } from "../../resource/index.ts" diff --git a/ckan/package/fixtures/ckan-package.json b/dataset/plugins/ckan/package/fixtures/ckan-package.json similarity index 100% rename from ckan/package/fixtures/ckan-package.json rename to dataset/plugins/ckan/package/fixtures/ckan-package.json diff --git a/ckan/package/fixtures/data.csv b/dataset/plugins/ckan/package/fixtures/data.csv similarity index 100% rename from ckan/package/fixtures/data.csv rename to dataset/plugins/ckan/package/fixtures/data.csv diff --git a/ckan/package/fixtures/generated/load.spec.ts.snap b/dataset/plugins/ckan/package/fixtures/generated/load.spec.ts.snap similarity index 100% rename from ckan/package/fixtures/generated/load.spec.ts.snap rename to dataset/plugins/ckan/package/fixtures/generated/load.spec.ts.snap diff --git a/ckan/package/fixtures/generated/loadPackageFromCkan-should-load-a-package_3615031657/recording.har b/dataset/plugins/ckan/package/fixtures/generated/loadPackageFromCkan-should-load-a-package_3615031657/recording.har similarity index 100% rename from ckan/package/fixtures/generated/loadPackageFromCkan-should-load-a-package_3615031657/recording.har rename to dataset/plugins/ckan/package/fixtures/generated/loadPackageFromCkan-should-load-a-package_3615031657/recording.har diff --git a/ckan/package/index.ts b/dataset/plugins/ckan/package/index.ts similarity index 100% rename from ckan/package/index.ts rename to dataset/plugins/ckan/package/index.ts diff --git a/ckan/package/load.spec.ts b/dataset/plugins/ckan/package/load.spec.ts similarity index 89% rename from ckan/package/load.spec.ts rename to dataset/plugins/ckan/package/load.spec.ts index 46bc6c91..03ee0e39 100644 --- a/ckan/package/load.spec.ts +++ b/dataset/plugins/ckan/package/load.spec.ts @@ -1,5 +1,5 @@ -import { useRecording } from "@dpkit/test" import { describe, expect, it } from "vitest" +import { useRecording } from "vitest-polly" import { loadPackageFromCkan } from "./load.ts" useRecording() diff --git a/ckan/package/load.ts b/dataset/plugins/ckan/package/load.ts similarity index 97% rename from ckan/package/load.ts rename to dataset/plugins/ckan/package/load.ts index e31e5d9f..0f012b57 100644 --- a/ckan/package/load.ts +++ b/dataset/plugins/ckan/package/load.ts @@ -1,4 +1,4 @@ -import { mergePackages } from "@dpkit/core" +import { mergePackages } from "../../../package/index.ts" import { makeCkanApiRequest } from "../ckan/index.ts" import type { CkanPackage } from "./Package.ts" import { convertPackageFromCkan } from "./convert/fromCkan.ts" diff --git a/ckan/package/save.spec.ts b/dataset/plugins/ckan/package/save.spec.ts similarity index 99% rename from ckan/package/save.spec.ts rename to dataset/plugins/ckan/package/save.spec.ts index 5d730738..19c0a483 100644 --- a/ckan/package/save.spec.ts +++ b/dataset/plugins/ckan/package/save.spec.ts @@ -1,6 +1,6 @@ import { relative } from "node:path" -import type { Package } from "@dpkit/core" -import { loadPackageDescriptor } from "@dpkit/core" +import type { Package } from "@dpkit/metadata" +import { loadPackageDescriptor } from "@dpkit/metadata" import { afterEach, beforeEach, describe, expect, it, vi } from "vitest" import { savePackageToCkan } from "./save.ts" diff --git a/ckan/package/save.ts b/dataset/plugins/ckan/package/save.ts similarity index 90% rename from ckan/package/save.ts rename to dataset/plugins/ckan/package/save.ts index 0858f09a..c07639e7 100644 --- a/ckan/package/save.ts +++ b/dataset/plugins/ckan/package/save.ts @@ -1,16 +1,14 @@ import { blob } from "node:stream/consumers" -import type { Descriptor, Package } from "@dpkit/core" +import type { Descriptor, Package } from "@dpkit/metadata" import { convertPackageToDescriptor, getFilename, getFormat, stringifyDescriptor, -} from "@dpkit/core" -import { - getPackageBasepath, - loadFileStream, - saveResourceFiles, -} from "@dpkit/file" +} from "@dpkit/metadata" +import { getPackageBasepath } from "../../../package/index.ts" +import { saveResourceFiles } from "../../../resource/index.ts" +import { loadFileStream } from "../../../stream/index.ts" import { makeCkanApiRequest } from "../ckan/index.ts" import type { CkanResource } from "../resource/index.ts" import { convertResourceToCkan } from "../resource/index.ts" diff --git a/ckan/plugin.spec.ts b/dataset/plugins/ckan/plugin.spec.ts similarity index 98% rename from ckan/plugin.spec.ts rename to dataset/plugins/ckan/plugin.spec.ts index 9af58790..a5efd47c 100644 --- a/ckan/plugin.spec.ts +++ b/dataset/plugins/ckan/plugin.spec.ts @@ -1,4 +1,4 @@ -import type { Package } from "@dpkit/core" +import type { Package } from "@dpkit/metadata" import { beforeEach, describe, expect, it, vi } from "vitest" import * as packageModule from "./package/load.ts" import { CkanPlugin } from "./plugin.ts" diff --git a/ckan/plugin.ts b/dataset/plugins/ckan/plugin.ts similarity index 72% rename from ckan/plugin.ts rename to dataset/plugins/ckan/plugin.ts index cf84f1de..4bd30f22 100644 --- a/ckan/plugin.ts +++ b/dataset/plugins/ckan/plugin.ts @@ -1,8 +1,8 @@ -import type { Plugin } from "@dpkit/core" -import { isRemotePath } from "@dpkit/core" +import { isRemotePath } from "@dpkit/metadata" +import type { DatasetPlugin } from "../../plugin.ts" import { loadPackageFromCkan } from "./package/load.ts" -export class CkanPlugin implements Plugin { +export class CkanPlugin implements DatasetPlugin { async loadPackage(source: string) { const isCkan = getIsCkan(source) if (!isCkan) return undefined diff --git a/ckan/resource/Resource.ts b/dataset/plugins/ckan/resource/Resource.ts similarity index 100% rename from ckan/resource/Resource.ts rename to dataset/plugins/ckan/resource/Resource.ts diff --git a/ckan/resource/convert/fromCkan.ts b/dataset/plugins/ckan/resource/convert/fromCkan.ts similarity index 92% rename from ckan/resource/convert/fromCkan.ts rename to dataset/plugins/ckan/resource/convert/fromCkan.ts index a48de2ec..4fa9aeeb 100644 --- a/ckan/resource/convert/fromCkan.ts +++ b/dataset/plugins/ckan/resource/convert/fromCkan.ts @@ -1,5 +1,5 @@ -import type { Resource } from "@dpkit/core" -import { getFilename } from "@dpkit/core" +import type { Resource } from "@dpkit/metadata" +import { getFilename } from "@dpkit/metadata" import { convertSchemaFromCkan } from "../../schema/index.ts" import type { CkanResource } from "../Resource.ts" diff --git a/ckan/resource/convert/toCkan.ts b/dataset/plugins/ckan/resource/convert/toCkan.ts similarity index 92% rename from ckan/resource/convert/toCkan.ts rename to dataset/plugins/ckan/resource/convert/toCkan.ts index 18aa43ee..0175d6fe 100644 --- a/ckan/resource/convert/toCkan.ts +++ b/dataset/plugins/ckan/resource/convert/toCkan.ts @@ -1,4 +1,4 @@ -import type { Resource } from "@dpkit/core" +import type { Resource } from "@dpkit/metadata" import type { CkanResource } from "../Resource.ts" export function convertResourceToCkan(resource: Resource) { diff --git a/ckan/resource/index.ts b/dataset/plugins/ckan/resource/index.ts similarity index 100% rename from ckan/resource/index.ts rename to dataset/plugins/ckan/resource/index.ts diff --git a/ckan/schema/Field.ts b/dataset/plugins/ckan/schema/Field.ts similarity index 100% rename from ckan/schema/Field.ts rename to dataset/plugins/ckan/schema/Field.ts diff --git a/ckan/schema/Schema.ts b/dataset/plugins/ckan/schema/Schema.ts similarity index 100% rename from ckan/schema/Schema.ts rename to dataset/plugins/ckan/schema/Schema.ts diff --git a/ckan/schema/convert/fixtures/ckan-schema.json b/dataset/plugins/ckan/schema/convert/fixtures/ckan-schema.json similarity index 100% rename from ckan/schema/convert/fixtures/ckan-schema.json rename to dataset/plugins/ckan/schema/convert/fixtures/ckan-schema.json diff --git a/ckan/schema/convert/fromCkan.spec.ts b/dataset/plugins/ckan/schema/convert/fromCkan.spec.ts similarity index 100% rename from ckan/schema/convert/fromCkan.spec.ts rename to dataset/plugins/ckan/schema/convert/fromCkan.spec.ts diff --git a/ckan/schema/convert/fromCkan.ts b/dataset/plugins/ckan/schema/convert/fromCkan.ts similarity index 95% rename from ckan/schema/convert/fromCkan.ts rename to dataset/plugins/ckan/schema/convert/fromCkan.ts index cf760c95..1332b0d5 100644 --- a/ckan/schema/convert/fromCkan.ts +++ b/dataset/plugins/ckan/schema/convert/fromCkan.ts @@ -1,4 +1,4 @@ -import type { Field, Schema } from "@dpkit/core" +import type { Field, Schema } from "@dpkit/metadata" import type { ArrayField, BooleanField, @@ -9,7 +9,7 @@ import type { ObjectField, StringField, TimeField, -} from "@dpkit/core" +} from "@dpkit/metadata" import type { CkanField } from "../Field.ts" import type { CkanSchema } from "../Schema.ts" diff --git a/ckan/schema/convert/toCkan.spec.ts b/dataset/plugins/ckan/schema/convert/toCkan.spec.ts similarity index 99% rename from ckan/schema/convert/toCkan.spec.ts rename to dataset/plugins/ckan/schema/convert/toCkan.spec.ts index 246772a0..2a2c1d0f 100644 --- a/ckan/schema/convert/toCkan.spec.ts +++ b/dataset/plugins/ckan/schema/convert/toCkan.spec.ts @@ -1,4 +1,4 @@ -import type { Schema } from "@dpkit/core" +import type { Schema } from "@dpkit/metadata" import { describe, expect, it } from "vitest" import type { CkanSchema } from "../Schema.ts" import ckanSchemaFixture from "./fixtures/ckan-schema.json" with { diff --git a/ckan/schema/convert/toCkan.ts b/dataset/plugins/ckan/schema/convert/toCkan.ts similarity index 95% rename from ckan/schema/convert/toCkan.ts rename to dataset/plugins/ckan/schema/convert/toCkan.ts index c21f8232..889c5f2e 100644 --- a/ckan/schema/convert/toCkan.ts +++ b/dataset/plugins/ckan/schema/convert/toCkan.ts @@ -1,4 +1,4 @@ -import type { Field, Schema } from "@dpkit/core" +import type { Field, Schema } from "@dpkit/metadata" import type { CkanField, CkanFieldInfo } from "../Field.ts" import type { CkanSchema } from "../Schema.ts" diff --git a/ckan/schema/index.ts b/dataset/plugins/ckan/schema/index.ts similarity index 100% rename from ckan/schema/index.ts rename to dataset/plugins/ckan/schema/index.ts diff --git a/datahub/index.ts b/dataset/plugins/datahub/index.ts similarity index 100% rename from datahub/index.ts rename to dataset/plugins/datahub/index.ts diff --git a/datahub/package/index.ts b/dataset/plugins/datahub/package/index.ts similarity index 100% rename from datahub/package/index.ts rename to dataset/plugins/datahub/package/index.ts diff --git a/datahub/package/load.spec.ts b/dataset/plugins/datahub/package/load.spec.ts similarity index 89% rename from datahub/package/load.spec.ts rename to dataset/plugins/datahub/package/load.spec.ts index 8cc75902..b2a801a4 100644 --- a/datahub/package/load.spec.ts +++ b/dataset/plugins/datahub/package/load.spec.ts @@ -1,5 +1,5 @@ -import { useRecording } from "@dpkit/test" import { describe, expect, it } from "vitest" +import { useRecording } from "vitest-polly" import { loadPackageFromDatahub } from "./load.ts" useRecording() diff --git a/datahub/package/load.ts b/dataset/plugins/datahub/package/load.ts similarity index 81% rename from datahub/package/load.ts rename to dataset/plugins/datahub/package/load.ts index 7d6c30f7..a7a80ff0 100644 --- a/datahub/package/load.ts +++ b/dataset/plugins/datahub/package/load.ts @@ -1,4 +1,4 @@ -import { loadPackageDescriptor } from "@dpkit/core" +import { loadPackageDescriptor } from "@dpkit/metadata" export async function loadPackageFromDatahub(datasetUrl: string) { const url = new URL(datasetUrl) diff --git a/datahub/plugin.spec.ts b/dataset/plugins/datahub/plugin.spec.ts similarity index 98% rename from datahub/plugin.spec.ts rename to dataset/plugins/datahub/plugin.spec.ts index faff59aa..197f4c5b 100644 --- a/datahub/plugin.spec.ts +++ b/dataset/plugins/datahub/plugin.spec.ts @@ -1,4 +1,4 @@ -import type { Package } from "@dpkit/core" +import type { Package } from "@dpkit/metadata" import { beforeEach, describe, expect, it, vi } from "vitest" import * as packageModule from "./package/index.ts" import { DatahubPlugin } from "./plugin.ts" diff --git a/datahub/plugin.ts b/dataset/plugins/datahub/plugin.ts similarity index 73% rename from datahub/plugin.ts rename to dataset/plugins/datahub/plugin.ts index 14b0950f..37c8101b 100644 --- a/datahub/plugin.ts +++ b/dataset/plugins/datahub/plugin.ts @@ -1,8 +1,8 @@ -import type { Plugin } from "@dpkit/core" -import { isRemotePath } from "@dpkit/core" +import { isRemotePath } from "@dpkit/metadata" +import type { DatasetPlugin } from "../../plugin.ts" import { loadPackageFromDatahub } from "./package/index.ts" -export class DatahubPlugin implements Plugin { +export class DatahubPlugin implements DatasetPlugin { async loadPackage(source: string) { const isDatahub = getIsDatahub(source) if (!isDatahub) return undefined diff --git a/dataset/plugins/descriptor/index.ts b/dataset/plugins/descriptor/index.ts new file mode 100644 index 00000000..3959722b --- /dev/null +++ b/dataset/plugins/descriptor/index.ts @@ -0,0 +1 @@ +export * from "./plugin.ts" diff --git a/dataset/plugins/descriptor/plugin.ts b/dataset/plugins/descriptor/plugin.ts new file mode 100644 index 00000000..10e1e2b3 --- /dev/null +++ b/dataset/plugins/descriptor/plugin.ts @@ -0,0 +1,38 @@ +import { inferFormat } from "@dpkit/metadata" +import type { Package } from "@dpkit/metadata" +import { isRemotePath } from "@dpkit/metadata" +import { loadPackageDescriptor } from "@dpkit/metadata" +import { savePackageDescriptor } from "@dpkit/metadata" +import type { DatasetPlugin } from "../../plugin.ts" + +export class DescriptorPlugin implements DatasetPlugin { + async loadPackage(source: string) { + const isLocalJson = await getIsLocalJson(source) + if (!isLocalJson) return undefined + + const dataPackage = await loadPackageDescriptor(source) + return dataPackage + } + + async savePackage( + dataPackage: Package, + options: { target: string; withRemote?: boolean }, + ) { + const isLocalJson = await getIsLocalJson(options.target) + if (!isLocalJson) return undefined + + if (!options.target.endsWith("datapackage.json")) { + return undefined + } + + await savePackageDescriptor(dataPackage, { path: options.target }) + + return { path: options.target } + } +} + +async function getIsLocalJson(path: string) { + const isRemote = isRemotePath(path) + const format = inferFormat({ path }) + return !isRemote && format === "json" +} diff --git a/zip/index.ts b/dataset/plugins/folder/index.ts similarity index 100% rename from zip/index.ts rename to dataset/plugins/folder/index.ts diff --git a/folder/package/index.ts b/dataset/plugins/folder/package/index.ts similarity index 100% rename from folder/package/index.ts rename to dataset/plugins/folder/package/index.ts diff --git a/dataset/plugins/folder/package/load.spec.ts b/dataset/plugins/folder/package/load.spec.ts new file mode 100644 index 00000000..29994bfa --- /dev/null +++ b/dataset/plugins/folder/package/load.spec.ts @@ -0,0 +1,205 @@ +import type { Package } from "@dpkit/metadata" +import { beforeEach, describe, expect, it } from "vitest" +import { getTempFilePath, writeTempFile } from "../../../file/index.ts" +import { loadPackageFromFolder } from "./load.ts" +import { savePackageToFolder } from "./save.ts" + +describe("loadPackageFromFolder", () => { + let tempFolderPath: string + + beforeEach(() => { + tempFolderPath = getTempFilePath() + }) + + it("should load a basic package from folder", async () => { + const originalPackage: Package = { + name: "test-package", + resources: [ + { + name: "empty-resource", + data: [], + }, + ], + } + + await savePackageToFolder(originalPackage, { folderPath: tempFolderPath }) + const loadedPackage = await loadPackageFromFolder(tempFolderPath) + + expect(loadedPackage).toBeDefined() + expect(loadedPackage.name).toBe("test-package") + expect(loadedPackage.resources).toHaveLength(1) + }) + + it("should load package with metadata", async () => { + const originalPackage: Package = { + name: "test-package", + title: "Test Package", + description: "A test data package", + version: "1.0.0", + resources: [ + { + name: "test-resource", + data: [], + }, + ], + } + + await savePackageToFolder(originalPackage, { folderPath: tempFolderPath }) + const loadedPackage = await loadPackageFromFolder(tempFolderPath) + + expect(loadedPackage.name).toBe("test-package") + expect(loadedPackage.title).toBe("Test Package") + expect(loadedPackage.description).toBe("A test data package") + expect(loadedPackage.version).toBe("1.0.0") + }) + + it("should load package with inline data resources", async () => { + const originalPackage: Package = { + name: "test-package", + resources: [ + { + name: "test-resource", + data: [ + { id: 1, name: "alice" }, + { id: 2, name: "bob" }, + ], + }, + ], + } + + await savePackageToFolder(originalPackage, { folderPath: tempFolderPath }) + const loadedPackage = await loadPackageFromFolder(tempFolderPath) + + expect(loadedPackage).toBeDefined() + expect(loadedPackage.resources).toHaveLength(1) + expect(loadedPackage.resources[0]?.name).toBe("test-resource") + expect(loadedPackage.resources[0]?.data).toEqual([ + { id: 1, name: "alice" }, + { id: 2, name: "bob" }, + ]) + }) + + it("should load package with file resources", async () => { + const csvContent = "id,name\n1,alice\n2,bob" + const csvPath = await writeTempFile(csvContent) + + const originalPackage: Package = { + name: "test-package", + resources: [ + { + name: "test-resource", + path: csvPath, + format: "csv", + }, + ], + } + + await savePackageToFolder(originalPackage, { folderPath: tempFolderPath }) + const loadedPackage = await loadPackageFromFolder(tempFolderPath) + + expect(loadedPackage).toBeDefined() + expect(loadedPackage.resources).toHaveLength(1) + expect(loadedPackage.resources[0]?.name).toBe("test-resource") + expect(loadedPackage.resources[0]?.format).toBe("csv") + }) + + it("should load package with schema", async () => { + const originalPackage: Package = { + name: "test-package", + resources: [ + { + name: "test-resource", + data: [{ id: 1, name: "alice" }], + schema: { + fields: [ + { name: "id", type: "integer" }, + { name: "name", type: "string" }, + ], + }, + }, + ], + } + + await savePackageToFolder(originalPackage, { folderPath: tempFolderPath }) + const loadedPackage = await loadPackageFromFolder(tempFolderPath) + + expect(loadedPackage.resources[0]?.schema).toBeDefined() + const schema = loadedPackage.resources[0]?.schema + expect(typeof schema === "object" && "fields" in schema).toBe(true) + if (typeof schema === "object" && "fields" in schema) { + expect(schema.fields).toHaveLength(2) + } + }) + + it("should load package with multiple resources", async () => { + const csvContent = "id,name\n1,alice\n2,bob" + const csvPath = await writeTempFile(csvContent) + + const originalPackage: Package = { + name: "test-package", + resources: [ + { + name: "resource-1", + path: csvPath, + format: "csv", + }, + { + name: "resource-2", + data: [{ id: 1, value: 100 }], + }, + ], + } + + await savePackageToFolder(originalPackage, { folderPath: tempFolderPath }) + const loadedPackage = await loadPackageFromFolder(tempFolderPath) + + expect(loadedPackage).toBeDefined() + expect(loadedPackage.name).toBe("test-package") + expect(loadedPackage.resources).toHaveLength(2) + expect(loadedPackage.resources[0]?.name).toBe("resource-1") + expect(loadedPackage.resources[1]?.name).toBe("resource-2") + }) + + it("should load package with dialect", async () => { + const csvContent = "id;name\n1;alice\n2;bob" + const csvPath = await writeTempFile(csvContent) + + const originalPackage: Package = { + name: "test-package", + resources: [ + { + name: "test-resource", + path: csvPath, + format: "csv", + dialect: { + delimiter: ";", + }, + }, + ], + } + + await savePackageToFolder(originalPackage, { folderPath: tempFolderPath }) + const loadedPackage = await loadPackageFromFolder(tempFolderPath) + + expect(loadedPackage.resources[0]?.dialect).toBeDefined() + const dialect = loadedPackage.resources[0]?.dialect + expect(typeof dialect === "object" && "delimiter" in dialect).toBe(true) + if (typeof dialect === "object" && "delimiter" in dialect) { + expect(dialect.delimiter).toBe(";") + } + }) + + it("should throw error for non-existent folder", async () => { + const nonExistentPath = "/non/existent/folder" + + await expect(loadPackageFromFolder(nonExistentPath)).rejects.toThrow() + }) + + it("should throw error for folder without datapackage.json", async () => { + const emptyFolderPath = getTempFilePath() + const fs = await import("node:fs/promises") + await fs.mkdir(emptyFolderPath, { recursive: true }) + + await expect(loadPackageFromFolder(emptyFolderPath)).rejects.toThrow() + }) +}) diff --git a/folder/package/load.ts b/dataset/plugins/folder/package/load.ts similarity index 75% rename from folder/package/load.ts rename to dataset/plugins/folder/package/load.ts index e39f4a10..703b8461 100644 --- a/folder/package/load.ts +++ b/dataset/plugins/folder/package/load.ts @@ -1,5 +1,5 @@ import { join } from "node:path" -import { loadPackageDescriptor } from "@dpkit/core" +import { loadPackageDescriptor } from "@dpkit/metadata" export async function loadPackageFromFolder(folderPath: string) { return loadPackageDescriptor(join(folderPath, "datapackage.json")) diff --git a/dataset/plugins/folder/package/save.spec.ts b/dataset/plugins/folder/package/save.spec.ts new file mode 100644 index 00000000..c8e81d42 --- /dev/null +++ b/dataset/plugins/folder/package/save.spec.ts @@ -0,0 +1,389 @@ +import { access, readFile } from "node:fs/promises" +import { join } from "node:path" +import type { Package } from "@dpkit/metadata" +import { beforeEach, describe, expect, it } from "vitest" +import { getTempFilePath, writeTempFile } from "../../../file/index.ts" +import { loadPackageFromFolder } from "./load.ts" +import { savePackageToFolder } from "./save.ts" + +describe("savePackageToFolder", () => { + let tempFolderPath: string + + beforeEach(() => { + tempFolderPath = getTempFilePath() + }) + + it("should save a basic package to folder", async () => { + const dataPackage: Package = { + name: "test-package", + resources: [ + { + name: "test-resource", + data: [], + }, + ], + } + + await savePackageToFolder(dataPackage, { folderPath: tempFolderPath }) + + const descriptorPath = join(tempFolderPath, "datapackage.json") + await expect(access(descriptorPath)).resolves.toBeUndefined() + }) + + it("should save package with metadata", async () => { + const dataPackage: Package = { + name: "test-package", + title: "Test Package", + description: "A test package", + version: "1.0.0", + resources: [ + { + name: "test-resource", + data: [], + }, + ], + } + + await savePackageToFolder(dataPackage, { folderPath: tempFolderPath }) + + const descriptorPath = join(tempFolderPath, "datapackage.json") + await expect(access(descriptorPath)).resolves.toBeUndefined() + }) + + it("should save package with inline data resources", async () => { + const dataPackage: Package = { + name: "test-package", + resources: [ + { + name: "test-resource", + data: [ + { id: 1, name: "alice" }, + { id: 2, name: "bob" }, + ], + }, + ], + } + + await savePackageToFolder(dataPackage, { folderPath: tempFolderPath }) + + const descriptorPath = join(tempFolderPath, "datapackage.json") + await expect(access(descriptorPath)).resolves.toBeUndefined() + }) + + it("should save package with file resources", async () => { + const csvContent = "id,name\n1,alice\n2,bob" + const csvPath = await writeTempFile(csvContent) + + const dataPackage: Package = { + name: "test-package", + resources: [ + { + name: "test-resource", + path: csvPath, + format: "csv", + }, + ], + } + + await savePackageToFolder(dataPackage, { folderPath: tempFolderPath }) + + const descriptorPath = join(tempFolderPath, "datapackage.json") + await expect(access(descriptorPath)).resolves.toBeUndefined() + }) + + it("should save package with multiple resources", async () => { + const csvContent = "id,name\n1,alice\n2,bob" + const csvPath = await writeTempFile(csvContent) + + const dataPackage: Package = { + name: "test-package", + resources: [ + { + name: "resource-1", + path: csvPath, + format: "csv", + }, + { + name: "resource-2", + data: [{ id: 1, value: 100 }], + }, + ], + } + + await savePackageToFolder(dataPackage, { folderPath: tempFolderPath }) + + const descriptorPath = join(tempFolderPath, "datapackage.json") + await expect(access(descriptorPath)).resolves.toBeUndefined() + }) + + it("should save package with schema", async () => { + const dataPackage: Package = { + name: "test-package", + resources: [ + { + name: "test-resource", + data: [{ id: 1, name: "alice" }], + schema: { + fields: [ + { name: "id", type: "integer" }, + { name: "name", type: "string" }, + ], + }, + }, + ], + } + + await savePackageToFolder(dataPackage, { folderPath: tempFolderPath }) + + const descriptorPath = join(tempFolderPath, "datapackage.json") + await expect(access(descriptorPath)).resolves.toBeUndefined() + }) + + it("should save package with dialect", async () => { + const csvContent = "id;name\n1;alice\n2;bob" + const csvPath = await writeTempFile(csvContent) + + const dataPackage: Package = { + name: "test-package", + resources: [ + { + name: "test-resource", + path: csvPath, + format: "csv", + dialect: { + delimiter: ";", + }, + }, + ], + } + + await savePackageToFolder(dataPackage, { folderPath: tempFolderPath }) + + const descriptorPath = join(tempFolderPath, "datapackage.json") + await expect(access(descriptorPath)).resolves.toBeUndefined() + }) + + it("should save and reload package with same structure", async () => { + const originalPackage: Package = { + name: "test-package", + title: "Test Package", + description: "A test package", + resources: [ + { + name: "test-resource", + data: [{ id: 1, name: "alice" }], + }, + ], + } + + await savePackageToFolder(originalPackage, { folderPath: tempFolderPath }) + const reloadedPackage = await loadPackageFromFolder(tempFolderPath) + + expect(reloadedPackage).toBeDefined() + expect(reloadedPackage.name).toBe("test-package") + expect(reloadedPackage.title).toBe("Test Package") + expect(reloadedPackage.description).toBe("A test package") + expect(reloadedPackage.resources).toHaveLength(1) + expect(reloadedPackage.resources[0]?.name).toBe("test-resource") + }) + + it("should save and reload package preserving metadata", async () => { + const originalPackage: Package = { + name: "test-package", + title: "Test Package", + description: "A test package", + version: "1.0.0", + keywords: ["test", "package"], + resources: [ + { + name: "test-resource", + data: [{ id: 1 }], + }, + ], + } + + await savePackageToFolder(originalPackage, { folderPath: tempFolderPath }) + const reloadedPackage = await loadPackageFromFolder(tempFolderPath) + + expect(reloadedPackage.name).toBe("test-package") + expect(reloadedPackage.title).toBe("Test Package") + expect(reloadedPackage.version).toBe("1.0.0") + expect(reloadedPackage.keywords).toEqual(["test", "package"]) + }) + + it("should save and reload package with schema", async () => { + const originalPackage: Package = { + name: "test-package", + resources: [ + { + name: "test-resource", + data: [{ id: 1, name: "alice" }], + schema: { + fields: [ + { name: "id", type: "integer" }, + { name: "name", type: "string" }, + ], + }, + }, + ], + } + + await savePackageToFolder(originalPackage, { folderPath: tempFolderPath }) + const reloadedPackage = await loadPackageFromFolder(tempFolderPath) + + const schema = reloadedPackage.resources[0]?.schema + expect(schema).toBeDefined() + expect(typeof schema === "object" && "fields" in schema).toBe(true) + if (typeof schema === "object" && "fields" in schema) { + expect(schema.fields).toHaveLength(2) + expect(schema.fields?.[0]?.name).toBe("id") + expect(schema.fields?.[1]?.name).toBe("name") + } + }) + + it("should save and reload package with file resources", async () => { + const csvContent = "id,name\n1,alice\n2,bob" + const csvPath = await writeTempFile(csvContent) + + const originalPackage: Package = { + name: "test-package", + resources: [ + { + name: "test-resource", + path: csvPath, + format: "csv", + }, + ], + } + + await savePackageToFolder(originalPackage, { folderPath: tempFolderPath }) + const reloadedPackage = await loadPackageFromFolder(tempFolderPath) + + expect(reloadedPackage.resources).toHaveLength(1) + expect(reloadedPackage.resources[0]?.name).toBe("test-resource") + expect(reloadedPackage.resources[0]?.format).toBe("csv") + }) + + it("should throw error when saving to existing folder", async () => { + const dataPackage: Package = { + name: "test-package", + resources: [ + { + name: "test-resource", + data: [], + }, + ], + } + + const fs = await import("node:fs/promises") + await fs.mkdir(tempFolderPath, { recursive: true }) + await fs.writeFile(join(tempFolderPath, "existing.txt"), "content") + + await expect( + savePackageToFolder(dataPackage, { folderPath: tempFolderPath }), + ).rejects.toThrow() + }) + + it("should create valid folder structure", async () => { + const dataPackage: Package = { + name: "test-package", + resources: [ + { + name: "test-resource", + data: [{ id: 1 }], + }, + ], + } + + await savePackageToFolder(dataPackage, { folderPath: tempFolderPath }) + const reloadedPackage = await loadPackageFromFolder(tempFolderPath) + + expect(reloadedPackage).toMatchObject({ + name: "test-package", + resources: [ + { + name: "test-resource", + }, + ], + }) + }) + + it("should save package with multiple file resources", async () => { + const csv1Content = "id,name\n1,alice" + const csv2Content = "id,value\n1,100" + const csv1Path = await writeTempFile(csv1Content) + const csv2Path = await writeTempFile(csv2Content) + + const originalPackage: Package = { + name: "test-package", + resources: [ + { + name: "resource-1", + path: csv1Path, + format: "csv", + }, + { + name: "resource-2", + path: csv2Path, + format: "csv", + }, + ], + } + + await savePackageToFolder(originalPackage, { folderPath: tempFolderPath }) + const reloadedPackage = await loadPackageFromFolder(tempFolderPath) + + expect(reloadedPackage.resources).toHaveLength(2) + expect(reloadedPackage.resources[0]?.name).toBe("resource-1") + expect(reloadedPackage.resources[1]?.name).toBe("resource-2") + }) + + it("should create datapackage.json in folder", async () => { + const dataPackage: Package = { + name: "test-package", + resources: [ + { + name: "test-resource", + data: [{ id: 1 }], + }, + ], + } + + await savePackageToFolder(dataPackage, { folderPath: tempFolderPath }) + + const descriptorPath = join(tempFolderPath, "datapackage.json") + const descriptorContent = await readFile(descriptorPath, "utf-8") + const descriptor = JSON.parse(descriptorContent) + + expect(descriptor.name).toBe("test-package") + expect(descriptor.resources).toHaveLength(1) + }) + + it("should copy file resources to folder", async () => { + const csvContent = "id,name\n1,alice\n2,bob" + const csvPath = await writeTempFile(csvContent) + + const dataPackage: Package = { + name: "test-package", + resources: [ + { + name: "test-resource", + path: csvPath, + format: "csv", + }, + ], + } + + await savePackageToFolder(dataPackage, { folderPath: tempFolderPath }) + + const descriptorPath = join(tempFolderPath, "datapackage.json") + const descriptorContent = await readFile(descriptorPath, "utf-8") + const descriptor = JSON.parse(descriptorContent) + + const resourcePath = descriptor.resources[0].path + const resourceFilePath = join(tempFolderPath, resourcePath) + const resourceContent = await readFile(resourceFilePath, "utf-8") + + expect(resourceContent).toBe(csvContent) + }) +}) diff --git a/folder/package/save.ts b/dataset/plugins/folder/package/save.ts similarity index 77% rename from folder/package/save.ts rename to dataset/plugins/folder/package/save.ts index de9e4331..e5d51bad 100644 --- a/folder/package/save.ts +++ b/dataset/plugins/folder/package/save.ts @@ -1,13 +1,10 @@ import { join } from "node:path" -import { convertPackageToDescriptor, saveDescriptor } from "@dpkit/core" -import type { Descriptor, Package } from "@dpkit/core" -import { - assertLocalPathVacant, - copyFile, - getPackageBasepath, - saveResourceFiles, -} from "@dpkit/file" -import { createFolder } from "../folder/index.ts" +import { convertPackageToDescriptor, saveDescriptor } from "@dpkit/metadata" +import type { Descriptor, Package } from "@dpkit/metadata" +import { assertLocalPathVacant, copyFile } from "../../../file/index.ts" +import { createFolder } from "../../../folder/index.ts" +import { getPackageBasepath } from "../../../package/index.ts" +import { saveResourceFiles } from "../../../resource/index.ts" export async function savePackageToFolder( dataPackage: Package, diff --git a/folder/plugin.spec.ts b/dataset/plugins/folder/plugin.spec.ts similarity index 97% rename from folder/plugin.spec.ts rename to dataset/plugins/folder/plugin.spec.ts index 29629b74..3226a8e3 100644 --- a/folder/plugin.spec.ts +++ b/dataset/plugins/folder/plugin.spec.ts @@ -1,4 +1,4 @@ -import type { Package } from "@dpkit/core" +import type { Package } from "@dpkit/metadata" import { beforeEach, describe, expect, it, vi } from "vitest" import * as packageModule from "./package/index.ts" import { FolderPlugin } from "./plugin.ts" diff --git a/folder/plugin.ts b/dataset/plugins/folder/plugin.ts similarity index 71% rename from folder/plugin.ts rename to dataset/plugins/folder/plugin.ts index 932c3d02..58f6f7f5 100644 --- a/folder/plugin.ts +++ b/dataset/plugins/folder/plugin.ts @@ -1,9 +1,9 @@ import { stat } from "node:fs/promises" -import type { Plugin } from "@dpkit/core" -import { isRemotePath } from "@dpkit/core" +import { isRemotePath } from "@dpkit/metadata" +import type { DatasetPlugin } from "../../plugin.ts" import { loadPackageFromFolder } from "./package/index.ts" -export class FolderPlugin implements Plugin { +export class FolderPlugin implements DatasetPlugin { async loadPackage(source: string) { const isFolder = await getIsFolder(source) if (!isFolder) return undefined @@ -11,6 +11,8 @@ export class FolderPlugin implements Plugin { const dataPackage = await loadPackageFromFolder(source) return dataPackage } + + // TOOD: implement savePackage? } async function getIsFolder(path: string) { diff --git a/github/github/index.ts b/dataset/plugins/github/github/index.ts similarity index 100% rename from github/github/index.ts rename to dataset/plugins/github/github/index.ts diff --git a/github/github/path.ts b/dataset/plugins/github/github/path.ts similarity index 100% rename from github/github/path.ts rename to dataset/plugins/github/github/path.ts diff --git a/github/github/request.ts b/dataset/plugins/github/github/request.ts similarity index 96% rename from github/github/request.ts rename to dataset/plugins/github/github/request.ts index f14610e5..d070066f 100644 --- a/github/github/request.ts +++ b/dataset/plugins/github/github/request.ts @@ -1,4 +1,4 @@ -import type { Descriptor } from "@dpkit/core" +import type { Descriptor } from "@dpkit/metadata" /** * Makes a request to the Github API diff --git a/github/index.ts b/dataset/plugins/github/index.ts similarity index 100% rename from github/index.ts rename to dataset/plugins/github/index.ts diff --git a/github/package/License.ts b/dataset/plugins/github/package/License.ts similarity index 100% rename from github/package/License.ts rename to dataset/plugins/github/package/License.ts diff --git a/github/package/Owner.ts b/dataset/plugins/github/package/Owner.ts similarity index 100% rename from github/package/Owner.ts rename to dataset/plugins/github/package/Owner.ts diff --git a/github/package/Package.ts b/dataset/plugins/github/package/Package.ts similarity index 100% rename from github/package/Package.ts rename to dataset/plugins/github/package/Package.ts diff --git a/github/package/convert/fromGithub.ts b/dataset/plugins/github/package/convert/fromGithub.ts similarity index 96% rename from github/package/convert/fromGithub.ts rename to dataset/plugins/github/package/convert/fromGithub.ts index a5fb3993..098fbcc2 100644 --- a/github/package/convert/fromGithub.ts +++ b/dataset/plugins/github/package/convert/fromGithub.ts @@ -1,4 +1,4 @@ -import type { Contributor, License, Package } from "@dpkit/core" +import type { Contributor, License, Package } from "@dpkit/metadata" import { convertResourceFromGithub } from "../../resource/index.ts" import type { GithubPackage } from "../Package.ts" diff --git a/github/package/convert/toGithub.ts b/dataset/plugins/github/package/convert/toGithub.ts similarity index 94% rename from github/package/convert/toGithub.ts rename to dataset/plugins/github/package/convert/toGithub.ts index b00da142..c0246cba 100644 --- a/github/package/convert/toGithub.ts +++ b/dataset/plugins/github/package/convert/toGithub.ts @@ -1,4 +1,4 @@ -import type { Package } from "@dpkit/core" +import type { Package } from "@dpkit/metadata" import type { GithubPackage } from "../Package.ts" export function convertPackageToGithub(dataPackage: Package) { diff --git a/github/package/fixtures/data.csv b/dataset/plugins/github/package/fixtures/data.csv similarity index 100% rename from github/package/fixtures/data.csv rename to dataset/plugins/github/package/fixtures/data.csv diff --git a/github/package/fixtures/generated/load.spec.ts.snap b/dataset/plugins/github/package/fixtures/generated/load.spec.ts.snap similarity index 100% rename from github/package/fixtures/generated/load.spec.ts.snap rename to dataset/plugins/github/package/fixtures/generated/load.spec.ts.snap diff --git a/github/package/fixtures/generated/loadPackageFromGithub-should-load-a-package_1044819575/recording.har b/dataset/plugins/github/package/fixtures/generated/loadPackageFromGithub-should-load-a-package_1044819575/recording.har similarity index 100% rename from github/package/fixtures/generated/loadPackageFromGithub-should-load-a-package_1044819575/recording.har rename to dataset/plugins/github/package/fixtures/generated/loadPackageFromGithub-should-load-a-package_1044819575/recording.har diff --git a/github/package/fixtures/generated/loadPackageFromGithub-should-merge-datapackage-json-if-present_618226504/recording.har b/dataset/plugins/github/package/fixtures/generated/loadPackageFromGithub-should-merge-datapackage-json-if-present_618226504/recording.har similarity index 100% rename from github/package/fixtures/generated/loadPackageFromGithub-should-merge-datapackage-json-if-present_618226504/recording.har rename to dataset/plugins/github/package/fixtures/generated/loadPackageFromGithub-should-merge-datapackage-json-if-present_618226504/recording.har diff --git a/github/package/index.ts b/dataset/plugins/github/package/index.ts similarity index 100% rename from github/package/index.ts rename to dataset/plugins/github/package/index.ts diff --git a/github/package/load.spec.ts b/dataset/plugins/github/package/load.spec.ts similarity index 92% rename from github/package/load.spec.ts rename to dataset/plugins/github/package/load.spec.ts index 74cecdd1..93585c2e 100644 --- a/github/package/load.spec.ts +++ b/dataset/plugins/github/package/load.spec.ts @@ -1,5 +1,5 @@ -import { useRecording } from "@dpkit/test" import { describe, expect, it } from "vitest" +import { useRecording } from "vitest-polly" import { loadPackageFromGithub } from "./load.ts" useRecording() diff --git a/github/package/load.ts b/dataset/plugins/github/package/load.ts similarity index 97% rename from github/package/load.ts rename to dataset/plugins/github/package/load.ts index 468f03ce..7a8533d7 100644 --- a/github/package/load.ts +++ b/dataset/plugins/github/package/load.ts @@ -1,4 +1,4 @@ -import { mergePackages } from "@dpkit/core" +import { mergePackages } from "../../../package/index.ts" import { makeGithubApiRequest } from "../github/index.ts" import type { GithubResource } from "../resource/index.ts" import type { GithubPackage } from "./Package.ts" diff --git a/github/package/save.spec.ts b/dataset/plugins/github/package/save.spec.ts similarity index 99% rename from github/package/save.spec.ts rename to dataset/plugins/github/package/save.spec.ts index 685facaa..c3054eb7 100644 --- a/github/package/save.spec.ts +++ b/dataset/plugins/github/package/save.spec.ts @@ -1,6 +1,6 @@ import { relative } from "node:path" -import type { Package } from "@dpkit/core" -import { loadPackageDescriptor } from "@dpkit/core" +import type { Package } from "@dpkit/metadata" +import { loadPackageDescriptor } from "@dpkit/metadata" import { afterEach, beforeEach, describe, expect, it, vi } from "vitest" import { savePackageToGithub } from "./save.ts" diff --git a/github/package/save.ts b/dataset/plugins/github/package/save.ts similarity index 87% rename from github/package/save.ts rename to dataset/plugins/github/package/save.ts index 367cbe62..b2c28dbe 100644 --- a/github/package/save.ts +++ b/dataset/plugins/github/package/save.ts @@ -1,9 +1,11 @@ import { Buffer } from "node:buffer" import { buffer } from "node:stream/consumers" -import type { Descriptor, Package } from "@dpkit/core" -import { convertPackageToDescriptor, stringifyDescriptor } from "@dpkit/core" -import { getPackageBasepath, loadFileStream } from "@dpkit/file" -import { saveResourceFiles } from "@dpkit/file" +import type { Descriptor, Package } from "@dpkit/metadata" +import { stringifyDescriptor } from "@dpkit/metadata" +import { convertPackageToDescriptor } from "@dpkit/metadata" +import { getPackageBasepath } from "../../../package/index.ts" +import { saveResourceFiles } from "../../../resource/index.ts" +import { loadFileStream } from "../../../stream/index.ts" import { makeGithubApiRequest } from "../github/index.ts" import type { GithubPackage } from "./Package.ts" diff --git a/github/plugin.spec.ts b/dataset/plugins/github/plugin.spec.ts similarity index 98% rename from github/plugin.spec.ts rename to dataset/plugins/github/plugin.spec.ts index c9ec48fa..c2a9aac5 100644 --- a/github/plugin.spec.ts +++ b/dataset/plugins/github/plugin.spec.ts @@ -1,4 +1,4 @@ -import type { Package } from "@dpkit/core" +import type { Package } from "@dpkit/metadata" import { beforeEach, describe, expect, it, vi } from "vitest" import * as packageModule from "./package/load.ts" import { GithubPlugin } from "./plugin.ts" diff --git a/github/plugin.ts b/dataset/plugins/github/plugin.ts similarity index 73% rename from github/plugin.ts rename to dataset/plugins/github/plugin.ts index 543d193f..256d97ec 100644 --- a/github/plugin.ts +++ b/dataset/plugins/github/plugin.ts @@ -1,8 +1,8 @@ -import type { Plugin } from "@dpkit/core" -import { isRemotePath } from "@dpkit/core" +import { isRemotePath } from "@dpkit/metadata" +import type { DatasetPlugin } from "../../plugin.ts" import { loadPackageFromGithub } from "./package/load.ts" -export class GithubPlugin implements Plugin { +export class GithubPlugin implements DatasetPlugin { async loadPackage(source: string) { const isGithub = getIsGithub(source) if (!isGithub) return undefined diff --git a/github/resource/Resource.ts b/dataset/plugins/github/resource/Resource.ts similarity index 100% rename from github/resource/Resource.ts rename to dataset/plugins/github/resource/Resource.ts diff --git a/github/resource/convert/fromGithub.ts b/dataset/plugins/github/resource/convert/fromGithub.ts similarity index 88% rename from github/resource/convert/fromGithub.ts rename to dataset/plugins/github/resource/convert/fromGithub.ts index 0660c1bc..2b68625f 100644 --- a/github/resource/convert/fromGithub.ts +++ b/dataset/plugins/github/resource/convert/fromGithub.ts @@ -1,5 +1,5 @@ -import type { Resource } from "@dpkit/core" -import { getFilename, getFormat, getName } from "@dpkit/core" +import type { Resource } from "@dpkit/metadata" +import { getFilename, getFormat, getName } from "@dpkit/metadata" import type { GithubResource } from "../Resource.ts" export function convertResourceFromGithub( diff --git a/github/resource/convert/toGithub.ts b/dataset/plugins/github/resource/convert/toGithub.ts similarity index 86% rename from github/resource/convert/toGithub.ts rename to dataset/plugins/github/resource/convert/toGithub.ts index 238de87e..c03fb8c0 100644 --- a/github/resource/convert/toGithub.ts +++ b/dataset/plugins/github/resource/convert/toGithub.ts @@ -1,4 +1,4 @@ -import type { Resource } from "@dpkit/core" +import type { Resource } from "@dpkit/metadata" import type { GithubResource } from "../Resource.ts" export function convertResourceToGithub( diff --git a/github/resource/index.ts b/dataset/plugins/github/resource/index.ts similarity index 100% rename from github/resource/index.ts rename to dataset/plugins/github/resource/index.ts diff --git a/zenodo/index.ts b/dataset/plugins/zenodo/index.ts similarity index 100% rename from zenodo/index.ts rename to dataset/plugins/zenodo/index.ts diff --git a/zenodo/package/Creator.ts b/dataset/plugins/zenodo/package/Creator.ts similarity index 100% rename from zenodo/package/Creator.ts rename to dataset/plugins/zenodo/package/Creator.ts diff --git a/zenodo/package/Package.ts b/dataset/plugins/zenodo/package/Package.ts similarity index 100% rename from zenodo/package/Package.ts rename to dataset/plugins/zenodo/package/Package.ts diff --git a/zenodo/package/convert/fromZenodo.ts b/dataset/plugins/zenodo/package/convert/fromZenodo.ts similarity index 95% rename from zenodo/package/convert/fromZenodo.ts rename to dataset/plugins/zenodo/package/convert/fromZenodo.ts index badc6bd1..68549359 100644 --- a/zenodo/package/convert/fromZenodo.ts +++ b/dataset/plugins/zenodo/package/convert/fromZenodo.ts @@ -1,4 +1,4 @@ -import type { Contributor, License, Package } from "@dpkit/core" +import type { Contributor, License, Package } from "@dpkit/metadata" import { convertResourceFromZenodo } from "../../resource/index.ts" import type { ZenodoPackage } from "../Package.ts" diff --git a/zenodo/package/convert/toZenodo.ts b/dataset/plugins/zenodo/package/convert/toZenodo.ts similarity index 97% rename from zenodo/package/convert/toZenodo.ts rename to dataset/plugins/zenodo/package/convert/toZenodo.ts index d2116d6a..805d0b74 100644 --- a/zenodo/package/convert/toZenodo.ts +++ b/dataset/plugins/zenodo/package/convert/toZenodo.ts @@ -1,4 +1,4 @@ -import type { Package } from "@dpkit/core" +import type { Package } from "@dpkit/metadata" import type { ZenodoCreator } from "../Creator.ts" import type { ZenodoPackage } from "../Package.ts" diff --git a/zenodo/package/fixtures/data.csv b/dataset/plugins/zenodo/package/fixtures/data.csv similarity index 100% rename from zenodo/package/fixtures/data.csv rename to dataset/plugins/zenodo/package/fixtures/data.csv diff --git a/zenodo/package/fixtures/generated/load.spec.ts.snap b/dataset/plugins/zenodo/package/fixtures/generated/load.spec.ts.snap similarity index 100% rename from zenodo/package/fixtures/generated/load.spec.ts.snap rename to dataset/plugins/zenodo/package/fixtures/generated/load.spec.ts.snap diff --git a/zenodo/package/fixtures/generated/loadPackageFromZenodo-should-load-a-package_3167400519/recording.har b/dataset/plugins/zenodo/package/fixtures/generated/loadPackageFromZenodo-should-load-a-package_3167400519/recording.har similarity index 100% rename from zenodo/package/fixtures/generated/loadPackageFromZenodo-should-load-a-package_3167400519/recording.har rename to dataset/plugins/zenodo/package/fixtures/generated/loadPackageFromZenodo-should-load-a-package_3167400519/recording.har diff --git a/zenodo/package/fixtures/generated/loadPackageFromZenodo-shoule-merge-datapackage-json-if-present_2160001855/recording.har b/dataset/plugins/zenodo/package/fixtures/generated/loadPackageFromZenodo-shoule-merge-datapackage-json-if-present_2160001855/recording.har similarity index 100% rename from zenodo/package/fixtures/generated/loadPackageFromZenodo-shoule-merge-datapackage-json-if-present_2160001855/recording.har rename to dataset/plugins/zenodo/package/fixtures/generated/loadPackageFromZenodo-shoule-merge-datapackage-json-if-present_2160001855/recording.har diff --git a/zenodo/package/index.ts b/dataset/plugins/zenodo/package/index.ts similarity index 100% rename from zenodo/package/index.ts rename to dataset/plugins/zenodo/package/index.ts diff --git a/zenodo/package/load.spec.ts b/dataset/plugins/zenodo/package/load.spec.ts similarity index 92% rename from zenodo/package/load.spec.ts rename to dataset/plugins/zenodo/package/load.spec.ts index 399f9b34..0412fc0c 100644 --- a/zenodo/package/load.spec.ts +++ b/dataset/plugins/zenodo/package/load.spec.ts @@ -1,5 +1,5 @@ -import { useRecording } from "@dpkit/test" import { describe, expect, it } from "vitest" +import { useRecording } from "vitest-polly" import { loadPackageFromZenodo } from "./load.ts" useRecording() diff --git a/zenodo/package/load.ts b/dataset/plugins/zenodo/package/load.ts similarity index 96% rename from zenodo/package/load.ts rename to dataset/plugins/zenodo/package/load.ts index 5298157f..27a51f94 100644 --- a/zenodo/package/load.ts +++ b/dataset/plugins/zenodo/package/load.ts @@ -1,4 +1,4 @@ -import { mergePackages } from "@dpkit/core" +import { mergePackages } from "../../../package/index.ts" import { makeZenodoApiRequest } from "../zenodo/index.ts" import type { ZenodoPackage } from "./Package.ts" import { convertPackageFromZenodo } from "./convert/fromZenodo.ts" diff --git a/zenodo/package/save.spec.ts b/dataset/plugins/zenodo/package/save.spec.ts similarity index 99% rename from zenodo/package/save.spec.ts rename to dataset/plugins/zenodo/package/save.spec.ts index 31647dd2..019771be 100644 --- a/zenodo/package/save.spec.ts +++ b/dataset/plugins/zenodo/package/save.spec.ts @@ -1,6 +1,6 @@ import { relative } from "node:path" -import type { Package } from "@dpkit/core" -import { loadPackageDescriptor } from "@dpkit/core" +import type { Package } from "@dpkit/metadata" +import { loadPackageDescriptor } from "@dpkit/metadata" import { afterEach, beforeEach, describe, expect, it, vi } from "vitest" import { savePackageToZenodo } from "./save.ts" diff --git a/zenodo/package/save.ts b/dataset/plugins/zenodo/package/save.ts similarity index 87% rename from zenodo/package/save.ts rename to dataset/plugins/zenodo/package/save.ts index 07f44c42..3f22c1f7 100644 --- a/zenodo/package/save.ts +++ b/dataset/plugins/zenodo/package/save.ts @@ -1,8 +1,10 @@ import { blob } from "node:stream/consumers" -import type { Descriptor, Package } from "@dpkit/core" -import { convertPackageToDescriptor, stringifyDescriptor } from "@dpkit/core" -import { loadFileStream, saveResourceFiles } from "@dpkit/file" -import { getPackageBasepath } from "@dpkit/file" +import type { Descriptor, Package } from "@dpkit/metadata" +import { stringifyDescriptor } from "@dpkit/metadata" +import { convertPackageToDescriptor } from "@dpkit/metadata" +import { getPackageBasepath } from "../../../package/index.ts" +import { saveResourceFiles } from "../../../resource/index.ts" +import { loadFileStream } from "../../../stream/index.ts" import { makeZenodoApiRequest } from "../zenodo/index.ts" import type { ZenodoPackage } from "./Package.ts" import { convertPackageToZenodo } from "./convert/toZenodo.ts" diff --git a/zenodo/plugin.spec.ts b/dataset/plugins/zenodo/plugin.spec.ts similarity index 98% rename from zenodo/plugin.spec.ts rename to dataset/plugins/zenodo/plugin.spec.ts index 8ca2eef1..06023847 100644 --- a/zenodo/plugin.spec.ts +++ b/dataset/plugins/zenodo/plugin.spec.ts @@ -1,4 +1,4 @@ -import type { Package } from "@dpkit/core" +import type { Package } from "@dpkit/metadata" import { beforeEach, describe, expect, it, vi } from "vitest" import * as packageModule from "./package/load.ts" import { ZenodoPlugin } from "./plugin.ts" diff --git a/zenodo/plugin.ts b/dataset/plugins/zenodo/plugin.ts similarity index 73% rename from zenodo/plugin.ts rename to dataset/plugins/zenodo/plugin.ts index d02cee52..3ef62a64 100644 --- a/zenodo/plugin.ts +++ b/dataset/plugins/zenodo/plugin.ts @@ -1,8 +1,8 @@ -import type { Plugin } from "@dpkit/core" -import { isRemotePath } from "@dpkit/core" +import { isRemotePath } from "@dpkit/metadata" +import type { DatasetPlugin } from "../../plugin.ts" import { loadPackageFromZenodo } from "./package/load.ts" -export class ZenodoPlugin implements Plugin { +export class ZenodoPlugin implements DatasetPlugin { async loadPackage(source: string) { const isZenodo = getIsZenodo(source) if (!isZenodo) return undefined diff --git a/zenodo/resource/Resource.ts b/dataset/plugins/zenodo/resource/Resource.ts similarity index 100% rename from zenodo/resource/Resource.ts rename to dataset/plugins/zenodo/resource/Resource.ts diff --git a/zenodo/resource/convert/fromZenodo.ts b/dataset/plugins/zenodo/resource/convert/fromZenodo.ts similarity index 91% rename from zenodo/resource/convert/fromZenodo.ts rename to dataset/plugins/zenodo/resource/convert/fromZenodo.ts index ab6b7e39..077812fe 100644 --- a/zenodo/resource/convert/fromZenodo.ts +++ b/dataset/plugins/zenodo/resource/convert/fromZenodo.ts @@ -1,4 +1,4 @@ -import { getFormat, getName } from "@dpkit/core" +import { getFormat, getName } from "@dpkit/metadata" import type { ZenodoResource } from "../Resource.ts" export function convertResourceFromZenodo(zenodoResource: ZenodoResource) { diff --git a/zenodo/resource/convert/toZenodo.ts b/dataset/plugins/zenodo/resource/convert/toZenodo.ts similarity index 88% rename from zenodo/resource/convert/toZenodo.ts rename to dataset/plugins/zenodo/resource/convert/toZenodo.ts index 2d735e73..f67f0cc7 100644 --- a/zenodo/resource/convert/toZenodo.ts +++ b/dataset/plugins/zenodo/resource/convert/toZenodo.ts @@ -1,4 +1,4 @@ -import type { Resource } from "@dpkit/core" +import type { Resource } from "@dpkit/metadata" import type { ZenodoResource } from "../Resource.ts" export function convertResourceToZenodo(resource: Resource) { diff --git a/zenodo/resource/index.ts b/dataset/plugins/zenodo/resource/index.ts similarity index 100% rename from zenodo/resource/index.ts rename to dataset/plugins/zenodo/resource/index.ts diff --git a/zenodo/zenodo/index.ts b/dataset/plugins/zenodo/zenodo/index.ts similarity index 100% rename from zenodo/zenodo/index.ts rename to dataset/plugins/zenodo/zenodo/index.ts diff --git a/zenodo/zenodo/request.ts b/dataset/plugins/zenodo/zenodo/request.ts similarity index 96% rename from zenodo/zenodo/request.ts rename to dataset/plugins/zenodo/zenodo/request.ts index d8d9ee64..eb9a85c8 100644 --- a/zenodo/zenodo/request.ts +++ b/dataset/plugins/zenodo/zenodo/request.ts @@ -1,4 +1,4 @@ -import type { Descriptor } from "@dpkit/core" +import type { Descriptor } from "@dpkit/metadata" export async function makeZenodoApiRequest(options: { endpoint: string diff --git a/folder/index.ts b/dataset/plugins/zip/index.ts similarity index 64% rename from folder/index.ts rename to dataset/plugins/zip/index.ts index 502a8f4a..8e03d380 100644 --- a/folder/index.ts +++ b/dataset/plugins/zip/index.ts @@ -1,3 +1,2 @@ export * from "./package/index.ts" -export * from "./folder/index.ts" export * from "./plugin.ts" diff --git a/zip/package/index.ts b/dataset/plugins/zip/package/index.ts similarity index 100% rename from zip/package/index.ts rename to dataset/plugins/zip/package/index.ts diff --git a/zip/package/load.spec.ts b/dataset/plugins/zip/package/load.spec.ts similarity index 98% rename from zip/package/load.spec.ts rename to dataset/plugins/zip/package/load.spec.ts index 0efe6b1e..cc4c253b 100644 --- a/zip/package/load.spec.ts +++ b/dataset/plugins/zip/package/load.spec.ts @@ -1,6 +1,6 @@ -import type { Package } from "@dpkit/core" -import { getTempFilePath, writeTempFile } from "@dpkit/file" +import type { Package } from "@dpkit/metadata" import { beforeEach, describe, expect, it } from "vitest" +import { getTempFilePath, writeTempFile } from "../../../file/index.ts" import { loadPackageFromZip } from "./load.ts" import { savePackageToZip } from "./save.ts" diff --git a/zip/package/load.ts b/dataset/plugins/zip/package/load.ts similarity index 89% rename from zip/package/load.ts rename to dataset/plugins/zip/package/load.ts index cc493691..dc42ea9d 100644 --- a/zip/package/load.ts +++ b/dataset/plugins/zip/package/load.ts @@ -1,8 +1,8 @@ import { readFile, writeFile } from "node:fs/promises" import { mkdir } from "node:fs/promises" import { dirname, join } from "node:path" -import { loadPackageDescriptor } from "@dpkit/core" -import { getTempFolderPath } from "@dpkit/folder" +import { loadPackageDescriptor } from "@dpkit/metadata" +import { getTempFolderPath } from "../../../folder/index.ts" import { unzip } from "fflate" export async function loadPackageFromZip(archivePath: string) { diff --git a/zip/package/save.spec.ts b/dataset/plugins/zip/package/save.spec.ts similarity index 98% rename from zip/package/save.spec.ts rename to dataset/plugins/zip/package/save.spec.ts index 166e3033..81fddd68 100644 --- a/zip/package/save.spec.ts +++ b/dataset/plugins/zip/package/save.spec.ts @@ -1,7 +1,7 @@ import { readFile } from "node:fs/promises" -import type { Package } from "@dpkit/core" -import { getTempFilePath, writeTempFile } from "@dpkit/file" +import type { Package } from "@dpkit/metadata" import { beforeEach, describe, expect, it } from "vitest" +import { getTempFilePath, writeTempFile } from "../../../file/index.ts" import { loadPackageFromZip } from "./load.ts" import { savePackageToZip } from "./save.ts" diff --git a/zip/package/save.ts b/dataset/plugins/zip/package/save.ts similarity index 78% rename from zip/package/save.ts rename to dataset/plugins/zip/package/save.ts index c492645d..0a85d38c 100644 --- a/zip/package/save.ts +++ b/dataset/plugins/zip/package/save.ts @@ -1,11 +1,14 @@ import { Buffer } from "node:buffer" import { writeFile } from "node:fs/promises" import type { Readable } from "node:stream" -import type { Descriptor, Package } from "@dpkit/core" -import { convertPackageToDescriptor, stringifyDescriptor } from "@dpkit/core" -import { loadFileStream, saveResourceFiles } from "@dpkit/file" -import { assertLocalPathVacant, getPackageBasepath } from "@dpkit/file" +import type { Descriptor, Package } from "@dpkit/metadata" +import { stringifyDescriptor } from "@dpkit/metadata" +import { convertPackageToDescriptor } from "@dpkit/metadata" import { zip } from "fflate" +import { assertLocalPathVacant } from "../../../file/index.ts" +import { getPackageBasepath } from "../../../package/index.ts" +import { saveResourceFiles } from "../../../resource/index.ts" +import { loadFileStream } from "../../../stream/index.ts" export async function savePackageToZip( dataPackage: Package, diff --git a/zip/plugin.spec.ts b/dataset/plugins/zip/plugin.spec.ts similarity index 99% rename from zip/plugin.spec.ts rename to dataset/plugins/zip/plugin.spec.ts index d267c57f..16b293d3 100644 --- a/zip/plugin.spec.ts +++ b/dataset/plugins/zip/plugin.spec.ts @@ -1,4 +1,4 @@ -import type { Package } from "@dpkit/core" +import type { Package } from "@dpkit/metadata" import { beforeEach, describe, expect, it, vi } from "vitest" import * as packageModule from "./package/index.ts" import { ZipPlugin } from "./plugin.ts" diff --git a/zip/plugin.ts b/dataset/plugins/zip/plugin.ts similarity index 81% rename from zip/plugin.ts rename to dataset/plugins/zip/plugin.ts index e575d8e4..8ded7102 100644 --- a/zip/plugin.ts +++ b/dataset/plugins/zip/plugin.ts @@ -1,7 +1,8 @@ -import type { Package, Plugin } from "@dpkit/core" +import type { Package } from "@dpkit/metadata" +import type { DatasetPlugin } from "../../plugin.ts" import { loadPackageFromZip, savePackageToZip } from "./package/index.ts" -export class ZipPlugin implements Plugin { +export class ZipPlugin implements DatasetPlugin { async loadPackage(source: string) { const isZip = getIsZip(source) if (!isZip) return undefined diff --git a/file/resource/index.ts b/dataset/resource/index.ts similarity index 100% rename from file/resource/index.ts rename to dataset/resource/index.ts diff --git a/file/resource/save.spec.ts b/dataset/resource/save.spec.ts similarity index 100% rename from file/resource/save.spec.ts rename to dataset/resource/save.spec.ts diff --git a/file/resource/save.ts b/dataset/resource/save.ts similarity index 96% rename from file/resource/save.ts rename to dataset/resource/save.ts index 7d943aa3..7c5fdbb0 100644 --- a/file/resource/save.ts +++ b/dataset/resource/save.ts @@ -1,10 +1,10 @@ -import type { Resource } from "@dpkit/core" +import type { Resource } from "@dpkit/metadata" import { convertResourceToDescriptor, denormalizePath, getFilename, isRemotePath, -} from "@dpkit/core" +} from "@dpkit/metadata" import invariant from "tiny-invariant" export type SaveFile = (options: { diff --git a/file/stream/concat.ts b/dataset/stream/concat.ts similarity index 100% rename from file/stream/concat.ts rename to dataset/stream/concat.ts diff --git a/file/stream/index.ts b/dataset/stream/index.ts similarity index 100% rename from file/stream/index.ts rename to dataset/stream/index.ts diff --git a/file/stream/load.ts b/dataset/stream/load.ts similarity index 97% rename from file/stream/load.ts rename to dataset/stream/load.ts index 4a96e43d..54cb43c1 100644 --- a/file/stream/load.ts +++ b/dataset/stream/load.ts @@ -1,6 +1,6 @@ import { createReadStream } from "node:fs" import { Readable, Transform } from "node:stream" -import { isRemotePath } from "@dpkit/core" +import { isRemotePath } from "@dpkit/metadata" export async function loadFileStream( path: string | string[], diff --git a/file/stream/save.ts b/dataset/stream/save.ts similarity index 100% rename from file/stream/save.ts rename to dataset/stream/save.ts diff --git a/ckan/tsconfig.json b/dataset/tsconfig.json similarity index 100% rename from ckan/tsconfig.json rename to dataset/tsconfig.json diff --git a/ckan/typedoc.json b/dataset/typedoc.json similarity index 100% rename from ckan/typedoc.json rename to dataset/typedoc.json diff --git a/docs/astro.config.ts b/docs/astro.config.ts index e5c220d5..78257e19 100644 --- a/docs/astro.config.ts +++ b/docs/astro.config.ts @@ -9,25 +9,15 @@ import starlightTypeDoc from "starlight-typedoc" const PACKAGES = { dpkit: "../dpkit", - "@dpkit/lib": "../lib", - "@dpkit/arrow": "../arrow", - "@dpkit/ckan": "../ckan", - "@dpkit/core": "../core", - "@dpkit/csv": "../csv", + "@dpkit/library": "../library", + "@dpkit/terminal": "../terminal", "@dpkit/database": "../database", - "@dpkit/datahub": "../datahub", - "@dpkit/file": "../file", - "@dpkit/github": "../github", - "@dpkit/html": "../html", - "@dpkit/inline": "../inline", - "@dpkit/json": "../json", - "@dpkit/markdown": "../markdown", - "@dpkit/ods": "../ods", - "@dpkit/parquet": "../parquet", + "@dpkit/audio": "../audio", + "@dpkit/video": "../video", + "@dpkit/image": "../image", "@dpkit/table": "../table", - "@dpkit/xlsx": "../xlsx", - "@dpkit/zenodo": "../zenodo", - "@dpkit/zip": "../zip", + "@dpkit/dataset": "../dataset", + "@dpkit/metadata": "../metadata", } export default defineConfig({ diff --git a/docs/content/docs/guides/ods.md b/docs/content/docs/guides/ods.md index ea6869ce..9b2373c3 100644 --- a/docs/content/docs/guides/ods.md +++ b/docs/content/docs/guides/ods.md @@ -56,8 +56,8 @@ const table = await loadOdsTable({ }) // Table is a Polars LazyDataFrame -const df = table.collect() -df.describe() +const frame = table.collect() +frame.describe() ``` ### Saving ODS Files diff --git a/docs/content/docs/guides/table.md b/docs/content/docs/guides/table.md index 93652e4e..886ef28f 100644 --- a/docs/content/docs/guides/table.md +++ b/docs/content/docs/guides/table.md @@ -12,9 +12,9 @@ The `@dpkit/table` package provides high-performance data validation and process ### Basic Table Validation ```typescript -import { DataFrame } from "nodejs-polars" +import * as pl from "nodejs-polars" import { validateTable } from "@dpkit/table" -import type { Schema } from "@dpkit/core" +import type { Schema } from "@dpkit/metadata" // Create a table from data const table = DataFrame({ @@ -139,7 +139,7 @@ result.errors.forEach(error => { The package uses `LazyDataFrame` from nodejs-polars as its core table representation, enabling lazy evaluation and efficient processing of large datasets through vectorized operations. ### Schema Integration -Integrates seamlessly with `@dpkit/core` schemas, bridging Data Package field definitions with Polars data types for comprehensive validation workflows. +Integrates seamlessly with `@dpkit/metadata` schemas, bridging Data Package field definitions with Polars data types for comprehensive validation workflows. ## Key Features diff --git a/docs/content/docs/guides/xlsx.md b/docs/content/docs/guides/xlsx.md index 9289068f..7f7ba4c6 100644 --- a/docs/content/docs/guides/xlsx.md +++ b/docs/content/docs/guides/xlsx.md @@ -56,8 +56,8 @@ const table = await loadXlsxTable({ }) // Table is a Polars LazyDataFrame -const df = table.collect() -df.describe() +const frame = table.collect() +frame.describe() ``` ### Saving XLSX Files diff --git a/docs/content/docs/index.md b/docs/content/docs/index.md index 79998d50..59fa6a4e 100644 --- a/docs/content/docs/index.md +++ b/docs/content/docs/index.md @@ -18,7 +18,7 @@ dpkit and all its packages support all the prominent TypeScript runtimes: - **Deno v2+** - **Bun v1+** -The core package `@dpkit/core` additionally supports browser environments: +The core package `@dpkit/metadata` additionally supports browser environments: - **Edge v92+** - **Chrome v92+** @@ -45,13 +45,13 @@ npm install @dpkit/lib You car cherry-pick from individual packages: ```bash -npm install @dpkit/core @dpkit/zenodo +npm install @dpkit/metadata @dpkit/zenodo ``` In the browser, the core package can be just imported using NPM CDNs: ```js -import { loadPackageDescriptor } from "https://esm.sh/@dpkit/core" +import { loadPackageDescriptor } from "https://esm.sh/@dpkit/metadata" ``` ## TypeScript diff --git a/docs/package.json b/docs/package.json index 58e5ad39..0cc47b84 100644 --- a/docs/package.json +++ b/docs/package.json @@ -12,7 +12,7 @@ "@astrojs/starlight": "0.36.0", "astro": "5.14.1", "dpkit": "workspace:*", - "nodejs-polars": "0.22.1", + "nodejs-polars": "0.22.2", "sharp": "0.34.2", "starlight-changelogs": "0.1.1", "starlight-github-alerts": "0.1.0", diff --git a/arrow/README.md b/document/README.md similarity index 77% rename from arrow/README.md rename to document/README.md index c6ae6be1..2c700cc7 100644 --- a/arrow/README.md +++ b/document/README.md @@ -1,3 +1,3 @@ -# @dpkit/arrow +# @dpkit/document -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). +dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [project's website](https://dpkit.app). diff --git a/document/document/Document.ts b/document/document/Document.ts new file mode 100644 index 00000000..396dabb9 --- /dev/null +++ b/document/document/Document.ts @@ -0,0 +1,3 @@ +import type { JsonDocument } from "./types/Json.ts" + +export type Document = JsonDocument diff --git a/document/document/index.ts b/document/document/index.ts new file mode 100644 index 00000000..a3f6d661 --- /dev/null +++ b/document/document/index.ts @@ -0,0 +1,3 @@ +export type { Document } from "./Document.ts" +export { validateDocument } from "./validate.ts" +export * from "./types/Json.ts" diff --git a/document/document/types/Base.ts b/document/document/types/Base.ts new file mode 100644 index 00000000..2fdda134 --- /dev/null +++ b/document/document/types/Base.ts @@ -0,0 +1,3 @@ +export interface BaseDocument { + type: string +} diff --git a/document/document/types/Json.ts b/document/document/types/Json.ts new file mode 100644 index 00000000..224059a7 --- /dev/null +++ b/document/document/types/Json.ts @@ -0,0 +1,6 @@ +import type { BaseDocument } from "./Base.ts" + +export interface JsonDocument extends BaseDocument { + type: "json" + data: Record +} diff --git a/document/document/validate.spec.ts b/document/document/validate.spec.ts new file mode 100644 index 00000000..05b5b91a --- /dev/null +++ b/document/document/validate.spec.ts @@ -0,0 +1,79 @@ +import { describe, expect, it } from "vitest" +import { validateDocument } from "./validate.ts" + +describe("validateDocument", () => { + it("should return valid report when data matches jsonSchema", async () => { + const resource = { + name: "test-document", + data: { + name: "test-package", + version: "1.0.0", + }, + jsonSchema: { + type: "object", + required: ["name", "version"], + properties: { + name: { type: "string" }, + version: { type: "string" }, + }, + }, + } + + const report = await validateDocument(resource) + + expect(report.valid).toBe(true) + expect(report.errors).toEqual([]) + }) + + it("should return error when data is missing with jsonSchema", async () => { + const resource = { + name: "test-document", + jsonSchema: { + type: "object", + required: ["name"], + properties: { + name: { type: "string" }, + }, + }, + } + + const report = await validateDocument(resource) + + expect(report.valid).toBe(false) + expect(report.errors).toEqual([ + { + type: "data", + message: "missing test-document data", + }, + ]) + }) + + it("should return validation errors when data does not match jsonSchema", async () => { + const resource = { + name: "test-document", + data: { + name: "test-package", + version: 123, + }, + jsonSchema: { + type: "object", + required: ["name", "version"], + properties: { + name: { type: "string" }, + version: { type: "string" }, + }, + }, + } + + const report = await validateDocument(resource) + + expect(report.valid).toBe(false) + expect(report.errors).toEqual([ + { + type: "document/json", + pointer: "/version", + message: "must be string", + }, + ]) + }) +}) diff --git a/document/document/validate.ts b/document/document/validate.ts new file mode 100644 index 00000000..106ea74f --- /dev/null +++ b/document/document/validate.ts @@ -0,0 +1,34 @@ +import type { Resource } from "@dpkit/metadata" +import type { DataError } from "@dpkit/metadata" +import type { JsonDocumentError } from "@dpkit/metadata" +import { createReport } from "@dpkit/metadata" +import { resolveJsonSchema } from "@dpkit/metadata" +import { inspectJsonValue } from "@dpkit/metadata" + +export async function validateDocument(resource: Partial) { + if (resource.jsonSchema) { + const jsonSchema = await resolveJsonSchema(resource.jsonSchema) + + if (!resource.data) { + return createReport([ + { + type: "data", + message: `missing ${resource.name} data`, + }, + ]) + } + + if (jsonSchema) { + const errors = await inspectJsonValue(resource.data, { jsonSchema }) + + return createReport( + errors.map(error => ({ + type: "document/json", + ...error, + })), + ) + } + } + + return createReport() +} diff --git a/document/index.ts b/document/index.ts new file mode 100644 index 00000000..f4f31d7e --- /dev/null +++ b/document/index.ts @@ -0,0 +1,6 @@ +export type { Document } from "./document/index.ts" +export type { JsonDocument } from "./document/index.ts" + +export { convertSchemaToHtml } from "./schema/index.tsx" +export { convertSchemaToMarkdown } from "./schema/index.ts" +export { validateDocument } from "./document/index.ts" diff --git a/html/package.json b/document/package.json similarity index 73% rename from html/package.json rename to document/package.json index c2ee081f..17eca4ab 100644 --- a/html/package.json +++ b/document/package.json @@ -1,9 +1,10 @@ { - "name": "@dpkit/html", + "name": "@dpkit/document", "type": "module", "version": "0.0.0-dev", "exports": "./build/index.js", "sideEffects": false, + "files": ["build"], "license": "MIT", "author": "Evgeny Karev", "repository": "https://github.com/datisthq/dpkit", @@ -18,19 +19,23 @@ "validation", "quality", "fair", + "document", "html" ], "scripts": { "build": "tsc" }, "dependencies": { - "@dpkit/core": "workspace:*", + "@dpkit/metadata": "workspace:*", + "@dpkit/dataset": "workspace:*", "htmlfy": "^1.0.0", "react": "^19.2.0", - "react-dom": "^19.2.0" + "react-dom": "^19.2.0", + "remark": "^15.0.1", + "remark-gfm": "^4.0.0" }, "devDependencies": { - "@dpkit/test": "workspace:*", + "@types/mdast": "^4.0.0", "@types/react": "^19.2.0", "@types/react-dom": "^19.2.0" } diff --git a/document/plugin.ts b/document/plugin.ts new file mode 100644 index 00000000..dc071d3d --- /dev/null +++ b/document/plugin.ts @@ -0,0 +1 @@ +// TODO: consider having a plugin here (e.g. plugin.convertSchema) diff --git a/html/schema/convert/toHtml.spec.tsx b/document/schema/convert/toHtml.spec.tsx similarity index 99% rename from html/schema/convert/toHtml.spec.tsx rename to document/schema/convert/toHtml.spec.tsx index bd4f30c3..1803fc23 100644 --- a/html/schema/convert/toHtml.spec.tsx +++ b/document/schema/convert/toHtml.spec.tsx @@ -1,4 +1,4 @@ -import type { Schema } from "@dpkit/core" +import type { Schema } from "@dpkit/metadata" import { describe, expect, it } from "vitest" import { convertSchemaToHtml } from "./toHtml.tsx" diff --git a/html/schema/convert/toHtml.tsx b/document/schema/convert/toHtml.tsx similarity index 99% rename from html/schema/convert/toHtml.tsx rename to document/schema/convert/toHtml.tsx index caab0fa6..364603e3 100644 --- a/html/schema/convert/toHtml.tsx +++ b/document/schema/convert/toHtml.tsx @@ -1,4 +1,4 @@ -import type { Field, Schema } from "@dpkit/core" +import type { Field, Schema } from "@dpkit/metadata" import { prettify } from "htmlfy" import React from "react" import { renderToStaticMarkup } from "react-dom/server" diff --git a/markdown/schema/convert/toMarkdown.spec.ts b/document/schema/convert/toMarkdown.spec.ts similarity index 98% rename from markdown/schema/convert/toMarkdown.spec.ts rename to document/schema/convert/toMarkdown.spec.ts index 64ddc303..d6ceb173 100644 --- a/markdown/schema/convert/toMarkdown.spec.ts +++ b/document/schema/convert/toMarkdown.spec.ts @@ -1,4 +1,4 @@ -import type { Schema } from "@dpkit/core" +import type { Schema } from "@dpkit/metadata" import { describe, expect, it } from "vitest" import { convertSchemaToMarkdown } from "./toMarkdown.ts" diff --git a/markdown/schema/convert/toMarkdown.ts b/document/schema/convert/toMarkdown.ts similarity index 98% rename from markdown/schema/convert/toMarkdown.ts rename to document/schema/convert/toMarkdown.ts index 9d5a9101..edf5d34b 100644 --- a/markdown/schema/convert/toMarkdown.ts +++ b/document/schema/convert/toMarkdown.ts @@ -1,4 +1,4 @@ -import type { Schema } from "@dpkit/core" +import type { Schema } from "@dpkit/metadata" export function convertSchemaToMarkdown( schema: Schema, diff --git a/markdown/schema/index.ts b/document/schema/index.ts similarity index 52% rename from markdown/schema/index.ts rename to document/schema/index.ts index f7d2ddfa..65bb6ec7 100644 --- a/markdown/schema/index.ts +++ b/document/schema/index.ts @@ -1 +1,2 @@ +export { convertSchemaToHtml } from "./convert/toHtml.tsx" export { convertSchemaToMarkdown } from "./convert/toMarkdown.ts" diff --git a/cli/tsconfig.json b/document/tsconfig.json similarity index 100% rename from cli/tsconfig.json rename to document/tsconfig.json diff --git a/core/typedoc.json b/document/typedoc.json similarity index 100% rename from core/typedoc.json rename to document/typedoc.json diff --git a/dpkit/README.md b/dpkit/README.md index f8d84925..aab80ffe 100644 --- a/dpkit/README.md +++ b/dpkit/README.md @@ -1,3 +1,3 @@ # dpkit -dpkit CLI is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). +dpkit CLI is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [project's website](https://dpkit.app). diff --git a/dpkit/entrypoints/run.ts b/dpkit/entrypoints/run.ts index a4c62422..a2825590 100755 --- a/dpkit/entrypoints/run.ts +++ b/dpkit/entrypoints/run.ts @@ -1,3 +1,11 @@ #!/usr/bin/env node -await import("@dpkit/cli/build/entrypoints/run.js") +process.removeAllListeners("warning") +process.on("warning", warning => { + if (warning.name === "ExperimentalWarning") { + return + } + console.warn(warning) +}) + +await import("../main.ts") diff --git a/dpkit/index.ts b/dpkit/index.ts index 9a6a6e72..2b1e9342 100644 --- a/dpkit/index.ts +++ b/dpkit/index.ts @@ -1 +1 @@ -export * from "@dpkit/lib" +export * from "@dpkit/library" diff --git a/dpkit/main.ts b/dpkit/main.ts new file mode 100644 index 00000000..6679ddb7 --- /dev/null +++ b/dpkit/main.ts @@ -0,0 +1,3 @@ +import { program } from "./program.ts" + +program.parse() diff --git a/dpkit/package.json b/dpkit/package.json index 4365eeaf..8e214ca2 100644 --- a/dpkit/package.json +++ b/dpkit/package.json @@ -4,8 +4,9 @@ "version": "0.0.0-dev", "exports": "./build/index.js", "sideEffects": false, + "files": ["build"], "bin": { - "dp": "./build/entrypoints/run.js" + "dpkit": "./build/entrypoints/run.js" }, "license": "MIT", "author": "Evgeny Karev", @@ -21,8 +22,8 @@ "validation", "quality", "fair", - "lib", - "cli" + "library", + "terminal" ], "scripts": { "build": "tsc && pnpm build:copy && pnpm build:mode", @@ -30,7 +31,7 @@ "build:mode": "chmod +x ./build/entrypoints/*.js" }, "dependencies": { - "@dpkit/cli": "workspace:*", - "@dpkit/lib": "workspace:*" + "@dpkit/library": "workspace:*", + "@dpkit/terminal": "workspace:*" } } diff --git a/dpkit/program.ts b/dpkit/program.ts new file mode 100644 index 00000000..256d13e4 --- /dev/null +++ b/dpkit/program.ts @@ -0,0 +1,4 @@ +import * as terminal from "@dpkit/terminal" + +// TODO: add ui/api commands +export const program = terminal.program diff --git a/file/error/Base.ts b/file/error/Base.ts deleted file mode 100644 index a517c9cf..00000000 --- a/file/error/Base.ts +++ /dev/null @@ -1,3 +0,0 @@ -import type { BaseError } from "@dpkit/core" - -export interface BaseFileError extends BaseError {} diff --git a/file/error/Bytes.ts b/file/error/Bytes.ts deleted file mode 100644 index fbf5cce0..00000000 --- a/file/error/Bytes.ts +++ /dev/null @@ -1,7 +0,0 @@ -import type { BaseFileError } from "./Base.ts" - -export interface BytesError extends BaseFileError { - type: "file/bytes" - bytes: number - actualBytes: number -} diff --git a/file/error/Encoding.ts b/file/error/Encoding.ts deleted file mode 100644 index 57217338..00000000 --- a/file/error/Encoding.ts +++ /dev/null @@ -1,7 +0,0 @@ -import type { BaseFileError } from "./Base.ts" - -export interface EncodingError extends BaseFileError { - type: "file/encoding" - encoding: string - actualEncoding: string -} diff --git a/file/error/Hash.ts b/file/error/Hash.ts deleted file mode 100644 index ed8bc4ab..00000000 --- a/file/error/Hash.ts +++ /dev/null @@ -1,7 +0,0 @@ -import type { BaseFileError } from "./Base.ts" - -export interface HashError extends BaseFileError { - type: "file/hash" - hash: string - actualHash: string -} diff --git a/file/error/index.ts b/file/error/index.ts deleted file mode 100644 index 547d82e2..00000000 --- a/file/error/index.ts +++ /dev/null @@ -1,4 +0,0 @@ -export type * from "./File.ts" -export type * from "./Bytes.ts" -export type * from "./Encoding.ts" -export type * from "./Hash.ts" diff --git a/file/file/validate.ts b/file/file/validate.ts deleted file mode 100644 index 3535a467..00000000 --- a/file/file/validate.ts +++ /dev/null @@ -1,59 +0,0 @@ -import type { FileError } from "../error/index.ts" -import { prefetchFiles } from "./fetch.ts" -import { inferFileBytes, inferFileEncoding, inferFileHash } from "./infer.ts" - -export async function validateFile( - path?: string | string[], - options?: { bytes?: number; hash?: string; encoding?: string }, -) { - const errors: FileError[] = [] - const localPaths = await prefetchFiles(path) - - if (options?.bytes) { - const bytes = options.bytes - const actualBytes = await inferFileBytes(localPaths) - - if (bytes !== actualBytes) { - errors.push({ - type: "file/bytes", - bytes, - actualBytes, - }) - } - } - - if (options?.hash) { - const [hashValue, hashType = "md5"] = options.hash.split(":").toReversed() - - const hash = `${hashType}:${hashValue}` - const actualHash = await inferFileHash(localPaths, { - hashType: hashType as any, - }) - - if (hash !== actualHash) { - errors.push({ - type: "file/hash", - hash, - actualHash, - }) - } - } - - if (options?.encoding) { - const encoding = options.encoding - const actualEncoding = await inferFileEncoding(localPaths) - - if (actualEncoding) { - if (encoding !== actualEncoding) { - errors.push({ - type: "file/encoding", - encoding, - actualEncoding, - }) - } - } - } - - const valid = errors.length === 0 - return { valid, errors } -} diff --git a/file/index.ts b/file/index.ts deleted file mode 100644 index 00ec5705..00000000 --- a/file/index.ts +++ /dev/null @@ -1,5 +0,0 @@ -export * from "./error/index.ts" -export * from "./file/index.ts" -export * from "./package/index.ts" -export * from "./resource/index.ts" -export * from "./stream/index.ts" diff --git a/folder/README.md b/folder/README.md deleted file mode 100644 index ee77d848..00000000 --- a/folder/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# @dpkit/folder - -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). diff --git a/github/README.md b/github/README.md deleted file mode 100644 index 5b303d0d..00000000 --- a/github/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# @dpkit/github - -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). diff --git a/github/package.json b/github/package.json deleted file mode 100644 index 5428a0eb..00000000 --- a/github/package.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "name": "@dpkit/github", - "type": "module", - "version": "0.0.0-dev", - "exports": "./build/index.js", - "sideEffects": false, - "license": "MIT", - "author": "Evgeny Karev", - "repository": "https://github.com/datisthq/dpkit", - "description": "Fast TypeScript data management framework built on top of the Data Package standard and Polars DataFrames", - "keywords": [ - "data", - "polars", - "dataframe", - "datapackage", - "tableschema", - "typescript", - "validation", - "quality", - "fair", - "github" - ], - "scripts": { - "build": "tsc" - }, - "dependencies": { - "@dpkit/core": "workspace:*", - "@dpkit/file": "workspace:*" - }, - "devDependencies": { - "@dpkit/test": "workspace:*" - } -} diff --git a/github/tsconfig.json b/github/tsconfig.json deleted file mode 100644 index 3c43903c..00000000 --- a/github/tsconfig.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "extends": "../tsconfig.json" -} diff --git a/github/typedoc.json b/github/typedoc.json deleted file mode 100644 index f8e49f3a..00000000 --- a/github/typedoc.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "entryPoints": ["index.ts"], - "skipErrorChecking": true -} diff --git a/html/README.md b/html/README.md deleted file mode 100644 index d47ac386..00000000 --- a/html/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# @dpkit/html - -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). diff --git a/html/index.ts b/html/index.ts deleted file mode 100644 index 352ef322..00000000 --- a/html/index.ts +++ /dev/null @@ -1 +0,0 @@ -export * from "./schema/index.ts" diff --git a/html/schema/index.ts b/html/schema/index.ts deleted file mode 100644 index bbc86c5d..00000000 --- a/html/schema/index.ts +++ /dev/null @@ -1 +0,0 @@ -export * from "./convert/toHtml.tsx" diff --git a/html/tsconfig.json b/html/tsconfig.json deleted file mode 100644 index 3c43903c..00000000 --- a/html/tsconfig.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "extends": "../tsconfig.json" -} diff --git a/html/typedoc.json b/html/typedoc.json deleted file mode 100644 index f8e49f3a..00000000 --- a/html/typedoc.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "entryPoints": ["index.ts"], - "skipErrorChecking": true -} diff --git a/ods/README.md b/image/README.md similarity index 78% rename from ods/README.md rename to image/README.md index 9e815067..8e6ca3e5 100644 --- a/ods/README.md +++ b/image/README.md @@ -1,3 +1,3 @@ -# @dpkit/ods +# @dpkit/image -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). +dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [project's website](https://dpkit.app). diff --git a/image/index.ts b/image/index.ts new file mode 100644 index 00000000..e69de29b diff --git a/datahub/package.json b/image/package.json similarity index 80% rename from datahub/package.json rename to image/package.json index 0846f1c6..e62d7069 100644 --- a/datahub/package.json +++ b/image/package.json @@ -1,9 +1,10 @@ { - "name": "@dpkit/datahub", + "name": "@dpkit/image", "type": "module", "version": "0.0.0-dev", "exports": "./build/index.js", "sideEffects": false, + "files": ["build"], "license": "MIT", "author": "Evgeny Karev", "repository": "https://github.com/datisthq/dpkit", @@ -18,15 +19,13 @@ "validation", "quality", "fair", - "datahub" + "image" ], "scripts": { "build": "tsc" }, "dependencies": { - "@dpkit/core": "workspace:*" - }, - "devDependencies": { - "@dpkit/test": "workspace:*" + "@dpkit/metadata": "workspace:*", + "@dpkit/dataset": "workspace:*" } } diff --git a/core/tsconfig.json b/image/tsconfig.json similarity index 100% rename from core/tsconfig.json rename to image/tsconfig.json diff --git a/csv/typedoc.json b/image/typedoc.json similarity index 100% rename from csv/typedoc.json rename to image/typedoc.json diff --git a/inline/README.md b/inline/README.md deleted file mode 100644 index 96acecbc..00000000 --- a/inline/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# @dpkit/inline - -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). diff --git a/inline/package.json b/inline/package.json deleted file mode 100644 index 0f76ffb5..00000000 --- a/inline/package.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "name": "@dpkit/inline", - "type": "module", - "version": "0.0.0-dev", - "exports": "./build/index.js", - "sideEffects": false, - "license": "MIT", - "author": "Evgeny Karev", - "repository": "https://github.com/datisthq/dpkit", - "description": "Data Package implementation in TypeScript.", - "keywords": [ - "data", - "polars", - "dataframe", - "datapackage", - "tableschema", - "typescript", - "validation", - "quality", - "fair", - "inline" - ], - "scripts": { - "build": "tsc" - }, - "dependencies": { - "@dpkit/core": "workspace:*", - "@dpkit/table": "workspace:*", - "nodejs-polars": "^0.22.1" - } -} diff --git a/inline/tsconfig.json b/inline/tsconfig.json deleted file mode 100644 index 3c43903c..00000000 --- a/inline/tsconfig.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "extends": "../tsconfig.json" -} diff --git a/inline/typedoc.json b/inline/typedoc.json deleted file mode 100644 index f8e49f3a..00000000 --- a/inline/typedoc.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "entryPoints": ["index.ts"], - "skipErrorChecking": true -} diff --git a/json/README.md b/json/README.md deleted file mode 100644 index 106a8222..00000000 --- a/json/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# @dpkit/json - -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). diff --git a/json/tsconfig.json b/json/tsconfig.json deleted file mode 100644 index 3c43903c..00000000 --- a/json/tsconfig.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "extends": "../tsconfig.json" -} diff --git a/json/typedoc.json b/json/typedoc.json deleted file mode 100644 index f8e49f3a..00000000 --- a/json/typedoc.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "entryPoints": ["index.ts"], - "skipErrorChecking": true -} diff --git a/lib/index.ts b/lib/index.ts deleted file mode 100644 index f8d5ea08..00000000 --- a/lib/index.ts +++ /dev/null @@ -1,26 +0,0 @@ -export * from "@dpkit/arrow" -export * from "@dpkit/ckan" -export * from "@dpkit/core" -export * from "@dpkit/csv" -export * from "@dpkit/database" -export * from "@dpkit/datahub" -export * from "@dpkit/file" -export * from "@dpkit/folder" -export * from "@dpkit/github" -export * from "@dpkit/html" -export * from "@dpkit/inline" -export * from "@dpkit/json" -export * from "@dpkit/markdown" -export * from "@dpkit/ods" -export * from "@dpkit/parquet" -export * from "@dpkit/table" -export * from "@dpkit/xlsx" -export * from "@dpkit/zenodo" -export * from "@dpkit/zip" - -export * from "./dialect/index.ts" -export * from "./package/index.ts" -export * from "./resource/index.ts" -export * from "./schema/index.ts" -export * from "./table/index.ts" -export * from "./plugin.ts" diff --git a/lib/package.json b/lib/package.json deleted file mode 100644 index b7219603..00000000 --- a/lib/package.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "name": "@dpkit/lib", - "type": "module", - "version": "0.0.0-dev", - "exports": "./build/index.js", - "sideEffects": false, - "license": "MIT", - "author": "Evgeny Karev", - "repository": "https://github.com/datisthq/dpkit", - "description": "Fast TypeScript data management framework built on top of the Data Package standard and Polars DataFrames", - "keywords": [ - "data", - "polars", - "dataframe", - "datapackage", - "tableschema", - "typescript", - "validation", - "quality", - "fair", - "lib" - ], - "dependencies": { - "@dpkit/arrow": "workspace:*", - "@dpkit/ckan": "workspace:*", - "@dpkit/core": "workspace:*", - "@dpkit/csv": "workspace:*", - "@dpkit/database": "workspace:*", - "@dpkit/datahub": "workspace:*", - "@dpkit/file": "workspace:*", - "@dpkit/folder": "workspace:*", - "@dpkit/github": "workspace:*", - "@dpkit/html": "workspace:*", - "@dpkit/inline": "workspace:*", - "@dpkit/json": "workspace:*", - "@dpkit/markdown": "workspace:*", - "@dpkit/ods": "workspace:*", - "@dpkit/parquet": "workspace:*", - "@dpkit/table": "workspace:*", - "@dpkit/xlsx": "workspace:*", - "@dpkit/zenodo": "workspace:*", - "@dpkit/zip": "workspace:*" - }, - "scripts": { - "build": "tsc" - } -} diff --git a/lib/package/load.ts b/lib/package/load.ts deleted file mode 100644 index bfa090fe..00000000 --- a/lib/package/load.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { loadPackageDescriptor } from "@dpkit/core" -import { dpkit } from "../plugin.ts" - -export async function loadPackage(source: string) { - for (const plugin of dpkit.plugins) { - const result = await plugin.loadPackage?.(source) - if (result) return result - } - - const dataPackage = await loadPackageDescriptor(source) - return dataPackage -} diff --git a/lib/package/save.ts b/lib/package/save.ts deleted file mode 100644 index 2373936d..00000000 --- a/lib/package/save.ts +++ /dev/null @@ -1,23 +0,0 @@ -import type { Package, SavePackageOptions } from "@dpkit/core" -import { savePackageDescriptor } from "@dpkit/core" -import { dpkit } from "../plugin.ts" - -export async function savePackage( - dataPackage: Package, - options: SavePackageOptions, -) { - for (const plugin of dpkit.plugins) { - const result = await plugin.savePackage?.(dataPackage, { - plugins: dpkit.plugins, - ...options, - }) - - if (result) return result - } - - if (options.target.endsWith("datapackage.json")) { - return await savePackageDescriptor(dataPackage, { path: options.target }) - } - - throw new Error(`No plugin can save the package: ${options.target}`) -} diff --git a/lib/plugin.ts b/lib/plugin.ts deleted file mode 100644 index eb5acebe..00000000 --- a/lib/plugin.ts +++ /dev/null @@ -1,45 +0,0 @@ -import { ArrowPlugin } from "@dpkit/arrow" -import { CkanPlugin } from "@dpkit/ckan" -import { CsvPlugin } from "@dpkit/csv" -import { DatabasePlugin } from "@dpkit/database" -import { DatahubPlugin } from "@dpkit/datahub" -import { FolderPlugin } from "@dpkit/folder" -import { GithubPlugin } from "@dpkit/github" -import { InlinePlugin } from "@dpkit/inline" -import { JsonPlugin } from "@dpkit/json" -import { OdsPlugin } from "@dpkit/ods" -import { ParquetPlugin } from "@dpkit/parquet" -import type { TablePlugin } from "@dpkit/table" -import { XlsxPlugin } from "@dpkit/xlsx" -import { ZenodoPlugin } from "@dpkit/zenodo" -import { ZipPlugin } from "@dpkit/zip" - -export class Dpkit { - plugins: TablePlugin[] = [] - - register(PluginClass: new () => TablePlugin) { - this.plugins.unshift(new PluginClass()) - } -} - -export const dpkit = new Dpkit() - -// Core functions -dpkit.register(CkanPlugin) -dpkit.register(DatahubPlugin) -dpkit.register(GithubPlugin) -dpkit.register(ZenodoPlugin) -dpkit.register(FolderPlugin) -dpkit.register(ZipPlugin) - -// Table functions -dpkit.register(ArrowPlugin) -dpkit.register(CsvPlugin) -dpkit.register(InlinePlugin) -dpkit.register(JsonPlugin) -dpkit.register(OdsPlugin) -dpkit.register(ParquetPlugin) -dpkit.register(XlsxPlugin) - -// Mixed functions -dpkit.register(DatabasePlugin) diff --git a/lib/resource/validate.ts b/lib/resource/validate.ts deleted file mode 100644 index c46631bc..00000000 --- a/lib/resource/validate.ts +++ /dev/null @@ -1,74 +0,0 @@ -import type { DataError, Descriptor, Resource } from "@dpkit/core" -import { resolveSchema } from "@dpkit/core" -import { loadDescriptor, validateResourceMetadata } from "@dpkit/core" -import { resolveBasepath } from "@dpkit/core" -import { validateFile } from "@dpkit/file" -import { validateTable } from "@dpkit/table" -import type { InferSchemaOptions } from "@dpkit/table" -import { inferSchema } from "../schema/index.ts" -import { loadTable } from "../table/index.ts" - -export async function validateResource( - source: string | Descriptor | Partial, - options?: InferSchemaOptions & { basepath?: string }, -) { - let descriptor = source - let basepath = options?.basepath - - if (typeof descriptor === "string") { - basepath = await resolveBasepath(descriptor) - descriptor = await loadDescriptor(descriptor) - } - - const { valid, errors, resource } = await validateResourceMetadata( - descriptor, - { basepath }, - ) - - if (!resource) { - return { valid, errors } - } - - return await validateResourceData(resource, options) -} - -export async function validateResourceData( - resource: Partial, - options?: InferSchemaOptions, -) { - const errors: DataError[] = [] - - // TODO: validateFileFromResource? - const fileReport = await validateFile(resource.path, { - bytes: resource.bytes, - hash: resource.hash, - encoding: resource.encoding, - }) - - if (!fileReport.valid) { - return fileReport - } - - // TODO: validateTableFromResource? - const table = await loadTable(resource, { denormalized: true }) - if (table) { - let schema = await resolveSchema(resource.schema) - if (!schema) schema = await inferSchema(resource, options) - const tableReport = await validateTable(table, { schema }) - - if (!tableReport.valid) { - return tableReport - } - } - - // TODO: Add document validation here - - if (!table && resource.schema) { - errors.push({ - type: "data", - message: "missing table", - }) - } - - return { valid: errors.length === 0, errors } -} diff --git a/lib/tsconfig.json b/lib/tsconfig.json deleted file mode 100644 index 3c43903c..00000000 --- a/lib/tsconfig.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "extends": "../tsconfig.json" -} diff --git a/lib/typedoc.json b/lib/typedoc.json deleted file mode 100644 index f8e49f3a..00000000 --- a/lib/typedoc.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "entryPoints": ["index.ts"], - "skipErrorChecking": true -} diff --git a/ckan/README.md b/library/README.md similarity index 78% rename from ckan/README.md rename to library/README.md index 76b64cf8..bad5c8a4 100644 --- a/ckan/README.md +++ b/library/README.md @@ -1,3 +1,3 @@ -# @dpkit/ckan +# @dpkit/library -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). +dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [project's website](https://dpkit.app). diff --git a/lib/dialect/index.ts b/library/dialect/index.ts similarity index 100% rename from lib/dialect/index.ts rename to library/dialect/index.ts diff --git a/lib/dialect/infer.spec.ts b/library/dialect/infer.spec.ts similarity index 98% rename from lib/dialect/infer.spec.ts rename to library/dialect/infer.spec.ts index 5f40d9fc..51e5d064 100644 --- a/lib/dialect/infer.spec.ts +++ b/library/dialect/infer.spec.ts @@ -1,4 +1,4 @@ -import { writeTempFile } from "@dpkit/file" +import { writeTempFile } from "@dpkit/dataset" import { describe, expect, it } from "vitest" import { inferDialect } from "./infer.ts" diff --git a/lib/dialect/infer.ts b/library/dialect/infer.ts similarity index 73% rename from lib/dialect/infer.ts rename to library/dialect/infer.ts index fdf55d27..505d4c19 100644 --- a/lib/dialect/infer.ts +++ b/library/dialect/infer.ts @@ -1,6 +1,6 @@ -import type { Dialect, Resource } from "@dpkit/core" +import type { Dialect, Resource } from "@dpkit/metadata" import type { InferDialectOptions } from "@dpkit/table" -import { dpkit } from "../plugin.ts" +import { system } from "../system.ts" // TODO: review default values being {} vs undefined @@ -10,7 +10,7 @@ export async function inferDialect( ) { let dialect: Dialect = {} - for (const plugin of dpkit.plugins) { + for (const plugin of system.plugins) { const result = await plugin.inferDialect?.(resource, options) if (result) { dialect = result diff --git a/library/index.ts b/library/index.ts new file mode 100644 index 00000000..2ba7a224 --- /dev/null +++ b/library/index.ts @@ -0,0 +1,25 @@ +export * from "@dpkit/audio" +export * from "@dpkit/dataset" +export * from "@dpkit/database" +export * from "@dpkit/document" +export * from "@dpkit/image" +export * from "@dpkit/metadata" +export * from "@dpkit/table" +export * from "@dpkit/video" + +export type { Plugin } from "./plugin.ts" + +export { System } from "./system.ts" + +export { inferDialect } from "./dialect/index.ts" +export { inferPackage } from "./package/index.ts" +export { inferResource } from "./resource/index.ts" +export { inferSchema } from "./schema/index.ts" +export { loadPackage } from "./package/index.ts" +export { loadTable } from "./table/index.ts" +export { savePackage } from "./package/index.ts" +export { saveTable } from "./table/index.ts" +export { system } from "./system.ts" +export { validatePackage } from "./package/index.ts" +export { validateResource } from "./resource/index.ts" +export { validateTable } from "./table/index.ts" diff --git a/json/package.json b/library/package.json similarity index 63% rename from json/package.json rename to library/package.json index 37fdb1d7..1cd81f3e 100644 --- a/json/package.json +++ b/library/package.json @@ -1,9 +1,10 @@ { - "name": "@dpkit/json", + "name": "@dpkit/library", "type": "module", "version": "0.0.0-dev", "exports": "./build/index.js", "sideEffects": false, + "files": ["build"], "license": "MIT", "author": "Evgeny Karev", "repository": "https://github.com/datisthq/dpkit", @@ -18,19 +19,20 @@ "validation", "quality", "fair", - "json" + "lib" ], - "scripts": { - "build": "tsc" - }, "dependencies": { - "@dpkit/core": "workspace:*", - "@dpkit/file": "workspace:*", + "@dpkit/audio": "workspace:*", + "@dpkit/dataset": "workspace:*", + "@dpkit/database": "workspace:*", + "@dpkit/document": "workspace:*", + "@dpkit/image": "workspace:*", + "@dpkit/metadata": "workspace:*", "@dpkit/table": "workspace:*", - "csv-sniffer": "^0.1.1", - "nodejs-polars": "^0.22.1" + "@dpkit/video": "workspace:*", + "p-all": "^5.0.1" }, - "devDependencies": { - "@dpkit/test": "workspace:*" + "scripts": { + "build": "tsc" } } diff --git a/lib/package/fixtures/issue-153/datapackage.json b/library/package/fixtures/issue-153/datapackage.json similarity index 100% rename from lib/package/fixtures/issue-153/datapackage.json rename to library/package/fixtures/issue-153/datapackage.json diff --git a/lib/package/fixtures/issue-153/deployments.csv b/library/package/fixtures/issue-153/deployments.csv similarity index 100% rename from lib/package/fixtures/issue-153/deployments.csv rename to library/package/fixtures/issue-153/deployments.csv diff --git a/lib/package/fixtures/issue-153/observations_1.tsv b/library/package/fixtures/issue-153/observations_1.tsv similarity index 100% rename from lib/package/fixtures/issue-153/observations_1.tsv rename to library/package/fixtures/issue-153/observations_1.tsv diff --git a/lib/package/fixtures/issue-153/observations_2.tsv b/library/package/fixtures/issue-153/observations_2.tsv similarity index 100% rename from lib/package/fixtures/issue-153/observations_2.tsv rename to library/package/fixtures/issue-153/observations_2.tsv diff --git a/lib/package/index.ts b/library/package/index.ts similarity index 100% rename from lib/package/index.ts rename to library/package/index.ts diff --git a/lib/package/infer.spec.ts b/library/package/infer.spec.ts similarity index 98% rename from lib/package/infer.spec.ts rename to library/package/infer.spec.ts index 74126660..2fec6d68 100644 --- a/lib/package/infer.spec.ts +++ b/library/package/infer.spec.ts @@ -1,4 +1,4 @@ -import { writeTempFile } from "@dpkit/file" +import { writeTempFile } from "@dpkit/dataset" import { describe, expect, it } from "vitest" import { inferPackage } from "./infer.ts" diff --git a/lib/package/infer.ts b/library/package/infer.ts similarity index 55% rename from lib/package/infer.ts rename to library/package/infer.ts index 0a4322d5..147af729 100644 --- a/lib/package/infer.ts +++ b/library/package/infer.ts @@ -1,9 +1,11 @@ -import type { Package, Resource } from "@dpkit/core" +import os from "node:os" +import type { Package, Resource } from "@dpkit/metadata" import type { InferDialectOptions } from "@dpkit/table" import type { InferSchemaOptions } from "@dpkit/table" +import pAll from "p-all" import { inferResource } from "../resource/index.ts" -// TODO: Move PartialPackage/Resource to @dpkit/core? +// TODO: Move PartialPackage/Resource to @dpkit/metadata? interface PartialPackage extends Omit { resources: Partial[] @@ -13,13 +15,18 @@ export async function inferPackage( dataPackage: PartialPackage, options?: InferDialectOptions & InferSchemaOptions, ) { + const concurrency = os.cpus().length + + const resources = await pAll( + dataPackage.resources.map( + resource => () => inferResource(resource, options), + ), + { concurrency }, + ) + const result = { ...dataPackage, - resources: await Promise.all( - dataPackage.resources.map(async resource => { - return await inferResource(resource, options) - }), - ), + resources, } return result diff --git a/table/package/validate.spec.ts b/library/package/integrity.spec.ts similarity index 66% rename from table/package/validate.spec.ts rename to library/package/integrity.spec.ts index c73a0b76..c69210db 100644 --- a/table/package/validate.spec.ts +++ b/library/package/integrity.spec.ts @@ -1,10 +1,8 @@ -import type { Package, Resource } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Package } from "@dpkit/metadata" import { describe, expect, it } from "vitest" -import type { Table } from "../table/Table.ts" -import { validatePackageForeignKeys } from "./validate.ts" +import { validatePackageIntegrity } from "./integrity.ts" -describe("validatePackageForeignKeys", () => { +describe("validatePackageIntegrity", () => { it("should validate package with valid foreign keys", async () => { const dataPackage: Package = { name: "test-package", @@ -12,6 +10,11 @@ describe("validatePackageForeignKeys", () => { { name: "users", type: "table" as const, + data: [ + { id: 1, name: "Alice" }, + { id: 2, name: "Bob" }, + { id: 3, name: "Charlie" }, + ], schema: { fields: [ { name: "id", type: "integer" as const }, @@ -22,6 +25,11 @@ describe("validatePackageForeignKeys", () => { { name: "posts", type: "table" as const, + data: [ + { id: 1, user_id: 1, title: "Post 1" }, + { id: 2, user_id: 2, title: "Post 2" }, + { id: 3, user_id: 3, title: "Post 3" }, + ], schema: { fields: [ { name: "id", type: "integer" as const }, @@ -42,26 +50,10 @@ describe("validatePackageForeignKeys", () => { ], } - const tables: Record = { - users: DataFrame({ - id: [1, 2, 3], - name: ["Alice", "Bob", "Charlie"], - }).lazy(), - posts: DataFrame({ - id: [1, 2, 3], - user_id: [1, 2, 3], - title: ["Post 1", "Post 2", "Post 3"], - }).lazy(), - } - - const loadTable = async (resource: Resource): Promise => { - return tables[resource.name] - } + const report = await validatePackageIntegrity(dataPackage) - const result = await validatePackageForeignKeys(dataPackage, { loadTable }) - - expect(result.valid).toBe(true) - expect(result.errors).toEqual([]) + expect(report.valid).toBe(true) + expect(report.errors).toEqual([]) }) it("should detect foreign key violations", async () => { @@ -71,6 +63,10 @@ describe("validatePackageForeignKeys", () => { { name: "users", type: "table" as const, + data: [ + { id: 1, name: "Alice" }, + { id: 2, name: "Bob" }, + ], schema: { fields: [ { name: "id", type: "integer" as const }, @@ -81,6 +77,11 @@ describe("validatePackageForeignKeys", () => { { name: "posts", type: "table" as const, + data: [ + { id: 1, user_id: 1, title: "Post 1" }, + { id: 2, user_id: 2, title: "Post 2" }, + { id: 3, user_id: 999, title: "Post 3" }, + ], schema: { fields: [ { name: "id", type: "integer" as const }, @@ -101,26 +102,10 @@ describe("validatePackageForeignKeys", () => { ], } - const tables: Record = { - users: DataFrame({ - id: [1, 2], - name: ["Alice", "Bob"], - }).lazy(), - posts: DataFrame({ - id: [1, 2, 3], - user_id: [1, 2, 999], - title: ["Post 1", "Post 2", "Post 3"], - }).lazy(), - } + const report = await validatePackageIntegrity(dataPackage) - const loadTable = async (resource: Resource): Promise
=> { - return tables[resource.name] - } - - const result = await validatePackageForeignKeys(dataPackage, { loadTable }) - - expect(result.valid).toBe(false) - expect(result.errors).toEqual([ + expect(report.valid).toBe(false) + expect(report.errors).toEqual([ { type: "foreignKey", foreignKey: { @@ -131,6 +116,7 @@ describe("validatePackageForeignKeys", () => { }, }, cells: ["999"], + resource: "posts", }, ]) }) @@ -142,6 +128,11 @@ describe("validatePackageForeignKeys", () => { { name: "categories", type: "table" as const, + data: [ + { id: 1, parent_id: 1, name: "Root" }, + { id: 2, parent_id: 1, name: "Child 1" }, + { id: 3, parent_id: 2, name: "Child 2" }, + ], schema: { fields: [ { name: "id", type: "integer" as const }, @@ -161,22 +152,10 @@ describe("validatePackageForeignKeys", () => { ], } - const tables: Record = { - categories: DataFrame({ - id: [1, 2, 3], - parent_id: [1, 1, 2], - name: ["Root", "Child 1", "Child 2"], - }).lazy(), - } - - const loadTable = async (resource: Resource): Promise
=> { - return tables[resource.name] - } - - const result = await validatePackageForeignKeys(dataPackage, { loadTable }) + const report = await validatePackageIntegrity(dataPackage) - expect(result.valid).toBe(true) - expect(result.errors).toEqual([]) + expect(report.valid).toBe(true) + expect(report.errors).toEqual([]) }) it("should detect violations in self-referencing foreign keys", async () => { @@ -186,6 +165,11 @@ describe("validatePackageForeignKeys", () => { { name: "categories", type: "table" as const, + data: [ + { id: 1, parent_id: 1, name: "Root" }, + { id: 2, parent_id: 1, name: "Child 1" }, + { id: 3, parent_id: 999, name: "Child 2" }, + ], schema: { fields: [ { name: "id", type: "integer" as const }, @@ -205,22 +189,10 @@ describe("validatePackageForeignKeys", () => { ], } - const tables: Record = { - categories: DataFrame({ - id: [1, 2, 3], - parent_id: [1, 1, 999], - name: ["Root", "Child 1", "Child 2"], - }).lazy(), - } - - const loadTable = async (resource: Resource): Promise
=> { - return tables[resource.name] - } - - const result = await validatePackageForeignKeys(dataPackage, { loadTable }) + const report = await validatePackageIntegrity(dataPackage) - expect(result.valid).toBe(false) - expect(result.errors).toEqual([ + expect(report.valid).toBe(false) + expect(report.errors).toEqual([ { type: "foreignKey", foreignKey: { @@ -230,6 +202,7 @@ describe("validatePackageForeignKeys", () => { }, }, cells: ["999"], + resource: "categories", }, ]) }) @@ -241,6 +214,7 @@ describe("validatePackageForeignKeys", () => { { name: "users", type: "table" as const, + data: [{ id: 1 }], schema: { fields: [{ name: "id", type: "integer" as const }], }, @@ -248,6 +222,13 @@ describe("validatePackageForeignKeys", () => { { name: "posts", type: "table" as const, + data: [ + { id: 1, user_id: 999 }, + { id: 2, user_id: 998 }, + { id: 3, user_id: 997 }, + { id: 4, user_id: 996 }, + { id: 5, user_id: 995 }, + ], schema: { fields: [ { name: "id", type: "integer" as const }, @@ -267,27 +248,12 @@ describe("validatePackageForeignKeys", () => { ], } - const tables: Record = { - users: DataFrame({ - id: [1], - }).lazy(), - posts: DataFrame({ - id: [1, 2, 3, 4, 5], - user_id: [999, 998, 997, 996, 995], - }).lazy(), - } - - const loadTable = async (resource: Resource): Promise
=> { - return tables[resource.name] - } - - const result = await validatePackageForeignKeys(dataPackage, { - loadTable, + const report = await validatePackageIntegrity(dataPackage, { maxErrors: 3, }) - expect(result.valid).toBe(false) - expect(result.errors).toEqual([ + expect(report.valid).toBe(false) + expect(report.errors).toEqual([ { type: "foreignKey", foreignKey: { @@ -298,6 +264,7 @@ describe("validatePackageForeignKeys", () => { }, }, cells: ["999"], + resource: "posts", }, { type: "foreignKey", @@ -309,6 +276,7 @@ describe("validatePackageForeignKeys", () => { }, }, cells: ["998"], + resource: "posts", }, { type: "foreignKey", @@ -320,6 +288,7 @@ describe("validatePackageForeignKeys", () => { }, }, cells: ["997"], + resource: "posts", }, ]) }) @@ -331,6 +300,7 @@ describe("validatePackageForeignKeys", () => { { name: "users", type: "table" as const, + data: [{ id: 1 }, { id: 2 }], schema: { fields: [{ name: "id", type: "integer" as const }], }, @@ -338,6 +308,7 @@ describe("validatePackageForeignKeys", () => { { name: "categories", type: "table" as const, + data: [{ id: 10 }, { id: 20 }], schema: { fields: [{ name: "id", type: "integer" as const }], }, @@ -345,6 +316,10 @@ describe("validatePackageForeignKeys", () => { { name: "posts", type: "table" as const, + data: [ + { id: 1, user_id: 1, category_id: 10 }, + { id: 2, user_id: 2, category_id: 20 }, + ], schema: { fields: [ { name: "id", type: "integer" as const }, @@ -372,28 +347,10 @@ describe("validatePackageForeignKeys", () => { ], } - const tables: Record = { - users: DataFrame({ - id: [1, 2], - }).lazy(), - categories: DataFrame({ - id: [10, 20], - }).lazy(), - posts: DataFrame({ - id: [1, 2], - user_id: [1, 2], - category_id: [10, 20], - }).lazy(), - } - - const loadTable = async (resource: Resource): Promise
=> { - return tables[resource.name] - } - - const result = await validatePackageForeignKeys(dataPackage, { loadTable }) + const report = await validatePackageIntegrity(dataPackage) - expect(result.valid).toBe(true) - expect(result.errors).toEqual([]) + expect(report.valid).toBe(true) + expect(report.errors).toEqual([]) }) it("should detect violations in multiple foreign keys", async () => { @@ -403,6 +360,7 @@ describe("validatePackageForeignKeys", () => { { name: "users", type: "table" as const, + data: [{ id: 1 }], schema: { fields: [{ name: "id", type: "integer" as const }], }, @@ -410,6 +368,7 @@ describe("validatePackageForeignKeys", () => { { name: "categories", type: "table" as const, + data: [{ id: 10 }], schema: { fields: [{ name: "id", type: "integer" as const }], }, @@ -417,6 +376,10 @@ describe("validatePackageForeignKeys", () => { { name: "posts", type: "table" as const, + data: [ + { id: 1, user_id: 999, category_id: 10 }, + { id: 2, user_id: 1, category_id: 888 }, + ], schema: { fields: [ { name: "id", type: "integer" as const }, @@ -444,28 +407,10 @@ describe("validatePackageForeignKeys", () => { ], } - const tables: Record = { - users: DataFrame({ - id: [1], - }).lazy(), - categories: DataFrame({ - id: [10], - }).lazy(), - posts: DataFrame({ - id: [1, 2], - user_id: [999, 1], - category_id: [10, 888], - }).lazy(), - } - - const loadTable = async (resource: Resource): Promise
=> { - return tables[resource.name] - } - - const result = await validatePackageForeignKeys(dataPackage, { loadTable }) + const report = await validatePackageIntegrity(dataPackage) - expect(result.valid).toBe(false) - expect(result.errors).toEqual([ + expect(report.valid).toBe(false) + expect(report.errors).toEqual([ { type: "foreignKey", foreignKey: { @@ -476,6 +421,7 @@ describe("validatePackageForeignKeys", () => { }, }, cells: ["999"], + resource: "posts", }, { type: "foreignKey", @@ -487,6 +433,7 @@ describe("validatePackageForeignKeys", () => { }, }, cells: ["888"], + resource: "posts", }, ]) }) @@ -498,10 +445,12 @@ describe("validatePackageForeignKeys", () => { { name: "no-schema", type: "table" as const, + data: [{ value: 1 }], }, { name: "users", type: "table" as const, + data: [{ id: 1 }], schema: { fields: [{ name: "id", type: "integer" as const }], }, @@ -509,20 +458,10 @@ describe("validatePackageForeignKeys", () => { ], } - const tables: Record = { - users: DataFrame({ - id: [1], - }).lazy(), - } - - const loadTable = async (resource: Resource): Promise
=> { - return tables[resource.name] - } + const report = await validatePackageIntegrity(dataPackage) - const result = await validatePackageForeignKeys(dataPackage, { loadTable }) - - expect(result.valid).toBe(true) - expect(result.errors).toEqual([]) + expect(report.valid).toBe(true) + expect(report.errors).toEqual([]) }) it("should skip resources without foreign keys", async () => { @@ -532,6 +471,10 @@ describe("validatePackageForeignKeys", () => { { name: "users", type: "table" as const, + data: [ + { id: 1, name: "Alice" }, + { id: 2, name: "Bob" }, + ], schema: { fields: [ { name: "id", type: "integer" as const }, @@ -542,21 +485,10 @@ describe("validatePackageForeignKeys", () => { ], } - const tables: Record = { - users: DataFrame({ - id: [1, 2], - name: ["Alice", "Bob"], - }).lazy(), - } - - const loadTable = async (resource: Resource): Promise
=> { - return tables[resource.name] - } - - const result = await validatePackageForeignKeys(dataPackage, { loadTable }) + const report = await validatePackageIntegrity(dataPackage) - expect(result.valid).toBe(true) - expect(result.errors).toEqual([]) + expect(report.valid).toBe(true) + expect(report.errors).toEqual([]) }) it("should handle composite foreign keys", async () => { @@ -566,6 +498,10 @@ describe("validatePackageForeignKeys", () => { { name: "users", type: "table" as const, + data: [ + { first_name: "Alice", last_name: "Smith" }, + { first_name: "Bob", last_name: "Jones" }, + ], schema: { fields: [ { name: "first_name", type: "string" as const }, @@ -576,6 +512,10 @@ describe("validatePackageForeignKeys", () => { { name: "posts", type: "table" as const, + data: [ + { id: 1, author_first: "Alice", author_last: "Smith" }, + { id: 2, author_first: "Bob", author_last: "Jones" }, + ], schema: { fields: [ { name: "id", type: "integer" as const }, @@ -596,26 +536,10 @@ describe("validatePackageForeignKeys", () => { ], } - const tables: Record = { - users: DataFrame({ - first_name: ["Alice", "Bob"], - last_name: ["Smith", "Jones"], - }).lazy(), - posts: DataFrame({ - id: [1, 2], - author_first: ["Alice", "Bob"], - author_last: ["Smith", "Jones"], - }).lazy(), - } - - const loadTable = async (resource: Resource): Promise
=> { - return tables[resource.name] - } + const report = await validatePackageIntegrity(dataPackage) - const result = await validatePackageForeignKeys(dataPackage, { loadTable }) - - expect(result.valid).toBe(true) - expect(result.errors).toEqual([]) + expect(report.valid).toBe(true) + expect(report.errors).toEqual([]) }) it("should detect violations in composite foreign keys", async () => { @@ -625,6 +549,10 @@ describe("validatePackageForeignKeys", () => { { name: "users", type: "table" as const, + data: [ + { first_name: "Alice", last_name: "Smith" }, + { first_name: "Bob", last_name: "Jones" }, + ], schema: { fields: [ { name: "first_name", type: "string" as const }, @@ -635,6 +563,10 @@ describe("validatePackageForeignKeys", () => { { name: "posts", type: "table" as const, + data: [ + { id: 1, author_first: "Alice", author_last: "Smith" }, + { id: 2, author_first: "Charlie", author_last: "Brown" }, + ], schema: { fields: [ { name: "id", type: "integer" as const }, @@ -655,26 +587,10 @@ describe("validatePackageForeignKeys", () => { ], } - const tables: Record = { - users: DataFrame({ - first_name: ["Alice", "Bob"], - last_name: ["Smith", "Jones"], - }).lazy(), - posts: DataFrame({ - id: [1, 2], - author_first: ["Alice", "Charlie"], - author_last: ["Smith", "Brown"], - }).lazy(), - } - - const loadTable = async (resource: Resource): Promise
=> { - return tables[resource.name] - } - - const result = await validatePackageForeignKeys(dataPackage, { loadTable }) + const report = await validatePackageIntegrity(dataPackage) - expect(result.valid).toBe(false) - expect(result.errors).toEqual([ + expect(report.valid).toBe(false) + expect(report.errors).toEqual([ { type: "foreignKey", foreignKey: { @@ -685,6 +601,7 @@ describe("validatePackageForeignKeys", () => { }, }, cells: ["Charlie", "Brown"], + resource: "posts", }, ]) }) diff --git a/table/package/validate.ts b/library/package/integrity.ts similarity index 75% rename from table/package/validate.ts rename to library/package/integrity.ts index cec50a7f..1da0683f 100644 --- a/table/package/validate.ts +++ b/library/package/integrity.ts @@ -1,22 +1,20 @@ -import type { Package, Resource } from "@dpkit/core" -import { resolveSchema } from "@dpkit/core" -import type { DataError } from "@dpkit/core" -import type { ForeignKeyError } from "../error/index.ts" -import type { Table } from "../table/Table.ts" +import type { Package } from "@dpkit/metadata" +import { createReport } from "@dpkit/metadata" +import { resolveSchema } from "@dpkit/metadata" +import type { BoundError } from "@dpkit/metadata" +import type { Table } from "@dpkit/table" +import { loadTable } from "../table/index.ts" // TODO: foreign key fields definition should be validated as well (metadata/here?) // TODO: review temporary files creation from validatePackage call -export async function validatePackageForeignKeys( +export async function validatePackageIntegrity( dataPackage: Package, - options: { - maxErrors?: number - loadTable: (resource: Resource) => Promise
- }, + options?: { maxErrors?: number }, ) { - const { loadTable, maxErrors = 1000 } = options + const { maxErrors = 1000 } = options ?? {} - const errors: (DataError | ForeignKeyError)[] = [] + const errors: BoundError[] = [] const tables: Record = {} for (const resource of dataPackage.resources) { @@ -38,6 +36,7 @@ export async function validatePackageForeignKeys( errors.push({ type: "data", message: `missing ${name} resource`, + resource: name, }) continue @@ -50,6 +49,7 @@ export async function validatePackageForeignKeys( errors.push({ type: "data", message: `missing ${resource.name} table`, + resource: name, }) continue @@ -82,13 +82,11 @@ export async function validatePackageForeignKeys( type: "foreignKey", foreignKey, cells: Object.values(row).map(String), + resource: resource.name, }) } } } - return { - errors: errors.slice(0, maxErrors), - valid: errors.length === 0, - } + return createReport(errors, { maxErrors }) } diff --git a/lib/package/load.spec.ts b/library/package/load.spec.ts similarity index 84% rename from lib/package/load.spec.ts rename to library/package/load.spec.ts index 5512f56d..3fa6015e 100644 --- a/lib/package/load.spec.ts +++ b/library/package/load.spec.ts @@ -1,5 +1,5 @@ import { basename } from "node:path" -import { writeTempFile } from "@dpkit/file" +import { writeTempFile } from "@dpkit/dataset" import { describe, expect, it } from "vitest" import { loadPackage } from "./load.ts" @@ -14,7 +14,10 @@ describe("loadPackage", () => { }, ], }) - const packagePath = await writeTempFile(packageContent) + + const packagePath = await writeTempFile(packageContent, { + filename: "datapackage.json", + }) const dataPackage = await loadPackage(packagePath) @@ -40,7 +43,10 @@ describe("loadPackage", () => { }, ], }) - const packagePath = await writeTempFile(packageContent) + + const packagePath = await writeTempFile(packageContent, { + filename: "datapackage.json", + }) const dataPackage = await loadPackage(packagePath) @@ -62,7 +68,10 @@ describe("loadPackage", () => { }, ], }) - const packagePath = await writeTempFile(packageContent) + + const packagePath = await writeTempFile(packageContent, { + filename: "datapackage.json", + }) const dataPackage = await loadPackage(packagePath) @@ -85,7 +94,10 @@ describe("loadPackage", () => { }, ], }) - const packagePath = await writeTempFile(packageContent) + + const packagePath = await writeTempFile(packageContent, { + filename: "datapackage.json", + }) const dataPackage = await loadPackage(packagePath) @@ -109,7 +121,10 @@ describe("loadPackage", () => { }, ], }) - const packagePath = await writeTempFile(packageContent) + + const packagePath = await writeTempFile(packageContent, { + filename: "datapackage.json", + }) const dataPackage = await loadPackage(packagePath) diff --git a/library/package/load.ts b/library/package/load.ts new file mode 100644 index 00000000..fe7de194 --- /dev/null +++ b/library/package/load.ts @@ -0,0 +1,10 @@ +import { system } from "../system.ts" + +export async function loadPackage(source: string) { + for (const plugin of system.plugins) { + const result = await plugin.loadPackage?.(source) + if (result) return result + } + + throw new Error(`No plugin can load the package: ${source}`) +} diff --git a/lib/package/save.spec.ts b/library/package/save.spec.ts similarity index 85% rename from lib/package/save.spec.ts rename to library/package/save.spec.ts index 1f56d207..e60d45bf 100644 --- a/lib/package/save.spec.ts +++ b/library/package/save.spec.ts @@ -1,7 +1,7 @@ import { access } from "node:fs/promises" import { join } from "node:path" -import { writeTempFile } from "@dpkit/file" -import { getTempFolderPath } from "@dpkit/folder" +import { writeTempFile } from "@dpkit/dataset" +import { getTempFolderPath } from "@dpkit/dataset" import { describe, expect, it } from "vitest" import { loadPackage } from "./load.ts" import { savePackage } from "./save.ts" @@ -17,7 +17,11 @@ describe("savePackage", () => { }, ], }) - const packagePath = await writeTempFile(packageContent) + + const packagePath = await writeTempFile(packageContent, { + filename: "datapackage.json", + }) + const dataPackage = await loadPackage(packagePath) const tempDir = getTempFolderPath() const targetPath = join(tempDir, "datapackage.json") @@ -47,7 +51,11 @@ describe("savePackage", () => { }, ], }) - const packagePath = await writeTempFile(packageContent) + + const packagePath = await writeTempFile(packageContent, { + filename: "datapackage.json", + }) + const dataPackage = await loadPackage(packagePath) const tempDir = getTempFolderPath() const targetPath = join(tempDir, "datapackage.json") @@ -74,7 +82,11 @@ describe("savePackage", () => { }, ], }) - const packagePath = await writeTempFile(packageContent) + + const packagePath = await writeTempFile(packageContent, { + filename: "datapackage.json", + }) + const originalPackage = await loadPackage(packagePath) const tempDir = getTempFolderPath() const targetPath = join(tempDir, "datapackage.json") @@ -100,7 +112,11 @@ describe("savePackage", () => { }, ], }) - const packagePath = await writeTempFile(packageContent) + + const packagePath = await writeTempFile(packageContent, { + filename: "datapackage.json", + }) + const dataPackage = await loadPackage(packagePath) const tempDir = getTempFolderPath() const targetPath = join(tempDir, "datapackage.json") @@ -132,7 +148,11 @@ describe("savePackage", () => { }, ], }) - const packagePath = await writeTempFile(packageContent) + + const packagePath = await writeTempFile(packageContent, { + filename: "datapackage.json", + }) + const dataPackage = await loadPackage(packagePath) const tempDir = getTempFolderPath() const targetPath = join(tempDir, "datapackage.json") diff --git a/library/package/save.ts b/library/package/save.ts new file mode 100644 index 00000000..8ce9661b --- /dev/null +++ b/library/package/save.ts @@ -0,0 +1,19 @@ +import type { SavePackageOptions } from "@dpkit/dataset" +import type { Package } from "@dpkit/metadata" +import { system } from "../system.ts" + +export async function savePackage( + dataPackage: Package, + options: SavePackageOptions, +) { + for (const plugin of system.plugins) { + const result = await plugin.savePackage?.(dataPackage, { + plugins: system.plugins, + ...options, + }) + + if (result) return result + } + + throw new Error(`No plugin can save the package: ${options.target}`) +} diff --git a/lib/package/validate.spec.ts b/library/package/validate.spec.ts similarity index 74% rename from lib/package/validate.spec.ts rename to library/package/validate.spec.ts index ade13e8a..02719523 100644 --- a/lib/package/validate.spec.ts +++ b/library/package/validate.spec.ts @@ -1,3 +1,4 @@ +import { join } from "node:path" import { describe, expect, it } from "vitest" import { validatePackage } from "./validate.ts" @@ -23,10 +24,10 @@ describe("validatePackage", () => { ], } - const result = await validatePackage(dataPackage) + const report = await validatePackage(dataPackage) - expect(result.valid).toBe(true) - expect(result.errors).toEqual([]) + expect(report.valid).toBe(true) + expect(report.errors).toEqual([]) }) it("should detect invalid resource data", async () => { @@ -50,11 +51,11 @@ describe("validatePackage", () => { ], } - const result = await validatePackage(dataPackage) + const report = await validatePackage(dataPackage) - expect(result.valid).toBe(false) - expect(result.errors.length).toBeGreaterThan(0) - expect(result.errors?.[0]?.resource).toBe("test-resource") + expect(report.valid).toBe(false) + expect(report.errors.length).toBeGreaterThan(0) + expect(report.errors?.[0]?.resource).toBe("test-resource") }) it("should validate multiple resources", async () => { @@ -86,10 +87,10 @@ describe("validatePackage", () => { ], } - const result = await validatePackage(dataPackage) + const report = await validatePackage(dataPackage) - expect(result.valid).toBe(true) - expect(result.errors).toEqual([]) + expect(report.valid).toBe(true) + expect(report.errors).toEqual([]) }) it("should detect errors in multiple resources", async () => { @@ -121,12 +122,12 @@ describe("validatePackage", () => { ], } - const result = await validatePackage(dataPackage) + const report = await validatePackage(dataPackage) - expect(result.valid).toBe(false) - expect(result.errors.length).toBeGreaterThan(1) - expect(result.errors.some(e => e.resource === "resource-1")).toBe(true) - expect(result.errors.some(e => e.resource === "resource-2")).toBe(true) + expect(report.valid).toBe(false) + expect(report.errors.length).toBeGreaterThan(1) + expect(report.errors.some(e => e.resource === "resource-1")).toBe(true) + expect(report.errors.some(e => e.resource === "resource-2")).toBe(true) }) it("should reject package with no resources", async () => { @@ -135,11 +136,11 @@ describe("validatePackage", () => { resources: [], } - const result = await validatePackage(dataPackage) + const report = await validatePackage(dataPackage) - expect(result.valid).toBe(false) - expect(result.errors.length).toBeGreaterThan(0) - const firstError = result.errors?.[0] + expect(report.valid).toBe(false) + expect(report.errors.length).toBeGreaterThan(0) + const firstError = report.errors?.[0] if (firstError && "message" in firstError) { expect(firstError.message).toContain("must NOT have fewer than 1 items") } @@ -163,21 +164,24 @@ describe("validatePackage", () => { ], } - const result = await validatePackage(dataPackage) + const report = await validatePackage(dataPackage) - expect(result.valid).toBe(false) - result.errors.forEach(error => { + expect(report.valid).toBe(false) + report.errors.forEach(error => { expect(error.resource).toBe("error-resource") }) }) it("should detect bad cell type (issue-153)", async () => { - const dataPackage = "lib/package/fixtures/issue-153/datapackage.json" + const dataPackage = join( + import.meta.dirname, + "fixtures/issue-153/datapackage.json", + ) - const result = await validatePackage(dataPackage) + const report = await validatePackage(dataPackage) - expect(result.valid).toBe(false) - expect(result.errors).toEqual([ + expect(report.valid).toBe(false) + expect(report.errors).toEqual([ { rowNumber: 3, type: "cell/type", diff --git a/lib/package/validate.ts b/library/package/validate.ts similarity index 59% rename from lib/package/validate.ts rename to library/package/validate.ts index 8ed390d3..904ca8be 100644 --- a/lib/package/validate.ts +++ b/library/package/validate.ts @@ -1,10 +1,13 @@ -import type { Descriptor, Package } from "@dpkit/core" -import { loadDescriptor, validatePackageMetadata } from "@dpkit/core" -import { resolveBasepath } from "@dpkit/core" -import { validatePackageForeignKeys } from "@dpkit/table" -import { dpkit } from "../plugin.ts" +import os from "node:os" +import type { BoundError } from "@dpkit/metadata" +import { createReport } from "@dpkit/metadata" +import type { Descriptor, Package } from "@dpkit/metadata" +import { loadDescriptor, validatePackageMetadata } from "@dpkit/metadata" +import { resolveBasepath } from "@dpkit/metadata" +import pAll from "p-all" import { validateResourceData } from "../resource/index.ts" -import { loadTable } from "../table/index.ts" +import { system } from "../system.ts" +import { validatePackageIntegrity } from "./integrity.ts" export async function validatePackage( source: string | Descriptor | Partial, @@ -16,7 +19,7 @@ export async function validatePackage( if (typeof source !== "string") { descriptor = source } else { - for (const plugin of dpkit.plugins) { + for (const plugin of system.plugins) { const result = await plugin.loadPackage?.(source) if (result) { descriptor = result as unknown as Descriptor @@ -45,30 +48,34 @@ export async function validatePackage( } const dataReport = await validatePackageData(metadataReport.dataPackage) - const fkReport = await validatePackageForeignKeys( + const integrityReport = await validatePackageIntegrity( metadataReport.dataPackage, - { loadTable }, ) - const errors = [...dataReport.errors, ...fkReport.errors] - return { valid: errors.length === 0, errors } + const errors = [...dataReport.errors, ...integrityReport.errors] + return createReport(errors) } export async function validatePackageData(dataPackage: Package) { - const errors = ( - await Promise.all( - dataPackage.resources.map(async resource => { + const concurrency = os.cpus().length + + const errors: BoundError[] = ( + await pAll( + dataPackage.resources.map(resource => async () => { try { - const { errors } = await validateResourceData(resource) - return errors.map(error => ({ ...error, resource: resource.name })) + const report = await validateResourceData(resource) + return report.errors.map(error => ({ + ...error, + resource: resource.name, + })) } catch (error) { const message = error instanceof Error ? error.message : String(error) throw new Error(`[${resource.name}] ${message}`) } }), + { concurrency }, ) ).flat() - const valid = !errors.length - return { valid, errors: errors } + return createReport(errors) } diff --git a/library/plugin.ts b/library/plugin.ts new file mode 100644 index 00000000..7cb04d39 --- /dev/null +++ b/library/plugin.ts @@ -0,0 +1,3 @@ +import type { TablePlugin } from "@dpkit/table" + +export type Plugin = TablePlugin diff --git a/lib/resource/index.ts b/library/resource/index.ts similarity index 100% rename from lib/resource/index.ts rename to library/resource/index.ts diff --git a/lib/resource/infer.spec.ts b/library/resource/infer.spec.ts similarity index 99% rename from lib/resource/infer.spec.ts rename to library/resource/infer.spec.ts index 3c22bab2..afaa9ed8 100644 --- a/lib/resource/infer.spec.ts +++ b/library/resource/infer.spec.ts @@ -1,4 +1,4 @@ -import { writeTempFile } from "@dpkit/file" +import { writeTempFile } from "@dpkit/dataset" import { describe, expect, it } from "vitest" import { inferResource } from "./infer.ts" diff --git a/lib/resource/infer.ts b/library/resource/infer.ts similarity index 65% rename from lib/resource/infer.ts rename to library/resource/infer.ts index b496ee6b..48db23cb 100644 --- a/lib/resource/infer.ts +++ b/library/resource/infer.ts @@ -1,25 +1,23 @@ -import type { Resource } from "@dpkit/core" -import { inferResourceFormat, inferResourceName } from "@dpkit/core" -import { prefetchFile } from "@dpkit/file" -import { inferFileBytes, inferFileEncoding, inferFileHash } from "@dpkit/file" +import { prefetchFile } from "@dpkit/dataset" +import { inferBytes, inferEncoding, inferHash } from "@dpkit/dataset" +import type { Resource } from "@dpkit/metadata" +import { inferFormat, inferName } from "@dpkit/metadata" import type { InferDialectOptions } from "@dpkit/table" import type { InferSchemaOptions } from "@dpkit/table" import { inferDialect } from "../dialect/index.ts" import { inferSchema } from "../schema/index.ts" -// TODO: Support multipart resources? (clarify on the specs level) - export async function inferResource( resource: Partial, options?: InferDialectOptions & InferSchemaOptions, ) { const result = { ...resource, - name: resource.name ?? inferResourceName(resource), + name: resource.name ?? inferName(resource), } if (!result.format) { - result.format = inferResourceFormat(resource) + result.format = inferFormat(resource) } if (typeof resource.path === "string") { @@ -27,18 +25,18 @@ export async function inferResource( const localResource = { ...resource, path: localPath } if (!result.encoding) { - const encoding = await inferFileEncoding(localPath) + const encoding = await inferEncoding(localResource) if (encoding) { result.encoding = encoding } } if (!result.bytes) { - result.bytes = await inferFileBytes(localPath) + result.bytes = await inferBytes(localResource) } if (!result.hash) { - result.hash = await inferFileHash(localPath) + result.hash = await inferHash(localResource) } if (!result.dialect) { diff --git a/lib/resource/validate.spec.ts b/library/resource/validate.spec.ts similarity index 60% rename from lib/resource/validate.spec.ts rename to library/resource/validate.spec.ts index 9cff0490..f125e76f 100644 --- a/lib/resource/validate.spec.ts +++ b/library/resource/validate.spec.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from "vitest" -import { validateResource, validateResourceData } from "./validate.ts" +import { validateResource } from "./validate.ts" describe("validateResource", () => { it("should catch validation errors for invalid tabular data", async () => { @@ -78,7 +78,7 @@ describe("validateResource", () => { it("should catch missing table error when schema is defined but table cannot be loaded", async () => { const resource = { name: "test", - path: "https://example.com/table.bad", + path: "table.bad", schema: { fields: [ { name: "id", type: "integer" as const }, @@ -93,7 +93,71 @@ describe("validateResource", () => { expect(report.errors).toEqual([ { type: "data", - message: "missing table", + message: "missing test table", + }, + ]) + }) + + it("should validate document with jsonSchema", async () => { + const resource = { + name: "test-document", + data: { + name: "test-package", + version: "1.0.0", + author: { + name: "John Doe", + email: "john@example.com", + }, + }, + jsonSchema: { + type: "object", + required: ["name", "version", "author"], + properties: { + name: { type: "string" }, + version: { type: "string" }, + author: { + type: "object", + required: ["name", "email"], + properties: { + name: { type: "string" }, + email: { type: "string" }, + }, + }, + }, + }, + } + + const report = await validateResource(resource) + + expect(report.valid).toBe(true) + expect(report.errors).toEqual([]) + }) + + it("should catch validation errors for document with invalid jsonSchema data", async () => { + const resource = { + name: "test-document", + data: { + name: "test-package", + version: 123, + }, + jsonSchema: { + type: "object", + required: ["name", "version"], + properties: { + name: { type: "string" }, + version: { type: "string" }, + }, + }, + } + + const report = await validateResource(resource) + + expect(report.valid).toBe(false) + expect(report.errors).toEqual([ + { + type: "document/json", + pointer: "/version", + message: "must be string", }, ]) }) diff --git a/library/resource/validate.ts b/library/resource/validate.ts new file mode 100644 index 00000000..0d48e6d7 --- /dev/null +++ b/library/resource/validate.ts @@ -0,0 +1,51 @@ +import { validateFile } from "@dpkit/dataset" +import { validateDocument } from "@dpkit/document" +import type { Descriptor, Resource } from "@dpkit/metadata" +import { createReport } from "@dpkit/metadata" +import { loadDescriptor, validateResourceMetadata } from "@dpkit/metadata" +import { resolveBasepath } from "@dpkit/metadata" +import type { InferSchemaOptions } from "@dpkit/table" +import { validateTable } from "../table/index.ts" + +export async function validateResource( + source: string | Descriptor | Partial, + options?: InferSchemaOptions & { basepath?: string }, +) { + let descriptor = source + let basepath = options?.basepath + + if (typeof descriptor === "string") { + basepath = await resolveBasepath(descriptor) + descriptor = await loadDescriptor(descriptor) + } + + const report = await validateResourceMetadata(descriptor, { basepath }) + + if (!report.resource) { + return report + } + + return await validateResourceData(report.resource, options) +} + +export async function validateResourceData( + resource: Partial, + options?: InferSchemaOptions, +) { + const fileReport = await validateFile(resource) + if (!fileReport.valid) { + return fileReport + } + + const tableReport = await validateTable(resource, options) + if (!tableReport.valid) { + return tableReport + } + + const documentReport = await validateDocument(resource) + if (!documentReport.valid) { + return documentReport + } + + return createReport() +} diff --git a/lib/schema/index.ts b/library/schema/index.ts similarity index 100% rename from lib/schema/index.ts rename to library/schema/index.ts diff --git a/lib/schema/infer.spec.ts b/library/schema/infer.spec.ts similarity index 98% rename from lib/schema/infer.spec.ts rename to library/schema/infer.spec.ts index b51607aa..6aefe328 100644 --- a/lib/schema/infer.spec.ts +++ b/library/schema/infer.spec.ts @@ -1,4 +1,4 @@ -import { writeTempFile } from "@dpkit/file" +import { writeTempFile } from "@dpkit/dataset" import { describe, expect, it } from "vitest" import { inferSchema } from "./infer.ts" @@ -33,6 +33,7 @@ describe("inferSchema", () => { it("should infer schema from inline data", async () => { const resource = { name: "test-resource", + type: "table" as const, data: [ { id: 1, name: "alice" }, { id: 2, name: "bob" }, @@ -146,6 +147,7 @@ describe("inferSchema", () => { it("should infer schema from complex inline data", async () => { const resource = { name: "test-resource", + type: "table" as const, data: [ { id: 1, diff --git a/lib/schema/infer.ts b/library/schema/infer.ts similarity index 81% rename from lib/schema/infer.ts rename to library/schema/infer.ts index aa2f1b85..281e9527 100644 --- a/lib/schema/infer.ts +++ b/library/schema/infer.ts @@ -1,14 +1,14 @@ -import type { Resource } from "@dpkit/core" +import type { Resource } from "@dpkit/metadata" import type { InferSchemaOptions } from "@dpkit/table" import { inferSchemaFromTable } from "@dpkit/table" -import { dpkit } from "../plugin.ts" +import { system } from "../system.ts" import { loadTable } from "../table/index.ts" export async function inferSchema( resource: Partial, options?: InferSchemaOptions, ) { - for (const plugin of dpkit.plugins) { + for (const plugin of system.plugins) { const schema = await plugin.inferSchema?.(resource, options) if (schema) { return schema diff --git a/library/system.ts b/library/system.ts new file mode 100644 index 00000000..337552d5 --- /dev/null +++ b/library/system.ts @@ -0,0 +1,50 @@ +import { DatabasePlugin } from "@dpkit/database" +import { CkanPlugin } from "@dpkit/dataset" +import { DatahubPlugin } from "@dpkit/dataset" +import { DescriptorPlugin } from "@dpkit/dataset" +import { FolderPlugin } from "@dpkit/dataset" +import { GithubPlugin } from "@dpkit/dataset" +import { ZenodoPlugin } from "@dpkit/dataset" +import { ZipPlugin } from "@dpkit/dataset" +import { CsvPlugin } from "@dpkit/table" +import { ArrowPlugin } from "@dpkit/table" +import { InlinePlugin } from "@dpkit/table" +import { JsonPlugin } from "@dpkit/table" +import { OdsPlugin } from "@dpkit/table" +import { ParquetPlugin } from "@dpkit/table" +import { XlsxPlugin } from "@dpkit/table" +import type { Plugin } from "./plugin.ts" + +export class System { + plugins: Plugin[] = [] + + register(PluginClass: new () => Plugin) { + this.plugins.unshift(new PluginClass()) + } +} + +export const system = new System() + +// Dataset + +system.register(CkanPlugin) +system.register(DatahubPlugin) +system.register(DescriptorPlugin) +system.register(GithubPlugin) +system.register(ZenodoPlugin) +system.register(FolderPlugin) +system.register(ZipPlugin) + +// Table + +system.register(ArrowPlugin) +system.register(CsvPlugin) +system.register(InlinePlugin) +system.register(JsonPlugin) +system.register(OdsPlugin) +system.register(ParquetPlugin) +system.register(XlsxPlugin) + +// Mixed + +system.register(DatabasePlugin) diff --git a/lib/table/index.ts b/library/table/index.ts similarity index 62% rename from lib/table/index.ts rename to library/table/index.ts index 310e12ef..f493ed6c 100644 --- a/lib/table/index.ts +++ b/library/table/index.ts @@ -1,2 +1,3 @@ export { loadTable } from "./load.ts" export { saveTable } from "./save.ts" +export { validateTable } from "./validate.ts" diff --git a/lib/table/infer.ts b/library/table/infer.ts similarity index 84% rename from lib/table/infer.ts rename to library/table/infer.ts index ad282128..3dc619fc 100644 --- a/lib/table/infer.ts +++ b/library/table/infer.ts @@ -1,5 +1,5 @@ -import type { Resource } from "@dpkit/core" -import { resolveDialect, resolveSchema } from "@dpkit/core" +import type { Resource } from "@dpkit/metadata" +import { resolveDialect, resolveSchema } from "@dpkit/metadata" import { inferSchemaFromTable } from "@dpkit/table" import { inferDialect } from "../dialect/index.ts" import { loadTable } from "./load.ts" diff --git a/lib/table/load.spec.ts b/library/table/load.spec.ts similarity index 98% rename from lib/table/load.spec.ts rename to library/table/load.spec.ts index 6ba12a48..f6257108 100644 --- a/lib/table/load.spec.ts +++ b/library/table/load.spec.ts @@ -1,4 +1,4 @@ -import { writeTempFile } from "@dpkit/file" +import { writeTempFile } from "@dpkit/dataset" import { describe, expect, it } from "vitest" import { loadTable } from "./load.ts" @@ -16,6 +16,7 @@ describe("loadTable", () => { it("should load table from inline data", async () => { const resource = { name: "test-resource", + type: "table" as const, data: [ { id: 1, name: "alice" }, { id: 2, name: "bob" }, diff --git a/lib/table/load.ts b/library/table/load.ts similarity index 68% rename from lib/table/load.ts rename to library/table/load.ts index 4dcd484b..ec5a3129 100644 --- a/lib/table/load.ts +++ b/library/table/load.ts @@ -1,12 +1,12 @@ -import type { Resource } from "@dpkit/core" +import type { Resource } from "@dpkit/metadata" import type { LoadTableOptions } from "@dpkit/table" -import { dpkit } from "../plugin.ts" +import { system } from "../system.ts" export async function loadTable( resource: Partial, options?: LoadTableOptions, ) { - for (const plugin of dpkit.plugins) { + for (const plugin of system.plugins) { const table = await plugin.loadTable?.(resource, options) if (table) { return table diff --git a/lib/table/save.spec.ts b/library/table/save.spec.ts similarity index 92% rename from lib/table/save.spec.ts rename to library/table/save.spec.ts index bf0cb848..198dabb5 100644 --- a/lib/table/save.spec.ts +++ b/library/table/save.spec.ts @@ -1,6 +1,7 @@ import { access, unlink } from "node:fs/promises" -import { writeTempFile } from "@dpkit/file" +import { writeTempFile } from "@dpkit/dataset" import { describe, expect, it } from "vitest" +import { assert } from "vitest" import { loadTable } from "./load.ts" import { saveTable } from "./save.ts" @@ -12,6 +13,7 @@ describe("saveTable", () => { const outputPath = await writeTempFile("") await unlink(outputPath) + assert(table, "table is not defined") const savedPath = await saveTable(table, { path: outputPath, format: "csv" as const, @@ -34,6 +36,7 @@ describe("saveTable", () => { const outputPath = await writeTempFile("") await unlink(outputPath) + assert(table, "table is not defined") const savedPath = await saveTable(table, { path: outputPath, format: "csv" as const, @@ -55,6 +58,7 @@ describe("saveTable", () => { const outputPath = await writeTempFile("") await unlink(outputPath) + assert(originalTable, "table is not defined") await saveTable(originalTable, { path: outputPath, format: "csv" as const, @@ -78,6 +82,7 @@ describe("saveTable", () => { const outputPath = await writeTempFile("") await unlink(outputPath) + assert(table, "table is not defined") const savedPath = await saveTable(table, { path: outputPath, format: "csv" as const, @@ -99,6 +104,7 @@ describe("saveTable", () => { const outputPath = await writeTempFile("") await unlink(outputPath) + assert(table, "table is not defined") const savedPath = await saveTable(table, { path: outputPath, format: "csv" as const, @@ -117,6 +123,7 @@ describe("saveTable", () => { it("should save table from inline data", async () => { const resource = { name: "test-resource", + type: "table" as const, data: [ { id: 1, name: "alice" }, { id: 2, name: "bob" }, @@ -126,6 +133,7 @@ describe("saveTable", () => { const outputPath = await writeTempFile("") await unlink(outputPath) + assert(table, "table is not defined") const savedPath = await saveTable(table, { path: outputPath, format: "csv" as const, @@ -149,6 +157,7 @@ describe("saveTable", () => { const outputPath = await writeTempFile("") await unlink(outputPath) + assert(table, "table is not defined") const savedPath = await saveTable(table, { path: outputPath, format: "csv" as const, @@ -170,6 +179,7 @@ describe("saveTable", () => { const outputPath = await writeTempFile("") await unlink(outputPath) + assert(table, "table is not defined") const savedPath = await saveTable(table, { path: outputPath, format: "csv" as const, @@ -191,6 +201,7 @@ describe("saveTable", () => { const outputPath = await writeTempFile("") await unlink(outputPath) + assert(table, "table is not defined") const savedPath = await saveTable(table, { path: outputPath, format: "csv" as const, @@ -214,6 +225,7 @@ describe("saveTable", () => { const outputPath = await writeTempFile("") await unlink(outputPath) + assert(table, "table is not defined") const savedPath = await saveTable(table, { path: outputPath, format: "csv" as const, diff --git a/lib/table/save.ts b/library/table/save.ts similarity index 79% rename from lib/table/save.ts rename to library/table/save.ts index 2332c080..12d1f932 100644 --- a/lib/table/save.ts +++ b/library/table/save.ts @@ -1,8 +1,8 @@ import type { SaveTableOptions, Table } from "@dpkit/table" -import { dpkit } from "../plugin.ts" +import { system } from "../system.ts" export async function saveTable(table: Table, options: SaveTableOptions) { - for (const plugin of dpkit.plugins) { + for (const plugin of system.plugins) { const path = await plugin.saveTable?.(table, options) if (path) { return path diff --git a/library/table/validate.ts b/library/table/validate.ts new file mode 100644 index 00000000..f8870319 --- /dev/null +++ b/library/table/validate.ts @@ -0,0 +1,35 @@ +import type { Resource } from "@dpkit/metadata" +import type { UnboundError } from "@dpkit/metadata" +import { resolveSchema } from "@dpkit/metadata" +import { createReport } from "@dpkit/metadata" +import { inspectTable } from "@dpkit/table" +import type { LoadTableOptions } from "@dpkit/table" +import { inferSchema } from "../schema/index.ts" +import { loadTable } from "./load.ts" + +export async function validateTable( + resource: Partial, + options?: LoadTableOptions & { maxErrors?: number }, +) { + const { maxErrors } = options ?? {} + + const errors: UnboundError[] = [] + const table = await loadTable(resource, { denormalized: true }) + + if (table) { + let schema = await resolveSchema(resource.schema) + if (!schema) schema = await inferSchema(resource, options) + const tableErrors = await inspectTable(table, { schema, maxErrors }) + errors.push(...tableErrors) + } + + // TODO: review + if (!table && resource.schema) { + errors.push({ + type: "data", + message: `missing ${resource.name} table`, + }) + } + + return createReport(errors, { maxErrors }) +} diff --git a/csv/tsconfig.json b/library/tsconfig.json similarity index 100% rename from csv/tsconfig.json rename to library/tsconfig.json diff --git a/datahub/typedoc.json b/library/typedoc.json similarity index 100% rename from datahub/typedoc.json rename to library/typedoc.json diff --git a/markdown/README.md b/markdown/README.md deleted file mode 100644 index d50fcaf3..00000000 --- a/markdown/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# @dpkit/markdown - -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). diff --git a/markdown/index.ts b/markdown/index.ts deleted file mode 100644 index 352ef322..00000000 --- a/markdown/index.ts +++ /dev/null @@ -1 +0,0 @@ -export * from "./schema/index.ts" diff --git a/markdown/package.json b/markdown/package.json deleted file mode 100644 index e5f533b0..00000000 --- a/markdown/package.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "name": "@dpkit/markdown", - "type": "module", - "version": "0.0.0-dev", - "exports": "./build/index.js", - "sideEffects": false, - "license": "MIT", - "author": "Evgeny Karev", - "repository": "https://github.com/datisthq/dpkit", - "description": "Fast TypeScript data management framework built on top of the Data Package standard and Polars DataFrames", - "keywords": [ - "data", - "polars", - "dataframe", - "datapackage", - "tableschema", - "typescript", - "validation", - "quality", - "fair", - "markdown" - ], - "scripts": { - "build": "tsc" - }, - "dependencies": { - "@dpkit/core": "workspace:*", - "@types/mdast": "^4.0.0", - "remark": "^15.0.1", - "remark-gfm": "^4.0.0" - }, - "devDependencies": { - "@dpkit/test": "workspace:*" - } -} diff --git a/markdown/tsconfig.json b/markdown/tsconfig.json deleted file mode 100644 index 3c43903c..00000000 --- a/markdown/tsconfig.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "extends": "../tsconfig.json" -} diff --git a/markdown/typedoc.json b/markdown/typedoc.json deleted file mode 100644 index f8e49f3a..00000000 --- a/markdown/typedoc.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "entryPoints": ["index.ts"], - "skipErrorChecking": true -} diff --git a/core/profile/scripts/prune.ts b/metadata/@minify.ts similarity index 96% rename from core/profile/scripts/prune.ts rename to metadata/@minify.ts index f682de9b..df5e9c5d 100644 --- a/core/profile/scripts/prune.ts +++ b/metadata/@minify.ts @@ -2,7 +2,7 @@ import { readFile, writeFile } from "node:fs/promises" import fs from "node:fs/promises" import { join } from "node:path" -const registryDir = join(import.meta.dirname, "..", "registry") +const registryDir = join(import.meta.dirname, "profile", "registry") const files = (await fs.readdir(registryDir)).filter(file => file.endsWith(".json"), ) diff --git a/core/README.md b/metadata/README.md similarity index 77% rename from core/README.md rename to metadata/README.md index e84ee17c..e4c12bd8 100644 --- a/core/README.md +++ b/metadata/README.md @@ -1,3 +1,3 @@ -# @dpkit/core +# @dpkit/metadata -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). +dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [project's website](https://dpkit.app). diff --git a/core/profile/registry/dialect-1.0.json b/metadata/assets/dialect-1.0.json similarity index 100% rename from core/profile/registry/dialect-1.0.json rename to metadata/assets/dialect-1.0.json diff --git a/core/profile/registry/dialect-2.0.json b/metadata/assets/dialect-2.0.json similarity index 100% rename from core/profile/registry/dialect-2.0.json rename to metadata/assets/dialect-2.0.json diff --git a/core/profile/registry/package-1.0.json b/metadata/assets/package-1.0.json similarity index 100% rename from core/profile/registry/package-1.0.json rename to metadata/assets/package-1.0.json diff --git a/core/profile/registry/package-2.0.json b/metadata/assets/package-2.0.json similarity index 100% rename from core/profile/registry/package-2.0.json rename to metadata/assets/package-2.0.json diff --git a/core/profile/registry/resource-1.0.json b/metadata/assets/resource-1.0.json similarity index 100% rename from core/profile/registry/resource-1.0.json rename to metadata/assets/resource-1.0.json diff --git a/core/profile/registry/resource-2.0.json b/metadata/assets/resource-2.0.json similarity index 100% rename from core/profile/registry/resource-2.0.json rename to metadata/assets/resource-2.0.json diff --git a/core/profile/registry/schema-1.0.json b/metadata/assets/schema-1.0.json similarity index 100% rename from core/profile/registry/schema-1.0.json rename to metadata/assets/schema-1.0.json diff --git a/core/profile/registry/schema-2.0.json b/metadata/assets/schema-2.0.json similarity index 100% rename from core/profile/registry/schema-2.0.json rename to metadata/assets/schema-2.0.json diff --git a/core/descriptor/Descriptor.ts b/metadata/descriptor/Descriptor.ts similarity index 100% rename from core/descriptor/Descriptor.ts rename to metadata/descriptor/Descriptor.ts diff --git a/core/descriptor/fixtures/schema.json b/metadata/descriptor/fixtures/schema.json similarity index 100% rename from core/descriptor/fixtures/schema.json rename to metadata/descriptor/fixtures/schema.json diff --git a/core/descriptor/index.ts b/metadata/descriptor/index.ts similarity index 100% rename from core/descriptor/index.ts rename to metadata/descriptor/index.ts diff --git a/core/descriptor/load.spec.ts b/metadata/descriptor/load.spec.ts similarity index 100% rename from core/descriptor/load.spec.ts rename to metadata/descriptor/load.spec.ts diff --git a/core/descriptor/load.ts b/metadata/descriptor/load.ts similarity index 96% rename from core/descriptor/load.ts rename to metadata/descriptor/load.ts index 4b9e8978..c3949167 100644 --- a/core/descriptor/load.ts +++ b/metadata/descriptor/load.ts @@ -1,4 +1,4 @@ -import { node } from "../node/index.ts" +import { node } from "../platform/index.ts" import { getProtocol, isRemotePath } from "../path/index.ts" import { parseDescriptor } from "./process/parse.ts" diff --git a/core/descriptor/process/parse.ts b/metadata/descriptor/process/parse.ts similarity index 100% rename from core/descriptor/process/parse.ts rename to metadata/descriptor/process/parse.ts diff --git a/core/descriptor/process/stringify.ts b/metadata/descriptor/process/stringify.ts similarity index 100% rename from core/descriptor/process/stringify.ts rename to metadata/descriptor/process/stringify.ts diff --git a/core/descriptor/save.spec.ts b/metadata/descriptor/save.spec.ts similarity index 100% rename from core/descriptor/save.spec.ts rename to metadata/descriptor/save.spec.ts diff --git a/core/descriptor/save.ts b/metadata/descriptor/save.ts similarity index 94% rename from core/descriptor/save.ts rename to metadata/descriptor/save.ts index d5733e06..c63edbe2 100644 --- a/core/descriptor/save.ts +++ b/metadata/descriptor/save.ts @@ -1,4 +1,4 @@ -import { node } from "../node/index.ts" +import { node } from "../platform/index.ts" import type { Descriptor } from "./Descriptor.ts" import { stringifyDescriptor } from "./process/stringify.ts" diff --git a/core/dialect/Dialect.ts b/metadata/dialect/Dialect.ts similarity index 100% rename from core/dialect/Dialect.ts rename to metadata/dialect/Dialect.ts diff --git a/core/dialect/assert.spec.ts b/metadata/dialect/assert.spec.ts similarity index 72% rename from core/dialect/assert.spec.ts rename to metadata/dialect/assert.spec.ts index 86b46829..f91ea1ad 100644 --- a/core/dialect/assert.spec.ts +++ b/metadata/dialect/assert.spec.ts @@ -1,5 +1,4 @@ import { describe, expect, expectTypeOf, it } from "vitest" -import { AssertionError } from "../error/index.ts" import type { Dialect } from "./Dialect.ts" import { assertDialect } from "./assert.ts" @@ -17,12 +16,12 @@ describe("assertDialect", () => { expect(dialect).toEqual(descriptor) }) - it("throws ValidationError when dialect is invalid", async () => { + it("throws Error when dialect is invalid", async () => { const invalidDialect = { - delimiter: 1, // Should be a string - header: "yes", // Should be a boolean + delimiter: 1, + header: "yes", } - await expect(assertDialect(invalidDialect)).rejects.toThrow(AssertionError) + await expect(assertDialect(invalidDialect)).rejects.toThrow(Error) }) }) diff --git a/core/dialect/assert.ts b/metadata/dialect/assert.ts similarity index 59% rename from core/dialect/assert.ts rename to metadata/dialect/assert.ts index 471131f5..6c5dbc24 100644 --- a/core/dialect/assert.ts +++ b/metadata/dialect/assert.ts @@ -1,5 +1,4 @@ import type { Descriptor } from "../descriptor/index.ts" -import { AssertionError } from "../error/index.ts" import type { Dialect } from "./Dialect.ts" import { validateDialect } from "./validate.ts" @@ -7,7 +6,13 @@ import { validateDialect } from "./validate.ts" * Assert a Dialect descriptor (JSON Object) against its profile */ export async function assertDialect(source: Descriptor | Dialect) { - const { dialect, errors } = await validateDialect(source) - if (!dialect) throw new AssertionError(errors) - return dialect + const report = await validateDialect(source) + + if (!report.dialect) { + throw new Error( + `Dialect "${JSON.stringify(source).slice(0, 100)}" is not valid`, + ) + } + + return report.dialect } diff --git a/core/dialect/convert/fromDescriptor.ts b/metadata/dialect/convert/fromDescriptor.ts similarity index 100% rename from core/dialect/convert/fromDescriptor.ts rename to metadata/dialect/convert/fromDescriptor.ts diff --git a/core/dialect/convert/toDescriptor.ts b/metadata/dialect/convert/toDescriptor.ts similarity index 100% rename from core/dialect/convert/toDescriptor.ts rename to metadata/dialect/convert/toDescriptor.ts diff --git a/core/dialect/fixtures/dialect-invalid.json b/metadata/dialect/fixtures/dialect-invalid.json similarity index 100% rename from core/dialect/fixtures/dialect-invalid.json rename to metadata/dialect/fixtures/dialect-invalid.json diff --git a/core/dialect/fixtures/dialect.json b/metadata/dialect/fixtures/dialect.json similarity index 100% rename from core/dialect/fixtures/dialect.json rename to metadata/dialect/fixtures/dialect.json diff --git a/core/dialect/index.ts b/metadata/dialect/index.ts similarity index 100% rename from core/dialect/index.ts rename to metadata/dialect/index.ts diff --git a/core/dialect/load.spec.ts b/metadata/dialect/load.spec.ts similarity index 100% rename from core/dialect/load.spec.ts rename to metadata/dialect/load.spec.ts diff --git a/core/dialect/load.ts b/metadata/dialect/load.ts similarity index 100% rename from core/dialect/load.ts rename to metadata/dialect/load.ts diff --git a/core/dialect/resolve.ts b/metadata/dialect/resolve.ts similarity index 100% rename from core/dialect/resolve.ts rename to metadata/dialect/resolve.ts diff --git a/core/dialect/save.spec.ts b/metadata/dialect/save.spec.ts similarity index 100% rename from core/dialect/save.spec.ts rename to metadata/dialect/save.spec.ts diff --git a/core/dialect/save.ts b/metadata/dialect/save.ts similarity index 100% rename from core/dialect/save.ts rename to metadata/dialect/save.ts diff --git a/metadata/dialect/validate.spec.ts b/metadata/dialect/validate.spec.ts new file mode 100644 index 00000000..9bf48a9c --- /dev/null +++ b/metadata/dialect/validate.spec.ts @@ -0,0 +1,34 @@ +import { describe, expect, it } from "vitest" +import { validateDialect } from "./validate.ts" + +describe("validateDialect", () => { + it("returns valid report for valid dialect", async () => { + const descriptor = { + delimiter: ";", + } + + const report = await validateDialect({ + descriptor, + }) + + expect(report.valid).toBe(true) + expect(report.errors).toEqual([]) + }) + + it("returns validation errors for invalid dialect", async () => { + const invalidDialect = { + delimiter: 1, // Should be a string + } + + const report = await validateDialect(invalidDialect) + + expect(report.valid).toBe(false) + expect(report.errors.length).toBeGreaterThan(0) + + const error = report.errors[0] + expect(error).toBeDefined() + if (error) { + expect(error.pointer).toBe("/delimiter") + } + }) +}) diff --git a/core/dialect/validate.ts b/metadata/dialect/validate.ts similarity index 63% rename from core/dialect/validate.ts rename to metadata/dialect/validate.ts index 01bbf8d9..59196b29 100644 --- a/core/dialect/validate.ts +++ b/metadata/dialect/validate.ts @@ -1,4 +1,5 @@ import type { Descriptor } from "../descriptor/index.ts" +import { loadDescriptor } from "../descriptor/index.ts" import { validateDescriptor } from "../profile/index.ts" import type { Dialect } from "./Dialect.ts" import { convertDialectFromDescriptor } from "./convert/fromDescriptor.ts" @@ -8,21 +9,26 @@ const DEFAULT_PROFILE = "https://datapackage.org/profiles/1.0/tabledialect.json" /** * Validate a Dialect descriptor (JSON Object) against its profile */ -export async function validateDialect(source: Descriptor | Dialect) { - const descriptor = source as Descriptor +export async function validateDialect(source: Dialect | Descriptor | string) { + const descriptor = + typeof source === "string" + ? await loadDescriptor(source) + : (source as Descriptor) const profile = typeof descriptor.$schema === "string" ? descriptor.$schema : DEFAULT_PROFILE - const { valid, errors } = await validateDescriptor(descriptor, { profile }) + const report = await validateDescriptor(descriptor, { + profile, + }) let dialect: Dialect | undefined = undefined - if (valid) { + if (report.valid) { // Validation + normalization = we can cast it dialect = convertDialectFromDescriptor(descriptor) as Dialect } - return { valid, errors, dialect } + return { ...report, dialect } } diff --git a/metadata/error/Bound.ts b/metadata/error/Bound.ts new file mode 100644 index 00000000..4de1a642 --- /dev/null +++ b/metadata/error/Bound.ts @@ -0,0 +1,3 @@ +import type { UnboundError } from "./Unbound.ts" + +export type BoundError = UnboundError & { resource: string } diff --git a/metadata/error/Error.ts b/metadata/error/Error.ts new file mode 100644 index 00000000..5a863999 --- /dev/null +++ b/metadata/error/Error.ts @@ -0,0 +1,4 @@ +import type { BoundError } from "./Bound.ts" +import type { UnboundError } from "./Unbound.ts" + +export type DpkitError = BoundError | UnboundError diff --git a/metadata/error/Unbound.ts b/metadata/error/Unbound.ts new file mode 100644 index 00000000..549b4f43 --- /dev/null +++ b/metadata/error/Unbound.ts @@ -0,0 +1,12 @@ +import type { DataError } from "./types/Data.ts" +import type { DocumentError } from "./types/Document.ts" +import type { FileError } from "./types/File.ts" +import type { MetadataError } from "./types/Metadata.ts" +import type { TableError } from "./types/Table.ts" + +export type UnboundError = + | MetadataError + | DataError + | FileError + | TableError + | DocumentError diff --git a/metadata/error/index.ts b/metadata/error/index.ts new file mode 100644 index 00000000..b53a3824 --- /dev/null +++ b/metadata/error/index.ts @@ -0,0 +1,17 @@ +export type { DpkitError } from "./Error.ts" +export type { BoundError } from "./Bound.ts" +export type { UnboundError } from "./Unbound.ts" + +export type * from "./types/Bytes.ts" +export type * from "./types/Cell.ts" +export type * from "./types/Data.ts" +export type * from "./types/Document.ts" +export type * from "./types/Encoding.ts" +export type * from "./types/Field.ts" +export type * from "./types/Fields.ts" +export type * from "./types/File.ts" +export type * from "./types/ForeignKey.ts" +export type * from "./types/Hash.ts" +export type * from "./types/Metadata.ts" +export type * from "./types/Row.ts" +export type * from "./types/Table.ts" diff --git a/core/error/Base.ts b/metadata/error/types/Base.ts similarity index 100% rename from core/error/Base.ts rename to metadata/error/types/Base.ts diff --git a/metadata/error/types/Bytes.ts b/metadata/error/types/Bytes.ts new file mode 100644 index 00000000..82530b93 --- /dev/null +++ b/metadata/error/types/Bytes.ts @@ -0,0 +1,7 @@ +import type { BaseError } from "./Base.ts" + +export interface BytesError extends BaseError { + type: "file/bytes" + bytes: number + actualBytes: number +} diff --git a/table/error/Cell.ts b/metadata/error/types/Cell.ts similarity index 89% rename from table/error/Cell.ts rename to metadata/error/types/Cell.ts index ae179dde..34e5a4fa 100644 --- a/table/error/Cell.ts +++ b/metadata/error/types/Cell.ts @@ -1,5 +1,5 @@ -import type { FieldType } from "@dpkit/core" -import type { BaseTableError } from "./Base.ts" +import type { FieldType } from "../../field/index.ts" +import type { BaseError } from "./Base.ts" export type CellError = | CellTypeError @@ -15,7 +15,7 @@ export type CellError = | CellEnumError | CellJsonSchemaError -export interface BaseCellError extends BaseTableError { +export interface BaseCellError extends BaseError { fieldName: string rowNumber: number cell: string @@ -77,5 +77,6 @@ export interface CellEnumError extends BaseCellError { export interface CellJsonSchemaError extends BaseCellError { type: "cell/jsonSchema" - jsonSchema: Record + pointer: string + message: string } diff --git a/core/error/Data.ts b/metadata/error/types/Data.ts similarity index 100% rename from core/error/Data.ts rename to metadata/error/types/Data.ts diff --git a/metadata/error/types/Document.ts b/metadata/error/types/Document.ts new file mode 100644 index 00000000..277d42dc --- /dev/null +++ b/metadata/error/types/Document.ts @@ -0,0 +1,9 @@ +import type { BaseError } from "./Base.ts" + +export type DocumentError = JsonDocumentError + +export interface JsonDocumentError extends BaseError { + type: "document/json" + pointer: string + message: string +} diff --git a/metadata/error/types/Encoding.ts b/metadata/error/types/Encoding.ts new file mode 100644 index 00000000..515ee12d --- /dev/null +++ b/metadata/error/types/Encoding.ts @@ -0,0 +1,7 @@ +import type { BaseError } from "./Base.ts" + +export interface EncodingError extends BaseError { + type: "file/encoding" + encoding: string + actualEncoding: string +} diff --git a/table/error/Field.ts b/metadata/error/types/Field.ts similarity index 68% rename from table/error/Field.ts rename to metadata/error/types/Field.ts index df3af39f..41b57831 100644 --- a/table/error/Field.ts +++ b/metadata/error/types/Field.ts @@ -1,9 +1,9 @@ -import type { FieldType } from "@dpkit/core" -import type { BaseTableError } from "./Base.ts" +import type { FieldType } from "../../field/index.ts" +import type { BaseError } from "./Base.ts" export type FieldError = FieldNameError | FieldTypeError -export interface BaseFieldError extends BaseTableError { +export interface BaseFieldError extends BaseError { fieldName: string } diff --git a/table/error/Fields.ts b/metadata/error/types/Fields.ts similarity index 71% rename from table/error/Fields.ts rename to metadata/error/types/Fields.ts index 0a30ca84..9307c47a 100644 --- a/table/error/Fields.ts +++ b/metadata/error/types/Fields.ts @@ -1,8 +1,8 @@ -import type { BaseTableError } from "./Base.ts" +import type { BaseError } from "./Base.ts" export type FieldsError = FieldsMissingError | FieldsExtraError -export interface BaseFieldsError extends BaseTableError { +export interface BaseFieldsError extends BaseError { fieldNames: string[] } diff --git a/file/error/File.ts b/metadata/error/types/File.ts similarity index 100% rename from file/error/File.ts rename to metadata/error/types/File.ts diff --git a/metadata/error/types/ForeignKey.ts b/metadata/error/types/ForeignKey.ts new file mode 100644 index 00000000..1d62dbaf --- /dev/null +++ b/metadata/error/types/ForeignKey.ts @@ -0,0 +1,8 @@ +import type { ForeignKey } from "../../schema/index.ts" +import type { BaseError } from "./Base.ts" + +export interface ForeignKeyError extends BaseError { + type: "foreignKey" + foreignKey: ForeignKey + cells: string[] +} diff --git a/metadata/error/types/Hash.ts b/metadata/error/types/Hash.ts new file mode 100644 index 00000000..48d70787 --- /dev/null +++ b/metadata/error/types/Hash.ts @@ -0,0 +1,7 @@ +import type { BaseError } from "./Base.ts" + +export interface HashError extends BaseError { + type: "file/hash" + hash: string + actualHash: string +} diff --git a/metadata/error/types/Metadata.ts b/metadata/error/types/Metadata.ts new file mode 100644 index 00000000..228adc3b --- /dev/null +++ b/metadata/error/types/Metadata.ts @@ -0,0 +1,10 @@ +import type { BaseError } from "./Base.ts" + +/** + * A descriptor error + */ +export interface MetadataError extends BaseError { + type: "metadata" + pointer: string + message: string +} diff --git a/table/error/Row.ts b/metadata/error/types/Row.ts similarity index 64% rename from table/error/Row.ts rename to metadata/error/types/Row.ts index c29b1ffe..6a871f0d 100644 --- a/table/error/Row.ts +++ b/metadata/error/types/Row.ts @@ -1,8 +1,8 @@ -import type { BaseTableError } from "./Base.ts" +import type { BaseError } from "./Base.ts" export type RowError = RowUniqueError -export interface BaseRowError extends BaseTableError { +export interface BaseRowError extends BaseError { rowNumber: number } diff --git a/table/error/Table.ts b/metadata/error/types/Table.ts similarity index 100% rename from table/error/Table.ts rename to metadata/error/types/Table.ts diff --git a/core/field/Field.ts b/metadata/field/Field.ts similarity index 100% rename from core/field/Field.ts rename to metadata/field/Field.ts diff --git a/core/field/Type.ts b/metadata/field/Type.ts similarity index 100% rename from core/field/Type.ts rename to metadata/field/Type.ts diff --git a/core/field/convert/fromDescriptor.ts b/metadata/field/convert/fromDescriptor.ts similarity index 100% rename from core/field/convert/fromDescriptor.ts rename to metadata/field/convert/fromDescriptor.ts diff --git a/core/field/convert/toDescriptor.ts b/metadata/field/convert/toDescriptor.ts similarity index 100% rename from core/field/convert/toDescriptor.ts rename to metadata/field/convert/toDescriptor.ts diff --git a/core/field/index.ts b/metadata/field/index.ts similarity index 100% rename from core/field/index.ts rename to metadata/field/index.ts diff --git a/core/field/types/Any.ts b/metadata/field/types/Any.ts similarity index 100% rename from core/field/types/Any.ts rename to metadata/field/types/Any.ts diff --git a/core/field/types/Array.ts b/metadata/field/types/Array.ts similarity index 100% rename from core/field/types/Array.ts rename to metadata/field/types/Array.ts diff --git a/core/field/types/Base.ts b/metadata/field/types/Base.ts similarity index 93% rename from core/field/types/Base.ts rename to metadata/field/types/Base.ts index ba32a92d..3f6e8cfb 100644 --- a/core/field/types/Base.ts +++ b/metadata/field/types/Base.ts @@ -9,6 +9,11 @@ export interface BaseField extends Metadata { */ name: string + /** + * Field format -- optional addition to the type + */ + format?: string + /** * Human-readable title */ diff --git a/core/field/types/Boolean.ts b/metadata/field/types/Boolean.ts similarity index 100% rename from core/field/types/Boolean.ts rename to metadata/field/types/Boolean.ts diff --git a/core/field/types/Date.ts b/metadata/field/types/Date.ts similarity index 100% rename from core/field/types/Date.ts rename to metadata/field/types/Date.ts diff --git a/core/field/types/Datetime.ts b/metadata/field/types/Datetime.ts similarity index 100% rename from core/field/types/Datetime.ts rename to metadata/field/types/Datetime.ts diff --git a/core/field/types/Duration.ts b/metadata/field/types/Duration.ts similarity index 100% rename from core/field/types/Duration.ts rename to metadata/field/types/Duration.ts diff --git a/core/field/types/Geojson.ts b/metadata/field/types/Geojson.ts similarity index 100% rename from core/field/types/Geojson.ts rename to metadata/field/types/Geojson.ts diff --git a/core/field/types/Geopoint.ts b/metadata/field/types/Geopoint.ts similarity index 100% rename from core/field/types/Geopoint.ts rename to metadata/field/types/Geopoint.ts diff --git a/core/field/types/Integer.ts b/metadata/field/types/Integer.ts similarity index 100% rename from core/field/types/Integer.ts rename to metadata/field/types/Integer.ts diff --git a/core/field/types/List.ts b/metadata/field/types/List.ts similarity index 100% rename from core/field/types/List.ts rename to metadata/field/types/List.ts diff --git a/core/field/types/Number.ts b/metadata/field/types/Number.ts similarity index 100% rename from core/field/types/Number.ts rename to metadata/field/types/Number.ts diff --git a/core/field/types/Object.ts b/metadata/field/types/Object.ts similarity index 100% rename from core/field/types/Object.ts rename to metadata/field/types/Object.ts diff --git a/core/field/types/String.ts b/metadata/field/types/String.ts similarity index 100% rename from core/field/types/String.ts rename to metadata/field/types/String.ts diff --git a/core/field/types/Time.ts b/metadata/field/types/Time.ts similarity index 100% rename from core/field/types/Time.ts rename to metadata/field/types/Time.ts diff --git a/core/field/types/Year.ts b/metadata/field/types/Year.ts similarity index 100% rename from core/field/types/Year.ts rename to metadata/field/types/Year.ts diff --git a/core/field/types/Yearmonth.ts b/metadata/field/types/Yearmonth.ts similarity index 100% rename from core/field/types/Yearmonth.ts rename to metadata/field/types/Yearmonth.ts diff --git a/core/field/types/index.ts b/metadata/field/types/index.ts similarity index 100% rename from core/field/types/index.ts rename to metadata/field/types/index.ts diff --git a/metadata/index.ts b/metadata/index.ts new file mode 100644 index 00000000..8d6e70c3 --- /dev/null +++ b/metadata/index.ts @@ -0,0 +1,113 @@ +export type { AnyConstraints } from "./field/index.ts" +export type { AnyField } from "./field/index.ts" +export type { ArrayConstraints } from "./field/index.ts" +export type { ArrayField } from "./field/index.ts" +export type { BaseCellError } from "./error/index.ts" +export type { BaseFieldError } from "./error/index.ts" +export type { BaseFieldsError } from "./error/index.ts" +export type { BaseRowError } from "./error/index.ts" +export type { BooleanConstraints } from "./field/index.ts" +export type { BooleanField } from "./field/index.ts" +export type { BoundError } from "./error/index.ts" +export type { BytesError } from "./error/index.ts" +export type { CellEnumError } from "./error/index.ts" +export type { CellError } from "./error/index.ts" +export type { CellExclusiveMaximumError } from "./error/index.ts" +export type { CellExclusiveMinimumError } from "./error/index.ts" +export type { CellJsonSchemaError } from "./error/index.ts" +export type { CellMaxLengthError } from "./error/index.ts" +export type { CellMaximumError } from "./error/index.ts" +export type { CellMinLengthError } from "./error/index.ts" +export type { CellMinimumError } from "./error/index.ts" +export type { CellPatternError } from "./error/index.ts" +export type { CellRequiredError } from "./error/index.ts" +export type { CellTypeError } from "./error/index.ts" +export type { CellUniqueError } from "./error/index.ts" +export type { Contributor } from "./package/index.ts" +export type { DataError } from "./error/index.ts" +export type { DateConstraints } from "./field/index.ts" +export type { DateField } from "./field/index.ts" +export type { DatetimeConstraints } from "./field/index.ts" +export type { DatetimeField } from "./field/index.ts" +export type { Descriptor } from "./descriptor/index.ts" +export type { Dialect } from "./dialect/index.ts" +export type { DocumentError } from "./error/index.ts" +export type { DpkitError } from "./error/index.ts" +export type { DurationConstraints } from "./field/index.ts" +export type { DurationField } from "./field/index.ts" +export type { EncodingError } from "./error/index.ts" +export type { Field } from "./field/index.ts" +export type { FieldError } from "./error/index.ts" +export type { FieldNameError } from "./error/index.ts" +export type { FieldType } from "./field/index.ts" +export type { FieldTypeError } from "./error/index.ts" +export type { FieldsError } from "./error/index.ts" +export type { FieldsExtraError } from "./error/index.ts" +export type { FieldsMissingError } from "./error/index.ts" +export type { FileError } from "./error/index.ts" +export type { ForeignKeyError } from "./error/index.ts" +export type { GeojsonConstraints } from "./field/index.ts" +export type { GeojsonField } from "./field/index.ts" +export type { GeopointConstraints } from "./field/index.ts" +export type { GeopointField } from "./field/index.ts" +export type { HashError } from "./error/index.ts" +export type { IntegerConstraints } from "./field/index.ts" +export type { IntegerField } from "./field/index.ts" +export type { JsonDocumentError } from "./error/index.ts" +export type { License } from "./resource/index.ts" +export type { ListConstraints } from "./field/index.ts" +export type { ListField } from "./field/index.ts" +export type { MetadataError } from "./error/index.ts" +export type { NumberConstraints } from "./field/index.ts" +export type { NumberField } from "./field/index.ts" +export type { ObjectConstraints } from "./field/index.ts" +export type { ObjectField } from "./field/index.ts" +export type { Package } from "./package/index.ts" +export type { Report } from "./report/index.ts" +export type { Resource } from "./resource/index.ts" +export type { RowError } from "./error/index.ts" +export type { RowUniqueError } from "./error/index.ts" +export type { Schema } from "./schema/Schema.ts" +export type { Source } from "./resource/index.ts" +export type { StringConstraints } from "./field/index.ts" +export type { StringField } from "./field/index.ts" +export type { TableError } from "./error/index.ts" +export type { TimeConstraints } from "./field/index.ts" +export type { TimeField } from "./field/index.ts" +export type { UnboundError } from "./error/index.ts" +export type { YearConstraints } from "./field/index.ts" +export type { YearField } from "./field/index.ts" +export type { YearmonthConstraints } from "./field/index.ts" +export type { YearmonthField } from "./field/index.ts" + +export { convertPackageToDescriptor } from "./package/index.ts" +export { convertResourceToDescriptor } from "./resource/index.ts" +export { convertSchemaFromJsonSchema } from "./schema/index.ts" +export { convertSchemaToJsonSchema } from "./schema/index.ts" +export { createReport } from "./report/index.ts" +export { denormalizePath } from "./path/index.ts" +export { getBasepath } from "./path/index.ts" +export { getFilename } from "./path/index.ts" +export { getFormat } from "./path/index.ts" +export { getName } from "./path/index.ts" +export { inferFormat } from "./resource/index.ts" +export { inferName } from "./resource/index.ts" +export { inspectJsonValue } from "./json/index.ts" +export { isRemotePath } from "./path/index.ts" +export { isRemoteResource } from "./resource/index.ts" +export { loadDescriptor } from "./descriptor/index.ts" +export { loadDialect } from "./dialect/index.ts" +export { loadPackageDescriptor } from "./package/index.ts" +export { loadResourceDescriptor } from "./resource/index.ts" +export { loadSchema } from "./schema/index.ts" +export { resolveBasepath } from "./path/index.ts" +export { resolveDialect } from "./dialect/index.ts" +export { resolveJsonSchema } from "./json/index.ts" +export { resolveSchema } from "./schema/index.ts" +export { saveDescriptor } from "./descriptor/index.ts" +export { savePackageDescriptor } from "./package/index.ts" +export { stringifyDescriptor } from "./descriptor/index.ts" +export { validateDialect } from "./dialect/index.ts" +export { validatePackageMetadata } from "./package/index.ts" +export { validateResourceMetadata } from "./resource/index.ts" +export { validateSchema } from "./schema/index.ts" diff --git a/metadata/json/Schema.ts b/metadata/json/Schema.ts new file mode 100644 index 00000000..9bb9f8dc --- /dev/null +++ b/metadata/json/Schema.ts @@ -0,0 +1,4 @@ +import type { Descriptor } from "../descriptor/index.ts" + +// TODO: Narrow to valid JSON Schema +export type JsonSchema = Descriptor diff --git a/metadata/json/Value.ts b/metadata/json/Value.ts new file mode 100644 index 00000000..92a7e575 --- /dev/null +++ b/metadata/json/Value.ts @@ -0,0 +1 @@ +export type JsonValue = unknown diff --git a/core/profile/ajv.ts b/metadata/json/ajv.ts similarity index 66% rename from core/profile/ajv.ts rename to metadata/json/ajv.ts index 5ede6f1b..d898e37f 100644 --- a/core/profile/ajv.ts +++ b/metadata/json/ajv.ts @@ -1,10 +1,10 @@ import { Ajv } from "ajv" -import { loadProfile } from "./load.ts" +import { loadJsonSchema } from "./load.ts" export const ajv = new Ajv({ strict: false, allErrors: true, validateSchema: false, validateFormats: false, - loadSchema: loadProfile, + loadSchema: loadJsonSchema, }) diff --git a/metadata/json/assert.ts b/metadata/json/assert.ts new file mode 100644 index 00000000..7c670657 --- /dev/null +++ b/metadata/json/assert.ts @@ -0,0 +1,16 @@ +import type { Descriptor } from "../descriptor/index.ts" +import type { JsonSchema } from "./Schema.ts" +import { inspectJsonSchema } from "./inspect/schema.ts" + +export async function assertJsonSchema(descriptor: Descriptor) { + const errors = await inspectJsonSchema(descriptor) + + // TODO: Improve consolidated error message + if (errors.length) { + throw new Error( + `JsonSchema "${JSON.stringify(descriptor).slice(0, 100)}" is not valid`, + ) + } + + return descriptor as JsonSchema +} diff --git a/metadata/json/cache.ts b/metadata/json/cache.ts new file mode 100644 index 00000000..e7f0fc44 --- /dev/null +++ b/metadata/json/cache.ts @@ -0,0 +1,6 @@ +import QuickLRU from "quick-lru" +import type { JsonSchema } from "./Schema.ts" + +export const cache = new QuickLRU({ + maxSize: 100, +}) diff --git a/metadata/json/index.ts b/metadata/json/index.ts new file mode 100644 index 00000000..caf75400 --- /dev/null +++ b/metadata/json/index.ts @@ -0,0 +1,5 @@ +export { inspectJsonValue } from "./inspect/value.ts" +export type { JsonSchema } from "./Schema.ts" +export type { JsonValue } from "./Value.ts" +export { resolveJsonSchema } from "./resolve.ts" +export { loadJsonSchema } from "./load.ts" diff --git a/metadata/json/inspect/schema.spec.ts b/metadata/json/inspect/schema.spec.ts new file mode 100644 index 00000000..9c50cb6e --- /dev/null +++ b/metadata/json/inspect/schema.spec.ts @@ -0,0 +1,161 @@ +import { describe, expect, it } from "vitest" +import { inspectJsonSchema } from "./schema.ts" + +describe("inspectJsonSchema", () => { + it("returns empty array for valid JSON Schema", async () => { + const descriptor = { + type: "object", + properties: { + name: { type: "string" }, + version: { type: "string" }, + }, + } + + const errors = await inspectJsonSchema(descriptor) + + expect(errors).toEqual([]) + }) + + it("returns empty array for valid JSON Schema with required fields", async () => { + const descriptor = { + type: "object", + required: ["name", "version"], + properties: { + name: { type: "string" }, + version: { type: "string" }, + description: { type: "string" }, + }, + } + + const errors = await inspectJsonSchema(descriptor) + + expect(errors).toEqual([]) + }) + + it("returns empty array for nested JSON Schema", async () => { + const descriptor = { + type: "object", + properties: { + author: { + type: "object", + properties: { + name: { type: "string" }, + email: { type: "string", format: "email" }, + }, + }, + }, + } + + const errors = await inspectJsonSchema(descriptor) + + expect(errors).toEqual([]) + }) + + it("returns empty array for JSON Schema with array items", async () => { + const descriptor = { + type: "object", + properties: { + keywords: { + type: "array", + items: { type: "string" }, + }, + }, + } + + const errors = await inspectJsonSchema(descriptor) + + expect(errors).toEqual([]) + }) + + it("returns validation errors for invalid JSON Schema structure", async () => { + const descriptor = { + type: "invalid-type", + properties: { + name: { type: "string" }, + }, + } + + const errors = await inspectJsonSchema(descriptor) + + expect(errors.length).toBeGreaterThan(0) + }) + + it("returns validation errors for malformed properties", async () => { + const descriptor = { + type: "object", + properties: { + name: { + type: "string", + minLength: "not-a-number", + }, + }, + } + + const errors = await inspectJsonSchema(descriptor) + + expect(errors.length).toBeGreaterThan(0) + }) + + it("returns validation errors for invalid required field", async () => { + const descriptor = { + type: "object", + required: "should-be-an-array", + properties: { + name: { type: "string" }, + }, + } + + const errors = await inspectJsonSchema(descriptor) + + expect(errors.length).toBeGreaterThan(0) + }) + + it("returns multiple errors for JSON Schema with multiple issues", async () => { + const descriptor = { + type: "invalid-type", + required: "should-be-array", + properties: { + field1: { + type: "string", + minLength: "not-a-number", + }, + field2: { + type: "unknown-type", + }, + }, + } + + const errors = await inspectJsonSchema(descriptor) + + expect(errors.length).toBeGreaterThan(1) + }) + + it("returns empty array for empty JSON Schema", async () => { + const descriptor = {} + + const errors = await inspectJsonSchema(descriptor) + + expect(errors).toEqual([]) + }) + + it("returns empty array for JSON Schema with definitions", async () => { + const descriptor = { + type: "object", + properties: { + user: { $ref: "#/definitions/User" }, + }, + definitions: { + User: { + type: "object", + properties: { + name: { type: "string" }, + }, + }, + }, + } + + const errors = await inspectJsonSchema(descriptor) + + expect(errors).toEqual([]) + }) +}) diff --git a/metadata/json/inspect/schema.ts b/metadata/json/inspect/schema.ts new file mode 100644 index 00000000..98bda8e5 --- /dev/null +++ b/metadata/json/inspect/schema.ts @@ -0,0 +1,13 @@ +import type { Descriptor } from "../../descriptor/index.ts" +import { ajv } from "../ajv.ts" + +export async function inspectJsonSchema(descriptor: Descriptor) { + const errors: { message: string }[] = [] + await ajv.validateSchema(descriptor) + + for (const error of ajv.errors ?? []) { + errors.push({ message: error.message ?? error.keyword }) + } + + return errors +} diff --git a/metadata/json/inspect/value.spec.ts b/metadata/json/inspect/value.spec.ts new file mode 100644 index 00000000..08d466e9 --- /dev/null +++ b/metadata/json/inspect/value.spec.ts @@ -0,0 +1,146 @@ +import { describe, expect, it } from "vitest" +import { inspectJsonValue } from "./value.ts" + +describe("inspectJsonValue", () => { + it("returns empty array for valid value", async () => { + const value = { + name: "test-package", + version: "1.0.0", + description: "A test package", + } + + const jsonSchema = { + type: "object", + required: ["name", "version"], + properties: { + name: { type: "string" }, + version: { type: "string" }, + description: { type: "string" }, + }, + } + + const errors = await inspectJsonValue(value, { jsonSchema }) + + expect(errors).toEqual([]) + }) + + it("returns validation errors for invalid value", async () => { + const jsonSchema = { + type: "object", + properties: { + name: { type: "string" }, + version: { type: "string" }, + }, + } + + const value = { + name: "test-package", + version: 123, + } + + const errors = await inspectJsonValue(value, { jsonSchema }) + + expect(errors.length).toBeGreaterThan(0) + expect(errors[0]?.pointer).toBe("/version") + expect(errors[0]?.message).toContain("string") + }) + + it("returns errors when required fields are missing", async () => { + const jsonSchema = { + type: "object", + required: ["name", "version", "required_field"], + properties: { + name: { type: "string" }, + version: { type: "string" }, + required_field: { type: "string" }, + }, + } + + const value = { + name: "test-package", + version: "1.0.0", + } + + const errors = await inspectJsonValue(value, { jsonSchema }) + + expect(errors.length).toBeGreaterThan(0) + expect(errors[0]?.pointer).toBe("") + expect(errors[0]?.message).toContain("required_field") + }) + + it("validates nested objects in the value", async () => { + const jsonSchema = { + type: "object", + properties: { + name: { type: "string" }, + version: { type: "string" }, + author: { + type: "object", + properties: { + name: { type: "string" }, + email: { + type: "string", + pattern: "^[^@]+@[^@]+\\.[^@]+$", + }, + }, + }, + }, + } + + const value = { + name: "test-package", + version: "1.0.0", + author: { + name: "Test Author", + email: "invalid-email", + }, + } + + const errors = await inspectJsonValue(value, { jsonSchema }) + + expect(errors.length).toBeGreaterThan(0) + expect( + errors.some( + error => + error.pointer === "/author/email" && + error.message.includes("pattern"), + ), + ).toBe(true) + }) + + it("returns multiple errors for value with multiple issues", async () => { + const jsonSchema = { + type: "object", + required: ["license"], + additionalProperties: false, + properties: { + name: { type: "string", minLength: 3 }, + version: { type: "string", pattern: "^\\d+\\.\\d+\\.\\d+$" }, + license: { type: "string" }, + description: { type: "string" }, + keywords: { + type: "array", + items: { type: "string" }, + }, + }, + } + + const value = { + name: "ab", + version: "not-a-version", + description: 123, + keywords: ["valid", 456, "another"], + extra_field: "should not be here", + } + + const errors = await inspectJsonValue(value, { jsonSchema }) + + expect(errors.length).toBeGreaterThan(3) + + const errorPointers = errors.map(err => err.pointer) + expect(errorPointers).toContain("") + expect(errorPointers).toContain("/name") + expect(errorPointers).toContain("/version") + expect(errorPointers).toContain("/description") + }) +}) diff --git a/metadata/json/inspect/value.ts b/metadata/json/inspect/value.ts new file mode 100644 index 00000000..744d9159 --- /dev/null +++ b/metadata/json/inspect/value.ts @@ -0,0 +1,31 @@ +import type { JsonSchema } from "../Schema.ts" +import { ajv } from "../ajv.ts" +import { loadJsonSchema } from "../load.ts" + +/** + * Validate a value against a JSON Schema + * It uses Ajv for JSON Schema validation under the hood + */ +export async function inspectJsonValue( + value: unknown, + options: { + jsonSchema: JsonSchema | string + }, +) { + const jsonSchema = + typeof options.jsonSchema === "string" + ? await loadJsonSchema(options.jsonSchema) + : options.jsonSchema + + const validate = await ajv.compileAsync(jsonSchema) + validate(value) + + const errors = validate.errors + ? validate.errors?.map(error => ({ + pointer: error.instancePath ?? "/", + message: error.message ?? "error", + })) + : [] + + return errors +} diff --git a/metadata/json/load.ts b/metadata/json/load.ts new file mode 100644 index 00000000..41a21cc0 --- /dev/null +++ b/metadata/json/load.ts @@ -0,0 +1,18 @@ +import { loadDescriptor } from "../descriptor/index.ts" +import { cache } from "./cache.ts" +import { assertJsonSchema } from "./assert.ts" + +export async function loadJsonSchema( + path: string, + options?: { onlyRemote?: boolean }, +) { + let jsonSchema = cache.get(path) + + if (!jsonSchema) { + const descriptor = await loadDescriptor(path, options) + jsonSchema = await assertJsonSchema(descriptor) + cache.set(path, jsonSchema) + } + + return jsonSchema +} diff --git a/metadata/json/resolve.ts b/metadata/json/resolve.ts new file mode 100644 index 00000000..ff201cf4 --- /dev/null +++ b/metadata/json/resolve.ts @@ -0,0 +1,14 @@ +import type { JsonSchema } from "./Schema.ts" +import { loadJsonSchema } from "./load.ts" + +export async function resolveJsonSchema(jsonSchema?: JsonSchema | string) { + if (!jsonSchema) { + return undefined + } + + if (typeof jsonSchema !== "string") { + return jsonSchema + } + + return await loadJsonSchema(jsonSchema) +} diff --git a/core/metadata/Metadata.ts b/metadata/metadata/Metadata.ts similarity index 100% rename from core/metadata/Metadata.ts rename to metadata/metadata/Metadata.ts diff --git a/core/metadata/index.ts b/metadata/metadata/index.ts similarity index 100% rename from core/metadata/index.ts rename to metadata/metadata/index.ts diff --git a/core/package.json b/metadata/package.json similarity index 88% rename from core/package.json rename to metadata/package.json index 90706846..1022e7b2 100644 --- a/core/package.json +++ b/metadata/package.json @@ -1,9 +1,10 @@ { - "name": "@dpkit/core", + "name": "@dpkit/metadata", "type": "module", "version": "0.0.0-dev", "exports": "./build/index.js", "sideEffects": false, + "files": ["build"], "license": "MIT", "author": "Evgeny Karev", "repository": "https://github.com/datisthq/dpkit", @@ -18,7 +19,7 @@ "validation", "quality", "fair", - "core" + "metadata" ], "scripts": { "build": "tsc" @@ -29,8 +30,5 @@ "ajv": "^8.17.1", "quick-lru": "^7.0.1", "tiny-invariant": "^1.3.3" - }, - "devDependencies": { - "@dpkit/test": "workspace:*" } } diff --git a/core/package/Contributor.ts b/metadata/package/Contributor.ts similarity index 100% rename from core/package/Contributor.ts rename to metadata/package/Contributor.ts diff --git a/core/package/Package.ts b/metadata/package/Package.ts similarity index 100% rename from core/package/Package.ts rename to metadata/package/Package.ts diff --git a/core/package/assert.spec.ts b/metadata/package/assert.spec.ts similarity index 67% rename from core/package/assert.spec.ts rename to metadata/package/assert.spec.ts index e5253b77..34791144 100644 --- a/core/package/assert.spec.ts +++ b/metadata/package/assert.spec.ts @@ -1,5 +1,4 @@ import { describe, expect, expectTypeOf, it } from "vitest" -import { AssertionError } from "../error/index.ts" import type { Package } from "./Package.ts" import { assertPackage } from "./assert.ts" @@ -21,12 +20,12 @@ describe("assertPackage", () => { expect(datapackage).toEqual(descriptor) }) - it("throws AssertionError when package is invalid", async () => { + it("throws Error when package is invalid", async () => { const descriptor = { - name: 123, // Should be a string - resources: "not-an-array", // Should be an array + name: 123, + resources: "not-an-array", } - await expect(assertPackage(descriptor)).rejects.toThrow(AssertionError) + await expect(assertPackage(descriptor)).rejects.toThrow(Error) }) }) diff --git a/core/package/assert.ts b/metadata/package/assert.ts similarity index 60% rename from core/package/assert.ts rename to metadata/package/assert.ts index b4980d35..58b3dfd3 100644 --- a/core/package/assert.ts +++ b/metadata/package/assert.ts @@ -1,5 +1,4 @@ import type { Descriptor } from "../descriptor/index.ts" -import { AssertionError } from "../error/index.ts" import type { Package } from "./Package.ts" import { validatePackageMetadata } from "./validate.ts" @@ -12,8 +11,13 @@ export async function assertPackage( basepath?: string }, ) { - const { errors, dataPackage } = await validatePackageMetadata(source, options) + const report = await validatePackageMetadata(source, options) - if (!dataPackage) throw new AssertionError(errors) - return dataPackage + if (!report.dataPackage) { + throw new Error( + `Package "${JSON.stringify(source).slice(0, 100)}" is not valid`, + ) + } + + return report.dataPackage } diff --git a/core/package/convert/fromDescriptor.ts b/metadata/package/convert/fromDescriptor.ts similarity index 100% rename from core/package/convert/fromDescriptor.ts rename to metadata/package/convert/fromDescriptor.ts diff --git a/core/package/convert/toDescriptor.ts b/metadata/package/convert/toDescriptor.ts similarity index 100% rename from core/package/convert/toDescriptor.ts rename to metadata/package/convert/toDescriptor.ts diff --git a/core/package/fixtures/dialect.json b/metadata/package/fixtures/dialect.json similarity index 100% rename from core/package/fixtures/dialect.json rename to metadata/package/fixtures/dialect.json diff --git a/core/package/fixtures/generated/validatePackageDescriptor-should-validate-camtrap-dp-144_965007081/recording.har b/metadata/package/fixtures/generated/validatePackageDescriptor-should-validate-camtrap-dp-144_965007081/recording.har similarity index 100% rename from core/package/fixtures/generated/validatePackageDescriptor-should-validate-camtrap-dp-144_965007081/recording.har rename to metadata/package/fixtures/generated/validatePackageDescriptor-should-validate-camtrap-dp-144_965007081/recording.har diff --git a/core/package/fixtures/generated/validatePackageMetadata-should-validate-camtrap-dp-144_2984677073/recording.har b/metadata/package/fixtures/generated/validatePackageMetadata-should-validate-camtrap-dp-144_2984677073/recording.har similarity index 100% rename from core/package/fixtures/generated/validatePackageMetadata-should-validate-camtrap-dp-144_2984677073/recording.har rename to metadata/package/fixtures/generated/validatePackageMetadata-should-validate-camtrap-dp-144_2984677073/recording.har diff --git a/core/package/fixtures/package-invalid.json b/metadata/package/fixtures/package-invalid.json similarity index 100% rename from core/package/fixtures/package-invalid.json rename to metadata/package/fixtures/package-invalid.json diff --git a/core/package/fixtures/package.json b/metadata/package/fixtures/package.json similarity index 100% rename from core/package/fixtures/package.json rename to metadata/package/fixtures/package.json diff --git a/core/package/fixtures/schema.json b/metadata/package/fixtures/schema.json similarity index 100% rename from core/package/fixtures/schema.json rename to metadata/package/fixtures/schema.json diff --git a/core/package/fixtures/table.csv b/metadata/package/fixtures/table.csv similarity index 100% rename from core/package/fixtures/table.csv rename to metadata/package/fixtures/table.csv diff --git a/core/package/index.ts b/metadata/package/index.ts similarity index 91% rename from core/package/index.ts rename to metadata/package/index.ts index c5596f4d..ed0997be 100644 --- a/core/package/index.ts +++ b/metadata/package/index.ts @@ -6,4 +6,3 @@ export { validatePackageMetadata } from "./validate.ts" export { convertPackageFromDescriptor } from "./convert/fromDescriptor.ts" export { convertPackageToDescriptor } from "./convert/toDescriptor.ts" export type { Contributor } from "./Contributor.ts" -export { mergePackages } from "./merge.ts" diff --git a/core/package/load.spec.ts b/metadata/package/load.spec.ts similarity index 100% rename from core/package/load.spec.ts rename to metadata/package/load.spec.ts diff --git a/core/package/load.ts b/metadata/package/load.ts similarity index 100% rename from core/package/load.ts rename to metadata/package/load.ts diff --git a/core/package/save.ts b/metadata/package/save.ts similarity index 100% rename from core/package/save.ts rename to metadata/package/save.ts diff --git a/core/package/validate.spec.ts b/metadata/package/validate.spec.ts similarity index 64% rename from core/package/validate.spec.ts rename to metadata/package/validate.spec.ts index a4233ea3..87f065d3 100644 --- a/core/package/validate.spec.ts +++ b/metadata/package/validate.spec.ts @@ -1,5 +1,5 @@ -import { useRecording } from "@dpkit/test" import { describe, expect, it } from "vitest" +import { useRecording } from "vitest-polly" import { loadDescriptor } from "../descriptor/index.ts" import { validatePackageMetadata } from "./validate.ts" @@ -17,10 +17,10 @@ describe("validatePackageMetadata", () => { ], } - const { valid, errors } = await validatePackageMetadata(descriptor) + const report = await validatePackageMetadata(descriptor) - expect(valid).toBe(true) - expect(errors).toEqual([]) + expect(report.valid).toBe(true) + expect(report.errors).toEqual([]) }) it("returns validation errors for invalid package", async () => { @@ -29,15 +29,15 @@ describe("validatePackageMetadata", () => { resources: "not-an-array", // Should be an array } - const { valid, errors } = await validatePackageMetadata(descriptor) + const report = await validatePackageMetadata(descriptor) - expect(valid).toBe(false) - expect(errors.length).toBeGreaterThan(0) + expect(report.valid).toBe(false) + expect(report.errors.length).toBeGreaterThan(0) - const error = errors[0] + const error = report.errors[0] expect(error).toBeDefined() if (error) { - expect(error.keyword).toBe("type") + expect(error.pointer).toBe("/name") } }) @@ -46,7 +46,7 @@ describe("validatePackageMetadata", () => { "https://raw.githubusercontent.com/tdwg/camtrap-dp/refs/tags/1.0.2/example/datapackage.json", ) - const { valid } = await validatePackageMetadata(descriptor) - expect(valid).toBe(true) + const report = await validatePackageMetadata(descriptor) + expect(report.valid).toBe(true) }) }) diff --git a/core/package/validate.ts b/metadata/package/validate.ts similarity index 70% rename from core/package/validate.ts rename to metadata/package/validate.ts index d58ecf63..258efcab 100644 --- a/core/package/validate.ts +++ b/metadata/package/validate.ts @@ -1,4 +1,5 @@ import type { Descriptor } from "../descriptor/index.ts" +import { loadDescriptor } from "../descriptor/index.ts" import { validateDescriptor } from "../profile/index.ts" import type { Package } from "./Package.ts" import { convertPackageFromDescriptor } from "./convert/fromDescriptor.ts" @@ -9,27 +10,32 @@ const DEFAULT_PROFILE = "https://datapackage.org/profiles/1.0/datapackage.json" * Validate a Package descriptor (JSON Object) against its profile */ export async function validatePackageMetadata( - source: Descriptor | Package, + source: Package | Descriptor | string, options?: { basepath?: string }, ) { - const descriptor = source as Descriptor + const descriptor = + typeof source === "string" + ? await loadDescriptor(source) + : (source as Descriptor) const profile = typeof descriptor.$schema === "string" ? descriptor.$schema : DEFAULT_PROFILE - const { valid, errors } = await validateDescriptor(descriptor, { profile }) + const report = await validateDescriptor(descriptor, { + profile, + }) let dataPackage: Package | undefined = undefined - if (valid) { + if (report.valid) { // Validation + normalization = we can cast it dataPackage = convertPackageFromDescriptor(descriptor, { basepath: options?.basepath, }) as unknown as Package } - return { valid, errors, dataPackage } + return { ...report, dataPackage } } diff --git a/core/path/basepath.spec.ts b/metadata/path/basepath.spec.ts similarity index 100% rename from core/path/basepath.spec.ts rename to metadata/path/basepath.spec.ts diff --git a/core/path/basepath.ts b/metadata/path/basepath.ts similarity index 94% rename from core/path/basepath.ts rename to metadata/path/basepath.ts index 6cfc93ae..f212baa9 100644 --- a/core/path/basepath.ts +++ b/metadata/path/basepath.ts @@ -1,4 +1,4 @@ -import { node } from "../node/index.ts" +import { node } from "../platform/index.ts" import { isRemotePath } from "./path.ts" export async function resolveBasepath(path: string) { diff --git a/core/path/denormalize.spec.ts b/metadata/path/denormalize.spec.ts similarity index 100% rename from core/path/denormalize.spec.ts rename to metadata/path/denormalize.spec.ts diff --git a/core/path/denormalize.ts b/metadata/path/denormalize.ts similarity index 96% rename from core/path/denormalize.ts rename to metadata/path/denormalize.ts index 06f29a52..35da9d8b 100644 --- a/core/path/denormalize.ts +++ b/metadata/path/denormalize.ts @@ -1,4 +1,4 @@ -import { node } from "../node/index.ts" +import { node } from "../platform/index.ts" import { isRemotePath } from "./path.ts" export function denormalizePath(path: string, options: { basepath?: string }) { diff --git a/core/path/index.ts b/metadata/path/index.ts similarity index 100% rename from core/path/index.ts rename to metadata/path/index.ts diff --git a/core/path/normalize.spec.ts b/metadata/path/normalize.spec.ts similarity index 100% rename from core/path/normalize.spec.ts rename to metadata/path/normalize.spec.ts diff --git a/core/path/normalize.ts b/metadata/path/normalize.ts similarity index 98% rename from core/path/normalize.ts rename to metadata/path/normalize.ts index dd91dbfb..3e28d79e 100644 --- a/core/path/normalize.ts +++ b/metadata/path/normalize.ts @@ -1,4 +1,4 @@ -import { node } from "../node/index.ts" +import { node } from "../platform/index.ts" import { isRemotePath } from "./path.ts" export function normalizePath(path: string, options: { basepath?: string }) { diff --git a/core/path/path.spec.ts b/metadata/path/path.spec.ts similarity index 100% rename from core/path/path.spec.ts rename to metadata/path/path.spec.ts diff --git a/core/path/path.ts b/metadata/path/path.ts similarity index 96% rename from core/path/path.ts rename to metadata/path/path.ts index 79732851..81c361d6 100644 --- a/core/path/path.ts +++ b/metadata/path/path.ts @@ -1,5 +1,5 @@ import slugify from "@sindresorhus/slugify" -import { node } from "../node/index.ts" +import { node } from "../platform/index.ts" export function isRemotePath(path: string) { const protocol = getProtocol(path) diff --git a/core/node/index.ts b/metadata/platform/index.ts similarity index 100% rename from core/node/index.ts rename to metadata/platform/index.ts diff --git a/metadata/platform/node.spec.ts b/metadata/platform/node.spec.ts new file mode 100644 index 00000000..a72f07d9 --- /dev/null +++ b/metadata/platform/node.spec.ts @@ -0,0 +1,18 @@ +import { describe, expect, it } from "vitest" +import { node } from "./node.ts" + +describe("loadNodeApis", () => { + it("should return node APIs when running in Node.js environment", async () => { + expect(node).toBeDefined() + expect(node?.fs).toBeDefined() + expect(node?.path).toBeDefined() + }) + + it("should have fs.readFile function", async () => { + expect(typeof node?.fs.readFile).toBe("function") + }) + + it("should have path.join function", async () => { + expect(typeof node?.path.join).toBe("function") + }) +}) diff --git a/core/node/load.ts b/metadata/platform/node.ts similarity index 69% rename from core/node/load.ts rename to metadata/platform/node.ts index c628b02b..fdcba00d 100644 --- a/core/node/load.ts +++ b/metadata/platform/node.ts @@ -1,4 +1,6 @@ -export async function loadNodeApis() { +export const node = await loadNodeApis() + +async function loadNodeApis() { if (globalThis.process) { const fs = await import("node:fs/promises") const path = await import("node:path") diff --git a/core/profile/Profile.ts b/metadata/profile/Profile.ts similarity index 58% rename from core/profile/Profile.ts rename to metadata/profile/Profile.ts index 2da02e2b..054db37c 100644 --- a/core/profile/Profile.ts +++ b/metadata/profile/Profile.ts @@ -1,7 +1,6 @@ -import type { Descriptor } from "../descriptor/index.ts" +import type { JsonSchema } from "../json/index.ts" -// TODO: Narrow to valid JSON Schema -export type Profile = Descriptor +export type Profile = JsonSchema export type ProfileType = "dialect" | "package" | "resource" | "schema" export type ProfileRegistry = { type: ProfileType diff --git a/core/profile/assert.spec.ts b/metadata/profile/assert.spec.ts similarity index 66% rename from core/profile/assert.spec.ts rename to metadata/profile/assert.spec.ts index 038ad5e3..4dd8aa4c 100644 --- a/core/profile/assert.spec.ts +++ b/metadata/profile/assert.spec.ts @@ -1,12 +1,7 @@ -import { beforeAll, describe, expect, it, vi } from "vitest" -import { ajv } from "./ajv.ts" +import { describe, expect, it } from "vitest" import { assertProfile } from "./assert.ts" describe("assertProfile", () => { - beforeAll(() => { - vi.spyOn(ajv, "validateSchema") - }) - it("returns profile for valid descriptor without options", async () => { const descriptor = { name: "test", @@ -17,48 +12,17 @@ describe("assertProfile", () => { expect(profile).toEqual(descriptor) }) - it("throws error for invalid schema", async () => { + it("throws error for custom profile path with mismatched type", async () => { const descriptor = { name: "test", } - vi.mocked(ajv.validateSchema).mockImplementationOnce(async () => { - ajv.errors = [ - { - keyword: "type", - instancePath: "/name", - schemaPath: "#/properties/name/type", - params: {}, - message: "must be string", - }, - ] - }) - - await expect(assertProfile(descriptor)).rejects.toThrow( - "Profile at path undefined is invalid", - ) - }) - - it("throws error when error message is not available", async () => { - const descriptor = { - name: "test", - } - - vi.mocked(ajv.validateSchema).mockImplementationOnce(async () => { - ajv.errors = [ - { - keyword: "required", - instancePath: "", - schemaPath: "#/required", - params: {}, - message: undefined, - }, - ] - }) - - await expect(assertProfile(descriptor)).rejects.toThrow( - "Profile at path undefined is invalid", - ) + await expect( + assertProfile(descriptor, { + path: "custom-profile.json", + type: "package", + }), + ).rejects.toThrow("Profile at path custom-profile.json is invalid") }) it("returns profile for official profile path", async () => { @@ -87,17 +51,19 @@ describe("assertProfile", () => { expect(profile).toEqual(descriptor) }) - it("throws error for profile type mismatch", async () => { + it("throws error when profile path does not match the specified type", async () => { const descriptor = { name: "test", } await expect( assertProfile(descriptor, { - path: "custom-profile.json", + path: "https://datapackage.org/profiles/1.0/tableschema.json", type: "package", }), - ).rejects.toThrow("Profile at path custom-profile.json is invalid") + ).rejects.toThrow( + "Profile at path https://datapackage.org/profiles/1.0/tableschema.json is invalid", + ) }) it("returns profile when only path is provided", async () => { @@ -124,23 +90,26 @@ describe("assertProfile", () => { expect(profile).toEqual(descriptor) }) - it("throws error when both schema and type validation fail", async () => { + it("returns profile when descriptor extends official profile via allOf", async () => { const descriptor = { name: "test", + allOf: ["https://datapackage.org/profiles/1.0/datapackage.json"], } - vi.mocked(ajv.validateSchema).mockImplementationOnce(async () => { - ajv.errors = [ - { - keyword: "type", - instancePath: "/name", - schemaPath: "#/properties/name/type", - params: {}, - message: "must be string", - }, - ] + const profile = await assertProfile(descriptor, { + path: "custom-profile.json", + type: "package", }) + expect(profile).toEqual(descriptor) + }) + + it("throws error when custom profile does not extend matching official profile", async () => { + const descriptor = { + name: "test", + allOf: ["https://datapackage.org/profiles/1.0/tableschema.json"], + } + await expect( assertProfile(descriptor, { path: "custom-profile.json", diff --git a/core/profile/assert.ts b/metadata/profile/assert.ts similarity index 71% rename from core/profile/assert.ts rename to metadata/profile/assert.ts index 0ca8e7b1..adb820fd 100644 --- a/core/profile/assert.ts +++ b/metadata/profile/assert.ts @@ -1,11 +1,12 @@ -import type { Descriptor } from "../descriptor/index.ts" +import type { JsonSchema } from "../json/index.ts" import type { Profile } from "./Profile.ts" import type { ProfileType } from "./Profile.ts" -import { ajv } from "./ajv.ts" import { profileRegistry } from "./registry.ts" +// TODO: It should narrow to JSON Schema + export async function assertProfile( - descriptor: Descriptor, + jsonSchema: JsonSchema, options?: { path?: string type?: ProfileType @@ -13,12 +14,7 @@ export async function assertProfile( ) { const errors: { message: string }[] = [] - await ajv.validateSchema(descriptor) - for (const error of ajv.errors ?? []) { - errors.push({ message: error.message ?? error.keyword }) - } - - if (!checkProfileType(descriptor, options)) { + if (!checkProfileType(jsonSchema, options)) { errors.push({ message: `Profile at ${options?.path} is not a valid ${options?.type} profile`, }) @@ -29,11 +25,11 @@ export async function assertProfile( throw new Error(`Profile at path ${options?.path} is invalid`) } - return descriptor as Profile + return jsonSchema as Profile } function checkProfileType( - descriptor: Descriptor, + jsonSchema: JsonSchema, options?: { path?: string type?: ProfileType @@ -53,8 +49,8 @@ function checkProfileType( if (options.path === typeProfile.path) return true // The profile extends one of the official profiles - if (Array.isArray(descriptor.allOf)) { - for (const ref of Object.values(descriptor.allOf)) { + if (Array.isArray(jsonSchema.allOf)) { + for (const ref of Object.values(jsonSchema.allOf)) { if (ref === typeProfile.path) return true } } diff --git a/core/profile/index.ts b/metadata/profile/index.ts similarity index 53% rename from core/profile/index.ts rename to metadata/profile/index.ts index 9184eb8a..b560bca5 100644 --- a/core/profile/index.ts +++ b/metadata/profile/index.ts @@ -1 +1,2 @@ +export type { Profile } from "./Profile.ts" export { validateDescriptor } from "./validate.ts" diff --git a/metadata/profile/load.ts b/metadata/profile/load.ts new file mode 100644 index 00000000..33d00292 --- /dev/null +++ b/metadata/profile/load.ts @@ -0,0 +1,17 @@ +import { loadJsonSchema } from "../json/index.ts" +import type { ProfileType } from "./Profile.ts" +import { assertProfile } from "./assert.ts" +import { profileRegistry } from "./registry.ts" + +export async function loadProfile( + path: string, + options?: { type?: ProfileType }, +) { + const profile = profileRegistry.find(profile => profile.path === path)?.profile + if (profile) { + return profile + } + + const jsonSchema = await loadJsonSchema(path, { onlyRemote: true }) + return await assertProfile(jsonSchema, { path, type: options?.type }) +} diff --git a/core/profile/registry.ts b/metadata/profile/registry.ts similarity index 75% rename from core/profile/registry.ts rename to metadata/profile/registry.ts index 36d74293..7369edc2 100644 --- a/core/profile/registry.ts +++ b/metadata/profile/registry.ts @@ -1,12 +1,12 @@ +import dialect_1_0 from "../assets/dialect-1.0.json" with { type: "json" } +import dialect_2_0 from "../assets/dialect-2.0.json" with { type: "json" } +import package_1_0 from "../assets/package-1.0.json" with { type: "json" } +import package_2_0 from "../assets/package-2.0.json" with { type: "json" } +import resource_1_0 from "../assets/resource-1.0.json" with { type: "json" } +import resource_2_0 from "../assets/resource-2.0.json" with { type: "json" } +import schema_1_0 from "../assets/schema-1.0.json" with { type: "json" } +import schema_2_0 from "../assets/schema-2.0.json" with { type: "json" } import type { ProfileRegistry } from "./Profile.ts" -import dialect_1_0 from "./registry/dialect-1.0.json" with { type: "json" } -import dialect_2_0 from "./registry/dialect-2.0.json" with { type: "json" } -import package_1_0 from "./registry/package-1.0.json" with { type: "json" } -import package_2_0 from "./registry/package-2.0.json" with { type: "json" } -import resource_1_0 from "./registry/resource-1.0.json" with { type: "json" } -import resource_2_0 from "./registry/resource-2.0.json" with { type: "json" } -import schema_1_0 from "./registry/schema-1.0.json" with { type: "json" } -import schema_2_0 from "./registry/schema-2.0.json" with { type: "json" } export const profileRegistry: ProfileRegistry = [ { diff --git a/metadata/profile/resolve.ts b/metadata/profile/resolve.ts new file mode 100644 index 00000000..0548ab9d --- /dev/null +++ b/metadata/profile/resolve.ts @@ -0,0 +1,10 @@ +import type { Profile } from "./Profile.ts" +import { loadProfile } from "./load.ts" + +export async function resolveProfile(profile: Profile | string) { + if (typeof profile !== "string") { + return profile + } + + return await loadProfile(profile) +} diff --git a/metadata/profile/validate.ts b/metadata/profile/validate.ts new file mode 100644 index 00000000..e101704d --- /dev/null +++ b/metadata/profile/validate.ts @@ -0,0 +1,23 @@ +import type { Descriptor } from "../descriptor/index.ts" +import type { MetadataError } from "../error/index.ts" +import { inspectJsonValue } from "../json/index.ts" +import { createReport } from "../report/index.ts" +import type { Profile } from "./Profile.ts" +import { resolveProfile } from "./resolve.ts" + +export async function validateDescriptor( + descriptor: Descriptor, + options: { + profile: Profile | string + }, +) { + const profile = await resolveProfile(options.profile) + const errors = await inspectJsonValue(descriptor, { jsonSchema: profile }) + + return createReport( + errors.map(error => ({ + type: "metadata", + ...error, + })), + ) +} diff --git a/metadata/report/Report.ts b/metadata/report/Report.ts new file mode 100644 index 00000000..1100eac9 --- /dev/null +++ b/metadata/report/Report.ts @@ -0,0 +1,6 @@ +import type { DpkitError } from "../error/index.ts" + +export interface Report { + valid: boolean + errors: T[] +} diff --git a/metadata/report/create.ts b/metadata/report/create.ts new file mode 100644 index 00000000..fbefee52 --- /dev/null +++ b/metadata/report/create.ts @@ -0,0 +1,11 @@ +import type { DpkitError } from "../error/index.ts" + +export function createReport( + errors?: T[], + options?: { maxErrors?: number }, +) { + errors = (errors ?? []).slice(0, options?.maxErrors) + const valid = errors.length === 0 + + return { errors, valid } +} diff --git a/metadata/report/index.ts b/metadata/report/index.ts new file mode 100644 index 00000000..20b5e503 --- /dev/null +++ b/metadata/report/index.ts @@ -0,0 +1,2 @@ +export type { Report } from "./Report.ts" +export { createReport } from "./create.ts" diff --git a/core/resource/License.ts b/metadata/resource/License.ts similarity index 100% rename from core/resource/License.ts rename to metadata/resource/License.ts diff --git a/core/resource/Resource.ts b/metadata/resource/Resource.ts similarity index 86% rename from core/resource/Resource.ts rename to metadata/resource/Resource.ts index 1df4a803..f60a1008 100644 --- a/core/resource/Resource.ts +++ b/metadata/resource/Resource.ts @@ -1,6 +1,7 @@ -import type { Dialect } from "../dialect/Dialect.ts" +import type { Dialect } from "../dialect/index.ts" import type { Metadata } from "../metadata/index.ts" -import type { Schema } from "../schema/Schema.ts" +import type { Profile } from "../profile/index.ts" +import type { Schema } from "../schema/index.ts" import type { License } from "./License.ts" import type { Source } from "./Source.ts" @@ -99,4 +100,11 @@ export interface Resource extends Metadata { * @see https://datapackage.org/standard/table-schema/ */ schema?: string | Schema + + /** + * Schema for the json data + * Describes fields in the json, constraints, etc. + * @see https://json-schema.org/ + */ + jsonSchema?: string | Profile } diff --git a/core/resource/Source.ts b/metadata/resource/Source.ts similarity index 100% rename from core/resource/Source.ts rename to metadata/resource/Source.ts diff --git a/core/resource/assert.spec.ts b/metadata/resource/assert.spec.ts similarity index 71% rename from core/resource/assert.spec.ts rename to metadata/resource/assert.spec.ts index d3a58358..a9369348 100644 --- a/core/resource/assert.spec.ts +++ b/metadata/resource/assert.spec.ts @@ -1,5 +1,4 @@ import { describe, expect, expectTypeOf, it } from "vitest" -import { AssertionError } from "../error/index.ts" import type { Resource } from "./Resource.ts" import { assertResource } from "./assert.ts" @@ -18,14 +17,12 @@ describe("assertResource", () => { expect(resource).toEqual(descriptor) }) - it("throws AssertionError when resource is invalid", async () => { + it("throws Error when resource is invalid", async () => { const invalidResource = { - name: 123, // Should be a string - path: true, // Should be a string or array of strings + name: 123, + path: true, } - await expect(assertResource(invalidResource)).rejects.toThrow( - AssertionError, - ) + await expect(assertResource(invalidResource)).rejects.toThrow(Error) }) }) diff --git a/core/resource/assert.ts b/metadata/resource/assert.ts similarity index 61% rename from core/resource/assert.ts rename to metadata/resource/assert.ts index 50a3ebb5..55cdb89f 100644 --- a/core/resource/assert.ts +++ b/metadata/resource/assert.ts @@ -1,5 +1,4 @@ import type { Descriptor } from "../descriptor/index.ts" -import { AssertionError } from "../error/index.ts" import type { Resource } from "./Resource.ts" import { validateResourceMetadata } from "./validate.ts" @@ -12,8 +11,13 @@ export async function assertResource( basepath?: string }, ) { - const { errors, resource } = await validateResourceMetadata(source, options) + const report = await validateResourceMetadata(source, options) - if (!resource) throw new AssertionError(errors) - return resource + if (!report.resource) { + throw new Error( + `Resource "${JSON.stringify(source).slice(0, 100)}" is not valid`, + ) + } + + return report.resource } diff --git a/core/resource/convert/fromDescriptor.ts b/metadata/resource/convert/fromDescriptor.ts similarity index 100% rename from core/resource/convert/fromDescriptor.ts rename to metadata/resource/convert/fromDescriptor.ts diff --git a/core/resource/convert/toDescriptor.ts b/metadata/resource/convert/toDescriptor.ts similarity index 100% rename from core/resource/convert/toDescriptor.ts rename to metadata/resource/convert/toDescriptor.ts diff --git a/core/resource/fixtures/resource-invalid.json b/metadata/resource/fixtures/resource-invalid.json similarity index 100% rename from core/resource/fixtures/resource-invalid.json rename to metadata/resource/fixtures/resource-invalid.json diff --git a/core/resource/fixtures/resource.json b/metadata/resource/fixtures/resource.json similarity index 100% rename from core/resource/fixtures/resource.json rename to metadata/resource/fixtures/resource.json diff --git a/core/resource/helpers.ts b/metadata/resource/helpers.ts similarity index 100% rename from core/resource/helpers.ts rename to metadata/resource/helpers.ts diff --git a/core/resource/index.ts b/metadata/resource/index.ts similarity index 88% rename from core/resource/index.ts rename to metadata/resource/index.ts index 095f3d1f..48b20fce 100644 --- a/core/resource/index.ts +++ b/metadata/resource/index.ts @@ -1,5 +1,5 @@ export type { Resource } from "./Resource.ts" -export { inferResourceName, inferResourceFormat } from "./infer.ts" +export { inferName, inferFormat } from "./infer.ts" export { assertResource } from "./assert.ts" export { loadResourceDescriptor } from "./load.ts" export { saveResourceDescriptor } from "./save.ts" diff --git a/core/resource/infer.spec.ts b/metadata/resource/infer.spec.ts similarity index 65% rename from core/resource/infer.spec.ts rename to metadata/resource/infer.spec.ts index 564aeec7..5f0413e1 100644 --- a/core/resource/infer.spec.ts +++ b/metadata/resource/infer.spec.ts @@ -1,125 +1,125 @@ import { describe, expect, it } from "vitest" -import { inferResourceFormat, inferResourceName } from "./infer.ts" +import { inferFormat, inferName } from "./infer.ts" -describe("inferResourceName", () => { +describe("inferName", () => { it("returns existing name when provided", () => { const resource = { name: "existing-name" } - expect(inferResourceName(resource)).toBe("existing-name") + expect(inferName(resource)).toBe("existing-name") }) it("infers name from single string path", () => { const resource = { path: "/data/users.csv" } - expect(inferResourceName(resource)).toBe("users") + expect(inferName(resource)).toBe("users") }) it("infers name from first path in array", () => { const resource = { path: ["/data/users.csv", "/data/backup.csv"] } - expect(inferResourceName(resource)).toBe("users") + expect(inferName(resource)).toBe("users") }) it("infers name from URL path", () => { const resource = { path: "https://example.com/data/products.json" } - expect(inferResourceName(resource)).toBe("products") + expect(inferName(resource)).toBe("products") }) it("returns default name when no path or name", () => { const resource = {} - expect(inferResourceName(resource)).toBe("resource") + expect(inferName(resource)).toBe("resource") }) it("returns default name when path has no filename", () => { const resource = { path: "/data/folder/" } - expect(inferResourceName(resource)).toBe("resource") + expect(inferName(resource)).toBe("resource") }) it("handles complex filename with multiple dots", () => { const resource = { path: "/data/file.backup.csv" } - expect(inferResourceName(resource)).toBe("file") + expect(inferName(resource)).toBe("file") }) it("slugifies filename with spaces and special characters", () => { const resource = { path: "/data/My Data File!.csv" } - expect(inferResourceName(resource)).toBe("my-data-file") + expect(inferName(resource)).toBe("my-data-file") }) }) -describe("inferResourceFormat", () => { +describe("inferFormat", () => { it("returns existing format when provided", () => { const resource = { format: "json" } - expect(inferResourceFormat(resource)).toBe("json") + expect(inferFormat(resource)).toBe("json") }) it("infers format from single string path", () => { const resource = { path: "/data/users.csv" } - expect(inferResourceFormat(resource)).toBe("csv") + expect(inferFormat(resource)).toBe("csv") }) it("infers format from first path in array", () => { const resource = { path: ["/data/users.xlsx", "/data/backup.csv"] } - expect(inferResourceFormat(resource)).toBe("xlsx") + expect(inferFormat(resource)).toBe("xlsx") }) it("infers format from URL path", () => { const resource = { path: "https://example.com/data/products.json" } - expect(inferResourceFormat(resource)).toBe("json") + expect(inferFormat(resource)).toBe("json") }) it("returns lowercase format", () => { const resource = { path: "/data/file.CSV" } - expect(inferResourceFormat(resource)).toBe("csv") + expect(inferFormat(resource)).toBe("csv") }) it("handles multiple extensions", () => { const resource = { path: "/data/file.tar.gz" } - expect(inferResourceFormat(resource)).toBe("gz") + expect(inferFormat(resource)).toBe("gz") }) it("returns undefined when no path", () => { const resource = {} - expect(inferResourceFormat(resource)).toBeUndefined() + expect(inferFormat(resource)).toBeUndefined() }) it("returns undefined when path has no extension", () => { const resource = { path: "/data/file" } - expect(inferResourceFormat(resource)).toBeUndefined() + expect(inferFormat(resource)).toBeUndefined() }) it("returns undefined when filename cannot be determined", () => { const resource = { path: "/data/folder/" } - expect(inferResourceFormat(resource)).toBeUndefined() + expect(inferFormat(resource)).toBeUndefined() }) it("infers postgresql protocol from connection string", () => { const resource = { path: "postgresql://user:password@localhost:5432/database", } - expect(inferResourceFormat(resource)).toBe("postgresql") + expect(inferFormat(resource)).toBe("postgresql") }) it("infers mysql protocol from connection string", () => { const resource = { path: "mysql://user:password@localhost:3306/database" } - expect(inferResourceFormat(resource)).toBe("mysql") + expect(inferFormat(resource)).toBe("mysql") }) it("infers sqlite protocol from file path", () => { const resource = { path: "sqlite:///path/to/database.db" } - expect(inferResourceFormat(resource)).toBe("sqlite") + expect(inferFormat(resource)).toBe("sqlite") }) it("infers sqlite protocol with file scheme", () => { const resource = { path: "sqlite://localhost/path/to/database.db" } - expect(inferResourceFormat(resource)).toBe("sqlite") + expect(inferFormat(resource)).toBe("sqlite") }) it("handles postgres protocol with ssl parameters", () => { const resource = { path: "postgresql://user:pass@host:5432/db?sslmode=require", } - expect(inferResourceFormat(resource)).toBe("postgresql") + expect(inferFormat(resource)).toBe("postgresql") }) it("handles mysql protocol with options", () => { const resource = { path: "mysql://user:pass@host:3306/db?charset=utf8" } - expect(inferResourceFormat(resource)).toBe("mysql") + expect(inferFormat(resource)).toBe("mysql") }) }) diff --git a/core/resource/infer.ts b/metadata/resource/infer.ts similarity index 87% rename from core/resource/infer.ts rename to metadata/resource/infer.ts index a025b64f..600ed510 100644 --- a/core/resource/infer.ts +++ b/metadata/resource/infer.ts @@ -1,7 +1,7 @@ import { getFilename, getFormat, getName, getProtocol } from "../path/index.ts" import type { Resource } from "./Resource.ts" -export function inferResourceName(resource: Partial) { +export function inferName(resource: Partial) { let name = resource.name if (!name) { @@ -15,7 +15,7 @@ export function inferResourceName(resource: Partial) { return name ?? "resource" } -export function inferResourceFormat(resource: Partial) { +export function inferFormat(resource: Partial) { let format = resource.format if (!format) { diff --git a/core/resource/load.spec.ts b/metadata/resource/load.spec.ts similarity index 100% rename from core/resource/load.spec.ts rename to metadata/resource/load.spec.ts diff --git a/core/resource/load.ts b/metadata/resource/load.ts similarity index 100% rename from core/resource/load.ts rename to metadata/resource/load.ts diff --git a/core/resource/save.ts b/metadata/resource/save.ts similarity index 100% rename from core/resource/save.ts rename to metadata/resource/save.ts diff --git a/core/resource/validate.spec.ts b/metadata/resource/validate.spec.ts similarity index 56% rename from core/resource/validate.spec.ts rename to metadata/resource/validate.spec.ts index 04a06d73..c7f9af7c 100644 --- a/core/resource/validate.spec.ts +++ b/metadata/resource/validate.spec.ts @@ -2,7 +2,7 @@ import { describe, expect, it } from "vitest" import { validateResourceMetadata } from "./validate.ts" describe("validateResourceMetadata", () => { - it("returns valid result for valid resource", async () => { + it("returns valid report for valid resource", async () => { const descriptor = { name: "example-resource", path: "data.csv", @@ -10,10 +10,10 @@ describe("validateResourceMetadata", () => { encoding: "utf-8", } - const result = await validateResourceMetadata(descriptor) + const report = await validateResourceMetadata(descriptor) - expect(result.valid).toBe(true) - expect(result.errors).toEqual([]) + expect(report.valid).toBe(true) + expect(report.errors).toEqual([]) }) it("returns validation errors for invalid resource", async () => { @@ -22,15 +22,15 @@ describe("validateResourceMetadata", () => { path: true, // Should be a string or array of strings } - const result = await validateResourceMetadata(invalidResource) + const report = await validateResourceMetadata(invalidResource) - expect(result.valid).toBe(false) - expect(result.errors.length).toBeGreaterThan(0) + expect(report.valid).toBe(false) + expect(report.errors.length).toBeGreaterThan(0) - const error = result.errors[0] + const error = report.errors[0] expect(error).toBeDefined() if (error) { - expect(error.keyword).toBe("type") + expect(error.pointer).toBe("/name") } }) }) diff --git a/metadata/resource/validate.ts b/metadata/resource/validate.ts new file mode 100644 index 00000000..37c25e16 --- /dev/null +++ b/metadata/resource/validate.ts @@ -0,0 +1,82 @@ +import type { Descriptor } from "../descriptor/index.ts" +import { loadDescriptor } from "../descriptor/index.ts" +import { validateDialect } from "../dialect/index.ts" +import type { MetadataError } from "../error/index.ts" +import { validateDescriptor } from "../profile/index.ts" +import { validateSchema } from "../schema/index.ts" +import type { Resource } from "./Resource.ts" +import { convertResourceFromDescriptor } from "./convert/fromDescriptor.ts" + +const DEFAULT_PROFILE = "https://datapackage.org/profiles/1.0/dataresource.json" + +/** + * Validate a Resource descriptor (JSON Object) against its profile + */ +export async function validateResourceMetadata( + source: Resource | Descriptor | string, + options?: { + basepath?: string + }, +) { + const descriptor = + typeof source === "string" + ? await loadDescriptor(source) + : (source as Descriptor) + + const profile = + typeof descriptor.$schema === "string" + ? descriptor.$schema + : DEFAULT_PROFILE + + const report = await validateDescriptor(descriptor, { + profile, + }) + + let resource: Resource | undefined = undefined + if (report.valid) { + // Validation + normalization = we can cast it + resource = convertResourceFromDescriptor(descriptor, { + basepath: options?.basepath, + }) as unknown as Resource + } + + if (resource) { + const dialectErorrs = await inspectDialectIfExternal(resource) + report.errors.push(...dialectErorrs) + + const schemaErorrs = await inspectSchemaIfExternal(resource) + report.errors.push(...schemaErorrs) + + // TODO: Support external JSON Schema validation as well + // https://github.com/frictionlessdata/datapackage/issues/937 + + if (report.errors.length) { + resource = undefined + report.valid = false + } + } + + return { ...report, resource } +} + +async function inspectDialectIfExternal(resource: Resource) { + const errors: MetadataError[] = [] + + if (typeof resource.dialect === "string") { + const report = await validateDialect(resource.dialect) + errors.push(...report.errors) + } + + return errors +} + +async function inspectSchemaIfExternal(resource: Resource) { + const errors: MetadataError[] = [] + + if (typeof resource.schema === "string") { + const report = await validateSchema(resource.schema) + errors.push(...report.errors) + } + + return errors +} diff --git a/core/schema/ForeignKey.ts b/metadata/schema/ForeignKey.ts similarity index 100% rename from core/schema/ForeignKey.ts rename to metadata/schema/ForeignKey.ts diff --git a/core/schema/Schema.ts b/metadata/schema/Schema.ts similarity index 100% rename from core/schema/Schema.ts rename to metadata/schema/Schema.ts diff --git a/core/schema/assert.spec.ts b/metadata/schema/assert.spec.ts similarity index 74% rename from core/schema/assert.spec.ts rename to metadata/schema/assert.spec.ts index 3ba795f2..29ea9d6f 100644 --- a/core/schema/assert.spec.ts +++ b/metadata/schema/assert.spec.ts @@ -1,5 +1,4 @@ import { describe, expect, expectTypeOf, it } from "vitest" -import { AssertionError } from "../error/index.ts" import type { Schema } from "./Schema.ts" import { assertSchema } from "./assert.ts" @@ -25,16 +24,16 @@ describe("assertSchema", () => { expect(schema).toEqual(descriptor) }) - it("throws ValidationError when schema is invalid", async () => { + it("throws Error when schema is invalid", async () => { const descriptor = { fields: [ { name: "id", - type: 123, // Should be a string + type: 123, }, ], } - await expect(assertSchema(descriptor)).rejects.toThrow(AssertionError) + await expect(assertSchema(descriptor)).rejects.toThrow(Error) }) }) diff --git a/core/schema/assert.ts b/metadata/schema/assert.ts similarity index 59% rename from core/schema/assert.ts rename to metadata/schema/assert.ts index da7ded6b..e3f5a626 100644 --- a/core/schema/assert.ts +++ b/metadata/schema/assert.ts @@ -1,5 +1,4 @@ import type { Descriptor } from "../descriptor/index.ts" -import { AssertionError } from "../error/index.ts" import type { Schema } from "./Schema.ts" import { validateSchema } from "./validate.ts" @@ -7,7 +6,13 @@ import { validateSchema } from "./validate.ts" * Assert a Schema descriptor (JSON Object) against its profile */ export async function assertSchema(source: Descriptor | Schema) { - const { schema, errors } = await validateSchema(source) - if (!schema) throw new AssertionError(errors) - return schema + const report = await validateSchema(source) + + if (!report.schema) { + throw new Error( + `Schema "${JSON.stringify(source).slice(0, 100)}" is not valid`, + ) + } + + return report.schema } diff --git a/core/schema/convert/fromDescriptor.ts b/metadata/schema/convert/fromDescriptor.ts similarity index 100% rename from core/schema/convert/fromDescriptor.ts rename to metadata/schema/convert/fromDescriptor.ts diff --git a/core/schema/convert/fromJsonSchema.spec.ts b/metadata/schema/convert/fromJsonSchema.spec.ts similarity index 100% rename from core/schema/convert/fromJsonSchema.spec.ts rename to metadata/schema/convert/fromJsonSchema.spec.ts diff --git a/core/schema/convert/fromJsonSchema.ts b/metadata/schema/convert/fromJsonSchema.ts similarity index 100% rename from core/schema/convert/fromJsonSchema.ts rename to metadata/schema/convert/fromJsonSchema.ts diff --git a/core/schema/convert/toDescriptor.ts b/metadata/schema/convert/toDescriptor.ts similarity index 100% rename from core/schema/convert/toDescriptor.ts rename to metadata/schema/convert/toDescriptor.ts diff --git a/core/schema/convert/toJsonSchema.spec.ts b/metadata/schema/convert/toJsonSchema.spec.ts similarity index 100% rename from core/schema/convert/toJsonSchema.spec.ts rename to metadata/schema/convert/toJsonSchema.spec.ts diff --git a/core/schema/convert/toJsonSchema.ts b/metadata/schema/convert/toJsonSchema.ts similarity index 100% rename from core/schema/convert/toJsonSchema.ts rename to metadata/schema/convert/toJsonSchema.ts diff --git a/core/schema/fixtures/schema-invalid.json b/metadata/schema/fixtures/schema-invalid.json similarity index 100% rename from core/schema/fixtures/schema-invalid.json rename to metadata/schema/fixtures/schema-invalid.json diff --git a/core/schema/fixtures/schema.json b/metadata/schema/fixtures/schema.json similarity index 100% rename from core/schema/fixtures/schema.json rename to metadata/schema/fixtures/schema.json diff --git a/core/schema/index.ts b/metadata/schema/index.ts similarity index 100% rename from core/schema/index.ts rename to metadata/schema/index.ts diff --git a/core/schema/load.spec.ts b/metadata/schema/load.spec.ts similarity index 100% rename from core/schema/load.spec.ts rename to metadata/schema/load.spec.ts diff --git a/core/schema/load.ts b/metadata/schema/load.ts similarity index 100% rename from core/schema/load.ts rename to metadata/schema/load.ts diff --git a/core/schema/resolve.ts b/metadata/schema/resolve.ts similarity index 100% rename from core/schema/resolve.ts rename to metadata/schema/resolve.ts diff --git a/core/schema/save.spec.ts b/metadata/schema/save.spec.ts similarity index 100% rename from core/schema/save.spec.ts rename to metadata/schema/save.spec.ts diff --git a/core/schema/save.ts b/metadata/schema/save.ts similarity index 100% rename from core/schema/save.ts rename to metadata/schema/save.ts diff --git a/core/schema/validate.spec.ts b/metadata/schema/validate.spec.ts similarity index 64% rename from core/schema/validate.spec.ts rename to metadata/schema/validate.spec.ts index 9fdf814a..5b6a9122 100644 --- a/core/schema/validate.spec.ts +++ b/metadata/schema/validate.spec.ts @@ -16,10 +16,10 @@ describe("validateSchema", () => { ], } - const result = await validateSchema(descriptor) + const report = await validateSchema(descriptor) - expect(result.valid).toBe(true) - expect(result.errors).toEqual([]) + expect(report.valid).toBe(true) + expect(report.errors).toEqual([]) }) it("returns validation errors for invalid schema", async () => { @@ -32,17 +32,16 @@ describe("validateSchema", () => { ], } - const result = await validateSchema(descriptor) + const report = await validateSchema(descriptor) - expect(result.valid).toBe(false) - expect(result.errors.length).toBeGreaterThan(0) + expect(report.valid).toBe(false) + expect(report.errors.length).toBeGreaterThan(0) - const error = result.errors[0] + const error = report.errors[0] expect(error).toBeDefined() if (error) { // The error could be either type or enum depending on schema validation - expect(["type", "enum"]).toContain(error.keyword) - expect(error.instancePath).toContain("/fields/0/type") + expect(error.pointer).toContain("/fields/0/type") } }) }) diff --git a/core/schema/validate.ts b/metadata/schema/validate.ts similarity index 64% rename from core/schema/validate.ts rename to metadata/schema/validate.ts index 6d470da7..8ab91923 100644 --- a/core/schema/validate.ts +++ b/metadata/schema/validate.ts @@ -1,4 +1,5 @@ import type { Descriptor } from "../descriptor/index.ts" +import { loadDescriptor } from "../descriptor/index.ts" import { validateDescriptor } from "../profile/index.ts" import type { Schema } from "./Schema.ts" import { convertSchemaFromDescriptor } from "./convert/fromDescriptor.ts" @@ -8,21 +9,26 @@ const DEFAULT_PROFILE = "https://datapackage.org/profiles/1.0/tableschema.json" /** * Validate a Schema descriptor (JSON Object) against its profile */ -export async function validateSchema(source: Descriptor | Schema) { - const descriptor = source as Descriptor +export async function validateSchema(source: Schema | Descriptor | string) { + const descriptor = + typeof source === "string" + ? await loadDescriptor(source) + : (source as Descriptor) const profile = typeof descriptor.$schema === "string" ? descriptor.$schema : DEFAULT_PROFILE - const { valid, errors } = await validateDescriptor(descriptor, { profile }) + const report = await validateDescriptor(descriptor, { + profile, + }) let schema: Schema | undefined = undefined - if (valid) { + if (report.valid) { // Validation + normalization = we can cast it schema = convertSchemaFromDescriptor(descriptor) as unknown as Schema } - return { valid, errors, schema } + return { ...report, schema } } diff --git a/datahub/tsconfig.json b/metadata/tsconfig.json similarity index 100% rename from datahub/tsconfig.json rename to metadata/tsconfig.json diff --git a/file/typedoc.json b/metadata/typedoc.json similarity index 100% rename from file/typedoc.json rename to metadata/typedoc.json diff --git a/ods/package.json b/ods/package.json deleted file mode 100644 index 2bc6eed5..00000000 --- a/ods/package.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "name": "@dpkit/ods", - "type": "module", - "version": "0.0.0-dev", - "exports": "./build/index.js", - "sideEffects": false, - "license": "MIT", - "author": "Evgeny Karev", - "repository": "https://github.com/datisthq/dpkit", - "description": "Fast TypeScript data management framework built on top of the Data Package standard and Polars DataFrames", - "keywords": [ - "data", - "polars", - "dataframe", - "datapackage", - "tableschema", - "typescript", - "validation", - "quality", - "fair", - "ods" - ], - "scripts": { - "build": "tsc" - }, - "dependencies": { - "@dpkit/core": "workspace:*", - "@dpkit/file": "workspace:*", - "@dpkit/table": "workspace:*", - "nodejs-polars": "^0.22.1", - "xlsx": "https://cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz" - }, - "devDependencies": { - "@dpkit/test": "workspace:*" - } -} diff --git a/ods/tsconfig.json b/ods/tsconfig.json deleted file mode 100644 index 3c43903c..00000000 --- a/ods/tsconfig.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "extends": "../tsconfig.json" -} diff --git a/ods/typedoc.json b/ods/typedoc.json deleted file mode 100644 index f8e49f3a..00000000 --- a/ods/typedoc.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "entryPoints": ["index.ts"], - "skipErrorChecking": true -} diff --git a/package.json b/package.json index e36527e8..d7917fbc 100644 --- a/package.json +++ b/package.json @@ -11,7 +11,7 @@ "build": "pnpm -F !browser -F !docs -F !site build", "bump": "ncu -ws -u", "clean": "rm -rf **/node_modules", - "compile": "pnpm -F cli compile", + "compile": "pnpm -F terminal compile", "coverage": "sensible-browser coverage/index.html", "format": "biome check --write", "lint": "biome check", @@ -40,7 +40,8 @@ "type-fest": "4.41.0", "typescript": "5.9.2", "vite": "7.1.10", - "vitest": "3.2.4" + "vitest": "3.2.4", + "vitest-polly": "1.3.0" }, "packageManager": "pnpm@10.11.0+sha512.6540583f41cc5f628eb3d9773ecee802f4f9ef9923cc45b69890fb47991d4b092964694ec3a4f738a420c918a333062c8b925d312f42e4f0c263eb603551f977" } diff --git a/parquet/README.md b/parquet/README.md deleted file mode 100644 index 9e7195a5..00000000 --- a/parquet/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# @dpkit/parquet - -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). diff --git a/parquet/package.json b/parquet/package.json deleted file mode 100644 index 7f99502e..00000000 --- a/parquet/package.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "name": "@dpkit/parquet", - "type": "module", - "version": "0.0.0-dev", - "exports": "./build/index.js", - "sideEffects": false, - "license": "MIT", - "author": "Evgeny Karev", - "repository": "https://github.com/datisthq/dpkit", - "description": "Fast TypeScript data management framework built on top of the Data Package standard and Polars DataFrames", - "keywords": [ - "data", - "polars", - "dataframe", - "datapackage", - "tableschema", - "typescript", - "validation", - "quality", - "fair", - "parquet" - ], - "scripts": { - "build": "tsc" - }, - "dependencies": { - "@dpkit/core": "workspace:*", - "@dpkit/file": "workspace:*", - "@dpkit/table": "workspace:*", - "csv-sniffer": "^0.1.1", - "nodejs-polars": "^0.22.1" - }, - "devDependencies": { - "@dpkit/test": "workspace:*" - } -} diff --git a/parquet/tsconfig.json b/parquet/tsconfig.json deleted file mode 100644 index 3c43903c..00000000 --- a/parquet/tsconfig.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "extends": "../tsconfig.json" -} diff --git a/parquet/typedoc.json b/parquet/typedoc.json deleted file mode 100644 index f8e49f3a..00000000 --- a/parquet/typedoc.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "entryPoints": ["index.ts"], - "skipErrorChecking": true -} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 1acc9a93..222c3786 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -53,28 +53,18 @@ importers: vitest: specifier: 3.2.4 version: 3.2.4(@types/debug@4.1.12)(@types/node@24.2.0)(@vitest/ui@3.2.4)(sass@1.93.2)(sugarss@5.0.1(postcss@8.5.6))(tsx@4.20.3)(yaml@2.8.1) + vitest-polly: + specifier: 1.3.0 + version: 1.3.0(vitest@3.2.4) - arrow: + audio: dependencies: - '@dpkit/core': + '@dpkit/dataset': specifier: workspace:* - version: link:../core - '@dpkit/file': + version: link:../dataset + '@dpkit/metadata': specifier: workspace:* - version: link:../file - '@dpkit/table': - specifier: workspace:* - version: link:../table - csv-sniffer: - specifier: ^0.1.1 - version: 0.1.1 - nodejs-polars: - specifier: ^0.22.1 - version: 0.22.1 - devDependencies: - '@dpkit/test': - specifier: workspace:* - version: link:../test + version: link:../metadata browser: devDependencies: @@ -84,9 +74,9 @@ importers: '@cloudflare/vite-plugin': specifier: 1.13.13 version: 1.13.13(vite@7.1.10(@types/node@24.2.0)(sass@1.93.2)(sugarss@5.0.1(postcss@8.5.6))(tsx@4.20.3)(yaml@2.8.1))(workerd@1.20251008.0)(wrangler@4.43.0) - '@dpkit/lib': + '@dpkit/library': specifier: workspace:* - version: link:../lib + version: link:../library '@dpkit/service': specifier: workspace:* version: link:../service @@ -202,126 +192,11 @@ importers: specifier: 5.0.8 version: 5.0.8(@types/react@19.2.2)(immer@10.1.3)(react@19.2.0) - ckan: - dependencies: - '@dpkit/core': - specifier: workspace:* - version: link:../core - '@dpkit/file': - specifier: workspace:* - version: link:../file - devDependencies: - '@dpkit/test': - specifier: workspace:* - version: link:../test - - cli: - dependencies: - '@clack/prompts': - specifier: ^0.11.0 - version: 0.11.0 - '@commander-js/extra-typings': - specifier: ^14.0.0 - version: 14.0.0(commander@14.0.1) - '@dpkit/lib': - specifier: workspace:* - version: link:../lib - commander: - specifier: ^14.0.0 - version: 14.0.1 - es-toolkit: - specifier: ^1.39.10 - version: 1.39.10 - exit-hook: - specifier: ^4.0.0 - version: 4.0.0 - ink: - specifier: ^6.3.1 - version: 6.3.1(@types/react@19.1.9)(react-devtools-core@6.1.5)(react@19.2.0) - nodejs-polars: - specifier: ^0.22.1 - version: 0.22.1 - picocolors: - specifier: ^1.1.1 - version: 1.1.1 - react: - specifier: ^19.1.1 - version: 19.2.0 - react-devtools-core: - specifier: ^6.1.2 - version: 6.1.5 - tiny-invariant: - specifier: ^1.3.3 - version: 1.3.3 - ts-extras: - specifier: ^0.14.0 - version: 0.14.0 - devDependencies: - '@dpkit/file': - specifier: workspace:* - version: link:../file - '@dpkit/test': - specifier: workspace:* - version: link:../test - '@types/node': - specifier: 24.2.0 - version: 24.2.0 - '@types/react': - specifier: 19.1.9 - version: 19.1.9 - ink-testing-library: - specifier: 4.0.0 - version: 4.0.0(@types/react@19.1.9) - - core: - dependencies: - '@sindresorhus/slugify': - specifier: ^0.9.0 - version: 0.9.1 - '@types/json-schema': - specifier: ^7.0.15 - version: 7.0.15 - ajv: - specifier: ^8.17.1 - version: 8.17.1 - quick-lru: - specifier: ^7.0.1 - version: 7.2.0 - tiny-invariant: - specifier: ^1.3.3 - version: 1.3.3 - devDependencies: - '@dpkit/test': - specifier: workspace:* - version: link:../test - - csv: - dependencies: - '@dpkit/core': - specifier: workspace:* - version: link:../core - '@dpkit/file': - specifier: workspace:* - version: link:../file - '@dpkit/table': - specifier: workspace:* - version: link:../table - csv-sniffer: - specifier: ^0.1.1 - version: 0.1.1 - nodejs-polars: - specifier: ^0.22.1 - version: 0.22.1 - devDependencies: - '@dpkit/test': - specifier: workspace:* - version: link:../test - database: dependencies: - '@dpkit/core': + '@dpkit/metadata': specifier: workspace:* - version: link:../core + version: link:../metadata '@dpkit/table': specifier: workspace:* version: link:../table @@ -341,31 +216,58 @@ importers: specifier: ^3.14.4 version: 3.15.1 nodejs-polars: - specifier: ^0.22.1 - version: 0.22.1 + specifier: ^0.22.2 + version: 0.22.2 pg: specifier: ^8.16.3 version: 8.16.3 devDependencies: - '@dpkit/file': + '@dpkit/dataset': specifier: workspace:* - version: link:../file - '@dpkit/test': - specifier: workspace:* - version: link:../test + version: link:../dataset '@types/pg': specifier: ^8.15.5 version: 8.15.5 - datahub: + dataset: dependencies: - '@dpkit/core': + '@dpkit/metadata': specifier: workspace:* - version: link:../core + version: link:../metadata + chardet: + specifier: ^2.1.0 + version: 2.1.0 + exit-hook: + specifier: ^4.0.0 + version: 4.0.0 + fflate: + specifier: ^0.8.2 + version: 0.8.2 + hasha: + specifier: ^6.0.0 + version: 6.0.0 + isbinaryfile: + specifier: ^5.0.4 + version: 5.0.6 + multistream: + specifier: ^4.1.0 + version: 4.1.0 + p-all: + specifier: ^5.0.1 + version: 5.0.1 + p-map: + specifier: ^7.0.3 + version: 7.0.3 + tempy: + specifier: 3.1.0 + version: 3.1.0 + tiny-invariant: + specifier: ^1.3.3 + version: 1.3.3 devDependencies: - '@dpkit/test': - specifier: workspace:* - version: link:../test + '@types/multistream': + specifier: 4.1.3 + version: 4.1.3 docs: devDependencies: @@ -379,8 +281,8 @@ importers: specifier: workspace:* version: link:../dpkit nodejs-polars: - specifier: 0.22.1 - version: 0.22.1 + specifier: 0.22.2 + version: 0.22.2 sharp: specifier: 0.34.2 version: 0.34.2 @@ -409,79 +311,14 @@ importers: specifier: 4.43.0 version: 4.43.0 - dpkit: - dependencies: - '@dpkit/cli': - specifier: workspace:* - version: link:../cli - '@dpkit/lib': - specifier: workspace:* - version: link:../lib - - file: - dependencies: - '@dpkit/core': - specifier: workspace:* - version: link:../core - chardet: - specifier: ^2.1.0 - version: 2.1.0 - exit-hook: - specifier: ^4.0.0 - version: 4.0.0 - hasha: - specifier: ^6.0.0 - version: 6.0.0 - isbinaryfile: - specifier: ^5.0.4 - version: 5.0.6 - multistream: - specifier: ^4.1.0 - version: 4.1.0 - p-map: - specifier: ^7.0.3 - version: 7.0.3 - tempy: - specifier: 3.1.0 - version: 3.1.0 - tiny-invariant: - specifier: ^1.3.3 - version: 1.3.3 - devDependencies: - '@types/multistream': - specifier: 4.1.3 - version: 4.1.3 - - folder: - dependencies: - '@dpkit/core': - specifier: workspace:* - version: link:../core - '@dpkit/file': - specifier: workspace:* - version: link:../file - exit-hook: - specifier: ^4.0.0 - version: 4.0.0 - - github: + document: dependencies: - '@dpkit/core': - specifier: workspace:* - version: link:../core - '@dpkit/file': + '@dpkit/dataset': specifier: workspace:* - version: link:../file - devDependencies: - '@dpkit/test': + version: link:../dataset + '@dpkit/metadata': specifier: workspace:* - version: link:../test - - html: - dependencies: - '@dpkit/core': - specifier: workspace:* - version: link:../core + version: link:../metadata htmlfy: specifier: ^1.0.0 version: 1.0.0 @@ -491,10 +328,16 @@ importers: react-dom: specifier: ^19.2.0 version: 19.2.0(react@19.2.0) + remark: + specifier: ^15.0.1 + version: 15.0.1 + remark-gfm: + specifier: ^4.0.0 + version: 4.0.1 devDependencies: - '@dpkit/test': - specifier: workspace:* - version: link:../test + '@types/mdast': + specifier: ^4.0.0 + version: 4.0.4 '@types/react': specifier: ^19.2.0 version: 19.2.2 @@ -502,168 +345,77 @@ importers: specifier: ^19.2.0 version: 19.2.0(@types/react@19.2.2) - inline: + dpkit: dependencies: - '@dpkit/core': + '@dpkit/library': specifier: workspace:* - version: link:../core - '@dpkit/table': + version: link:../library + '@dpkit/terminal': specifier: workspace:* - version: link:../table - nodejs-polars: - specifier: ^0.22.1 - version: 0.22.1 + version: link:../terminal - json: + image: dependencies: - '@dpkit/core': - specifier: workspace:* - version: link:../core - '@dpkit/file': + '@dpkit/dataset': specifier: workspace:* - version: link:../file - '@dpkit/table': + version: link:../dataset + '@dpkit/metadata': specifier: workspace:* - version: link:../table - csv-sniffer: - specifier: ^0.1.1 - version: 0.1.1 - nodejs-polars: - specifier: ^0.22.1 - version: 0.22.1 - devDependencies: - '@dpkit/test': - specifier: workspace:* - version: link:../test + version: link:../metadata - lib: + library: dependencies: - '@dpkit/arrow': - specifier: workspace:* - version: link:../arrow - '@dpkit/ckan': + '@dpkit/audio': specifier: workspace:* - version: link:../ckan - '@dpkit/core': - specifier: workspace:* - version: link:../core - '@dpkit/csv': - specifier: workspace:* - version: link:../csv + version: link:../audio '@dpkit/database': specifier: workspace:* version: link:../database - '@dpkit/datahub': - specifier: workspace:* - version: link:../datahub - '@dpkit/file': - specifier: workspace:* - version: link:../file - '@dpkit/folder': - specifier: workspace:* - version: link:../folder - '@dpkit/github': + '@dpkit/dataset': specifier: workspace:* - version: link:../github - '@dpkit/html': + version: link:../dataset + '@dpkit/document': specifier: workspace:* - version: link:../html - '@dpkit/inline': + version: link:../document + '@dpkit/image': specifier: workspace:* - version: link:../inline - '@dpkit/json': + version: link:../image + '@dpkit/metadata': specifier: workspace:* - version: link:../json - '@dpkit/markdown': - specifier: workspace:* - version: link:../markdown - '@dpkit/ods': - specifier: workspace:* - version: link:../ods - '@dpkit/parquet': - specifier: workspace:* - version: link:../parquet + version: link:../metadata '@dpkit/table': specifier: workspace:* version: link:../table - '@dpkit/xlsx': - specifier: workspace:* - version: link:../xlsx - '@dpkit/zenodo': - specifier: workspace:* - version: link:../zenodo - '@dpkit/zip': + '@dpkit/video': specifier: workspace:* - version: link:../zip - - markdown: - dependencies: - '@dpkit/core': - specifier: workspace:* - version: link:../core - '@types/mdast': - specifier: ^4.0.0 - version: 4.0.4 - remark: - specifier: ^15.0.1 - version: 15.0.1 - remark-gfm: - specifier: ^4.0.0 - version: 4.0.1 - devDependencies: - '@dpkit/test': - specifier: workspace:* - version: link:../test - - ods: - dependencies: - '@dpkit/core': - specifier: workspace:* - version: link:../core - '@dpkit/file': - specifier: workspace:* - version: link:../file - '@dpkit/table': - specifier: workspace:* - version: link:../table - nodejs-polars: - specifier: ^0.22.1 - version: 0.22.1 - xlsx: - specifier: https://cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz - version: https://cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz - devDependencies: - '@dpkit/test': - specifier: workspace:* - version: link:../test + version: link:../video + p-all: + specifier: ^5.0.1 + version: 5.0.1 - parquet: + metadata: dependencies: - '@dpkit/core': - specifier: workspace:* - version: link:../core - '@dpkit/file': - specifier: workspace:* - version: link:../file - '@dpkit/table': - specifier: workspace:* - version: link:../table - csv-sniffer: - specifier: ^0.1.1 - version: 0.1.1 - nodejs-polars: - specifier: ^0.22.1 - version: 0.22.1 - devDependencies: - '@dpkit/test': - specifier: workspace:* - version: link:../test + '@sindresorhus/slugify': + specifier: ^0.9.0 + version: 0.9.1 + '@types/json-schema': + specifier: ^7.0.15 + version: 7.0.15 + ajv: + specifier: ^8.17.1 + version: 8.17.1 + quick-lru: + specifier: ^7.0.1 + version: 7.2.0 + tiny-invariant: + specifier: ^1.3.3 + version: 1.3.3 service: dependencies: - '@dpkit/lib': + '@dpkit/library': specifier: workspace:* - version: link:../lib + version: link:../library '@loglayer/transport-tslog': specifier: 3.0.4 version: 3.0.4(tslog@4.10.2) @@ -730,77 +482,88 @@ importers: table: dependencies: - '@dpkit/core': + '@dpkit/dataset': specifier: workspace:* - version: link:../core + version: link:../dataset + '@dpkit/metadata': + specifier: workspace:* + version: link:../metadata + csv-sniffer: + specifier: ^0.1.1 + version: 0.1.1 nodejs-polars: - specifier: ^0.22.1 - version: 0.22.1 + specifier: ^0.22.2 + version: 0.22.2 p-all: specifier: ^5.0.1 version: 5.0.1 - - test: - dependencies: - '@pollyjs/adapter-fetch': - specifier: ^6.0.6 - version: 6.0.7 - '@pollyjs/core': - specifier: ^6.0.6 - version: 6.0.6 - '@pollyjs/persister-fs': - specifier: ^6.0.6 - version: 6.0.6 - - xlsx: - dependencies: - '@dpkit/core': - specifier: workspace:* - version: link:../core - '@dpkit/file': - specifier: workspace:* - version: link:../file - '@dpkit/table': - specifier: workspace:* - version: link:../table - nodejs-polars: - specifier: ^0.22.1 - version: 0.22.1 xlsx: specifier: https://cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz version: https://cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz - devDependencies: - '@dpkit/test': - specifier: workspace:* - version: link:../test - zenodo: + terminal: dependencies: - '@dpkit/core': - specifier: workspace:* - version: link:../core - '@dpkit/file': + '@clack/prompts': + specifier: ^0.11.0 + version: 0.11.0 + '@commander-js/extra-typings': + specifier: ^14.0.0 + version: 14.0.0(commander@14.0.1) + '@dpkit/library': specifier: workspace:* - version: link:../file + version: link:../library + commander: + specifier: ^14.0.0 + version: 14.0.1 + es-toolkit: + specifier: ^1.39.10 + version: 1.39.10 + exit-hook: + specifier: ^4.0.0 + version: 4.0.0 + ink: + specifier: ^6.3.1 + version: 6.3.1(@types/react@19.1.9)(react-devtools-core@6.1.5)(react@19.2.0) + nodejs-polars: + specifier: ^0.22.2 + version: 0.22.2 + picocolors: + specifier: ^1.1.1 + version: 1.1.1 + react: + specifier: ^19.1.1 + version: 19.2.0 + react-devtools-core: + specifier: ^6.1.2 + version: 6.1.5 + tiny-invariant: + specifier: ^1.3.3 + version: 1.3.3 + ts-extras: + specifier: ^0.14.0 + version: 0.14.0 devDependencies: - '@dpkit/test': + '@dpkit/dataset': specifier: workspace:* - version: link:../test + version: link:../dataset + '@types/node': + specifier: 24.2.0 + version: 24.2.0 + '@types/react': + specifier: 19.1.9 + version: 19.1.9 + ink-testing-library: + specifier: 4.0.0 + version: 4.0.0(@types/react@19.1.9) - zip: + video: dependencies: - '@dpkit/core': - specifier: workspace:* - version: link:../core - '@dpkit/file': + '@dpkit/dataset': specifier: workspace:* - version: link:../file - '@dpkit/folder': + version: link:../dataset + '@dpkit/metadata': specifier: workspace:* - version: link:../folder - fflate: - specifier: ^0.8.2 - version: 0.8.2 + version: link:../metadata packages: @@ -4535,56 +4298,56 @@ packages: node-releases@2.0.23: resolution: {integrity: sha512-cCmFDMSm26S6tQSDpBCg/NR8NENrVPhAJSf+XbxBG4rPFaaonlEoE9wHQmun+cls499TQGSb7ZyPBRlzgKfpeg==} - nodejs-polars-android-arm64@0.22.1: - resolution: {integrity: sha512-X/vAj0GIp6c6k/Y5NZvrt/ZfNLJZFLOjP1N1uMiMUNMaeNPjevfv7Yt6Exsc/1DBfNB455e+9wgw7kE8jbkDRw==} + nodejs-polars-android-arm64@0.22.2: + resolution: {integrity: sha512-9IbhQTYBV2z8RWziBgAundzuf11jJis3/FNaTo+MYrUARDgJECMu8f0fHpqu2caxVgW6u5len/47vPuyJ18s+Q==} engines: {node: '>= 16'} cpu: [arm64] os: [android] - nodejs-polars-darwin-arm64@0.22.1: - resolution: {integrity: sha512-6sHu3WrYo91aU1d7Ze+pIu9+QDctkrJyIhhZiSL0ApSwwfzTRqqXePi+3URwRvNNZIem9aOAkkoGpCmG0XKe/A==} + nodejs-polars-darwin-arm64@0.22.2: + resolution: {integrity: sha512-SOGZwoeT/j6bKUmW7EH93VPG9bWir0WYeLMAb/8+hK7+MjDpaUj/u9Xrp8fB6uZTUgDXwZrdJf/zBCHYY9iQMg==} engines: {node: '>= 16'} cpu: [arm64] os: [darwin] - nodejs-polars-darwin-x64@0.22.1: - resolution: {integrity: sha512-tyz6ZibPhCiLN40mwe7XhTniSKn/mcZMCURweZVjo8b1Mj3xyfS49Yy+s1JyHEZilCb6jylcS6bg4ibDEcu1OQ==} + nodejs-polars-darwin-x64@0.22.2: + resolution: {integrity: sha512-o8SVkiwlxdoAByXveeEVaNnW1UhdrkhFZlrZxamh2bfI/A/qc+XTDjBkAham6DZfDrZSMpKaAkVIYxFYus10lQ==} engines: {node: '>= 16'} cpu: [x64] os: [darwin] - nodejs-polars-linux-arm64-gnu@0.22.1: - resolution: {integrity: sha512-Fn6XDgsOr9/yojBj0olUCNArglHkl8817JpNyCUyFfWFKR1u24xIfn5Tj5uytToqcpTOkRO1/DSqGo5lWS/Rfg==} + nodejs-polars-linux-arm64-gnu@0.22.2: + resolution: {integrity: sha512-yeJKmmLTHPLGj3TwMrFOt6rjtUltlXj0UBw5Q0DrLKQibJzH5/6DzZ8RqcyhhCirws6MRCLDDYZbqVQHY6csWw==} engines: {node: '>= 16'} cpu: [arm64] os: [linux] - nodejs-polars-linux-arm64-musl@0.22.1: - resolution: {integrity: sha512-uJVkq5YT1LyBsV5C3O3qVgKPdmcfmhi6NYOwhI4Ku7Noo+EUTN+NyOrOjWtS0vHpgkmn+eBaU7+PpKkCSNG+Qg==} + nodejs-polars-linux-arm64-musl@0.22.2: + resolution: {integrity: sha512-IuaZaZPn7Wu6RG6SfqIilukk55wbjK9dsxBh1FHjhuorRDE3a3I/7Yn8yYZisM6ymmB0DHgcem3ynRxqF428hA==} engines: {node: '>= 16'} cpu: [arm64] os: [linux] - nodejs-polars-linux-x64-gnu@0.22.1: - resolution: {integrity: sha512-Uh294xCNkg4wNqPB5YIEyJNT76gHJWqwlF/G6VHRiObZUCBDZQEDVDcLqZsaOe9AXksMAlaXDx3dV/VIBgW+LA==} + nodejs-polars-linux-x64-gnu@0.22.2: + resolution: {integrity: sha512-Tyan9su/vJBAsdm+MyitAGVe4FWFf44Yqh/tKq7Fwq32ufed7MVg2a8wD/JragYD6gl1AK7ND0iGyKVeh2gI+A==} engines: {node: '>= 16'} cpu: [x64] os: [linux] - nodejs-polars-linux-x64-musl@0.22.1: - resolution: {integrity: sha512-dTLsidOoWHY7+GqCg97qOPlrbwXhStoMm8ZvR8/l4NhlYIfKkkG9iUQ5Eu1LolfqCK8zYfSlQl+voL4AqGiOkA==} + nodejs-polars-linux-x64-musl@0.22.2: + resolution: {integrity: sha512-XtIy76CE9uY/jtizXwpaxHE5tbV6lqSLeZcHmCjPBOI36moJ51zwyMmiN00qq1+JUaZhOgg1q+Vy/0J4WN3Yaw==} engines: {node: '>= 16'} cpu: [x64] os: [linux] - nodejs-polars-win32-x64-msvc@0.22.1: - resolution: {integrity: sha512-7pFP029b4rlIEJk5pdRHPcSLHPAZHbItR+ZFRKSfPQssjwrYnt31kFKrWeW57KkUVKA50m1QgLfNlCv9bkezmw==} + nodejs-polars-win32-x64-msvc@0.22.2: + resolution: {integrity: sha512-QJabH9l8hL791hNyH9QHZNGAE1Xy1qi66wt9/Zf3j+TkPkS1v14FaStd32o34ZQeB9qjQGfnMR3vjakrv7wKKw==} engines: {node: '>= 16'} cpu: [x64] os: [win32] - nodejs-polars@0.22.1: - resolution: {integrity: sha512-RP8mhb84u5zCc8/22SyS01UNzIk6z2NFtM1mqjK7LCrg8yyqTh+zQKpM0JAY9yBQ8d3wl2wyHCqduhRn+xRsKA==} + nodejs-polars@0.22.2: + resolution: {integrity: sha512-K/ZHlbsalU2uzvoaeP6Eu3aS6DqfH0Q83ug0WjM7UArKVedPn4ViGU/iLYmbFKq63qNXv5R6K3rigMhRtFJeCw==} engines: {node: '>= 20'} nodemon@3.1.10: @@ -6295,6 +6058,12 @@ packages: vite: optional: true + vitest-polly@1.3.0: + resolution: {integrity: sha512-Kb4mnuKZKnpgG2pkbGAIIyz48ani2nY6Rs3jcOZn6dthyDLJDXe4QowvcgmjDT3hnJHAqojsAond+RCnUURRyg==} + engines: {node: ^24.0.0, pnpm: ^10.0.0} + peerDependencies: + vitest: ^3.2.4 + vitest@3.2.4: resolution: {integrity: sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==} engines: {node: ^18.0.0 || ^20.0.0 || >=22.0.0} @@ -10858,40 +10627,40 @@ snapshots: node-releases@2.0.23: {} - nodejs-polars-android-arm64@0.22.1: + nodejs-polars-android-arm64@0.22.2: optional: true - nodejs-polars-darwin-arm64@0.22.1: + nodejs-polars-darwin-arm64@0.22.2: optional: true - nodejs-polars-darwin-x64@0.22.1: + nodejs-polars-darwin-x64@0.22.2: optional: true - nodejs-polars-linux-arm64-gnu@0.22.1: + nodejs-polars-linux-arm64-gnu@0.22.2: optional: true - nodejs-polars-linux-arm64-musl@0.22.1: + nodejs-polars-linux-arm64-musl@0.22.2: optional: true - nodejs-polars-linux-x64-gnu@0.22.1: + nodejs-polars-linux-x64-gnu@0.22.2: optional: true - nodejs-polars-linux-x64-musl@0.22.1: + nodejs-polars-linux-x64-musl@0.22.2: optional: true - nodejs-polars-win32-x64-msvc@0.22.1: + nodejs-polars-win32-x64-msvc@0.22.2: optional: true - nodejs-polars@0.22.1: + nodejs-polars@0.22.2: optionalDependencies: - nodejs-polars-android-arm64: 0.22.1 - nodejs-polars-darwin-arm64: 0.22.1 - nodejs-polars-darwin-x64: 0.22.1 - nodejs-polars-linux-arm64-gnu: 0.22.1 - nodejs-polars-linux-arm64-musl: 0.22.1 - nodejs-polars-linux-x64-gnu: 0.22.1 - nodejs-polars-linux-x64-musl: 0.22.1 - nodejs-polars-win32-x64-msvc: 0.22.1 + nodejs-polars-android-arm64: 0.22.2 + nodejs-polars-darwin-arm64: 0.22.2 + nodejs-polars-darwin-x64: 0.22.2 + nodejs-polars-linux-arm64-gnu: 0.22.2 + nodejs-polars-linux-arm64-musl: 0.22.2 + nodejs-polars-linux-x64-gnu: 0.22.2 + nodejs-polars-linux-x64-musl: 0.22.2 + nodejs-polars-win32-x64-msvc: 0.22.2 nodemon@3.1.10: dependencies: @@ -12553,6 +12322,15 @@ snapshots: optionalDependencies: vite: 6.3.6(@types/node@24.2.0)(sass@1.93.2)(sugarss@5.0.1(postcss@8.5.6))(tsx@4.20.3)(yaml@2.8.1) + vitest-polly@1.3.0(vitest@3.2.4): + dependencies: + '@pollyjs/adapter-fetch': 6.0.7 + '@pollyjs/core': 6.0.6 + '@pollyjs/persister-fs': 6.0.6 + vitest: 3.2.4(@types/debug@4.1.12)(@types/node@24.2.0)(@vitest/ui@3.2.4)(sass@1.93.2)(sugarss@5.0.1(postcss@8.5.6))(tsx@4.20.3)(yaml@2.8.1) + transitivePeerDependencies: + - supports-color + vitest@3.2.4(@types/debug@4.1.12)(@types/node@24.2.0)(@vitest/ui@3.2.4)(sass@1.93.2)(sugarss@5.0.1(postcss@8.5.6))(tsx@4.20.3)(yaml@2.8.1): dependencies: '@types/chai': 5.2.2 diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index ea7d62bb..85ca57b6 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -1,29 +1,17 @@ dangerouslyAllowAllBuilds: true packages: - - arrow + - audio - browser - - ckan - - cli - - core - - csv - - datahub - database + - dataset - docs + - document - dpkit - - file - - folder - - github - - html - - inline - - json - - lib - - markdown - - ods - - parquet + - image + - library + - metadata - service - site - table - - test - - xlsx - - zenodo - - zip + - terminal + - video diff --git a/service/README.md b/service/README.md index aa77dbad..4e488107 100644 --- a/service/README.md +++ b/service/README.md @@ -1,3 +1,3 @@ # @dpkit/service -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). +dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [project's website](https://dpkit.app). diff --git a/service/endpoints/package/validate.ts b/service/endpoints/package/validate.ts index f27a97a7..b2407557 100644 --- a/service/endpoints/package/validate.ts +++ b/service/endpoints/package/validate.ts @@ -1,4 +1,4 @@ -import * as dpkit from "@dpkit/lib" +import * as dpkit from "@dpkit/library" import * as z from "zod" import { endpoint } from "../../endpoint.ts" diff --git a/service/package.json b/service/package.json index b68286ae..98f7df2e 100644 --- a/service/package.json +++ b/service/package.json @@ -2,6 +2,7 @@ "name": "@dpkit/service", "type": "module", "version": "0.0.0-dev", + "private": true, "exports": { "./browser": "./build/runtimes/browser.js", "./node": "./build/runtimes/node.js" @@ -28,7 +29,7 @@ "generate": "node @generate.ts" }, "dependencies": { - "@dpkit/lib": "workspace:*", + "@dpkit/library": "workspace:*", "@loglayer/transport-tslog": "3.0.4", "@orpc/client": "^1.10.0", "@orpc/contract": "^1.10.0", diff --git a/site/content/docs/guides/commands.md b/site/content/docs/guides/commands.md index 611cf490..f47b6c5d 100644 --- a/site/content/docs/guides/commands.md +++ b/site/content/docs/guides/commands.md @@ -4,7 +4,7 @@ sidebar: order: 2 --- -After instalation, the dpkit's command-line tool is available in your terminal under name `dp`. Please read the [Getting Started](../../overview/getting-started) guide for more information. +After instalation, the dpkit's command-line tool is available in your terminal under name `dpkit`. Please read the [Getting Started](../../overview/getting-started) guide for more information. ## Command categories @@ -19,11 +19,11 @@ The CLI commands are organized into categories named after the main objects they Each category has its own commands for example the `table` category: -- `dp table convert` -- `dp table describe` -- `dp table explore` -- `dp table script` -- `dp table validate` +- `dpkit table convert` +- `dpkit table describe` +- `dpkit table explore` +- `dpkit table script` +- `dpkit table validate` ## Working with data packages @@ -32,20 +32,20 @@ Usually non-package command support the `-p/--package` and `-r/--resource` optio For example, we can explore a table using this command: ```bash -dp table explore table.csv +dpkit table explore table.csv ``` Or this command using an interactive mode: ```bash -dp table explore -p datapackage.json +dpkit table explore -p datapackage.json # it will ask you to select a resource ``` Or this command using both the datapackage file path and the resource name making it non-interactive similarly to the plain path-based command: ```bash -dp table explore -p datapackage.json -r table +dpkit table explore -p datapackage.json -r table ``` > [!TIP] diff --git a/site/content/docs/guides/dialect.md b/site/content/docs/guides/dialect.md index bfd520eb..10adf63c 100644 --- a/site/content/docs/guides/dialect.md +++ b/site/content/docs/guides/dialect.md @@ -8,12 +8,12 @@ Table Dialect commands help you work with CSV dialects - metadata that describes ## Available Commands -### `dp dialect infer` +### `dpkit dialect infer` Infer a table dialect from a table by analyzing its structure and determining the best parsing parameters such as delimiter, quote character, and header configuration. ```bash -dp dialect infer +dpkit dialect infer ``` **Options:** @@ -26,27 +26,27 @@ dp dialect infer **Examples:** ```bash # Infer dialect from CSV file -dp dialect infer data.csv +dpkit dialect infer data.csv # Infer from remote file -dp dialect infer https://example.com/data.csv +dpkit dialect infer https://example.com/data.csv # Infer from resource in package -dp dialect infer --from-package datapackage.json --from-resource "users" +dpkit dialect infer --from-package datapackage.json --from-resource "users" # Export dialect as JSON -dp dialect infer data.csv --json > dialect.json +dpkit dialect infer data.csv --json > dialect.json # Use larger sample for complex files -dp dialect infer complex_data.csv --sample-bytes 8192 +dpkit dialect infer complex_data.csv --sample-bytes 8192 ``` -### `dp dialect explore` +### `dpkit dialect explore` Explore a table dialect from a local or remote path to view its parsing configuration in an interactive format. ```bash -dp dialect explore +dpkit dialect explore ``` **Options:** @@ -58,24 +58,24 @@ dp dialect explore **Examples:** ```bash # Explore dialect descriptor -dp dialect explore dialect.json +dpkit dialect explore dialect.json # Explore remote dialect -dp dialect explore https://example.com/dialect.json +dpkit dialect explore https://example.com/dialect.json # Explore dialect from package resource -dp dialect explore --from-package datapackage.json --from-resource "users" +dpkit dialect explore --from-package datapackage.json --from-resource "users" # Export dialect structure as JSON -dp dialect explore dialect.json --json +dpkit dialect explore dialect.json --json ``` -### `dp dialect validate` +### `dpkit dialect validate` Validate a table dialect from a local or remote path against the CSV Dialect specification. ```bash -dp dialect validate +dpkit dialect validate ``` **Options:** @@ -89,27 +89,27 @@ dp dialect validate **Examples:** ```bash # Validate dialect descriptor -dp dialect validate dialect.json +dpkit dialect validate dialect.json # Validate remote dialect -dp dialect validate https://example.com/dialect.json +dpkit dialect validate https://example.com/dialect.json # Validate dialect from package resource -dp dialect validate --from-package datapackage.json --from-resource "users" +dpkit dialect validate --from-package datapackage.json --from-resource "users" # Get validation results as JSON -dp dialect validate dialect.json --json +dpkit dialect validate dialect.json --json # Interactive selection when no path provided -dp dialect validate --from-package datapackage.json +dpkit dialect validate --from-package datapackage.json ``` -### `dp dialect script` +### `dpkit dialect script` Open an interactive scripting session with a loaded table dialect. This provides a REPL environment where you can programmatically interact with the dialect definition. ```bash -dp dialect script +dpkit dialect script ``` **Options:** @@ -125,16 +125,16 @@ dp dialect script **Examples:** ```bash # Start scripting session with dialect -dp dialect script dialect.json +dpkit dialect script dialect.json # Script dialect from package resource -dp dialect script --from-package datapackage.json --from-resource "users" +dpkit dialect script --from-package datapackage.json --from-resource "users" # In the REPL session: -dp> dialect.delimiter -dp> dialect.quoteChar -dp> dialect.header -dp> dialect.skipInitialSpace +dpkit> dialect.delimiter +dpkit> dialect.quoteChar +dpkit> dialect.header +dpkit> dialect.skipInitialSpace ``` ## Common Workflows @@ -143,53 +143,53 @@ dp> dialect.skipInitialSpace 1. **Infer from data file:** ```bash - dp dialect infer data.csv --json > dialect.json + dpkit dialect infer data.csv --json > dialect.json ``` 2. **Validate the generated dialect:** ```bash - dp dialect validate dialect.json + dpkit dialect validate dialect.json ``` 3. **Explore the dialect configuration:** ```bash - dp dialect explore dialect.json + dpkit dialect explore dialect.json ``` ### Dialect Analysis for Complex Files ```bash # Infer dialect with larger sample for better accuracy -dp dialect infer complex_file.csv --sample-bytes 16384 +dpkit dialect infer complex_file.csv --sample-bytes 16384 # Validate and explore for verification -dp dialect validate dialect.json -dp dialect explore dialect.json +dpkit dialect validate dialect.json +dpkit dialect explore dialect.json # Script for custom dialect analysis -dp dialect script dialect.json +dpkit dialect script dialect.json ``` ### Working with Package Dialects ```bash # Validate all dialects in a package interactively -dp dialect validate --from-package datapackage.json +dpkit dialect validate --from-package datapackage.json # Infer improved dialect for specific resource -dp dialect infer --from-package datapackage.json --from-resource "transactions" +dpkit dialect infer --from-package datapackage.json --from-resource "transactions" # Compare dialects using scripting -dp dialect script --from-package datapackage.json --from-resource "users" +dpkit dialect script --from-package datapackage.json --from-resource "users" ``` ### Remote Dialect Handling ```bash # Work with remote dialects -dp dialect explore https://example.com/dialect.json -dp dialect validate https://example.com/dialect.json -dp dialect infer https://example.com/data.csv +dpkit dialect explore https://example.com/dialect.json +dpkit dialect validate https://example.com/dialect.json +dpkit dialect infer https://example.com/data.csv ``` ## Dialect Properties @@ -263,22 +263,22 @@ CSV Dialect specifications typically include: ### For files with unusual formatting: ```bash # Use larger sample size -dp dialect infer unusual_file.csv --sample-bytes 32768 +dpkit dialect infer unusual_file.csv --sample-bytes 32768 # Check inferred dialect -dp dialect explore dialect.json +dpkit dialect explore dialect.json # Manually verify with table commands -dp table explore unusual_file.csv --dialect dialect.json +dpkit table explore unusual_file.csv --dialect dialect.json ``` ### For files with multiple header rows: ```bash # The dialect inference will detect headerRows automatically -dp dialect infer multi_header.csv --json +dpkit dialect infer multi_header.csv --json # Verify the header configuration -dp dialect script dialect.json +dpkit dialect script dialect.json # Then in REPL: dialect.headerRows ``` @@ -296,16 +296,16 @@ Dialect commands work seamlessly with other dpkit commands: ```bash # Create dialect, then use it for table operations -dp dialect infer data.csv --json > dialect.json -dp table validate data.csv --dialect dialect.json +dpkit dialect infer data.csv --json > dialect.json +dpkit table validate data.csv --dialect dialect.json # Work within package context -dp package infer *.csv --json > datapackage.json -dp dialect validate --from-package datapackage.json --from-resource "data" +dpkit package infer *.csv --json > datapackage.json +dpkit dialect validate --from-package datapackage.json --from-resource "data" # Use inferred dialect for schema inference -dp dialect infer data.csv --json > dialect.json -dp schema infer data.csv --delimiter ";" --header-rows 2 +dpkit dialect infer data.csv --json > dialect.json +dpkit schema infer data.csv --delimiter ";" --header-rows 2 ``` ## Best Practices diff --git a/site/content/docs/guides/file.md b/site/content/docs/guides/file.md index 65b00342..f4792d81 100644 --- a/site/content/docs/guides/file.md +++ b/site/content/docs/guides/file.md @@ -8,12 +8,12 @@ File commands help you work with individual files, providing utilities for copyi ## Available Commands -### `dp file copy` +### `dpkit file copy` Copy a file from one location to another with support for local and remote sources and destinations. ```bash -dp file copy +dpkit file copy ``` **Options:** @@ -22,21 +22,21 @@ dp file copy **Examples:** ```bash # Copy local file -dp file copy data.csv backup.csv +dpkit file copy data.csv backup.csv # Copy remote file to local -dp file copy https://example.com/data.csv local_data.csv +dpkit file copy https://example.com/data.csv local_data.csv # Copy to different directory -dp file copy data.csv ./backup/data_backup.csv +dpkit file copy data.csv ./backup/data_backup.csv ``` -### `dp file describe` +### `dpkit file describe` Describe a file's properties including size, format, encoding, and basic metadata information. ```bash -dp file describe +dpkit file describe ``` **Options:** @@ -46,26 +46,26 @@ dp file describe **Examples:** ```bash # Describe local file -dp file describe data.csv +dpkit file describe data.csv # Describe remote file -dp file describe https://example.com/data.csv +dpkit file describe https://example.com/data.csv # Get description as JSON -dp file describe data.csv --json +dpkit file describe data.csv --json # Describe various file types -dp file describe document.pdf -dp file describe image.png -dp file describe archive.zip +dpkit file describe document.pdf +dpkit file describe image.png +dpkit file describe archive.zip ``` -### `dp file validate` +### `dpkit file validate` Validate a file's integrity, format compliance, and accessibility. ```bash -dp file validate +dpkit file validate ``` **Options:** @@ -77,18 +77,18 @@ dp file validate **Examples:** ```bash # Validate local file -dp file validate data.csv +dpkit file validate data.csv # Validate remote file -dp file validate https://example.com/data.csv +dpkit file validate https://example.com/data.csv # Get validation results as JSON -dp file validate data.csv --json +dpkit file validate data.csv --json # Validate multiple file types -dp file validate document.json -dp file validate image.jpg -dp file validate data.parquet +dpkit file validate document.json +dpkit file validate image.jpg +dpkit file validate data.parquet ``` ## Common Workflows @@ -97,38 +97,38 @@ dp file validate data.parquet ```bash # Create backup copy -dp file copy important_data.csv backup/important_data_$(date +%Y%m%d).csv +dpkit file copy important_data.csv backup/important_data_$(date +%Y%m%d).csv # Validate backup integrity -dp file validate backup/important_data_20240101.csv +dpkit file validate backup/important_data_20240101.csv # Describe backup properties -dp file describe backup/important_data_20240101.csv +dpkit file describe backup/important_data_20240101.csv ``` ### Remote File Handling ```bash # Download and validate remote file -dp file copy https://example.com/dataset.csv local_dataset.csv -dp file validate local_dataset.csv +dpkit file copy https://example.com/dataset.csv local_dataset.csv +dpkit file validate local_dataset.csv # Describe remote file without downloading -dp file describe https://example.com/dataset.csv +dpkit file describe https://example.com/dataset.csv ``` ### File Diagnostics ```bash # Check file properties -dp file describe suspicious_file.csv +dpkit file describe suspicious_file.csv # Validate file integrity -dp file validate suspicious_file.csv +dpkit file validate suspicious_file.csv # Get detailed diagnostics as JSON -dp file describe problematic_file.csv --json -dp file validate problematic_file.csv --json +dpkit file describe problematic_file.csv --json +dpkit file validate problematic_file.csv --json ``` ### Batch File Operations @@ -137,13 +137,13 @@ dp file validate problematic_file.csv --json # Describe multiple files for file in *.csv; do echo "Describing $file:" - dp file describe "$file" + dpkit file describe "$file" echo "---" done # Validate all files in directory for file in data/*.json; do - dp file validate "$file" --json >> validation_report.json + dpkit file validate "$file" --json >> validation_report.json done ``` @@ -198,28 +198,28 @@ File commands work with various file formats: #### File Not Found ```bash -dp file describe missing_file.csv +dpkit file describe missing_file.csv # Error: File not found # Solution: Check file path and permissions ``` #### Network Issues (Remote Files) ```bash -dp file copy https://unreachable.com/data.csv local.csv +dpkit file copy https://unreachable.com/data.csv local.csv # Error: Network timeout # Solution: Check URL and network connectivity ``` #### Format Recognition ```bash -dp file describe unknown_format.dat +dpkit file describe unknown_format.dat # May show limited information for unknown formats # Solution: Use --debug for more details ``` #### Permission Issues ```bash -dp file copy protected_file.csv backup.csv +dpkit file copy protected_file.csv backup.csv # Error: Permission denied # Solution: Check file permissions ``` @@ -236,14 +236,14 @@ for file in $FILES; do echo "Processing $file" # Validate file - if dp file validate "$file" --json | jq -r '.valid' | grep -q "true"; then + if dpkit file validate "$file" --json | jq -r '.valid' | grep -q "true"; then echo "✓ $file is valid" # Create backup - dp file copy "$file" "backup/${file%.csv}_$(date +%Y%m%d).csv" + dpkit file copy "$file" "backup/${file%.csv}_$(date +%Y%m%d).csv" # Get file info - dp file describe "$file" --json > "info/${file%.csv}_info.json" + dpkit file describe "$file" --json > "info/${file%.csv}_info.json" else echo "✗ $file is invalid" fi @@ -253,25 +253,25 @@ done ### Integration with Other Commands ```bash # Validate file before processing with table commands -dp file validate data.csv && dp table explore data.csv +dpkit file validate data.csv && dpkit table explore data.csv # Describe file and then infer schema -dp file describe data.csv -dp schema infer data.csv --json > schema.json +dpkit file describe data.csv +dpkit schema infer data.csv --json > schema.json # Copy and then create package -dp file copy remote_data.csv local_data.csv -dp package infer local_data.csv --json > datapackage.json +dpkit file copy remote_data.csv local_data.csv +dpkit package infer local_data.csv --json > datapackage.json ``` ### Monitoring and Logging ```bash # Create validation log -dp file validate data.csv --json | jq '{file: "data.csv", valid: .valid, timestamp: now}' >> validation.log +dpkit file validate data.csv --json | jq '{file: "data.csv", valid: .valid, timestamp: now}' >> validation.log # Monitor file changes while true; do - dp file describe changing_file.csv --json > current_state.json + dpkit file describe changing_file.csv --json > current_state.json if ! cmp -s current_state.json previous_state.json; then echo "File changed at $(date)" cp current_state.json previous_state.json diff --git a/site/content/docs/guides/package.md b/site/content/docs/guides/package.md index bdca6b49..6cdaa07f 100644 --- a/site/content/docs/guides/package.md +++ b/site/content/docs/guides/package.md @@ -8,12 +8,12 @@ Data Package commands help you work with Data Packages - collections of data fil ## Available Commands -### `dp package copy` +### `dpkit package copy` Copy a local or remote Data Package to a local folder, a ZIP archive or a database. ```bash -dp package copy --to-path +dpkit package copy --to-path ``` **Options:** @@ -24,21 +24,21 @@ dp package copy --to-path **Examples:** ```bash # Copy package to local directory -dp package copy datapackage.json --to-path ./output +dpkit package copy datapackage.json --to-path ./output # Copy package to ZIP archive -dp package copy datapackage.json --to-path package.zip +dpkit package copy datapackage.json --to-path package.zip # Copy remote package including remote resources -dp package copy https://example.com/datapackage.json --to-path ./local --with-remote +dpkit package copy https://example.com/datapackage.json --to-path ./local --with-remote ``` -### `dp package infer` +### `dpkit package infer` Infer a data package from local or remote file paths. This command analyzes data files and automatically generates metadata including schema information. ```bash -dp package infer +dpkit package infer ``` **Table Dialect Options:** @@ -88,21 +88,21 @@ dp package infer **Examples:** ```bash # Infer package from CSV files -dp package infer data1.csv data2.csv +dpkit package infer data1.csv data2.csv # Infer with custom delimiter -dp package infer data.csv --delimiter ";" +dpkit package infer data.csv --delimiter ";" # Infer from remote files -dp package infer https://example.com/data.csv +dpkit package infer https://example.com/data.csv ``` -### `dp package explore` +### `dpkit package explore` Explore a Data Package descriptor to view its structure and metadata in an interactive format. ```bash -dp package explore +dpkit package explore ``` **Options:** @@ -112,21 +112,21 @@ dp package explore **Examples:** ```bash # Explore local package -dp package explore datapackage.json +dpkit package explore datapackage.json # Explore remote package -dp package explore https://example.com/datapackage.json +dpkit package explore https://example.com/datapackage.json # Export structure as JSON -dp package explore datapackage.json --json +dpkit package explore datapackage.json --json ``` -### `dp package validate` +### `dpkit package validate` Validate a data package from a local or remote path against the Data Package specification. ```bash -dp package validate +dpkit package validate ``` **Options:** @@ -138,21 +138,21 @@ dp package validate **Examples:** ```bash # Validate local package -dp package validate datapackage.json +dpkit package validate datapackage.json # Validate remote package -dp package validate https://example.com/datapackage.json +dpkit package validate https://example.com/datapackage.json # Get validation results as JSON -dp package validate datapackage.json --json +dpkit package validate datapackage.json --json ``` -### `dp package script` +### `dpkit package script` Open an interactive scripting session with a loaded Data Package. This provides a REPL environment where you can programmatically interact with the package data. ```bash -dp package script +dpkit package script ``` **Available Variables:** @@ -162,23 +162,23 @@ dp package script **Examples:** ```bash # Start scripting session -dp package script datapackage.json +dpkit package script datapackage.json # In the REPL session: -dp> dataPackage.resources.length -dp> dataPackage.resources[0].schema.fields +dpkit> dataPackage.resources.length +dpkit> dataPackage.resources[0].schema.fields ``` -### `dp package publish` +### `dpkit package publish` Publish data packages to various platforms. This is a parent command with platform-specific subcommands. -#### `dp package publish ckan` +#### `dpkit package publish ckan` Publish a data package to a CKAN instance. ```bash -dp package publish ckan +dpkit package publish ckan ``` **CKAN Options:** @@ -190,26 +190,26 @@ dp package publish ckan **Examples:** ```bash # Publish to CKAN -dp package publish ckan datapackage.json \ +dpkit package publish ckan datapackage.json \ --to-ckan-url https://demo.ckan.org \ --to-ckan-api-key your-api-key \ --to-ckan-owner-org your-org ``` -#### `dp package publish github` +#### `dpkit package publish github` Publish a data package to GitHub as releases or repository files. ```bash -dp package publish github +dpkit package publish github ``` -#### `dp package publish zenodo` +#### `dpkit package publish zenodo` Publish a data package to Zenodo for academic archiving. ```bash -dp package publish zenodo +dpkit package publish zenodo ``` ## Common Workflows @@ -218,40 +218,40 @@ dp package publish zenodo 1. **Infer from data files:** ```bash - dp package infer *.csv --json > datapackage.json + dpkit package infer *.csv --json > datapackage.json ``` 2. **Validate the generated package:** ```bash - dp package validate datapackage.json + dpkit package validate datapackage.json ``` 3. **Explore the package structure:** ```bash - dp package explore datapackage.json + dpkit package explore datapackage.json ``` ### Working with Remote Packages ```bash # Explore remote package -dp package explore https://example.com/datapackage.json +dpkit package explore https://example.com/datapackage.json # Copy remote package locally -dp package copy https://example.com/datapackage.json --to-path ./local-copy +dpkit package copy https://example.com/datapackage.json --to-path ./local-copy # Validate remote package -dp package validate https://example.com/datapackage.json +dpkit package validate https://example.com/datapackage.json ``` ### Publishing Workflow ```bash # Validate before publishing -dp package validate datapackage.json +dpkit package validate datapackage.json # Publish to CKAN -dp package publish ckan datapackage.json \ +dpkit package publish ckan datapackage.json \ --to-ckan-url https://your-ckan-instance.org \ --to-ckan-api-key $CKAN_API_KEY ``` diff --git a/site/content/docs/guides/resource.md b/site/content/docs/guides/resource.md index 02a94af9..d3614b6c 100644 --- a/site/content/docs/guides/resource.md +++ b/site/content/docs/guides/resource.md @@ -8,12 +8,12 @@ Data Resource commands help you work with individual data resources - the buildi ## Available Commands -### `dp resource infer` +### `dpkit resource infer` Infer a data resource from a table by analyzing its structure and generating metadata including schema information. ```bash -dp resource infer +dpkit resource infer ``` **Options:** @@ -69,27 +69,27 @@ dp resource infer **Examples:** ```bash # Infer resource from CSV file -dp resource infer data.csv +dpkit resource infer data.csv # Infer with custom delimiter -dp resource infer data.csv --delimiter ";" +dpkit resource infer data.csv --delimiter ";" # Infer from remote file -dp resource infer https://example.com/data.csv +dpkit resource infer https://example.com/data.csv # Infer from resource in package -dp resource infer --from-package datapackage.json --from-resource "users" +dpkit resource infer --from-package datapackage.json --from-resource "users" # Export as JSON -dp resource infer data.csv --json +dpkit resource infer data.csv --json ``` -### `dp resource explore` +### `dpkit resource explore` Explore a data resource from a local or remote path to view its structure and metadata in an interactive format. ```bash -dp resource explore +dpkit resource explore ``` **Options:** @@ -101,24 +101,24 @@ dp resource explore **Examples:** ```bash # Explore resource descriptor -dp resource explore resource.json +dpkit resource explore resource.json # Explore remote resource -dp resource explore https://example.com/resource.json +dpkit resource explore https://example.com/resource.json # Explore resource from package -dp resource explore --from-package datapackage.json --from-resource "users" +dpkit resource explore --from-package datapackage.json --from-resource "users" # Export structure as JSON -dp resource explore resource.json --json +dpkit resource explore resource.json --json ``` -### `dp resource validate` +### `dpkit resource validate` Validate a data resource from a local or remote path against the Data Resource specification. ```bash -dp resource validate [descriptor-path] +dpkit resource validate [descriptor-path] ``` **Options:** @@ -132,27 +132,27 @@ dp resource validate [descriptor-path] **Examples:** ```bash # Validate resource descriptor -dp resource validate resource.json +dpkit resource validate resource.json # Validate remote resource -dp resource validate https://example.com/resource.json +dpkit resource validate https://example.com/resource.json # Validate resource from package -dp resource validate --from-package datapackage.json --from-resource "users" +dpkit resource validate --from-package datapackage.json --from-resource "users" # Get validation results as JSON -dp resource validate resource.json --json +dpkit resource validate resource.json --json # Interactive selection when no path provided -dp resource validate --from-package datapackage.json +dpkit resource validate --from-package datapackage.json ``` -### `dp resource script` +### `dpkit resource script` Open an interactive scripting session with a loaded data resource. This provides a REPL environment where you can programmatically interact with the resource metadata. ```bash -dp resource script +dpkit resource script ``` **Options:** @@ -168,15 +168,15 @@ dp resource script **Examples:** ```bash # Start scripting session with resource -dp resource script resource.json +dpkit resource script resource.json # Script resource from package -dp resource script --from-package datapackage.json --from-resource "users" +dpkit resource script --from-package datapackage.json --from-resource "users" # In the REPL session: -dp> resource.schema.fields.length -dp> resource.schema.fields[0].type -dp> resource.path +dpkit> resource.schema.fields.length +dpkit> resource.schema.fields[0].type +dpkit> resource.path ``` ## Common Workflows @@ -185,55 +185,55 @@ dp> resource.path 1. **Infer from data file:** ```bash - dp resource infer data.csv --json > resource.json + dpkit resource infer data.csv --json > resource.json ``` 2. **Validate the generated resource:** ```bash - dp resource validate resource.json + dpkit resource validate resource.json ``` 3. **Explore the resource structure:** ```bash - dp resource explore resource.json + dpkit resource explore resource.json ``` ### Working with Package Resources ```bash # Explore all resources in a package interactively -dp resource validate --from-package datapackage.json +dpkit resource validate --from-package datapackage.json # Infer metadata for specific resource -dp resource infer --from-package datapackage.json --from-resource "users" +dpkit resource infer --from-package datapackage.json --from-resource "users" # Script specific resource from package -dp resource script --from-package datapackage.json --from-resource "transactions" +dpkit resource script --from-package datapackage.json --from-resource "transactions" ``` ### Resource Analysis Workflow ```bash # Infer resource with custom options -dp resource infer data.csv \ +dpkit resource infer data.csv \ --delimiter ";" \ --header-rows 2 \ --sample-rows 1000 # Validate the inferred resource -dp resource validate resource.json +dpkit resource validate resource.json # Explore interactively to verify structure -dp resource explore resource.json +dpkit resource explore resource.json ``` ### Remote Resource Handling ```bash # Work with remote resources -dp resource explore https://example.com/resource.json -dp resource validate https://example.com/resource.json -dp resource infer https://example.com/data.csv +dpkit resource explore https://example.com/resource.json +dpkit resource validate https://example.com/resource.json +dpkit resource infer https://example.com/data.csv ``` ## Resource Selection @@ -242,12 +242,12 @@ When working with resources from packages, you can either: 1. **Specify explicitly:** ```bash - dp resource explore --from-package datapackage.json --from-resource "users" + dpkit resource explore --from-package datapackage.json --from-resource "users" ``` 2. **Interactive selection:** ```bash - dp resource validate --from-package datapackage.json + dpkit resource validate --from-package datapackage.json # Will prompt to select from available resources ``` @@ -265,7 +265,7 @@ Resource commands work seamlessly with package commands: ```bash # Create package, then work with individual resources -dp package infer *.csv --json > datapackage.json -dp resource validate --from-package datapackage.json --from-resource "data" -dp resource explore --from-package datapackage.json --from-resource "users" +dpkit package infer *.csv --json > datapackage.json +dpkit resource validate --from-package datapackage.json --from-resource "data" +dpkit resource explore --from-package datapackage.json --from-resource "users" ``` diff --git a/site/content/docs/guides/schema.md b/site/content/docs/guides/schema.md index 8969b7ab..31df07e7 100644 --- a/site/content/docs/guides/schema.md +++ b/site/content/docs/guides/schema.md @@ -8,12 +8,12 @@ Table Schema commands help you work with table schemas - metadata that describes ## Available Commands -### `dp schema infer` +### `dpkit schema infer` Infer a table schema from a table by analyzing its data and generating field definitions including types, constraints, and formats. ```bash -dp schema infer +dpkit schema infer ``` **Options:** @@ -69,27 +69,27 @@ dp schema infer **Examples:** ```bash # Infer schema from CSV file -dp schema infer data.csv +dpkit schema infer data.csv # Infer with custom delimiter and date format -dp schema infer data.csv --delimiter ";" --date-format "%d/%m/%Y" +dpkit schema infer data.csv --delimiter ";" --date-format "%d/%m/%Y" # Infer from remote file -dp schema infer https://example.com/data.csv +dpkit schema infer https://example.com/data.csv # Infer from resource in package -dp schema infer --from-package datapackage.json --from-resource "users" +dpkit schema infer --from-package datapackage.json --from-resource "users" # Export schema as JSON -dp schema infer data.csv --json > schema.json +dpkit schema infer data.csv --json > schema.json ``` -### `dp schema convert` +### `dpkit schema convert` Convert table schemas between different formats, supporting bidirectional conversion between Table Schema and JSONSchema formats. ```bash -dp schema convert +dpkit schema convert ``` **Options:** @@ -107,24 +107,24 @@ dp schema convert **Examples:** ```bash # Convert Table Schema to JSONSchema -dp schema convert schema.json --to-format jsonschema +dpkit schema convert schema.json --to-format jsonschema # Convert JSONSchema to Table Schema -dp schema convert schema.jsonschema.json --format jsonschema +dpkit schema convert schema.jsonschema.json --format jsonschema # Save converted schema to file -dp schema convert schema.json --to-format jsonschema --to-path converted.jsonschema.json +dpkit schema convert schema.json --to-format jsonschema --to-path converted.jsonschema.json # Convert from JSONSchema and save as Table Schema -dp schema convert input.jsonschema.json --format jsonschema --to-path output.schema.json +dpkit schema convert input.jsonschema.json --format jsonschema --to-path output.schema.json ``` -### `dp schema explore` +### `dpkit schema explore` Explore a table schema from a local or remote path to view its field definitions and constraints in an interactive format. ```bash -dp schema explore +dpkit schema explore ``` **Options:** @@ -136,24 +136,24 @@ dp schema explore **Examples:** ```bash # Explore schema descriptor -dp schema explore schema.json +dpkit schema explore schema.json # Explore remote schema -dp schema explore https://example.com/schema.json +dpkit schema explore https://example.com/schema.json # Explore schema from package resource -dp schema explore --from-package datapackage.json --from-resource "users" +dpkit schema explore --from-package datapackage.json --from-resource "users" # Export schema structure as JSON -dp schema explore schema.json --json +dpkit schema explore schema.json --json ``` -### `dp schema validate` +### `dpkit schema validate` Validate a table schema from a local or remote path against the Table Schema specification. ```bash -dp schema validate +dpkit schema validate ``` **Options:** @@ -167,27 +167,27 @@ dp schema validate **Examples:** ```bash # Validate schema descriptor -dp schema validate schema.json +dpkit schema validate schema.json # Validate remote schema -dp schema validate https://example.com/schema.json +dpkit schema validate https://example.com/schema.json # Validate schema from package resource -dp schema validate --from-package datapackage.json --from-resource "users" +dpkit schema validate --from-package datapackage.json --from-resource "users" # Get validation results as JSON -dp schema validate schema.json --json +dpkit schema validate schema.json --json # Interactive selection when no path provided -dp schema validate --from-package datapackage.json +dpkit schema validate --from-package datapackage.json ``` -### `dp schema script` +### `dpkit schema script` Open an interactive scripting session with a loaded table schema. This provides a REPL environment where you can programmatically interact with the schema definition. ```bash -dp schema script +dpkit schema script ``` **Options:** @@ -203,16 +203,16 @@ dp schema script **Examples:** ```bash # Start scripting session with schema -dp schema script schema.json +dpkit schema script schema.json # Script schema from package resource -dp schema script --from-package datapackage.json --from-resource "users" +dpkit schema script --from-package datapackage.json --from-resource "users" # In the REPL session: -dp> schema.fields.length -dp> schema.fields[0].name -dp> schema.fields.filter(f => f.type === 'integer') -dp> schema.primaryKey +dpkit> schema.fields.length +dpkit> schema.fields[0].name +dpkit> schema.fields.filter(f => f.type === 'integer') +dpkit> schema.primaryKey ``` ## Common Workflows @@ -221,65 +221,65 @@ dp> schema.primaryKey 1. **Infer from data file:** ```bash - dp schema infer data.csv --json > schema.json + dpkit schema infer data.csv --json > schema.json ``` 2. **Validate the generated schema:** ```bash - dp schema validate schema.json + dpkit schema validate schema.json ``` 3. **Explore the schema structure:** ```bash - dp schema explore schema.json + dpkit schema explore schema.json ``` ### Schema Format Conversion ```bash # Convert Table Schema to JSONSchema for JSON Schema validation tools -dp schema infer data.csv --json > table.schema.json -dp schema convert table.schema.json --to-format jsonschema --to-path api.jsonschema.json +dpkit schema infer data.csv --json > table.schema.json +dpkit schema convert table.schema.json --to-format jsonschema --to-path api.jsonschema.json # Convert JSONSchema back to Table Schema for dpkit tools -dp schema convert api.jsonschema.json --format jsonschema --to-path converted.schema.json +dpkit schema convert api.jsonschema.json --format jsonschema --to-path converted.schema.json # Validate the round-trip conversion -dp schema validate converted.schema.json +dpkit schema validate converted.schema.json ``` ### Schema Analysis and Refinement ```bash # Infer schema with high confidence threshold -dp schema infer data.csv --confidence 0.8 --sample-rows 10000 +dpkit schema infer data.csv --confidence 0.8 --sample-rows 10000 # Validate and explore for refinement -dp schema validate schema.json -dp schema explore schema.json +dpkit schema validate schema.json +dpkit schema explore schema.json # Script for custom analysis -dp schema script schema.json +dpkit schema script schema.json ``` ### Working with Package Schemas ```bash # Validate all schemas in a package interactively -dp schema validate --from-package datapackage.json +dpkit schema validate --from-package datapackage.json # Infer improved schema for specific resource -dp schema infer --from-package datapackage.json --from-resource "transactions" +dpkit schema infer --from-package datapackage.json --from-resource "transactions" # Compare schemas using scripting -dp schema script --from-package datapackage.json --from-resource "users" +dpkit schema script --from-package datapackage.json --from-resource "users" ``` ### Custom Type Inference ```bash # Configure specific data types and formats -dp schema infer data.csv \ +dpkit schema infer data.csv \ --datetime-format "%Y-%m-%d %H:%M:%S" \ --true-values "Yes,True,1" \ --false-values "No,False,0" \ @@ -291,9 +291,9 @@ dp schema infer data.csv \ ```bash # Work with remote schemas -dp schema explore https://example.com/schema.json -dp schema validate https://example.com/schema.json -dp schema infer https://example.com/data.csv +dpkit schema explore https://example.com/schema.json +dpkit schema validate https://example.com/schema.json +dpkit schema infer https://example.com/data.csv ``` ## Schema Field Types @@ -310,28 +310,28 @@ The schema inference supports various field types: ### Confidence Tuning ```bash # High confidence for clean data -dp schema infer data.csv --confidence 0.9 +dpkit schema infer data.csv --confidence 0.9 # Lower confidence for messy data -dp schema infer data.csv --confidence 0.6 +dpkit schema infer data.csv --confidence 0.6 ``` ### Sample Size Control ```bash # Large sample for better inference -dp schema infer large_data.csv --sample-rows 50000 +dpkit schema infer large_data.csv --sample-rows 50000 # Quick inference with small sample -dp schema infer data.csv --sample-rows 100 +dpkit schema infer data.csv --sample-rows 100 ``` ### Format Specifications ```bash # European date format -dp schema infer data.csv --date-format "%d.%m.%Y" +dpkit schema infer data.csv --date-format "%d.%m.%Y" # Custom boolean values -dp schema infer data.csv --true-values "Ja,Oui,Sí" --false-values "Nein,Non,No" +dpkit schema infer data.csv --true-values "Ja,Oui,Sí" --false-values "Nein,Non,No" ``` ## Output Formats @@ -348,15 +348,15 @@ The `convert` command enables seamless integration with other schema ecosystems: ```bash # Use with JSON Schema validation libraries -dp schema infer data.csv --json > table.schema.json -dp schema convert table.schema.json --to-format jsonschema --to-path validation.jsonschema.json +dpkit schema infer data.csv --json > table.schema.json +dpkit schema convert table.schema.json --to-format jsonschema --to-path validation.jsonschema.json # Import existing JSONSchema into dpkit workflow -dp schema convert external.jsonschema.json --format jsonschema --to-path dpkit.schema.json -dp table validate data.csv --schema dpkit.schema.json +dpkit schema convert external.jsonschema.json --format jsonschema --to-path dpkit.schema.json +dpkit table validate data.csv --schema dpkit.schema.json # Cross-platform schema sharing -dp schema convert schema.json --to-format jsonschema --to-path api-spec.jsonschema.json +dpkit schema convert schema.json --to-format jsonschema --to-path api-spec.jsonschema.json ``` ## Integration with Other Commands @@ -365,10 +365,10 @@ Schema commands work seamlessly with other dpkit commands: ```bash # Create schema, then use it for validation -dp schema infer data.csv --json > schema.json -dp table validate data.csv --schema schema.json +dpkit schema infer data.csv --json > schema.json +dpkit table validate data.csv --schema schema.json # Work within package context -dp package infer *.csv --json > datapackage.json -dp schema validate --from-package datapackage.json --from-resource "data" +dpkit package infer *.csv --json > datapackage.json +dpkit schema validate --from-package datapackage.json --from-resource "data" ``` diff --git a/site/content/docs/guides/table.md b/site/content/docs/guides/table.md index 8b0baef6..138bebec 100644 --- a/site/content/docs/guides/table.md +++ b/site/content/docs/guides/table.md @@ -8,12 +8,12 @@ Table commands help you work directly with tabular data files. These commands al ## Available Commands -### `dp table convert` +### `dpkit table convert` Convert a table from one format to another with support for various input and output formats. ```bash -dp table convert +dpkit table convert ``` **Options:** @@ -29,24 +29,24 @@ dp table convert **Examples:** ```bash # Convert CSV to Excel -dp table convert data.csv data.xlsx +dpkit table convert data.csv data.xlsx # Convert Excel to JSON -dp table convert data.xlsx data.json +dpkit table convert data.xlsx data.json # Convert from package resource -dp table convert --from-package datapackage.json --from-resource "users" users.xlsx +dpkit table convert --from-package datapackage.json --from-resource "users" users.xlsx # Convert Parquet to CSV -dp table convert data.parquet data.csv +dpkit table convert data.parquet data.csv ``` -### `dp table describe` +### `dpkit table describe` Generate statistical descriptions and summaries of table data including column statistics, data types, and quality metrics. ```bash -dp table describe +dpkit table describe ``` **Options:** @@ -58,24 +58,24 @@ dp table describe **Examples:** ```bash # Describe CSV file -dp table describe data.csv +dpkit table describe data.csv # Describe with JSON output -dp table describe data.csv --json +dpkit table describe data.csv --json # Describe resource from package -dp table describe --from-package datapackage.json --from-resource "sales" +dpkit table describe --from-package datapackage.json --from-resource "sales" # Describe remote table -dp table describe https://example.com/data.csv +dpkit table describe https://example.com/data.csv ``` -### `dp table explore` +### `dpkit table explore` Explore a table interactively, viewing data samples, column information, and basic statistics in a rich terminal interface. ```bash -dp table explore +dpkit table explore ``` **Options:** @@ -89,27 +89,27 @@ dp table explore **Examples:** ```bash # Explore CSV file -dp table explore data.csv +dpkit table explore data.csv # Explore with schema validation -dp table explore data.csv --schema schema.json +dpkit table explore data.csv --schema schema.json # Explore with custom dialect -dp table explore data.csv --dialect dialect.json +dpkit table explore data.csv --dialect dialect.json # Explore resource from package -dp table explore --from-package datapackage.json --from-resource "users" +dpkit table explore --from-package datapackage.json --from-resource "users" # Explore remote table -dp table explore https://example.com/data.csv +dpkit table explore https://example.com/data.csv ``` -### `dp table validate` +### `dpkit table validate` Validate table data against a schema, checking data types, constraints, and data quality issues. ```bash -dp table validate +dpkit table validate ``` **Options:** @@ -125,27 +125,27 @@ dp table validate **Examples:** ```bash # Validate with schema -dp table validate data.csv --schema schema.json +dpkit table validate data.csv --schema schema.json # Validate with custom dialect and schema -dp table validate data.csv --dialect dialect.json --schema schema.json +dpkit table validate data.csv --dialect dialect.json --schema schema.json # Validate resource from package -dp table validate --from-package datapackage.json --from-resource "users" +dpkit table validate --from-package datapackage.json --from-resource "users" # Get validation results as JSON -dp table validate data.csv --schema schema.json --json +dpkit table validate data.csv --schema schema.json --json # Validate remote table -dp table validate https://example.com/data.csv --schema https://example.com/schema.json +dpkit table validate https://example.com/data.csv --schema https://example.com/schema.json ``` -### `dp table script` +### `dpkit table script` Open an interactive scripting session with a loaded table. This provides a REPL environment where you can programmatically analyze and manipulate table data. ```bash -dp table script +dpkit table script ``` **Options:** @@ -163,19 +163,19 @@ dp table script **Examples:** ```bash # Start scripting session with table -dp table script data.csv +dpkit table script data.csv # Script with schema and dialect -dp table script data.csv --schema schema.json --dialect dialect.json +dpkit table script data.csv --schema schema.json --dialect dialect.json # Script resource from package -dp table script --from-package datapackage.json --from-resource "sales" +dpkit table script --from-package datapackage.json --from-resource "sales" # In the REPL session: -dp> table.rows.length -dp> table.columns -dp> table.rows[0] -dp> table.schema.fields.map(f => f.name) +dpkit> table.rows.length +dpkit> table.columns +dpkit> table.rows[0] +dpkit> table.schema.fields.map(f => f.name) ``` ## Common Workflows @@ -184,69 +184,69 @@ dp> table.schema.fields.map(f => f.name) 1. **Quick exploration:** ```bash - dp table explore data.csv + dpkit table explore data.csv ``` 2. **Generate statistical summary:** ```bash - dp table describe data.csv + dpkit table describe data.csv ``` 3. **Interactive analysis:** ```bash - dp table script data.csv + dpkit table script data.csv ``` ### Data Validation Workflow ```bash # Infer schema from table -dp schema infer data.csv --json > schema.json +dpkit schema infer data.csv --json > schema.json # Validate table against schema -dp table validate data.csv --schema schema.json +dpkit table validate data.csv --schema schema.json # Explore validation issues -dp table explore data.csv --schema schema.json +dpkit table explore data.csv --schema schema.json ``` ### Format Conversion Pipeline ```bash # Convert Excel to CSV for processing -dp table convert input.xlsx temp.csv +dpkit table convert input.xlsx temp.csv # Process and validate -dp table validate temp.csv --schema schema.json +dpkit table validate temp.csv --schema schema.json # Convert to final format -dp table convert temp.csv output.json +dpkit table convert temp.csv output.json ``` ### Package Integration Workflow ```bash # Create package with tables -dp package infer *.csv --json > datapackage.json +dpkit package infer *.csv --json > datapackage.json # Validate individual tables -dp table validate --from-package datapackage.json --from-resource "users" +dpkit table validate --from-package datapackage.json --from-resource "users" # Describe tables for documentation -dp table describe --from-package datapackage.json --from-resource "sales" --json +dpkit table describe --from-package datapackage.json --from-resource "sales" --json ``` ### Remote Table Processing ```bash # Explore remote table -dp table explore https://example.com/data.csv +dpkit table explore https://example.com/data.csv # Convert remote to local format -dp table convert https://example.com/data.csv local_data.xlsx +dpkit table convert https://example.com/data.csv local_data.xlsx # Validate remote table with local schema -dp table validate https://example.com/data.csv --schema local_schema.json +dpkit table validate https://example.com/data.csv --schema local_schema.json ``` ## Supported File Formats @@ -272,28 +272,28 @@ dp table validate https://example.com/data.csv --schema local_schema.json ### Schema-Aware Operations ```bash # Validate with type checking -dp table validate data.csv --schema schema.json +dpkit table validate data.csv --schema schema.json # Explore with schema hints -dp table explore data.csv --schema schema.json +dpkit table explore data.csv --schema schema.json ``` ### Custom Parsing ```bash # Use custom dialect for parsing -dp table explore data.csv --dialect custom_dialect.json +dpkit table explore data.csv --dialect custom_dialect.json # Convert with parsing options -dp table convert complex_data.csv output.xlsx --dialect dialect.json +dpkit table convert complex_data.csv output.xlsx --dialect dialect.json ``` ### Statistical Analysis ```bash # Generate comprehensive statistics -dp table describe large_dataset.csv --json > stats.json +dpkit table describe large_dataset.csv --json > stats.json # Interactive statistical exploration -dp table script data.csv +dpkit table script data.csv # In REPL: analyze column distributions, correlations, etc. ``` @@ -302,29 +302,29 @@ dp table script data.csv ### Validation Issues ```bash # Get detailed validation report -dp table validate data.csv --schema schema.json --json +dpkit table validate data.csv --schema schema.json --json # Interactive error exploration (don't quit on errors) -dp table validate data.csv --schema schema.json +dpkit table validate data.csv --schema schema.json ``` ### Parsing Problems ```bash # Debug parsing issues -dp table explore problematic.csv --debug +dpkit table explore problematic.csv --debug # Infer and test dialect -dp dialect infer problematic.csv --json > dialect.json -dp table explore problematic.csv --dialect dialect.json +dpkit dialect infer problematic.csv --json > dialect.json +dpkit table explore problematic.csv --dialect dialect.json ``` ### Performance Optimization ```bash # For large files, use sampling -dp table describe huge_file.csv --sample-rows 10000 +dpkit table describe huge_file.csv --sample-rows 10000 # Convert to efficient formats for repeated analysis -dp table convert large_data.csv data.parquet +dpkit table convert large_data.csv data.parquet ``` ## Output Formats @@ -341,20 +341,20 @@ Table commands support multiple output formats: ### With Package Commands ```bash # Create and validate package -dp package infer *.csv --json > datapackage.json -dp table validate --from-package datapackage.json --from-resource "main" +dpkit package infer *.csv --json > datapackage.json +dpkit table validate --from-package datapackage.json --from-resource "main" ``` ### With Schema Commands ```bash # Infer schema and validate table -dp schema infer data.csv --json > schema.json -dp table validate data.csv --schema schema.json +dpkit schema infer data.csv --json > schema.json +dpkit table validate data.csv --schema schema.json ``` ### With Dialect Commands ```bash # Infer dialect and use for table operations -dp dialect infer data.csv --json > dialect.json -dp table explore data.csv --dialect dialect.json +dpkit dialect infer data.csv --json > dialect.json +dpkit table explore data.csv --dialect dialect.json ``` diff --git a/site/content/docs/index.md b/site/content/docs/index.md index 5de191ab..158259af 100644 --- a/site/content/docs/index.md +++ b/site/content/docs/index.md @@ -30,7 +30,7 @@ curl -fsSL https://dpkit.app/install.sh | sh After downloading, you can verify the binary using the following command: ```sh -./dp --version +./dpkit --version ``` We recommend adding the binary to your PATH environment variable to make it easier to use. @@ -46,5 +46,5 @@ npm install -g dpkit After that you can use the CLI binary: ```bash -dp --version +dpkit --version ``` diff --git a/table/README.md b/table/README.md index 5b917c98..21ef3358 100644 --- a/table/README.md +++ b/table/README.md @@ -1,3 +1,3 @@ # @dpkit/table -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). +dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [project's website](https://dpkit.app). diff --git a/table/profiles/geojson.json b/table/assets/geojson.json similarity index 100% rename from table/profiles/geojson.json rename to table/assets/geojson.json diff --git a/table/profiles/topojson.json b/table/assets/topojson.json similarity index 100% rename from table/profiles/topojson.json rename to table/assets/topojson.json diff --git a/table/data/record.ts b/table/data/record.ts index 4ead3f86..570c1c3f 100644 --- a/table/data/record.ts +++ b/table/data/record.ts @@ -1,4 +1,4 @@ -import type { Dialect } from "@dpkit/core" +import type { Dialect } from "@dpkit/metadata" export type DataRow = unknown[] export type DataRecord = Record diff --git a/table/error/Base.ts b/table/error/Base.ts deleted file mode 100644 index e0d2fcb6..00000000 --- a/table/error/Base.ts +++ /dev/null @@ -1,3 +0,0 @@ -import type { BaseError } from "@dpkit/core" - -export interface BaseTableError extends BaseError {} diff --git a/table/error/ForeignKey.ts b/table/error/ForeignKey.ts deleted file mode 100644 index a7142810..00000000 --- a/table/error/ForeignKey.ts +++ /dev/null @@ -1,8 +0,0 @@ -import type { ForeignKey } from "@dpkit/core" -import type { BaseTableError } from "./Base.ts" - -export interface ForeignKeyError extends BaseTableError { - type: "foreignKey" - foreignKey: ForeignKey - cells: string[] -} diff --git a/table/error/index.ts b/table/error/index.ts deleted file mode 100644 index e2c5759d..00000000 --- a/table/error/index.ts +++ /dev/null @@ -1,6 +0,0 @@ -export type * from "./Table.ts" -export type * from "./Cell.ts" -export type * from "./Field.ts" -export type * from "./Fields.ts" -export type * from "./ForeignKey.ts" -export type * from "./Row.ts" diff --git a/table/field/Field.ts b/table/field/Field.ts index b653fbfb..e756a955 100644 --- a/table/field/Field.ts +++ b/table/field/Field.ts @@ -1,6 +1,6 @@ -import type { DataType } from "nodejs-polars" +import * as pl from "nodejs-polars" export type PolarsField = { name: string - type: DataType + type: pl.DataType } diff --git a/table/field/Mapping.ts b/table/field/Mapping.ts index a6899dc2..9fc6c97c 100644 --- a/table/field/Mapping.ts +++ b/table/field/Mapping.ts @@ -1,5 +1,5 @@ -import type { Field } from "@dpkit/core" -import type { Expr } from "nodejs-polars" +import type { Field } from "@dpkit/metadata" +import type * as pl from "nodejs-polars" import type { PolarsField } from "./Field.ts" export interface FieldMapping { @@ -8,6 +8,6 @@ export interface FieldMapping { } export interface CellMapping { - source: Expr - target: Expr + source: pl.Expr + target: pl.Expr } diff --git a/table/field/checks/enum.spec.ts b/table/field/checks/enum.spec.ts index 58b504ea..887dbc0d 100644 --- a/table/field/checks/enum.spec.ts +++ b/table/field/checks/enum.spec.ts @@ -1,13 +1,15 @@ -import type { Schema } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Schema } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" -import { validateTable } from "../../table/index.ts" +import { inspectTable } from "../../table/index.ts" -describe("validateTable (cell/enum)", () => { - it("should not report errors for string values that are in the enum", async () => { - const table = DataFrame({ - status: ["pending", "approved", "rejected", "pending"], - }).lazy() +describe("inspectTable (cell/enum)", () => { + it("should not errors for string values that are in the enum", async () => { + const table = pl + .DataFrame({ + status: ["pending", "approved", "rejected", "pending"], + }) + .lazy() const schema: Schema = { fields: [ @@ -21,16 +23,18 @@ describe("validateTable (cell/enum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) - it("should report errors for values not in the enum", async () => { + it("should errors for values not in the enum", async () => { const allowedValues = ["pending", "approved", "rejected"] - const table = DataFrame({ - status: ["pending", "approved", "unknown", "cancelled", "rejected"], - }).lazy() + const table = pl + .DataFrame({ + status: ["pending", "approved", "unknown", "cancelled", "rejected"], + }) + .lazy() const schema: Schema = { fields: [ @@ -44,7 +48,7 @@ describe("validateTable (cell/enum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors.filter(e => e.type === "cell/enum")).toHaveLength(2) expect(errors).toContainEqual({ type: "cell/enum", @@ -63,9 +67,11 @@ describe("validateTable (cell/enum)", () => { }) it("should handle null values correctly", async () => { - const table = DataFrame({ - status: ["pending", null, "approved", null], - }).lazy() + const table = pl + .DataFrame({ + status: ["pending", null, "approved", null], + }) + .lazy() const schema: Schema = { fields: [ @@ -79,16 +85,18 @@ describe("validateTable (cell/enum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors.filter(e => e.type === "cell/enum")).toHaveLength(0) }) it("should handle case sensitivity correctly", async () => { const allowedValues = ["pending", "approved", "rejected"] - const table = DataFrame({ - status: ["Pending", "APPROVED", "rejected"], - }).lazy() + const table = pl + .DataFrame({ + status: ["Pending", "APPROVED", "rejected"], + }) + .lazy() const schema: Schema = { fields: [ @@ -102,7 +110,7 @@ describe("validateTable (cell/enum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors.filter(e => e.type === "cell/enum")).toHaveLength(2) expect(errors).toContainEqual({ type: "cell/enum", @@ -123,9 +131,11 @@ describe("validateTable (cell/enum)", () => { it("should handle integer enum with string values", async () => { const allowedValues = ["1", "2", "3"] - const table = DataFrame({ - priority: ["1", "2", "5"], - }).lazy() + const table = pl + .DataFrame({ + priority: ["1", "2", "5"], + }) + .lazy() const schema: Schema = { fields: [ @@ -139,7 +149,7 @@ describe("validateTable (cell/enum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -155,9 +165,11 @@ describe("validateTable (cell/enum)", () => { it("should handle number enum with string values", async () => { const allowedValues = ["1.5", "2.5", "3.5"] - const table = DataFrame({ - rating: ["1.5", "2.5", "4.5"], - }).lazy() + const table = pl + .DataFrame({ + rating: ["1.5", "2.5", "4.5"], + }) + .lazy() const schema: Schema = { fields: [ @@ -171,7 +183,7 @@ describe("validateTable (cell/enum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -187,9 +199,11 @@ describe("validateTable (cell/enum)", () => { it.skip("should handle date enum with string values", async () => { const allowedValues = ["2024-01-01", "2024-02-01", "2024-03-01"] - const table = DataFrame({ - date: ["2024-01-01", "2024-02-01", "2024-05-01"], - }).lazy() + const table = pl + .DataFrame({ + date: ["2024-01-01", "2024-02-01", "2024-05-01"], + }) + .lazy() const schema: Schema = { fields: [ @@ -203,7 +217,7 @@ describe("validateTable (cell/enum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -223,13 +237,15 @@ describe("validateTable (cell/enum)", () => { "2024-01-01T18:00:00", ] - const table = DataFrame({ - timestamp: [ - "2024-01-01T10:00:00", - "2024-01-01T14:00:00", - "2024-01-01T20:00:00", - ], - }).lazy() + const table = pl + .DataFrame({ + timestamp: [ + "2024-01-01T10:00:00", + "2024-01-01T14:00:00", + "2024-01-01T20:00:00", + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -243,7 +259,7 @@ describe("validateTable (cell/enum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -259,9 +275,11 @@ describe("validateTable (cell/enum)", () => { it("should handle year enum with string values", async () => { const allowedValues = ["2020", "2021", "2022"] - const table = DataFrame({ - year: ["2020", "2021", "2023"], - }).lazy() + const table = pl + .DataFrame({ + year: ["2020", "2021", "2023"], + }) + .lazy() const schema: Schema = { fields: [ @@ -275,7 +293,7 @@ describe("validateTable (cell/enum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -291,9 +309,11 @@ describe("validateTable (cell/enum)", () => { it.skip("should handle time enum with string values", async () => { const allowedValues = ["10:00:00", "14:00:00", "18:00:00"] - const table = DataFrame({ - time: ["10:00:00", "14:00:00", "20:00:00"], - }).lazy() + const table = pl + .DataFrame({ + time: ["10:00:00", "14:00:00", "20:00:00"], + }) + .lazy() const schema: Schema = { fields: [ @@ -307,7 +327,7 @@ describe("validateTable (cell/enum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -323,9 +343,11 @@ describe("validateTable (cell/enum)", () => { it.skip("should handle yearmonth enum with string values", async () => { const allowedValues = ["2024-01", "2024-02", "2024-03"] - const table = DataFrame({ - yearmonth: ["2024-01", "2024-02", "2024-05"], - }).lazy() + const table = pl + .DataFrame({ + yearmonth: ["2024-01", "2024-02", "2024-05"], + }) + .lazy() const schema: Schema = { fields: [ @@ -339,7 +361,7 @@ describe("validateTable (cell/enum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { diff --git a/table/field/checks/enum.ts b/table/field/checks/enum.ts index 52341ffe..7806f36d 100644 --- a/table/field/checks/enum.ts +++ b/table/field/checks/enum.ts @@ -1,7 +1,6 @@ -import type { Field } from "@dpkit/core" -import type { Expr } from "nodejs-polars" +import type { Field } from "@dpkit/metadata" +import type { CellEnumError } from "@dpkit/metadata" import * as pl from "nodejs-polars" -import type { CellEnumError } from "../../error/index.ts" import { evaluateExpression } from "../../helpers.ts" import type { CellMapping } from "../Mapping.ts" import { parseDateField } from "../types/date.ts" @@ -29,12 +28,12 @@ export function checkCellEnum(field: Field, mapping: CellMapping) { const rawEnum = field.constraints?.enum if (!rawEnum) return undefined - let isErrorExpr: Expr + let isErrorExpr: pl.Expr try { const parsedEnum = parseConstraint(field, rawEnum) isErrorExpr = mapping.target.isIn(parsedEnum).not() } catch (error) { - isErrorExpr = pl.lit(true) + isErrorExpr = pl.pl.lit(true) } const errorTemplate: CellEnumError = { @@ -55,7 +54,7 @@ function parseConstraint(field: Field, value: number[] | string[]) { function parseConstraintItem(field: Field, value: number | string) { if (typeof value !== "string") return value - let expr = pl.lit(value) + let expr = pl.pl.lit(value) if (field.type === "integer") { expr = parseIntegerField(field, expr) } else if (field.type === "number") { diff --git a/table/field/checks/maxLength.spec.ts b/table/field/checks/maxLength.spec.ts index 1916d13c..d3a9de09 100644 --- a/table/field/checks/maxLength.spec.ts +++ b/table/field/checks/maxLength.spec.ts @@ -1,13 +1,15 @@ -import type { Schema } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Schema } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" -import { validateTable } from "../../table/index.ts" +import { inspectTable } from "../../table/index.ts" -describe("validateTable (cell/maxLength)", () => { - it("should not report errors for string values that meet the maxLength constraint", async () => { - const table = DataFrame({ - code: ["A123", "B456", "C789"], - }).lazy() +describe("inspectTable (cell/maxLength)", () => { + it("should not errors for string values that meet the maxLength constraint", async () => { + const table = pl + .DataFrame({ + code: ["A123", "B456", "C789"], + }) + .lazy() const schema: Schema = { fields: [ @@ -19,14 +21,16 @@ describe("validateTable (cell/maxLength)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) it("should report an error for strings that are too long", async () => { - const table = DataFrame({ - username: ["bob", "alice", "christopher", "david"], - }).lazy() + const table = pl + .DataFrame({ + username: ["bob", "alice", "christopher", "david"], + }) + .lazy() const schema: Schema = { fields: [ @@ -38,7 +42,7 @@ describe("validateTable (cell/maxLength)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors.filter(e => e.type === "cell/maxLength")).toHaveLength(1) expect(errors).toContainEqual({ type: "cell/maxLength", diff --git a/table/field/checks/maxLength.ts b/table/field/checks/maxLength.ts index 84306b21..704f0a6e 100644 --- a/table/field/checks/maxLength.ts +++ b/table/field/checks/maxLength.ts @@ -1,5 +1,5 @@ -import type { Field } from "@dpkit/core" -import type { CellMaxLengthError } from "../../error/index.ts" +import type { Field } from "@dpkit/metadata" +import type { CellMaxLengthError } from "@dpkit/metadata" import type { CellMapping } from "../Mapping.ts" export function checkCellMaxLength(field: Field, mapping: CellMapping) { diff --git a/table/field/checks/maximum.spec.ts b/table/field/checks/maximum.spec.ts index 3929b184..b486aeef 100644 --- a/table/field/checks/maximum.spec.ts +++ b/table/field/checks/maximum.spec.ts @@ -1,13 +1,15 @@ -import type { Schema } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Schema } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" -import { validateTable } from "../../table/index.ts" +import { inspectTable } from "../../table/index.ts" -describe("validateTable (cell/maximum)", () => { - it("should not report errors for valid values", async () => { - const table = DataFrame({ - price: [10.5, 20.75, 30.0], - }).lazy() +describe("inspectTable (cell/maximum)", () => { + it("should not errors for valid values", async () => { + const table = pl + .DataFrame({ + price: [10.5, 20.75, 30.0], + }) + .lazy() const schema: Schema = { fields: [ @@ -19,14 +21,16 @@ describe("validateTable (cell/maximum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) it("should report an error for invalid values", async () => { - const table = DataFrame({ - temperature: [20.5, 30.0, 40, 50.5], - }).lazy() + const table = pl + .DataFrame({ + temperature: [20.5, 30.0, 40, 50.5], + }) + .lazy() const schema: Schema = { fields: [ @@ -38,7 +42,7 @@ describe("validateTable (cell/maximum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors.filter(e => e.type === "cell/maximum")).toHaveLength(1) expect(errors).toContainEqual({ type: "cell/maximum", @@ -50,9 +54,11 @@ describe("validateTable (cell/maximum)", () => { }) it("should report an error for invalid values (exclusive)", async () => { - const table = DataFrame({ - temperature: [20.5, 30.0, 40.0, 50.5], - }).lazy() + const table = pl + .DataFrame({ + temperature: [20.5, 30.0, 40.0, 50.5], + }) + .lazy() const schema: Schema = { fields: [ @@ -64,7 +70,7 @@ describe("validateTable (cell/maximum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors.filter(e => e.type === "cell/exclusiveMaximum")).toHaveLength( 2, ) @@ -85,9 +91,11 @@ describe("validateTable (cell/maximum)", () => { }) it("should handle maximum as string", async () => { - const table = DataFrame({ - price: [10.5, 20.75, 55.0], - }).lazy() + const table = pl + .DataFrame({ + price: [10.5, 20.75, 55.0], + }) + .lazy() const schema: Schema = { fields: [ @@ -99,7 +107,7 @@ describe("validateTable (cell/maximum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -113,9 +121,11 @@ describe("validateTable (cell/maximum)", () => { }) it("should handle exclusiveMaximum as string", async () => { - const table = DataFrame({ - temperature: [20.5, 40.0, 50.5], - }).lazy() + const table = pl + .DataFrame({ + temperature: [20.5, 40.0, 50.5], + }) + .lazy() const schema: Schema = { fields: [ @@ -127,7 +137,7 @@ describe("validateTable (cell/maximum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -148,9 +158,11 @@ describe("validateTable (cell/maximum)", () => { }) it("should handle maximum as string with groupChar", async () => { - const table = DataFrame({ - price: ["5,000", "10,500", "15,000"], - }).lazy() + const table = pl + .DataFrame({ + price: ["5,000", "10,500", "15,000"], + }) + .lazy() const schema: Schema = { fields: [ @@ -163,7 +175,7 @@ describe("validateTable (cell/maximum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -177,9 +189,11 @@ describe("validateTable (cell/maximum)", () => { }) it("should handle maximum as string with decimalChar", async () => { - const table = DataFrame({ - price: ["5,5", "10,75", "15,3"], - }).lazy() + const table = pl + .DataFrame({ + price: ["5,5", "10,75", "15,3"], + }) + .lazy() const schema: Schema = { fields: [ @@ -192,7 +206,7 @@ describe("validateTable (cell/maximum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -206,9 +220,11 @@ describe("validateTable (cell/maximum)", () => { }) it("should handle maximum as string with groupChar and decimalChar", async () => { - const table = DataFrame({ - price: ["5.000,50", "10.500,75", "15.000,30"], - }).lazy() + const table = pl + .DataFrame({ + price: ["5.000,50", "10.500,75", "15.000,30"], + }) + .lazy() const schema: Schema = { fields: [ @@ -222,7 +238,7 @@ describe("validateTable (cell/maximum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -236,9 +252,11 @@ describe("validateTable (cell/maximum)", () => { }) it("should handle maximum as string with bareNumber false", async () => { - const table = DataFrame({ - price: ["$5.00", "$10.50", "$15.50"], - }).lazy() + const table = pl + .DataFrame({ + price: ["$5.00", "$10.50", "$15.50"], + }) + .lazy() const schema: Schema = { fields: [ @@ -251,7 +269,7 @@ describe("validateTable (cell/maximum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -265,9 +283,11 @@ describe("validateTable (cell/maximum)", () => { }) it("should handle maximum for date fields", async () => { - const table = DataFrame({ - date: ["2024-01-15", "2024-02-20", "2024-03-25"], - }).lazy() + const table = pl + .DataFrame({ + date: ["2024-01-15", "2024-02-20", "2024-03-25"], + }) + .lazy() const schema: Schema = { fields: [ @@ -279,7 +299,7 @@ describe("validateTable (cell/maximum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -293,9 +313,11 @@ describe("validateTable (cell/maximum)", () => { }) it.skip("should handle maximum for time fields", async () => { - const table = DataFrame({ - time: ["14:30:00", "16:45:00", "18:00:00"], - }).lazy() + const table = pl + .DataFrame({ + time: ["14:30:00", "16:45:00", "18:00:00"], + }) + .lazy() const schema: Schema = { fields: [ @@ -307,7 +329,7 @@ describe("validateTable (cell/maximum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -321,13 +343,15 @@ describe("validateTable (cell/maximum)", () => { }) it("should handle maximum for datetime fields", async () => { - const table = DataFrame({ - timestamp: [ - "2024-01-15T14:30:00", - "2024-02-20T08:15:00", - "2024-03-25T10:00:00", - ], - }).lazy() + const table = pl + .DataFrame({ + timestamp: [ + "2024-01-15T14:30:00", + "2024-02-20T08:15:00", + "2024-03-25T10:00:00", + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -339,7 +363,7 @@ describe("validateTable (cell/maximum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -353,9 +377,11 @@ describe("validateTable (cell/maximum)", () => { }) it("should handle maximum for date fields with custom format", async () => { - const table = DataFrame({ - date: ["15/01/2024", "20/02/2024", "25/03/2024"], - }).lazy() + const table = pl + .DataFrame({ + date: ["15/01/2024", "20/02/2024", "25/03/2024"], + }) + .lazy() const schema: Schema = { fields: [ @@ -368,7 +394,7 @@ describe("validateTable (cell/maximum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -382,9 +408,11 @@ describe("validateTable (cell/maximum)", () => { }) it("should handle maximum for year fields", async () => { - const table = DataFrame({ - year: ["2020", "2021", "2023"], - }).lazy() + const table = pl + .DataFrame({ + year: ["2020", "2021", "2023"], + }) + .lazy() const schema: Schema = { fields: [ @@ -396,7 +424,7 @@ describe("validateTable (cell/maximum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -410,9 +438,11 @@ describe("validateTable (cell/maximum)", () => { }) it("should handle exclusiveMaximum for year fields", async () => { - const table = DataFrame({ - year: ["2020", "2021", "2022", "2023"], - }).lazy() + const table = pl + .DataFrame({ + year: ["2020", "2021", "2022", "2023"], + }) + .lazy() const schema: Schema = { fields: [ @@ -424,7 +454,7 @@ describe("validateTable (cell/maximum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -445,9 +475,11 @@ describe("validateTable (cell/maximum)", () => { }) it.skip("should handle maximum for yearmonth fields", async () => { - const table = DataFrame({ - yearmonth: ["2024-01", "2024-03", "2024-06"], - }).lazy() + const table = pl + .DataFrame({ + yearmonth: ["2024-01", "2024-03", "2024-06"], + }) + .lazy() const schema: Schema = { fields: [ @@ -459,7 +491,7 @@ describe("validateTable (cell/maximum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -473,9 +505,11 @@ describe("validateTable (cell/maximum)", () => { }) it.skip("should handle exclusiveMaximum for yearmonth fields", async () => { - const table = DataFrame({ - yearmonth: ["2024-01", "2024-03", "2024-05", "2024-06"], - }).lazy() + const table = pl + .DataFrame({ + yearmonth: ["2024-01", "2024-03", "2024-05", "2024-06"], + }) + .lazy() const schema: Schema = { fields: [ @@ -487,7 +521,7 @@ describe("validateTable (cell/maximum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { diff --git a/table/field/checks/maximum.ts b/table/field/checks/maximum.ts index 5f9f06a4..40df1df4 100644 --- a/table/field/checks/maximum.ts +++ b/table/field/checks/maximum.ts @@ -1,8 +1,7 @@ -import type { Field } from "@dpkit/core" +import type { Field } from "@dpkit/metadata" +import type { CellExclusiveMaximumError } from "@dpkit/metadata" +import type { CellMaximumError } from "@dpkit/metadata" import * as pl from "nodejs-polars" -import type { Expr } from "nodejs-polars" -import type { CellExclusiveMaximumError } from "../../error/index.ts" -import type { CellMaximumError } from "../../error/index.ts" import { evaluateExpression } from "../../helpers.ts" import type { CellMapping } from "../Mapping.ts" import { parseDateField } from "../types/date.ts" @@ -32,14 +31,14 @@ export function createCheckCellMaximum(options?: { isExclusive?: boolean }) { : field.constraints?.maximum if (maximum === undefined) return undefined - let isErrorExpr: Expr + let isErrorExpr: pl.Expr try { const parsedMaximum = parseConstraint(field, maximum) isErrorExpr = options?.isExclusive ? mapping.target.gtEq(parsedMaximum) : mapping.target.gt(parsedMaximum) } catch (error) { - isErrorExpr = pl.lit(true) + isErrorExpr = pl.pl.lit(true) } const errorTemplate: CellMaximumError | CellExclusiveMaximumError = { @@ -57,7 +56,7 @@ export function createCheckCellMaximum(options?: { isExclusive?: boolean }) { function parseConstraint(field: Field, value: number | string) { if (typeof value !== "string") return value - let expr = pl.lit(value) + let expr = pl.pl.lit(value) if (field.type === "integer") { expr = parseIntegerField(field, expr) } else if (field.type === "number") { diff --git a/table/field/checks/minLength.spec.ts b/table/field/checks/minLength.spec.ts index d4ac46f3..42619bea 100644 --- a/table/field/checks/minLength.spec.ts +++ b/table/field/checks/minLength.spec.ts @@ -1,13 +1,15 @@ -import type { Schema } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Schema } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" -import { validateTable } from "../../table/index.ts" +import { inspectTable } from "../../table/index.ts" -describe("validateTable (cell/minLength)", () => { - it("should not report errors for string values that meet the minLength constraint", async () => { - const table = DataFrame({ - code: ["A123", "B456", "C789"], - }).lazy() +describe("inspectTable (cell/minLength)", () => { + it("should not errors for string values that meet the minLength constraint", async () => { + const table = pl + .DataFrame({ + code: ["A123", "B456", "C789"], + }) + .lazy() const schema: Schema = { fields: [ @@ -19,14 +21,16 @@ describe("validateTable (cell/minLength)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) it("should report an error for strings that are too short", async () => { - const table = DataFrame({ - username: ["bob", "a", "christopher", "ab"], - }).lazy() + const table = pl + .DataFrame({ + username: ["bob", "a", "christopher", "ab"], + }) + .lazy() const schema: Schema = { fields: [ @@ -38,7 +42,7 @@ describe("validateTable (cell/minLength)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors.filter(e => e.type === "cell/minLength")).toHaveLength(2) expect(errors).toContainEqual({ type: "cell/minLength", diff --git a/table/field/checks/minLength.ts b/table/field/checks/minLength.ts index b9bfce53..86529b81 100644 --- a/table/field/checks/minLength.ts +++ b/table/field/checks/minLength.ts @@ -1,5 +1,5 @@ -import type { Field } from "@dpkit/core" -import type { CellMinLengthError } from "../../error/index.ts" +import type { Field } from "@dpkit/metadata" +import type { CellMinLengthError } from "@dpkit/metadata" import type { CellMapping } from "../Mapping.ts" export function checkCellMinLength(field: Field, mapping: CellMapping) { diff --git a/table/field/checks/minimum.spec.ts b/table/field/checks/minimum.spec.ts index 9ce9b35a..15dba0e5 100644 --- a/table/field/checks/minimum.spec.ts +++ b/table/field/checks/minimum.spec.ts @@ -1,13 +1,15 @@ -import type { Schema } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Schema } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" -import { validateTable } from "../../table/index.ts" +import { inspectTable } from "../../table/index.ts" -describe("validateTable (cell/minimum)", () => { - it("should not report errors for valid values", async () => { - const table = DataFrame({ - price: [10.5, 20.75, 30.0], - }).lazy() +describe("inspectTable (cell/minimum)", () => { + it("should not errors for valid values", async () => { + const table = pl + .DataFrame({ + price: [10.5, 20.75, 30.0], + }) + .lazy() const schema: Schema = { fields: [ @@ -19,14 +21,16 @@ describe("validateTable (cell/minimum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) it("should report an error for invalid values", async () => { - const table = DataFrame({ - temperature: [20.5, 30.0, 40, 3.5], - }).lazy() + const table = pl + .DataFrame({ + temperature: [20.5, 30.0, 40, 3.5], + }) + .lazy() const schema: Schema = { fields: [ @@ -38,7 +42,7 @@ describe("validateTable (cell/minimum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors.filter(e => e.type === "cell/minimum")).toHaveLength(1) expect(errors).toContainEqual({ type: "cell/minimum", @@ -50,9 +54,11 @@ describe("validateTable (cell/minimum)", () => { }) it("should report an error for invalid values (exclusive)", async () => { - const table = DataFrame({ - temperature: [20.5, 30.0, 10.0, 5.5], - }).lazy() + const table = pl + .DataFrame({ + temperature: [20.5, 30.0, 10.0, 5.5], + }) + .lazy() const schema: Schema = { fields: [ @@ -64,7 +70,7 @@ describe("validateTable (cell/minimum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors.filter(e => e.type === "cell/exclusiveMinimum")).toHaveLength( 2, ) @@ -85,9 +91,11 @@ describe("validateTable (cell/minimum)", () => { }) it("should handle minimum as string", async () => { - const table = DataFrame({ - price: [10.5, 20.75, 3.0], - }).lazy() + const table = pl + .DataFrame({ + price: [10.5, 20.75, 3.0], + }) + .lazy() const schema: Schema = { fields: [ @@ -99,7 +107,7 @@ describe("validateTable (cell/minimum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -113,9 +121,11 @@ describe("validateTable (cell/minimum)", () => { }) it("should handle exclusiveMinimum as string", async () => { - const table = DataFrame({ - temperature: [20.5, 10.0, 5.5], - }).lazy() + const table = pl + .DataFrame({ + temperature: [20.5, 10.0, 5.5], + }) + .lazy() const schema: Schema = { fields: [ @@ -127,7 +137,7 @@ describe("validateTable (cell/minimum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -148,9 +158,11 @@ describe("validateTable (cell/minimum)", () => { }) it("should handle minimum as string with groupChar", async () => { - const table = DataFrame({ - price: ["5,000", "10,500", "2,500"], - }).lazy() + const table = pl + .DataFrame({ + price: ["5,000", "10,500", "2,500"], + }) + .lazy() const schema: Schema = { fields: [ @@ -163,7 +175,7 @@ describe("validateTable (cell/minimum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -177,9 +189,11 @@ describe("validateTable (cell/minimum)", () => { }) it("should handle minimum as string with decimalChar", async () => { - const table = DataFrame({ - price: ["5,5", "10,75", "2,3"], - }).lazy() + const table = pl + .DataFrame({ + price: ["5,5", "10,75", "2,3"], + }) + .lazy() const schema: Schema = { fields: [ @@ -192,7 +206,7 @@ describe("validateTable (cell/minimum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -206,9 +220,11 @@ describe("validateTable (cell/minimum)", () => { }) it("should handle minimum as string with groupChar and decimalChar", async () => { - const table = DataFrame({ - price: ["5.000,50", "10.500,75", "2.500,30"], - }).lazy() + const table = pl + .DataFrame({ + price: ["5.000,50", "10.500,75", "2.500,30"], + }) + .lazy() const schema: Schema = { fields: [ @@ -222,7 +238,7 @@ describe("validateTable (cell/minimum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -236,9 +252,11 @@ describe("validateTable (cell/minimum)", () => { }) it("should handle minimum as string with bareNumber false", async () => { - const table = DataFrame({ - price: ["$5.00", "$10.50", "$2.50"], - }).lazy() + const table = pl + .DataFrame({ + price: ["$5.00", "$10.50", "$2.50"], + }) + .lazy() const schema: Schema = { fields: [ @@ -251,7 +269,7 @@ describe("validateTable (cell/minimum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -265,9 +283,11 @@ describe("validateTable (cell/minimum)", () => { }) it("should handle minimum for date fields", async () => { - const table = DataFrame({ - date: ["2024-01-15", "2024-02-20", "2024-01-05"], - }).lazy() + const table = pl + .DataFrame({ + date: ["2024-01-15", "2024-02-20", "2024-01-05"], + }) + .lazy() const schema: Schema = { fields: [ @@ -279,7 +299,7 @@ describe("validateTable (cell/minimum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -293,9 +313,11 @@ describe("validateTable (cell/minimum)", () => { }) it.skip("should handle minimum for time fields", async () => { - const table = DataFrame({ - time: ["14:30:00", "16:45:00", "12:15:00"], - }).lazy() + const table = pl + .DataFrame({ + time: ["14:30:00", "16:45:00", "12:15:00"], + }) + .lazy() const schema: Schema = { fields: [ @@ -307,7 +329,7 @@ describe("validateTable (cell/minimum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -321,13 +343,15 @@ describe("validateTable (cell/minimum)", () => { }) it("should handle minimum for datetime fields", async () => { - const table = DataFrame({ - timestamp: [ - "2024-01-15T14:30:00", - "2024-02-20T08:15:00", - "2024-01-10T10:00:00", - ], - }).lazy() + const table = pl + .DataFrame({ + timestamp: [ + "2024-01-15T14:30:00", + "2024-02-20T08:15:00", + "2024-01-10T10:00:00", + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -339,7 +363,7 @@ describe("validateTable (cell/minimum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -353,9 +377,11 @@ describe("validateTable (cell/minimum)", () => { }) it("should handle minimum for date fields with custom format", async () => { - const table = DataFrame({ - date: ["15/01/2024", "20/02/2024", "05/01/2024"], - }).lazy() + const table = pl + .DataFrame({ + date: ["15/01/2024", "20/02/2024", "05/01/2024"], + }) + .lazy() const schema: Schema = { fields: [ @@ -368,7 +394,7 @@ describe("validateTable (cell/minimum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -382,9 +408,11 @@ describe("validateTable (cell/minimum)", () => { }) it("should handle minimum for year fields", async () => { - const table = DataFrame({ - year: ["2020", "2021", "2018"], - }).lazy() + const table = pl + .DataFrame({ + year: ["2020", "2021", "2018"], + }) + .lazy() const schema: Schema = { fields: [ @@ -396,7 +424,7 @@ describe("validateTable (cell/minimum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -410,9 +438,11 @@ describe("validateTable (cell/minimum)", () => { }) it("should handle exclusiveMinimum for year fields", async () => { - const table = DataFrame({ - year: ["2020", "2021", "2019", "2018"], - }).lazy() + const table = pl + .DataFrame({ + year: ["2020", "2021", "2019", "2018"], + }) + .lazy() const schema: Schema = { fields: [ @@ -424,7 +454,7 @@ describe("validateTable (cell/minimum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -445,9 +475,11 @@ describe("validateTable (cell/minimum)", () => { }) it.skip("should handle minimum for yearmonth fields", async () => { - const table = DataFrame({ - yearmonth: ["2024-03", "2024-05", "2024-01"], - }).lazy() + const table = pl + .DataFrame({ + yearmonth: ["2024-03", "2024-05", "2024-01"], + }) + .lazy() const schema: Schema = { fields: [ @@ -459,7 +491,7 @@ describe("validateTable (cell/minimum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { @@ -473,9 +505,11 @@ describe("validateTable (cell/minimum)", () => { }) it.skip("should handle exclusiveMinimum for yearmonth fields", async () => { - const table = DataFrame({ - yearmonth: ["2024-03", "2024-05", "2024-02", "2024-01"], - }).lazy() + const table = pl + .DataFrame({ + yearmonth: ["2024-03", "2024-05", "2024-02", "2024-01"], + }) + .lazy() const schema: Schema = { fields: [ @@ -487,7 +521,7 @@ describe("validateTable (cell/minimum)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { diff --git a/table/field/checks/minimum.ts b/table/field/checks/minimum.ts index 1e33434d..290c87f6 100644 --- a/table/field/checks/minimum.ts +++ b/table/field/checks/minimum.ts @@ -1,8 +1,7 @@ -import type { Field } from "@dpkit/core" +import type { Field } from "@dpkit/metadata" +import type { CellExclusiveMinimumError } from "@dpkit/metadata" +import type { CellMinimumError } from "@dpkit/metadata" import * as pl from "nodejs-polars" -import type { Expr } from "nodejs-polars" -import type { CellExclusiveMinimumError } from "../../error/index.ts" -import type { CellMinimumError } from "../../error/index.ts" import { evaluateExpression } from "../../helpers.ts" import type { CellMapping } from "../Mapping.ts" import { parseDateField } from "../types/date.ts" @@ -32,14 +31,14 @@ export function createCheckCellMinimum(options?: { isExclusive?: boolean }) { : field.constraints?.minimum if (minimum === undefined) return undefined - let isErrorExpr: Expr + let isErrorExpr: pl.Expr try { const parsedMinimum = parseConstraint(field, minimum) isErrorExpr = options?.isExclusive ? mapping.target.ltEq(parsedMinimum) : mapping.target.lt(parsedMinimum) } catch (error) { - isErrorExpr = pl.lit(true) + isErrorExpr = pl.pl.lit(true) } const errorTemplate: CellMinimumError | CellExclusiveMinimumError = { @@ -57,7 +56,7 @@ export function createCheckCellMinimum(options?: { isExclusive?: boolean }) { function parseConstraint(field: Field, value: number | string) { if (typeof value !== "string") return value - let expr = pl.lit(value) + let expr = pl.pl.lit(value) if (field.type === "integer") { expr = parseIntegerField(field, expr) } else if (field.type === "number") { diff --git a/table/field/checks/pattern.spec.ts b/table/field/checks/pattern.spec.ts index afc8a255..52c052d5 100644 --- a/table/field/checks/pattern.spec.ts +++ b/table/field/checks/pattern.spec.ts @@ -1,13 +1,15 @@ -import type { Schema } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Schema } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" -import { validateTable } from "../../table/index.ts" +import { inspectTable } from "../../table/index.ts" -describe("validateTable (cell/pattern)", () => { - it("should not report errors for string values that match the pattern", async () => { - const table = DataFrame({ - email: ["john@example.com", "alice@domain.org", "test@test.io"], - }).lazy() +describe("inspectTable (cell/pattern)", () => { + it("should not errors for string values that match the pattern", async () => { + const table = pl + .DataFrame({ + email: ["john@example.com", "alice@domain.org", "test@test.io"], + }) + .lazy() const schema: Schema = { fields: [ @@ -21,16 +23,23 @@ describe("validateTable (cell/pattern)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) it("should report an error for strings that don't match the pattern", async () => { const pattern = "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$" - const table = DataFrame({ - email: ["john@example.com", "alice@domain", "test.io", "valid@email.com"], - }).lazy() + const table = pl + .DataFrame({ + email: [ + "john@example.com", + "alice@domain", + "test.io", + "valid@email.com", + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -44,7 +53,7 @@ describe("validateTable (cell/pattern)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors.filter(e => e.type === "cell/pattern")).toHaveLength(2) expect(errors).toContainEqual({ type: "cell/pattern", diff --git a/table/field/checks/pattern.ts b/table/field/checks/pattern.ts index e4c31d0c..2a106918 100644 --- a/table/field/checks/pattern.ts +++ b/table/field/checks/pattern.ts @@ -1,5 +1,5 @@ -import type { Field } from "@dpkit/core" -import type { CellPatternError } from "../../error/index.ts" +import type { Field } from "@dpkit/metadata" +import type { CellPatternError } from "@dpkit/metadata" import type { CellMapping } from "../Mapping.ts" export function checkCellPattern(field: Field, mapping: CellMapping) { diff --git a/table/field/checks/required.spec.ts b/table/field/checks/required.spec.ts index d67838cc..554b23ca 100644 --- a/table/field/checks/required.spec.ts +++ b/table/field/checks/required.spec.ts @@ -1,19 +1,21 @@ -import type { Schema } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Schema } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" -import { validateTable } from "../../table/index.ts" +import { inspectTable } from "../../table/index.ts" -describe("validateTable (cell/required)", () => { +describe("inspectTable (cell/required)", () => { it("should report a cell/required error", async () => { - const table = DataFrame({ - id: [1, null, 3], - }).lazy() + const table = pl + .DataFrame({ + id: [1, null, 3], + }) + .lazy() const schema: Schema = { fields: [{ name: "id", type: "number", constraints: { required: true } }], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(1) expect(errors).toContainEqual({ diff --git a/table/field/checks/required.ts b/table/field/checks/required.ts index 08602f26..6196fc65 100644 --- a/table/field/checks/required.ts +++ b/table/field/checks/required.ts @@ -1,5 +1,5 @@ -import type { Field } from "@dpkit/core" -import type { CellRequiredError } from "../../error/index.ts" +import type { Field } from "@dpkit/metadata" +import type { CellRequiredError } from "@dpkit/metadata" import type { CellMapping } from "../Mapping.ts" export function checkCellRequired(field: Field, mapping: CellMapping) { diff --git a/table/field/checks/type.spec.ts b/table/field/checks/type.spec.ts index 3b0304e2..ecadeb5e 100644 --- a/table/field/checks/type.spec.ts +++ b/table/field/checks/type.spec.ts @@ -1,19 +1,21 @@ -import type { Schema } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Schema } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" -import { validateTable } from "../../table/index.ts" +import { inspectTable } from "../../table/index.ts" -describe("validateTable", () => { - it("should validate string to integer conversion errors", async () => { - const table = DataFrame({ - id: ["1", "bad", "3", "4x"], - }).lazy() +describe("inspectTable", () => { + it("should validate string to integer convertions errors", async () => { + const table = pl + .DataFrame({ + id: ["1", "bad", "3", "4x"], + }) + .lazy() const schema: Schema = { fields: [{ name: "id", type: "integer" }], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(2) expect(errors).toContainEqual({ @@ -32,16 +34,18 @@ describe("validateTable", () => { }) }) - it("should validate string to number conversion errors", async () => { - const table = DataFrame({ - price: ["10.5", "twenty", "30.75", "$40"], - }).lazy() + it("should validate string to number convertions errors", async () => { + const table = pl + .DataFrame({ + price: ["10.5", "twenty", "30.75", "$40"], + }) + .lazy() const schema: Schema = { fields: [{ name: "price", type: "number" }], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(2) expect(errors).toContainEqual({ @@ -60,16 +64,18 @@ describe("validateTable", () => { }) }) - it("should validate string to boolean conversion errors", async () => { - const table = DataFrame({ - active: ["true", "yes", "false", "0", "1"], - }).lazy() + it("should validate string to boolean convertions errors", async () => { + const table = pl + .DataFrame({ + active: ["true", "yes", "false", "0", "1"], + }) + .lazy() const schema: Schema = { fields: [{ name: "active", type: "boolean" }], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(1) expect(errors).toContainEqual({ @@ -81,16 +87,18 @@ describe("validateTable", () => { }) }) - it("should validate string to date conversion errors", async () => { - const table = DataFrame({ - created: ["2023-01-15", "Jan 15, 2023", "20230115", "not-a-date"], - }).lazy() + it("should validate string to date convertions errors", async () => { + const table = pl + .DataFrame({ + created: ["2023-01-15", "Jan 15, 2023", "20230115", "not-a-date"], + }) + .lazy() const schema: Schema = { fields: [{ name: "created", type: "date" }], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(3) expect(errors).toContainEqual({ @@ -116,16 +124,18 @@ describe("validateTable", () => { }) }) - it("should validate string to time conversion errors", async () => { - const table = DataFrame({ - time: ["14:30:00", "2:30pm", "invalid", "14h30"], - }).lazy() + it("should validate string to time convertions errors", async () => { + const table = pl + .DataFrame({ + time: ["14:30:00", "2:30pm", "invalid", "14h30"], + }) + .lazy() const schema: Schema = { fields: [{ name: "time", type: "time" }], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(3) expect(errors).toContainEqual({ @@ -151,16 +161,18 @@ describe("validateTable", () => { }) }) - it("should validate string to time conversion errors with custom format", async () => { - const table = DataFrame({ - time: ["14:30", "invalid"], - }).lazy() + it("should validate string to time convertions errors with custom format", async () => { + const table = pl + .DataFrame({ + time: ["14:30", "invalid"], + }) + .lazy() const schema: Schema = { fields: [{ name: "time", type: "time", format: "%H:%M" }], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) console.log(errors) @@ -175,16 +187,18 @@ describe("validateTable", () => { }) }) - it("should validate string to year conversion errors", async () => { - const table = DataFrame({ - year: ["2023", "23", "MMXXIII", "two-thousand-twenty-three"], - }).lazy() + it("should validate string to year convertions errors", async () => { + const table = pl + .DataFrame({ + year: ["2023", "23", "MMXXIII", "two-thousand-twenty-three"], + }) + .lazy() const schema: Schema = { fields: [{ name: "year", type: "year" }], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(3) expect(errors).toContainEqual({ @@ -210,21 +224,23 @@ describe("validateTable", () => { }) }) - it("should validate string to datetime conversion errors", async () => { - const table = DataFrame({ - timestamp: [ - "2023-01-15T14:30:00", - "January 15, 2023 2:30 PM", - "2023-01-15 14:30", - "not-a-datetime", - ], - }).lazy() + it("should validate string to datetime convertions errors", async () => { + const table = pl + .DataFrame({ + timestamp: [ + "2023-01-15T14:30:00", + "January 15, 2023 2:30 PM", + "2023-01-15 14:30", + "not-a-datetime", + ], + }) + .lazy() const schema: Schema = { fields: [{ name: "datetime", type: "datetime" }], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) // Adjust the expectations to match actual behavior expect(errors.length).toBeGreaterThan(0) @@ -248,29 +264,33 @@ describe("validateTable", () => { }) it("should pass validation when all cells are valid", async () => { - const table = DataFrame({ - id: ["1", "2", "3", "4"], - }).lazy() + const table = pl + .DataFrame({ + id: ["1", "2", "3", "4"], + }) + .lazy() const schema: Schema = { fields: [{ name: "id", type: "integer" }], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) it("should validate with non-string source data", async () => { - const table = DataFrame({ - is_active: [true, false, 1, 0], - }).lazy() + const table = pl + .DataFrame({ + is_active: [true, false, 1, 0], + }) + .lazy() const schema: Schema = { fields: [{ name: "is_active", type: "boolean" }], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) // Since the column isn't string type, validateField will not normalize it expect(errors).toHaveLength(0) diff --git a/table/field/checks/type.ts b/table/field/checks/type.ts index 0e88dd62..20a9f335 100644 --- a/table/field/checks/type.ts +++ b/table/field/checks/type.ts @@ -1,5 +1,5 @@ -import type { Field } from "@dpkit/core" -import type { CellTypeError } from "../../error/index.ts" +import type { Field } from "@dpkit/metadata" +import type { CellTypeError } from "@dpkit/metadata" import type { CellMapping } from "../Mapping.ts" export function checkCellType(field: Field, mapping: CellMapping) { diff --git a/table/field/checks/unique.spec.ts b/table/field/checks/unique.spec.ts index b85f7624..72bf4577 100644 --- a/table/field/checks/unique.spec.ts +++ b/table/field/checks/unique.spec.ts @@ -1,14 +1,16 @@ -import type { Schema } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Schema } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" -import { validateTable } from "../../table/index.ts" +import { inspectTable } from "../../table/index.ts" // TODO: recover -describe("validateTable (cell/unique)", () => { - it("should not report errors when all values are unique", async () => { - const table = DataFrame({ - id: [1, 2, 3, 4, 5], - }).lazy() +describe("inspectTable (cell/unique)", () => { + it("should not errors when all values are unique", async () => { + const table = pl + .DataFrame({ + id: [1, 2, 3, 4, 5], + }) + .lazy() const schema: Schema = { fields: [ @@ -20,14 +22,16 @@ describe("validateTable (cell/unique)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) - it("should report errors for duplicate values", async () => { - const table = DataFrame({ - id: [1, 2, 3, 2, 5], - }).lazy() + it("should errors for duplicate values", async () => { + const table = pl + .DataFrame({ + id: [1, 2, 3, 2, 5], + }) + .lazy() const schema: Schema = { fields: [ @@ -39,7 +43,7 @@ describe("validateTable (cell/unique)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors.filter(e => e.type === "cell/unique")).toHaveLength(1) expect(errors).toContainEqual({ @@ -51,9 +55,11 @@ describe("validateTable (cell/unique)", () => { }) it("should report multiple errors for string duplicates", async () => { - const table = DataFrame({ - code: ["A001", "B002", "A001", "C003", "B002"], - }).lazy() + const table = pl + .DataFrame({ + code: ["A001", "B002", "A001", "C003", "B002"], + }) + .lazy() const schema: Schema = { fields: [ @@ -65,7 +71,7 @@ describe("validateTable (cell/unique)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors.filter(e => e.type === "cell/unique")).toHaveLength(2) expect(errors).toContainEqual({ type: "cell/unique", @@ -82,9 +88,11 @@ describe("validateTable (cell/unique)", () => { }) it("should handle null values correctly", async () => { - const table = DataFrame({ - id: [1, null, 3, null, 5], - }).lazy() + const table = pl + .DataFrame({ + id: [1, null, 3, null, 5], + }) + .lazy() const schema: Schema = { fields: [ @@ -96,7 +104,7 @@ describe("validateTable (cell/unique)", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) }) diff --git a/table/field/checks/unique.ts b/table/field/checks/unique.ts index b6f3078d..419e60ad 100644 --- a/table/field/checks/unique.ts +++ b/table/field/checks/unique.ts @@ -1,5 +1,5 @@ -import type { Field } from "@dpkit/core" -import type { CellUniqueError } from "../../error/index.ts" +import type { Field } from "@dpkit/metadata" +import type { CellUniqueError } from "@dpkit/metadata" import type { CellMapping } from "../Mapping.ts" // TODO: Support schema.primaryKey and schema.uniqueKeys diff --git a/table/field/denormalize.ts b/table/field/denormalize.ts index 3a45539e..28efd055 100644 --- a/table/field/denormalize.ts +++ b/table/field/denormalize.ts @@ -1,5 +1,5 @@ -import type { Field } from "@dpkit/core" -import { col } from "nodejs-polars" +import type { Field } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { desubstituteField } from "./desubstitute.ts" import { stringifyField } from "./stringify.ts" @@ -11,7 +11,7 @@ export function denormalizeField( field: Field, options?: DenormalizeFieldOptions, ) { - let expr = col(field.name) + let expr = pl.col(field.name) const { nativeTypes } = options ?? {} if (!nativeTypes?.includes(field.type ?? "any")) { diff --git a/table/field/desubstitute.ts b/table/field/desubstitute.ts index e14396db..023b5194 100644 --- a/table/field/desubstitute.ts +++ b/table/field/desubstitute.ts @@ -1,17 +1,17 @@ -import type { Field } from "@dpkit/core" -import { lit, when } from "nodejs-polars" -import type { Expr } from "nodejs-polars" +import type { Field } from "@dpkit/metadata" +import * as pl from "nodejs-polars" const DEFAULT_MISSING_VALUE = "" -export function desubstituteField(field: Field, fieldExpr: Expr) { +export function desubstituteField(field: Field, fieldExpr: pl.Expr) { const flattenMissingValues = field.missingValues?.map(it => typeof it === "string" ? it : it.value, ) const missingValue = flattenMissingValues?.[0] ?? DEFAULT_MISSING_VALUE - fieldExpr = when(fieldExpr.isNull()) - .then(lit(missingValue)) + fieldExpr = pl + .when(fieldExpr.isNull()) + .then(pl.lit(missingValue)) .otherwise(fieldExpr) .alias(field.name) diff --git a/table/field/index.ts b/table/field/index.ts index 902f73d4..944e877a 100644 --- a/table/field/index.ts +++ b/table/field/index.ts @@ -1,6 +1,6 @@ export { denormalizeField } from "./denormalize.ts" export { parseField } from "./parse.ts" -export { validateField } from "./validate.ts" +export { inspectField } from "./inspect.ts" export { normalizeField } from "./normalize.ts" export { stringifyField } from "./stringify.ts" export type { PolarsField } from "./Field.ts" diff --git a/table/field/validate.spec.ts b/table/field/inspect.spec.ts similarity index 71% rename from table/field/validate.spec.ts rename to table/field/inspect.spec.ts index 4c9369d4..992c10e2 100644 --- a/table/field/validate.spec.ts +++ b/table/field/inspect.spec.ts @@ -1,14 +1,16 @@ -import type { Schema } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Schema } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" -import { validateTable } from "../table/validate.ts" +import { inspectTable } from "../table/inspect.ts" -describe("validateField", () => { +describe("inspectField", () => { describe("field name validation", () => { it("should report an error when field names don't match", async () => { - const table = DataFrame({ - actual_id: [1, 2, 3], - }).lazy() + const table = pl + .DataFrame({ + actual_id: [1, 2, 3], + }) + .lazy() const schema: Schema = { fields: [ @@ -19,7 +21,7 @@ describe("validateField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toContainEqual({ type: "field/name", @@ -28,10 +30,12 @@ describe("validateField", () => { }) }) - it("should not report errors when field names match", async () => { - const table = DataFrame({ - id: [1, 2, 3], - }).lazy() + it("should not errors when field names match", async () => { + const table = pl + .DataFrame({ + id: [1, 2, 3], + }) + .lazy() const schema: Schema = { fields: [ @@ -42,14 +46,16 @@ describe("validateField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) it("should be case-sensitive when comparing field names", async () => { - const table = DataFrame({ - ID: [1, 2, 3], - }).lazy() + const table = pl + .DataFrame({ + ID: [1, 2, 3], + }) + .lazy() const schema: Schema = { fields: [ @@ -60,7 +66,7 @@ describe("validateField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(1) expect(errors).toContainEqual({ @@ -73,9 +79,11 @@ describe("validateField", () => { describe("field type validation", () => { it("should report an error when field types don't match", async () => { - const table = DataFrame({ - id: [true, false, true], - }).lazy() + const table = pl + .DataFrame({ + id: [true, false, true], + }) + .lazy() const schema: Schema = { fields: [ @@ -86,7 +94,7 @@ describe("validateField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(1) expect(errors).toContainEqual({ @@ -97,10 +105,12 @@ describe("validateField", () => { }) }) - it("should not report errors when field types match", async () => { - const table = DataFrame({ - id: [1, 2, 3], - }).lazy() + it("should not errors when field types match", async () => { + const table = pl + .DataFrame({ + id: [1, 2, 3], + }) + .lazy() const schema: Schema = { fields: [ @@ -111,16 +121,18 @@ describe("validateField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) }) describe("cell types validation", () => { it("should validate string to integer conversion errors", async () => { - const table = DataFrame({ - id: ["1", "bad", "3", "4x"], - }).lazy() + const table = pl + .DataFrame({ + id: ["1", "bad", "3", "4x"], + }) + .lazy() const schema: Schema = { fields: [ @@ -131,7 +143,7 @@ describe("validateField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(2) expect(errors).toContainEqual({ @@ -151,9 +163,11 @@ describe("validateField", () => { }) it("should validate string to number conversion errors", async () => { - const table = DataFrame({ - price: ["10.5", "twenty", "30.75", "$40"], - }).lazy() + const table = pl + .DataFrame({ + price: ["10.5", "twenty", "30.75", "$40"], + }) + .lazy() const schema: Schema = { fields: [ @@ -164,7 +178,7 @@ describe("validateField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(2) expect(errors).toContainEqual({ @@ -184,9 +198,11 @@ describe("validateField", () => { }) it("should validate string to boolean conversion errors", async () => { - const table = DataFrame({ - active: ["true", "yes", "false", "0", "1"], - }).lazy() + const table = pl + .DataFrame({ + active: ["true", "yes", "false", "0", "1"], + }) + .lazy() const schema: Schema = { fields: [ @@ -197,7 +213,7 @@ describe("validateField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(1) expect(errors).toContainEqual({ @@ -210,9 +226,11 @@ describe("validateField", () => { }) it("should validate string to date conversion errors", async () => { - const table = DataFrame({ - created: ["2023-01-15", "Jan 15, 2023", "20230115", "not-a-date"], - }).lazy() + const table = pl + .DataFrame({ + created: ["2023-01-15", "Jan 15, 2023", "20230115", "not-a-date"], + }) + .lazy() const schema: Schema = { fields: [ @@ -223,7 +241,7 @@ describe("validateField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(3) expect(errors).toContainEqual({ @@ -250,9 +268,11 @@ describe("validateField", () => { }) it("should validate string to time conversion errors", async () => { - const table = DataFrame({ - time: ["14:30:00", "2:30pm", "invalid", "14h30"], - }).lazy() + const table = pl + .DataFrame({ + time: ["14:30:00", "2:30pm", "invalid", "14h30"], + }) + .lazy() const schema: Schema = { fields: [ @@ -263,7 +283,7 @@ describe("validateField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(3) expect(errors).toContainEqual({ @@ -290,9 +310,11 @@ describe("validateField", () => { }) it("should validate string to year conversion errors", async () => { - const table = DataFrame({ - year: ["2023", "23", "MMXXIII", "two-thousand-twenty-three"], - }).lazy() + const table = pl + .DataFrame({ + year: ["2023", "23", "MMXXIII", "two-thousand-twenty-three"], + }) + .lazy() const schema: Schema = { fields: [ @@ -303,7 +325,7 @@ describe("validateField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(3) expect(errors).toContainEqual({ @@ -330,14 +352,16 @@ describe("validateField", () => { }) it("should validate string to datetime conversion errors", async () => { - const table = DataFrame({ - timestamp: [ - "2023-01-15T14:30:00", - "January 15, 2023 2:30 PM", - "2023-01-15 14:30", - "not-a-datetime", - ], - }).lazy() + const table = pl + .DataFrame({ + timestamp: [ + "2023-01-15T14:30:00", + "January 15, 2023 2:30 PM", + "2023-01-15 14:30", + "not-a-datetime", + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -348,7 +372,7 @@ describe("validateField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) // Adjust the expectations to match actual behavior expect(errors.length).toBeGreaterThan(0) @@ -372,9 +396,11 @@ describe("validateField", () => { }) it("should pass validation when all cells are valid", async () => { - const table = DataFrame({ - id: ["1", "2", "3", "4"], - }).lazy() + const table = pl + .DataFrame({ + id: ["1", "2", "3", "4"], + }) + .lazy() const schema: Schema = { fields: [ @@ -385,15 +411,17 @@ describe("validateField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) it("should validate with non-string source data", async () => { - const table = DataFrame({ - is_active: [true, false, true, false], - }).lazy() + const table = pl + .DataFrame({ + is_active: [true, false, true, false], + }) + .lazy() const schema: Schema = { fields: [ @@ -404,7 +432,7 @@ describe("validateField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) // Since the column matches the expected type, validation passes expect(errors).toHaveLength(0) diff --git a/table/field/validate.ts b/table/field/inspect.ts similarity index 75% rename from table/field/validate.ts rename to table/field/inspect.ts index b544d227..b6b33107 100644 --- a/table/field/validate.ts +++ b/table/field/inspect.ts @@ -1,6 +1,6 @@ -import type { Field } from "@dpkit/core" -import { col, lit, when } from "nodejs-polars" -import type { CellError, FieldError, TableError } from "../error/index.ts" +import type { Field } from "@dpkit/metadata" +import type { CellError, FieldError, TableError } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import type { Table } from "../table/index.ts" import type { FieldMapping } from "./Mapping.ts" import { checkCellEnum } from "./checks/enum.ts" @@ -13,11 +13,11 @@ import { checkCellRequired } from "./checks/required.ts" import { checkCellType } from "./checks/type.ts" import { checkCellUnique } from "./checks/unique.ts" import { normalizeField } from "./normalize.ts" -import { validateArrayField } from "./types/array.ts" -import { validateGeojsonField } from "./types/geojson.ts" -import { validateObjectField } from "./types/object.ts" +import { inspectArrayField } from "./types/array.ts" +import { inspectGeojsonField } from "./types/geojson.ts" +import { inspectObjectField } from "./types/object.ts" -export async function validateField( +export async function inspectField( mapping: FieldMapping, table: Table, options: { @@ -27,21 +27,21 @@ export async function validateField( const { maxErrors } = options const errors: TableError[] = [] - const nameErrors = validateName(mapping) + const nameErrors = inspectName(mapping) errors.push(...nameErrors) - const typeErrors = validateType(mapping) + const typeErrors = inspectType(mapping) errors.push(...typeErrors) if (!typeErrors.length) { - const dataErorrs = await validateCells(mapping, table, { maxErrors }) + const dataErorrs = await inspectCells(mapping, table, { maxErrors }) errors.push(...dataErorrs) } - return { errors, valid: !errors.length } + return errors } -function validateName(mapping: FieldMapping) { +function inspectName(mapping: FieldMapping) { const errors: FieldError[] = [] if (mapping.source.name !== mapping.target.name) { @@ -55,7 +55,7 @@ function validateName(mapping: FieldMapping) { return errors } -function validateType(mapping: FieldMapping) { +function inspectType(mapping: FieldMapping) { const errors: FieldError[] = [] const variant = mapping.source.type.variant @@ -99,7 +99,7 @@ function validateType(mapping: FieldMapping) { return errors } -async function validateCells( +async function inspectCells( mapping: FieldMapping, table: Table, options: { @@ -112,20 +112,20 @@ async function validateCells( // Types that require non-polars validation switch (mapping.target.type) { case "array": - return await validateArrayField(mapping.target, table) + return await inspectArrayField(mapping.target, table) case "geojson": - return await validateGeojsonField(mapping.target, table) + return await inspectGeojsonField(mapping.target, table) case "object": - return await validateObjectField(mapping.target, table) + return await inspectObjectField(mapping.target, table) } let fieldCheckTable = table .withRowCount() .select( - col("row_nr").add(1).alias("number"), + pl.col("row_nr").add(1).alias("number"), normalizeField(mapping).alias("target"), normalizeField(mapping, { keepType: true }).alias("source"), - lit(null).alias("error"), + pl.lit(null).alias("error"), ) for (const checkCell of [ @@ -141,23 +141,24 @@ async function validateCells( checkCellMaxLength, checkCellUnique, ]) { - const cellMapping = { source: col("source"), target: col("target") } + const cellMapping = { source: pl.col("source"), target: pl.col("target") } const check = checkCell(mapping.target, cellMapping) if (!check) continue fieldCheckTable = fieldCheckTable.withColumn( - when(col("error").isNotNull()) - .then(col("error")) + pl + .when(pl.col("error").isNotNull()) + .then(pl.col("error")) .when(check.isErrorExpr) - .then(lit(JSON.stringify(check.errorTemplate))) - .otherwise(lit(null)) + .then(pl.lit(JSON.stringify(check.errorTemplate))) + .otherwise(pl.lit(null)) .alias("error"), ) } const fieldCheckFrame = await fieldCheckTable - .filter(col("error").isNotNull()) + .filter(pl.col("error").isNotNull()) .drop(["target"]) .head(maxErrors) .collect() diff --git a/table/field/narrow.spec.ts b/table/field/narrow.spec.ts index 8a7f9628..9fc190c7 100644 --- a/table/field/narrow.spec.ts +++ b/table/field/narrow.spec.ts @@ -1,15 +1,17 @@ -import type { Schema } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Schema } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" +import { inspectTable } from "../table/inspect.ts" import { normalizeTable } from "../table/normalize.ts" -import { validateTable } from "../table/validate.ts" describe("narrowField", () => { it("should narrow float to integer", async () => { - const table = DataFrame({ - id: [1.0, 2.0, 3.0], - name: ["a", "b", "c"], - }).lazy() + const table = pl + .DataFrame({ + id: [1.0, 2.0, 3.0], + name: ["a", "b", "c"], + }) + .lazy() const schema: Schema = { fields: [ @@ -18,10 +20,10 @@ describe("narrowField", () => { ], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()).toEqual([ + expect(frame.toRecords()).toEqual([ { id: 1, name: "a" }, { id: 2, name: "b" }, { id: 3, name: "c" }, @@ -29,10 +31,12 @@ describe("narrowField", () => { }) it("should detect error when float cannot be narrowed to integer", async () => { - const table = DataFrame({ - id: [1.0, 2.0, 3.5], - name: ["a", "b", "c"], - }).lazy() + const table = pl + .DataFrame({ + id: [1.0, 2.0, 3.5], + name: ["a", "b", "c"], + }) + .lazy() const schema: Schema = { fields: [ @@ -41,7 +45,7 @@ describe("narrowField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { diff --git a/table/field/narrow.ts b/table/field/narrow.ts index 9918e38a..adb7570a 100644 --- a/table/field/narrow.ts +++ b/table/field/narrow.ts @@ -1,16 +1,15 @@ -import { DataType } from "nodejs-polars" -import { lit, when } from "nodejs-polars" -import type { Expr } from "nodejs-polars" +import * as pl from "nodejs-polars" import type { FieldMapping } from "./Mapping.ts" -export function narrowField(mapping: FieldMapping, fieldExpr: Expr) { +export function narrowField(mapping: FieldMapping, fieldExpr: pl.Expr) { const variant = mapping.source.type.variant if (mapping.target.type === "integer") { if (["Float32", "Float64"].includes(variant)) { - fieldExpr = when(fieldExpr.eq(fieldExpr.round(0))) - .then(fieldExpr.cast(DataType.Int64)) - .otherwise(lit(null)) + fieldExpr = pl + .when(fieldExpr.eq(fieldExpr.round(0))) + .then(fieldExpr.cast(pl.Int64)) + .otherwise(pl.lit(null)) } } diff --git a/table/field/normalize.ts b/table/field/normalize.ts index 838f68e4..f795aa18 100644 --- a/table/field/normalize.ts +++ b/table/field/normalize.ts @@ -1,4 +1,4 @@ -import { col } from "nodejs-polars" +import * as pl from "nodejs-polars" import type { FieldMapping } from "./Mapping.ts" import { narrowField } from "./narrow.ts" import { parseField } from "./parse.ts" @@ -8,7 +8,7 @@ export function normalizeField( mapping: FieldMapping, options?: { keepType?: boolean }, ) { - let fieldExpr = col(mapping.source.name) + let fieldExpr = pl.col(mapping.source.name) fieldExpr = substituteField(mapping, fieldExpr) if (!options?.keepType) { diff --git a/table/field/parse.spec.ts b/table/field/parse.spec.ts index 99d88511..a28f9f87 100644 --- a/table/field/parse.spec.ts +++ b/table/field/parse.spec.ts @@ -1,5 +1,5 @@ -import type { Schema } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Schema } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { normalizeTable } from "../table/index.ts" @@ -23,16 +23,16 @@ describe("parseField", () => { ["-", "-", { schemaLevel: ["-"], fieldLevel: ["x"] }], // @ts-ignore ])("$0 -> $1 $2", async (cell, value, { fieldLevel, schemaLevel }) => { - const table = DataFrame({ name: [cell] }).lazy() + const table = pl.DataFrame({ name: [cell] }).lazy() const schema: Schema = { missingValues: schemaLevel, fields: [{ name: "name", type: "string", missingValues: fieldLevel }], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.getColumn("name").get(0)).toEqual(value) + expect(frame.getColumn("name").get(0)).toEqual(value) }) }) }) diff --git a/table/field/parse.ts b/table/field/parse.ts index c59042b6..bfcd7abe 100644 --- a/table/field/parse.ts +++ b/table/field/parse.ts @@ -1,5 +1,4 @@ -import type { Expr } from "nodejs-polars" -import { DataType } from "nodejs-polars" +import * as pl from "nodejs-polars" import type { FieldMapping } from "./Mapping.ts" import { parseBooleanField } from "./types/boolean.ts" import { parseDateField } from "./types/date.ts" @@ -14,8 +13,8 @@ import { parseTimeField } from "./types/time.ts" import { parseYearField } from "./types/year.ts" import { parseYearmonthField } from "./types/yearmonth.ts" -export function parseField(mapping: FieldMapping, fieldExpr: Expr) { - if (!mapping.source.type.equals(DataType.String)) return fieldExpr +export function parseField(mapping: FieldMapping, fieldExpr: pl.Expr) { + if (!mapping.source.type.equals(pl.String)) return fieldExpr const field = mapping.target switch (field.type) { diff --git a/table/field/stringify.spec.ts b/table/field/stringify.spec.ts index 32db737c..826c25b1 100644 --- a/table/field/stringify.spec.ts +++ b/table/field/stringify.spec.ts @@ -1,5 +1,5 @@ -import type { Schema } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Schema } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { denormalizeTable } from "../table/index.ts" @@ -35,16 +35,16 @@ describe("stringifyField", () => { // @ts-ignore ])("%s -> %s %s", async (value, expected, { fieldLevel, schemaLevel }) => { - const table = DataFrame({ name: [value] }).lazy() + const table = pl.DataFrame({ name: [value] }).lazy() const schema: Schema = { missingValues: schemaLevel, fields: [{ name: "name", type: "string", missingValues: fieldLevel }], } - const ldf = await denormalizeTable(table, schema) - const df = await ldf.collect() + const result = await denormalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(expected) + expect(frame.toRecords()[0]?.name).toEqual(expected) }) }) }) diff --git a/table/field/stringify.ts b/table/field/stringify.ts index 185410a3..6b56d505 100644 --- a/table/field/stringify.ts +++ b/table/field/stringify.ts @@ -1,5 +1,5 @@ -import type { Field } from "@dpkit/core" -import type { Expr } from "nodejs-polars" +import type { Field } from "@dpkit/metadata" +import type * as pl from "nodejs-polars" import { stringifyBooleanField } from "./types/boolean.ts" import { stringifyDateField } from "./types/date.ts" import { stringifyDatetimeField } from "./types/datetime.ts" @@ -13,7 +13,7 @@ import { stringifyTimeField } from "./types/time.ts" import { stringifyYearField } from "./types/year.ts" import { stringifyYearmonthField } from "./types/yearmonth.ts" -export function stringifyField(field: Field, fieldExpr: Expr) { +export function stringifyField(field: Field, fieldExpr: pl.Expr) { switch (field.type) { case "boolean": return stringifyBooleanField(field, fieldExpr) diff --git a/table/field/substitute.ts b/table/field/substitute.ts index 7234e0d4..0bbac3fa 100644 --- a/table/field/substitute.ts +++ b/table/field/substitute.ts @@ -1,12 +1,10 @@ -import { DataType } from "nodejs-polars" -import { lit, when } from "nodejs-polars" -import type { Expr } from "nodejs-polars" +import * as pl from "nodejs-polars" import type { FieldMapping } from "./Mapping.ts" const DEFAULT_MISSING_VALUES = [""] -export function substituteField(mapping: FieldMapping, fieldExpr: Expr) { - if (!mapping.source.type.equals(DataType.String)) return fieldExpr +export function substituteField(mapping: FieldMapping, fieldExpr: pl.Expr) { + if (!mapping.source.type.equals(pl.String)) return fieldExpr const flattenMissingValues = mapping.target.missingValues?.map(it => @@ -14,8 +12,9 @@ export function substituteField(mapping: FieldMapping, fieldExpr: Expr) { ) ?? DEFAULT_MISSING_VALUES if (flattenMissingValues.length) { - fieldExpr = when(fieldExpr.isIn(flattenMissingValues)) - .then(lit(null)) + fieldExpr = pl + .when(fieldExpr.isIn(flattenMissingValues)) + .then(pl.lit(null)) .otherwise(fieldExpr) .alias(mapping.target.name) } diff --git a/table/field/types/array.spec.ts b/table/field/types/array.spec.ts index 9fb69f65..e328f598 100644 --- a/table/field/types/array.spec.ts +++ b/table/field/types/array.spec.ts @@ -1,13 +1,15 @@ -import type { Schema } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Schema } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" -import { validateTable } from "../../table/index.ts" +import { inspectTable } from "../../table/index.ts" describe("validateArrayField", () => { - it("should not report errors for valid JSON arrays", async () => { - const table = DataFrame({ - tags: ['["tag1","tag2"]', "[1,2,3]", '["a","b","c"]'], - }).lazy() + it("should not errors for valid JSON arrays", async () => { + const table = pl + .DataFrame({ + tags: ['["tag1","tag2"]', "[1,2,3]", '["a","b","c"]'], + }) + .lazy() const schema: Schema = { fields: [ @@ -18,14 +20,16 @@ describe("validateArrayField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) - it("should not report errors for empty arrays", async () => { - const table = DataFrame({ - items: ["[]", "[]", "[]"], - }).lazy() + it("should not errors for empty arrays", async () => { + const table = pl + .DataFrame({ + items: ["[]", "[]", "[]"], + }) + .lazy() const schema: Schema = { fields: [ @@ -36,14 +40,16 @@ describe("validateArrayField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) - it("should not report errors for null values", async () => { - const table = DataFrame({ - data: ['["value"]', null, "[1,2,3]"], - }).lazy() + it("should not errors for null values", async () => { + const table = pl + .DataFrame({ + data: ['["value"]', null, "[1,2,3]"], + }) + .lazy() const schema: Schema = { fields: [ @@ -54,14 +60,16 @@ describe("validateArrayField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) - it("should report errors for JSON objects", async () => { - const table = DataFrame({ - data: ["[1,2,3]", '{"key":"value"}', '["a","b"]'], - }).lazy() + it("should errors for JSON objects", async () => { + const table = pl + .DataFrame({ + data: ["[1,2,3]", '{"key":"value"}', '["a","b"]'], + }) + .lazy() const schema: Schema = { fields: [ @@ -72,7 +80,7 @@ describe("validateArrayField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { type: "cell/type", @@ -84,10 +92,12 @@ describe("validateArrayField", () => { ]) }) - it("should report errors for invalid JSON", async () => { - const table = DataFrame({ - data: ['["valid"]', "invalid json", "[1,2,3]", "[broken"], - }).lazy() + it("should errors for invalid JSON", async () => { + const table = pl + .DataFrame({ + data: ['["valid"]', "invalid json", "[1,2,3]", "[broken"], + }) + .lazy() const schema: Schema = { fields: [ @@ -98,7 +108,7 @@ describe("validateArrayField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors.filter(e => e.type === "cell/type")).toHaveLength(2) expect(errors).toContainEqual({ type: "cell/type", @@ -117,9 +127,11 @@ describe("validateArrayField", () => { }) it("should handle nested arrays", async () => { - const table = DataFrame({ - matrix: ["[[1,2],[3,4]]", "[[5,6],[7,8]]", '[["a","b"],["c","d"]]'], - }).lazy() + const table = pl + .DataFrame({ + matrix: ["[[1,2],[3,4]]", "[[5,6],[7,8]]", '[["a","b"],["c","d"]]'], + }) + .lazy() const schema: Schema = { fields: [ @@ -130,14 +142,16 @@ describe("validateArrayField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) - it("should report errors for empty strings", async () => { - const table = DataFrame({ - data: ['["valid"]', "", "[1,2,3]"], - }).lazy() + it("should errors for empty strings", async () => { + const table = pl + .DataFrame({ + data: ['["valid"]', "", "[1,2,3]"], + }) + .lazy() const schema: Schema = { fields: [ @@ -148,7 +162,7 @@ describe("validateArrayField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { type: "cell/type", @@ -160,10 +174,12 @@ describe("validateArrayField", () => { ]) }) - it("should report errors for JSON primitives", async () => { - const table = DataFrame({ - data: ['"string"', "123", "true", "false", "null"], - }).lazy() + it("should errors for JSON primitives", async () => { + const table = pl + .DataFrame({ + data: ['"string"', "123", "true", "false", "null"], + }) + .lazy() const schema: Schema = { fields: [ @@ -174,7 +190,7 @@ describe("validateArrayField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { type: "cell/type", @@ -214,10 +230,12 @@ describe("validateArrayField", () => { ]) }) - it("should not report errors for arrays matching jsonSchema", async () => { - const table = DataFrame({ - scores: ["[80,90,100]", "[75,85,95]", "[90,95,100]"], - }).lazy() + it("should not errors for arrays matching jsonSchema", async () => { + const table = pl + .DataFrame({ + scores: ["[80,90,100]", "[75,85,95]", "[90,95,100]"], + }) + .lazy() const schema: Schema = { fields: [ @@ -236,20 +254,22 @@ describe("validateArrayField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) - it("should report errors for arrays not matching jsonSchema", async () => { + it("should errors for arrays not matching jsonSchema", async () => { const jsonSchema = { type: "array", items: { type: "number" }, minItems: 2, } - const table = DataFrame({ - numbers: ["[1,2,3]", '["not","numbers"]', "[1]", "[4,5,6]"], - }).lazy() + const table = pl + .DataFrame({ + numbers: ["[1,2,3]", '["not","numbers"]', "[1]", "[4,5,6]"], + }) + .lazy() const schema: Schema = { fields: [ @@ -263,31 +283,44 @@ describe("validateArrayField", () => { ], } - const { errors } = await validateTable(table, { schema }) - expect(errors.filter(e => e.type === "cell/jsonSchema")).toHaveLength(2) - expect(errors).toContainEqual({ - type: "cell/jsonSchema", - fieldName: "numbers", - jsonSchema, - rowNumber: 2, - cell: '["not","numbers"]', - }) - expect(errors).toContainEqual({ - type: "cell/jsonSchema", - fieldName: "numbers", - jsonSchema, - rowNumber: 3, - cell: "[1]", - }) + const errors = await inspectTable(table, { schema }) + expect(errors.filter(e => e.type === "cell/jsonSchema")).toEqual([ + { + type: "cell/jsonSchema", + fieldName: "numbers", + rowNumber: 2, + cell: '["not","numbers"]', + pointer: "/0", + message: "must be number", + }, + { + type: "cell/jsonSchema", + fieldName: "numbers", + rowNumber: 2, + cell: '["not","numbers"]', + pointer: "/1", + message: "must be number", + }, + { + type: "cell/jsonSchema", + fieldName: "numbers", + rowNumber: 3, + cell: "[1]", + pointer: "", + message: "must NOT have fewer than 2 items", + }, + ]) }) it("should validate complex jsonSchema with array of objects", async () => { - const table = DataFrame({ - users: [ - '[{"name":"John","age":30},{"name":"Jane","age":25}]', - '[{"name":"Bob","age":"invalid"}]', - ], - }).lazy() + const table = pl + .DataFrame({ + users: [ + '[{"name":"John","age":30},{"name":"Jane","age":25}]', + '[{"name":"Bob","age":"invalid"}]', + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -311,23 +344,25 @@ describe("validateArrayField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { type: "cell/jsonSchema", fieldName: "users", - // @ts-ignore - jsonSchema: schema.fields[0].constraints?.jsonSchema, rowNumber: 2, cell: '[{"name":"Bob","age":"invalid"}]', + pointer: "/0/age", + message: "must be number", }, ]) }) it("should validate jsonSchema with unique items constraint", async () => { - const table = DataFrame({ - tags: ['["unique","values"]', '["duplicate","duplicate"]'], - }).lazy() + const table = pl + .DataFrame({ + tags: ['["unique","values"]', '["duplicate","duplicate"]'], + }) + .lazy() const schema: Schema = { fields: [ @@ -345,15 +380,16 @@ describe("validateArrayField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { type: "cell/jsonSchema", fieldName: "tags", - // @ts-ignore - jsonSchema: schema.fields[0].constraints?.jsonSchema, rowNumber: 2, cell: '["duplicate","duplicate"]', + pointer: "", + message: + "must NOT have duplicate items (items ## 1 and 0 are identical)", }, ]) }) diff --git a/table/field/types/array.ts b/table/field/types/array.ts index 76d672f0..e1d9a10e 100644 --- a/table/field/types/array.ts +++ b/table/field/types/array.ts @@ -1,7 +1,7 @@ -import type { ArrayField } from "@dpkit/core" +import type { ArrayField } from "@dpkit/metadata" import type { Table } from "../../table/index.ts" -import { validateJsonField } from "./json.ts" +import { inspectJsonField } from "./json.ts" -export async function validateArrayField(field: ArrayField, table: Table) { - return validateJsonField(field, table) +export async function inspectArrayField(field: ArrayField, table: Table) { + return inspectJsonField(field, table) } diff --git a/table/field/types/boolean.spec.ts b/table/field/types/boolean.spec.ts index e1517a31..51fcf6a4 100644 --- a/table/field/types/boolean.spec.ts +++ b/table/field/types/boolean.spec.ts @@ -1,4 +1,4 @@ -import { DataFrame, DataType, Series } from "nodejs-polars" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { normalizeTable } from "../../table/index.ts" import { denormalizeTable } from "../../table/index.ts" @@ -46,16 +46,16 @@ describe("parseBooleanField", () => { ["non", false, { trueValues: ["oui", "si"], falseValues: ["non", "no"] }], ["no", false, { trueValues: ["oui", "si"], falseValues: ["non", "no"] }], ])("%s -> %s %o", async (cell, value, options) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [{ name: "name", type: "boolean" as const, ...options }], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(value) + expect(frame.toRecords()[0]?.name).toEqual(value) }) }) @@ -77,15 +77,15 @@ describe("stringifyBooleanField", () => { [true, "oui", { trueValues: ["oui", "si"], falseValues: ["non", "no"] }], [false, "non", { trueValues: ["oui", "si"], falseValues: ["non", "no"] }], ])("%s -> %s %o", async (value, expected, options) => { - const table = DataFrame([Series("name", [value], DataType.Bool)]).lazy() + const table = pl.DataFrame([pl.Series("name", [value], pl.Bool)]).lazy() const schema = { fields: [{ name: "name", type: "boolean" as const, ...options }], } - const ldf = await denormalizeTable(table, schema) - const df = await ldf.collect() + const result = await denormalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(expected) + expect(frame.toRecords()[0]?.name).toEqual(expected) }) }) diff --git a/table/field/types/boolean.ts b/table/field/types/boolean.ts index 0183da39..82c24c9f 100644 --- a/table/field/types/boolean.ts +++ b/table/field/types/boolean.ts @@ -1,37 +1,37 @@ -import type { BooleanField } from "@dpkit/core" -import { DataType } from "nodejs-polars" -import { lit, when } from "nodejs-polars" -import type { Expr } from "nodejs-polars" +import type { BooleanField } from "@dpkit/metadata" +import * as pl from "nodejs-polars" const DEFAULT_TRUE_VALUES = ["true", "True", "TRUE", "1"] const DEFAULT_FALSE_VALUES = ["false", "False", "FALSE", "0"] -export function parseBooleanField(field: BooleanField, fieldExpr: Expr) { +export function parseBooleanField(field: BooleanField, fieldExpr: pl.Expr) { const trueValues = field.trueValues ?? DEFAULT_TRUE_VALUES const falseValues = field.falseValues ?? DEFAULT_FALSE_VALUES for (const value of trueValues) fieldExpr = fieldExpr.replace(value, "1") for (const value of falseValues) fieldExpr = fieldExpr.replace(value, "0") - fieldExpr = fieldExpr.cast(DataType.Int8) + fieldExpr = fieldExpr.cast(pl.Int8) - return when(fieldExpr.eq(1)) - .then(lit(true)) + return pl + .when(fieldExpr.eq(1)) + .then(pl.lit(true)) .when(fieldExpr.eq(0)) - .then(lit(false)) - .otherwise(lit(null)) + .then(pl.lit(false)) + .otherwise(pl.lit(null)) .alias(field.name) } const DEFAULT_TRUE_VALUE = "true" const DEFAULT_FALSE_VALUE = "false" -export function stringifyBooleanField(field: BooleanField, fieldExpr: Expr) { +export function stringifyBooleanField(field: BooleanField, fieldExpr: pl.Expr) { const trueValue = field.trueValues?.[0] ?? DEFAULT_TRUE_VALUE const falseValue = field.falseValues?.[0] ?? DEFAULT_FALSE_VALUE - return when(fieldExpr.eq(lit(true))) - .then(lit(trueValue)) - .otherwise(lit(falseValue)) + return pl + .when(fieldExpr.eq(pl.lit(true))) + .then(pl.lit(trueValue)) + .otherwise(pl.lit(falseValue)) .alias(field.name) } diff --git a/table/field/types/date.spec.ts b/table/field/types/date.spec.ts index fe9ec389..0f6d2d32 100644 --- a/table/field/types/date.spec.ts +++ b/table/field/types/date.spec.ts @@ -1,4 +1,4 @@ -import { DataFrame, DataType, Series } from "nodejs-polars" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { normalizeTable } from "../../table/index.ts" import { denormalizeTable } from "../../table/index.ts" @@ -25,16 +25,16 @@ describe("parseDateField", () => { // Invalid format ["21/11/06", null, { format: "invalid" }], ])("%s -> %s %o", async (cell, expected, options) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [{ name: "name", type: "date" as const, ...options }], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(expected) + expect(frame.toRecords()[0]?.name).toEqual(expected) }) }) @@ -48,15 +48,15 @@ describe("stringifyDateField", () => { [new Date(Date.UTC(2006, 10, 21)), "21/11/2006", { format: "%d/%m/%Y" }], [new Date(Date.UTC(2006, 10, 21)), "2006/11/21", { format: "%Y/%m/%d" }], ])("%s -> %s %o", async (value, expected, options) => { - const table = DataFrame([Series("name", [value], DataType.Date)]).lazy() + const table = pl.DataFrame([pl.Series("name", [value], pl.Date)]).lazy() const schema = { fields: [{ name: "name", type: "date" as const, ...options }], } - const ldf = await denormalizeTable(table, schema) - const df = await ldf.collect() + const result = await denormalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(expected) + expect(frame.toRecords()[0]?.name).toEqual(expected) }) }) diff --git a/table/field/types/date.ts b/table/field/types/date.ts index bedf0dfd..ef07c9f5 100644 --- a/table/field/types/date.ts +++ b/table/field/types/date.ts @@ -1,19 +1,18 @@ -import type { DateField } from "@dpkit/core" -import { DataType } from "nodejs-polars" -import type { Expr } from "nodejs-polars" +import type { DateField } from "@dpkit/metadata" +import * as pl from "nodejs-polars" const DEFAULT_FORMAT = "%Y-%m-%d" -export function parseDateField(field: DateField, fieldExpr: Expr) { +export function parseDateField(field: DateField, fieldExpr: pl.Expr) { let format = DEFAULT_FORMAT if (field.format && field.format !== "default" && field.format !== "any") { format = field.format } - return fieldExpr.str.strptime(DataType.Date, format) + return fieldExpr.str.strptime(pl.Date, format) } -export function stringifyDateField(field: DateField, fieldExpr: Expr) { +export function stringifyDateField(field: DateField, fieldExpr: pl.Expr) { const format = field.format ?? DEFAULT_FORMAT return fieldExpr.date.strftime(format) diff --git a/table/field/types/datetime.spec.ts b/table/field/types/datetime.spec.ts index 89297edc..516b0143 100644 --- a/table/field/types/datetime.spec.ts +++ b/table/field/types/datetime.spec.ts @@ -1,4 +1,4 @@ -import { DataFrame, DataType, Series } from "nodejs-polars" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { normalizeTable } from "../../table/index.ts" import { denormalizeTable } from "../../table/index.ts" @@ -27,16 +27,16 @@ describe.skip("parseDatetimeField", () => { // Invalid format ["21/11/06 16:30", null, { format: "invalid" }], ])("%s -> %s %o", async (cell, expected, options) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [{ name: "name", type: "datetime" as const, ...options }], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(expected) + expect(frame.toRecords()[0]?.name).toEqual(expected) }) }) @@ -58,15 +58,15 @@ describe("stringifyDatetimeField", () => { { format: "%Y/%m/%dT%H:%M:%S" }, ], ])("%s -> %s %o", async (value, expected, options) => { - const table = DataFrame([Series("name", [value], DataType.Datetime)]).lazy() + const table = pl.DataFrame([pl.Series("name", [value], pl.Datetime)]).lazy() const schema = { fields: [{ name: "name", type: "datetime" as const, ...options }], } - const ldf = await denormalizeTable(table, schema) - const df = await ldf.collect() + const result = await denormalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(expected) + expect(frame.toRecords()[0]?.name).toEqual(expected) }) }) diff --git a/table/field/types/datetime.ts b/table/field/types/datetime.ts index bee920ac..8ba6da0e 100644 --- a/table/field/types/datetime.ts +++ b/table/field/types/datetime.ts @@ -1,20 +1,22 @@ -import type { DatetimeField } from "@dpkit/core" -import { DataType } from "nodejs-polars" -import type { Expr } from "nodejs-polars" +import type { DatetimeField } from "@dpkit/metadata" +import * as pl from "nodejs-polars" const DEFAULT_FORMAT = "%Y-%m-%dT%H:%M:%S" // TODO: Add support for timezone handling -export function parseDatetimeField(field: DatetimeField, fieldExpr: Expr) { +export function parseDatetimeField(field: DatetimeField, fieldExpr: pl.Expr) { let format = DEFAULT_FORMAT if (field.format && field.format !== "default" && field.format !== "any") { format = field.format } - return fieldExpr.str.strptime(DataType.Datetime, format) + return fieldExpr.str.strptime(pl.Datetime, format) } -export function stringifyDatetimeField(field: DatetimeField, fieldExpr: Expr) { +export function stringifyDatetimeField( + field: DatetimeField, + fieldExpr: pl.Expr, +) { const format = field.format ?? DEFAULT_FORMAT return fieldExpr.date.strftime(format) diff --git a/table/field/types/duration.spec.ts b/table/field/types/duration.spec.ts index 742022f0..e8535ceb 100644 --- a/table/field/types/duration.spec.ts +++ b/table/field/types/duration.spec.ts @@ -1,4 +1,4 @@ -import { DataFrame, DataType, Series } from "nodejs-polars" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { denormalizeTable, normalizeTable } from "../../table/index.ts" @@ -6,15 +6,15 @@ describe("parseDurationField", () => { it.each([["P23DT23H", "P23DT23H", {}]])( "$0 -> $1 $2", async (cell, value, options) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [{ name: "name", type: "duration" as const, ...options }], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.getColumn("name").get(0)).toEqual(value) + expect(frame.getColumn("name").get(0)).toEqual(value) }, ) }) @@ -33,15 +33,15 @@ describe("stringifyDurationField", () => { // Null handling [null, ""], ])("%s -> %s", async (value, expected) => { - const table = DataFrame([Series("name", [value], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [value], pl.String)]).lazy() const schema = { fields: [{ name: "name", type: "duration" as const }], } - const ldf = await denormalizeTable(table, schema) - const df = await ldf.collect() + const result = await denormalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(expected) + expect(frame.toRecords()[0]?.name).toEqual(expected) }) }) diff --git a/table/field/types/duration.ts b/table/field/types/duration.ts index 8b16d4e5..47bbae3e 100644 --- a/table/field/types/duration.ts +++ b/table/field/types/duration.ts @@ -1,12 +1,15 @@ -import type { DurationField } from "@dpkit/core" -import type { Expr } from "nodejs-polars" +import type { DurationField } from "@dpkit/metadata" +import type * as pl from "nodejs-polars" // TODO: raise an issue on nodejs-polars repo as this is not supported yet // So we do nothing on this column type for now -export function parseDurationField(_field: DurationField, fieldExpr: Expr) { +export function parseDurationField(_field: DurationField, fieldExpr: pl.Expr) { return fieldExpr } -export function stringifyDurationField(_field: DurationField, fieldExpr: Expr) { +export function stringifyDurationField( + _field: DurationField, + fieldExpr: pl.Expr, +) { return fieldExpr } diff --git a/table/field/types/geojson.spec.ts b/table/field/types/geojson.spec.ts index 2500fb4d..86fcd3a2 100644 --- a/table/field/types/geojson.spec.ts +++ b/table/field/types/geojson.spec.ts @@ -1,17 +1,19 @@ -import type { Schema } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Schema } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" -import { validateTable } from "../../table/index.ts" +import { inspectTable } from "../../table/index.ts" describe("validateGeojsonField", () => { - it("should not report errors for valid GeoJSON Point", async () => { - const table = DataFrame({ - location: [ - '{"type":"Point","coordinates":[0,0]}', - '{"type":"Point","coordinates":[12.5,41.9]}', - '{"type":"Point","coordinates":[-73.9,40.7]}', - ], - }).lazy() + it("should not errors for valid GeoJSON Point", async () => { + const table = pl + .DataFrame({ + location: [ + '{"type":"Point","coordinates":[0,0]}', + '{"type":"Point","coordinates":[12.5,41.9]}', + '{"type":"Point","coordinates":[-73.9,40.7]}', + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -22,18 +24,20 @@ describe("validateGeojsonField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) - it("should not report errors for valid GeoJSON geometries", async () => { - const table = DataFrame({ - geometry: [ - '{"type":"LineString","coordinates":[[0,0],[1,1]]}', - '{"type":"Polygon","coordinates":[[[0,0],[1,0],[1,1],[0,1],[0,0]]]}', - '{"type":"MultiPoint","coordinates":[[0,0],[1,1]]}', - ], - }).lazy() + it("should not errors for valid GeoJSON geometries", async () => { + const table = pl + .DataFrame({ + geometry: [ + '{"type":"LineString","coordinates":[[0,0],[1,1]]}', + '{"type":"Polygon","coordinates":[[[0,0],[1,0],[1,1],[0,1],[0,0]]]}', + '{"type":"MultiPoint","coordinates":[[0,0],[1,1]]}', + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -44,18 +48,20 @@ describe("validateGeojsonField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) - it("should not report errors for valid GeoJSON Feature", async () => { - const table = DataFrame({ - feature: [ - '{"type":"Feature","geometry":{"type":"Point","coordinates":[0,0]},"properties":{"name":"Test"}}', - '{"type":"Feature","geometry":{"type":"LineString","coordinates":[[0,0],[1,1]]},"properties":{"id":1}}', - '{"type":"Feature","geometry":null,"properties":{}}', - ], - }).lazy() + it("should not errors for valid GeoJSON Feature", async () => { + const table = pl + .DataFrame({ + feature: [ + '{"type":"Feature","geometry":{"type":"Point","coordinates":[0,0]},"properties":{"name":"Test"}}', + '{"type":"Feature","geometry":{"type":"LineString","coordinates":[[0,0],[1,1]]},"properties":{"id":1}}', + '{"type":"Feature","geometry":null,"properties":{}}', + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -66,17 +72,19 @@ describe("validateGeojsonField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) - it("should not report errors for valid GeoJSON FeatureCollection", async () => { - const table = DataFrame({ - collection: [ - '{"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Point","coordinates":[0,0]},"properties":{}}]}', - '{"type":"FeatureCollection","features":[]}', - ], - }).lazy() + it("should not errors for valid GeoJSON FeatureCollection", async () => { + const table = pl + .DataFrame({ + collection: [ + '{"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Point","coordinates":[0,0]},"properties":{}}]}', + '{"type":"FeatureCollection","features":[]}', + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -87,18 +95,20 @@ describe("validateGeojsonField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) - it("should not report errors for null values", async () => { - const table = DataFrame({ - location: [ - '{"type":"Point","coordinates":[0,0]}', - null, - '{"type":"Feature","geometry":null,"properties":{}}', - ], - }).lazy() + it("should not errors for null values", async () => { + const table = pl + .DataFrame({ + location: [ + '{"type":"Point","coordinates":[0,0]}', + null, + '{"type":"Feature","geometry":null,"properties":{}}', + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -109,18 +119,20 @@ describe("validateGeojsonField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) - it("should report errors for JSON arrays", async () => { - const table = DataFrame({ - data: [ - '{"type":"Point","coordinates":[0,0]}', - "[[0,0],[1,1]]", - '{"type":"Feature","geometry":null,"properties":{}}', - ], - }).lazy() + it("should errors for JSON arrays", async () => { + const table = pl + .DataFrame({ + data: [ + '{"type":"Point","coordinates":[0,0]}', + "[[0,0],[1,1]]", + '{"type":"Feature","geometry":null,"properties":{}}', + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -131,7 +143,7 @@ describe("validateGeojsonField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { type: "cell/type", @@ -143,14 +155,16 @@ describe("validateGeojsonField", () => { ]) }) - it("should report errors for invalid JSON", async () => { - const table = DataFrame({ - data: [ - '{"type":"Point","coordinates":[0,0]}', - "invalid json", - "{broken}", - ], - }).lazy() + it("should errors for invalid JSON", async () => { + const table = pl + .DataFrame({ + data: [ + '{"type":"Point","coordinates":[0,0]}', + "invalid json", + "{broken}", + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -161,7 +175,7 @@ describe("validateGeojsonField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors.filter(e => e.type === "cell/type")).toHaveLength(2) expect(errors).toContainEqual({ type: "cell/type", @@ -179,14 +193,16 @@ describe("validateGeojsonField", () => { }) }) - it("should report errors for empty strings", async () => { - const table = DataFrame({ - data: [ - '{"type":"Point","coordinates":[0,0]}', - "", - '{"type":"Feature","geometry":null,"properties":{}}', - ], - }).lazy() + it("should errors for empty strings", async () => { + const table = pl + .DataFrame({ + data: [ + '{"type":"Point","coordinates":[0,0]}', + "", + '{"type":"Feature","geometry":null,"properties":{}}', + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -197,7 +213,7 @@ describe("validateGeojsonField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { type: "cell/type", @@ -209,10 +225,12 @@ describe("validateGeojsonField", () => { ]) }) - it("should report errors for JSON primitives", async () => { - const table = DataFrame({ - data: ['"string"', "123", "true", "false", "null"], - }).lazy() + it("should errors for JSON primitives", async () => { + const table = pl + .DataFrame({ + data: ['"string"', "123", "true", "false", "null"], + }) + .lazy() const schema: Schema = { fields: [ @@ -223,7 +241,7 @@ describe("validateGeojsonField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { type: "cell/type", @@ -263,14 +281,16 @@ describe("validateGeojsonField", () => { ]) }) - it("should report errors for invalid GeoJSON Point coordinates", async () => { - const table = DataFrame({ - location: [ - '{"type":"Point","coordinates":[0,0]}', - '{"type":"Point","coordinates":[0]}', - '{"type":"Point","coordinates":[0,0,0,0]}', - ], - }).lazy() + it("should errors for invalid GeoJSON Point coordinates", async () => { + const table = pl + .DataFrame({ + location: [ + '{"type":"Point","coordinates":[0,0]}', + '{"type":"Point","coordinates":[0]}', + '{"type":"Point","coordinates":[0,0,0,0]}', + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -281,7 +301,7 @@ describe("validateGeojsonField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(2) expect(errors).toContainEqual({ type: "cell/type", @@ -299,14 +319,16 @@ describe("validateGeojsonField", () => { }) }) - it("should report errors for invalid GeoJSON LineString", async () => { - const table = DataFrame({ - line: [ - '{"type":"LineString","coordinates":[[0,0],[1,1]]}', - '{"type":"LineString","coordinates":[[0,0]]}', - '{"type":"LineString","coordinates":[0,0]}', - ], - }).lazy() + it("should errors for invalid GeoJSON LineString", async () => { + const table = pl + .DataFrame({ + line: [ + '{"type":"LineString","coordinates":[[0,0],[1,1]]}', + '{"type":"LineString","coordinates":[[0,0]]}', + '{"type":"LineString","coordinates":[0,0]}', + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -317,7 +339,7 @@ describe("validateGeojsonField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(2) expect(errors).toContainEqual({ type: "cell/type", @@ -335,14 +357,16 @@ describe("validateGeojsonField", () => { }) }) - it("should report errors for incomplete GeoJSON Feature", async () => { - const table = DataFrame({ - feature: [ - '{"type":"Feature","geometry":{"type":"Point","coordinates":[0,0]},"properties":{}}', - '{"type":"Feature","geometry":{"type":"Point","coordinates":[0,0]}}', - '{"type":"Feature","properties":{}}', - ], - }).lazy() + it("should errors for incomplete GeoJSON Feature", async () => { + const table = pl + .DataFrame({ + feature: [ + '{"type":"Feature","geometry":{"type":"Point","coordinates":[0,0]},"properties":{}}', + '{"type":"Feature","geometry":{"type":"Point","coordinates":[0,0]}}', + '{"type":"Feature","properties":{}}', + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -353,7 +377,7 @@ describe("validateGeojsonField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(2) expect(errors).toContainEqual({ type: "cell/type", @@ -371,13 +395,15 @@ describe("validateGeojsonField", () => { }) }) - it("should report errors for invalid GeoJSON FeatureCollection", async () => { - const table = DataFrame({ - collection: [ - '{"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Point","coordinates":[0,0]},"properties":{}}]}', - '{"type":"FeatureCollection"}', - ], - }).lazy() + it("should errors for invalid GeoJSON FeatureCollection", async () => { + const table = pl + .DataFrame({ + collection: [ + '{"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Point","coordinates":[0,0]},"properties":{}}]}', + '{"type":"FeatureCollection"}', + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -388,7 +414,7 @@ describe("validateGeojsonField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { type: "cell/type", @@ -401,12 +427,14 @@ describe("validateGeojsonField", () => { }) it("should not validate jsonSchema constraints for geojson fields", async () => { - const table = DataFrame({ - location: [ - '{"type":"Point","coordinates":[0,0]}', - '{"type":"Point","coordinates":[100,200]}', - ], - }).lazy() + const table = pl + .DataFrame({ + location: [ + '{"type":"Point","coordinates":[0,0]}', + '{"type":"Point","coordinates":[100,200]}', + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -428,17 +456,19 @@ describe("validateGeojsonField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) - it("should not report errors for valid TopoJSON", async () => { - const table = DataFrame({ - topology: [ - '{"type":"Topology","objects":{"example":{"type":"GeometryCollection","geometries":[{"type":"Point","coordinates":[0,0]}]}},"arcs":[]}', - '{"type":"Topology","objects":{"collection":{"type":"GeometryCollection","geometries":[]}},"arcs":[]}', - ], - }).lazy() + it("should not errors for valid TopoJSON", async () => { + const table = pl + .DataFrame({ + topology: [ + '{"type":"Topology","objects":{"example":{"type":"GeometryCollection","geometries":[{"type":"Point","coordinates":[0,0]}]}},"arcs":[]}', + '{"type":"Topology","objects":{"collection":{"type":"GeometryCollection","geometries":[]}},"arcs":[]}', + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -450,18 +480,20 @@ describe("validateGeojsonField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) - it("should report errors for invalid TopoJSON structure", async () => { - const table = DataFrame({ - topology: [ - '{"type":"Topology","objects":{"example":{"type":"GeometryCollection","geometries":[]}},"arcs":[]}', - '{"type":"Topology","objects":{}}', - '{"type":"Topology"}', - ], - }).lazy() + it("should errors for invalid TopoJSON structure", async () => { + const table = pl + .DataFrame({ + topology: [ + '{"type":"Topology","objects":{"example":{"type":"GeometryCollection","geometries":[]}},"arcs":[]}', + '{"type":"Topology","objects":{}}', + '{"type":"Topology"}', + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -473,12 +505,13 @@ describe("validateGeojsonField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(2) expect(errors).toContainEqual({ type: "cell/type", fieldName: "topology", fieldType: "geojson", + fieldFormat: "topojson", rowNumber: 2, cell: '{"type":"Topology","objects":{}}', }) @@ -486,19 +519,22 @@ describe("validateGeojsonField", () => { type: "cell/type", fieldName: "topology", fieldType: "geojson", + fieldFormat: "topojson", rowNumber: 3, cell: '{"type":"Topology"}', }) }) it("should accept TopoJSON geometry objects", async () => { - const table = DataFrame({ - geometry: [ - '{"type":"Point","coordinates":[0,0]}', - '{"type":"LineString","arcs":[0,1]}', - '{"type":"Polygon","arcs":[[0,1,2]]}', - ], - }).lazy() + const table = pl + .DataFrame({ + geometry: [ + '{"type":"Point","coordinates":[0,0]}', + '{"type":"LineString","arcs":[0,1]}', + '{"type":"Polygon","arcs":[[0,1,2]]}', + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -510,17 +546,19 @@ describe("validateGeojsonField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) it("should handle null values for topojson format", async () => { - const table = DataFrame({ - topology: [ - '{"type":"Topology","objects":{"example":{"type":"GeometryCollection","geometries":[]}},"arcs":[]}', - null, - ], - }).lazy() + const table = pl + .DataFrame({ + topology: [ + '{"type":"Topology","objects":{"example":{"type":"GeometryCollection","geometries":[]}},"arcs":[]}', + null, + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -532,7 +570,7 @@ describe("validateGeojsonField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) }) diff --git a/table/field/types/geojson.ts b/table/field/types/geojson.ts index 7a455ab3..98fc4300 100644 --- a/table/field/types/geojson.ts +++ b/table/field/types/geojson.ts @@ -1,12 +1,11 @@ -import type { GeojsonField } from "@dpkit/core" -import geojsonProfile from "../../profiles/geojson.json" with { type: "json" } -import topojsonProfile from "../../profiles/topojson.json" with { type: "json" } +import type { GeojsonField } from "@dpkit/metadata" +import geojson from "../../assets/geojson.json" with { type: "json" } +import topojson from "../../assets/topojson.json" with { type: "json" } import type { Table } from "../../table/index.ts" -import { validateJsonField } from "./json.ts" +import { inspectJsonField } from "./json.ts" -export async function validateGeojsonField(field: GeojsonField, table: Table) { - return validateJsonField(field, table, { - formatProfile: - field.format === "topojson" ? topojsonProfile : geojsonProfile, +export async function inspectGeojsonField(field: GeojsonField, table: Table) { + return inspectJsonField(field, table, { + formatJsonSchema: field.format === "topojson" ? topojson : geojson, }) } diff --git a/table/field/types/geopoint.spec.ts b/table/field/types/geopoint.spec.ts index 71c87dcb..c3ff7ded 100644 --- a/table/field/types/geopoint.spec.ts +++ b/table/field/types/geopoint.spec.ts @@ -1,4 +1,4 @@ -import { DataFrame, DataType, Series } from "nodejs-polars" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { denormalizeTable, normalizeTable } from "../../table/index.ts" @@ -23,16 +23,16 @@ describe("parseGeopointField", () => { //["lon,45.50", null], //["90.50,45.50,0", null], ])("%s -> %s", async (cell, value) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [{ name: "name", type: "geopoint" as const }], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(value) + expect(frame.toRecords()[0]?.name).toEqual(value) }) }) @@ -56,7 +56,7 @@ describe("parseGeopointField", () => { //["[90.50, 45.50, 0]", null], //["['lon', 'lat']", null], ])("%s -> %s", async (cell, value) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [ @@ -64,10 +64,10 @@ describe("parseGeopointField", () => { ], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(value) + expect(frame.toRecords()[0]?.name).toEqual(value) }) }) @@ -91,7 +91,7 @@ describe("parseGeopointField", () => { //['{"lon": 90.50}', null], //['{"lat": 45.50}', null], ])("%s -> %s", async (cell, value) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [ @@ -103,10 +103,10 @@ describe("parseGeopointField", () => { ], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(value) + expect(frame.toRecords()[0]?.name).toEqual(value) }) }) }) @@ -127,18 +127,18 @@ describe("stringifyGeopointField", () => { // Null handling //[null, null], ])("%s -> %s", async (value, expected) => { - const table = DataFrame([ - Series("name", [value], DataType.List(DataType.Float64)), - ]).lazy() + const table = pl + .DataFrame([pl.Series("name", [value], pl.List(pl.Float64))]) + .lazy() const schema = { fields: [{ name: "name", type: "geopoint" as const }], } - const ldf = await denormalizeTable(table, schema) - const df = await ldf.collect() + const result = await denormalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(expected) + expect(frame.toRecords()[0]?.name).toEqual(expected) }) }) @@ -154,9 +154,9 @@ describe("stringifyGeopointField", () => { // Null handling //[null, null], ])("%s -> %s", async (value, expected) => { - const table = DataFrame([ - Series("name", [value], DataType.List(DataType.Float64)), - ]).lazy() + const table = pl + .DataFrame([pl.Series("name", [value], pl.List(pl.Float64))]) + .lazy() const schema = { fields: [ @@ -164,10 +164,10 @@ describe("stringifyGeopointField", () => { ], } - const ldf = await denormalizeTable(table, schema) - const df = await ldf.collect() + const result = await denormalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(expected) + expect(frame.toRecords()[0]?.name).toEqual(expected) }) }) @@ -183,9 +183,9 @@ describe("stringifyGeopointField", () => { // Null handling //[null, null], ])("%s -> %s", async (value, expected) => { - const table = DataFrame([ - Series("name", [value], DataType.List(DataType.Float64)), - ]).lazy() + const table = pl + .DataFrame([pl.Series("name", [value], pl.List(pl.Float64))]) + .lazy() const schema = { fields: [ @@ -197,10 +197,10 @@ describe("stringifyGeopointField", () => { ], } - const ldf = await denormalizeTable(table, schema) - const df = await ldf.collect() + const result = await denormalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(expected) + expect(frame.toRecords()[0]?.name).toEqual(expected) }) }) }) diff --git a/table/field/types/geopoint.ts b/table/field/types/geopoint.ts index 7ab063cd..1e71f5c5 100644 --- a/table/field/types/geopoint.ts +++ b/table/field/types/geopoint.ts @@ -1,6 +1,5 @@ -import type { GeopointField } from "@dpkit/core" -import { DataType, concatList, concatString, lit } from "nodejs-polars" -import type { Expr } from "nodejs-polars" +import type { GeopointField } from "@dpkit/metadata" +import * as pl from "nodejs-polars" // TODO: // Add more validation: @@ -8,63 +7,72 @@ import type { Expr } from "nodejs-polars" // - Check the values are within -180..180 and -90..90 // - Return null instead of list if any of the values are out of range -export function parseGeopointField(field: GeopointField, fieldExpr: Expr) { +export function parseGeopointField(field: GeopointField, fieldExpr: pl.Expr) { // Default format is "lon,lat" string const format = field.format ?? "default" if (format === "default") { - fieldExpr = fieldExpr.str.split(",").cast(DataType.List(DataType.Float64)) + fieldExpr = fieldExpr.str.split(",").cast(pl.List(pl.Float64)) } if (format === "array") { fieldExpr = fieldExpr.str .replaceAll("[\\[\\]\\s]", "") .str.split(",") - .cast(DataType.List(DataType.Float64)) + .cast(pl.List(pl.Float64)) } if (format === "object") { - fieldExpr = concatList([ - fieldExpr.str.jsonPathMatch("$.lon").cast(DataType.Float64), - fieldExpr.str.jsonPathMatch("$.lat").cast(DataType.Float64), - ]).alias(field.name) + fieldExpr = pl + .concatList([ + fieldExpr.str.jsonPathMatch("$.lon").cast(pl.Float64), + fieldExpr.str.jsonPathMatch("$.lat").cast(pl.Float64), + ]) + .alias(field.name) } return fieldExpr } -export function stringifyGeopointField(field: GeopointField, fieldExpr: Expr) { +export function stringifyGeopointField( + field: GeopointField, + fieldExpr: pl.Expr, +) { // Default format is "lon,lat" string const format = field.format ?? "default" if (format === "default") { - return fieldExpr.cast(DataType.List(DataType.String)).lst.join(",") + return fieldExpr.cast(pl.List(pl.String)).lst.join(",") } if (format === "array") { - return concatString( - [ - lit("["), - fieldExpr.lst.get(0).cast(DataType.String), - lit(","), - fieldExpr.lst.get(1).cast(DataType.String), - lit("]"), - ], - "", - ).alias(field.name) as Expr + return pl + .concatString( + [ + pl.lit("["), + fieldExpr.lst.get(0).cast(pl.String), + pl.lit(","), + fieldExpr.lst.get(1).cast(pl.String), + pl.lit("]"), + ], + "", + ) + .alias(field.name) as pl.Expr } if (format === "object") { - return concatString( - [ - lit('{"lon":'), - fieldExpr.lst.get(0).cast(DataType.String), - lit(',"lat":'), - fieldExpr.lst.get(1).cast(DataType.String), - lit("}"), - ], - "", - ).alias(field.name) as Expr + return pl + .concatString( + [ + pl.lit('{"lon":'), + fieldExpr.lst.get(0).cast(pl.String), + pl.lit(',"lat":'), + fieldExpr.lst.get(1).cast(pl.String), + pl.lit("}"), + ], + "", + ) + .alias(field.name) as pl.Expr } return fieldExpr diff --git a/table/field/types/integer.spec.ts b/table/field/types/integer.spec.ts index c26e9509..d45e7f3d 100644 --- a/table/field/types/integer.spec.ts +++ b/table/field/types/integer.spec.ts @@ -1,4 +1,4 @@ -import { DataFrame, DataType, Series } from "nodejs-polars" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { denormalizeTable, normalizeTable } from "../../table/index.ts" @@ -51,17 +51,17 @@ describe("parseIntegerField", () => { //[" -1,000 ", -1000, { groupChar: "," }], ["000,001", 1, { groupChar: "," }], ])("$0 -> $1 $2", async (cell, value, options) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [{ name: "name", type: "integer" as const, ...options }], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.getColumn("name").get(0)).toEqual(value) - expect(df.getColumn("name").get(0)).toEqual(value) + expect(frame.getColumn("name").get(0)).toEqual(value) + expect(frame.getColumn("name").get(0)).toEqual(value) }) describe("categories", () => { @@ -75,16 +75,16 @@ describe("parseIntegerField", () => { ["1", 1, { categories: [{ value: 1, label: "One" }] }], ["2", null, { categories: [{ value: 1, label: "One" }] }], ])("$0 -> $1 $2", async (cell, value, options) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [{ name: "name", type: "integer" as const, ...options }], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(value) + expect(frame.toRecords()[0]?.name).toEqual(value) }) }) }) @@ -107,15 +107,15 @@ describe("stringifyIntegerField", () => { // Null handling [null, ""], ])("%s -> %s", async (value, expected) => { - const table = DataFrame([Series("name", [value], DataType.Int64)]).lazy() + const table = pl.DataFrame([pl.Series("name", [value], pl.Int64)]).lazy() const schema = { fields: [{ name: "name", type: "integer" as const }], } - const ldf = await denormalizeTable(table, schema) - const df = await ldf.collect() + const result = await denormalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(expected) + expect(frame.toRecords()[0]?.name).toEqual(expected) }) }) diff --git a/table/field/types/integer.ts b/table/field/types/integer.ts index 0a1d04c3..05722a58 100644 --- a/table/field/types/integer.ts +++ b/table/field/types/integer.ts @@ -1,10 +1,9 @@ -import type { IntegerField } from "@dpkit/core" -import { DataType, lit, when } from "nodejs-polars" -import type { Expr } from "nodejs-polars" +import type { IntegerField } from "@dpkit/metadata" +import * as pl from "nodejs-polars" // TODO: support categories // TODO: support categoriesOrder -export function parseIntegerField(field: IntegerField, fieldExpr: Expr) { +export function parseIntegerField(field: IntegerField, fieldExpr: pl.Expr) { const groupChar = field.groupChar const bareNumber = field.bareNumber const flattenCategories = field.categories?.map(it => @@ -26,22 +25,26 @@ export function parseIntegerField(field: IntegerField, fieldExpr: Expr) { } // Cast to int64 (will handle values up to 2^63-1) - fieldExpr = fieldExpr.cast(DataType.Int64) + fieldExpr = fieldExpr.cast(pl.Int64) // Currently, only string categories are supported if (flattenCategories) { - return when(fieldExpr.isIn(flattenCategories)) + return pl + .when(fieldExpr.isIn(flattenCategories)) .then(fieldExpr) - .otherwise(lit(null)) + .otherwise(pl.lit(null)) .alias(field.name) } return fieldExpr } -export function stringifyIntegerField(_field: IntegerField, fieldExpr: Expr) { +export function stringifyIntegerField( + _field: IntegerField, + fieldExpr: pl.Expr, +) { // Convert to string - fieldExpr = fieldExpr.cast(DataType.String) + fieldExpr = fieldExpr.cast(pl.String) //const groupChar = field.groupChar //const bareNumber = field.bareNumber diff --git a/table/field/types/json.ts b/table/field/types/json.ts index 2b8fb663..465d959c 100644 --- a/table/field/types/json.ts +++ b/table/field/types/json.ts @@ -1,30 +1,30 @@ -import type { ArrayField, GeojsonField, ObjectField } from "@dpkit/core" -import { validateDescriptor } from "@dpkit/core" +import type { ArrayField, GeojsonField, ObjectField } from "@dpkit/metadata" +import { inspectJsonValue } from "@dpkit/metadata" +import type { CellError } from "@dpkit/metadata" import * as pl from "nodejs-polars" -import type { CellError } from "../../error/index.ts" import { isObject } from "../../helpers.ts" import type { Table } from "../../table/index.ts" // TODO: Improve the implementation // Make unblocking / handle large data / process in parallel / move processing to Rust? -export async function validateJsonField( +export async function inspectJsonField( field: ArrayField | GeojsonField | ObjectField, table: Table, options?: { - formatProfile?: Record + formatJsonSchema?: Record }, ) { const errors: CellError[] = [] - const formatProfile = options?.formatProfile - const constraintProfile = field.constraints?.jsonSchema + const formatJsonSchema = options?.formatJsonSchema + const constraintJsonSchema = field.constraints?.jsonSchema const frame = await table .withRowCount() .select( - pl.col("row_nr").add(1).alias("number"), - pl.col(field.name).alias("source"), + pl.pl.col("row_nr").add(1).alias("number"), + pl.pl.col(field.name).alias("source"), ) .collect() @@ -44,24 +44,25 @@ export async function validateJsonField( cell: String(row.source), fieldName: field.name, fieldType: field.type, + fieldFormat: field.format, rowNumber: row.number, }) continue } - if (formatProfile) { - // TODO: Extract more generic function validateJson? - const report = await validateDescriptor(target as any, { - profile: formatProfile, + if (formatJsonSchema) { + const formatErrors = await inspectJsonValue(target, { + jsonSchema: formatJsonSchema, }) - if (!report.valid) { + if (formatErrors.length) { errors.push({ type: "cell/type", cell: String(row.source), fieldName: field.name, fieldType: field.type, + fieldFormat: field.format, rowNumber: row.number, }) } @@ -69,19 +70,19 @@ export async function validateJsonField( continue } - if (constraintProfile) { - // TODO: Extract more generic function validateJson? - const report = await validateDescriptor(target as any, { - profile: constraintProfile, + if (constraintJsonSchema) { + const constraintErrors = await inspectJsonValue(target, { + jsonSchema: constraintJsonSchema, }) - if (!report.valid) { + for (const error of constraintErrors) { errors.push({ type: "cell/jsonSchema", cell: String(row.source), fieldName: field.name, rowNumber: row.number, - jsonSchema: constraintProfile, + pointer: error.pointer, + message: error.message, }) } } diff --git a/table/field/types/list.spec.ts b/table/field/types/list.spec.ts index 85c1c508..12dd51d8 100644 --- a/table/field/types/list.spec.ts +++ b/table/field/types/list.spec.ts @@ -1,4 +1,4 @@ -import { DataFrame, DataType, Series } from "nodejs-polars" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { denormalizeTable, normalizeTable } from "../../table/index.ts" @@ -28,16 +28,16 @@ describe("parseListField", () => { // Null handling //[null, null], ])("%s -> %s", async (cell, value) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [{ name: "name", type: "list" as const }], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(value) + expect(frame.toRecords()[0]?.name).toEqual(value) }) }) @@ -66,7 +66,7 @@ describe("parseListField", () => { ["1,a,3", [1, null, 3]], ["1.5,2,3", [null, 2, 3]], ])("%s -> %s", async (cell, value) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [ @@ -74,10 +74,10 @@ describe("parseListField", () => { ], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(value) + expect(frame.toRecords()[0]?.name).toEqual(value) }) }) @@ -105,7 +105,7 @@ describe("parseListField", () => { // Invalid numbers become null ["1.1,a,3.3", [1.1, null, 3.3]], ])("%s -> %s", async (cell, value) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [ @@ -113,10 +113,10 @@ describe("parseListField", () => { ], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(value) + expect(frame.toRecords()[0]?.name).toEqual(value) }) }) @@ -138,16 +138,16 @@ describe("parseListField", () => { // Empty items in list ["a;;c", ["a", "", "c"]], ])("%s -> %s", async (cell, value) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [{ name: "name", type: "list" as const, delimiter: ";" }], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(value) + expect(frame.toRecords()[0]?.name).toEqual(value) }) }) }) @@ -175,18 +175,18 @@ describe("stringifyListField", () => { // Empty array [[], ""], ])("%s -> %s", async (value, expected) => { - const table = DataFrame([ - Series("name", [value], DataType.List(DataType.String)), - ]).lazy() + const table = pl + .DataFrame([pl.Series("name", [value], pl.List(pl.String))]) + .lazy() const schema = { fields: [{ name: "name", type: "list" as const }], } - const ldf = await denormalizeTable(table, schema) - const df = await ldf.collect() + const result = await denormalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(expected) + expect(frame.toRecords()[0]?.name).toEqual(expected) }) }) @@ -207,9 +207,9 @@ describe("stringifyListField", () => { // Empty array [[], ""], ])("%s -> %s", async (value, expected) => { - const table = DataFrame([ - Series("name", [value], DataType.List(DataType.Int16)), - ]).lazy() + const table = pl + .DataFrame([pl.Series("name", [value], pl.List(pl.Int16))]) + .lazy() const schema = { fields: [ @@ -217,10 +217,10 @@ describe("stringifyListField", () => { ], } - const ldf = await denormalizeTable(table, schema) - const df = await ldf.collect() + const result = await denormalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(expected) + expect(frame.toRecords()[0]?.name).toEqual(expected) }) }) @@ -241,9 +241,9 @@ describe("stringifyListField", () => { // Empty array [[], ""], ])("%s -> %s", async (value, expected) => { - const table = DataFrame([ - Series("name", [value], DataType.List(DataType.Float64)), - ]).lazy() + const table = pl + .DataFrame([pl.Series("name", [value], pl.List(pl.Float64))]) + .lazy() const schema = { fields: [ @@ -251,10 +251,10 @@ describe("stringifyListField", () => { ], } - const ldf = await denormalizeTable(table, schema) - const df = await ldf.collect() + const result = await denormalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(expected) + expect(frame.toRecords()[0]?.name).toEqual(expected) }) }) @@ -277,18 +277,18 @@ describe("stringifyListField", () => { // Empty array [[], ""], ])("%s -> %s", async (value, expected) => { - const table = DataFrame([ - Series("name", [value], DataType.List(DataType.String)), - ]).lazy() + const table = pl + .DataFrame([pl.Series("name", [value], pl.List(pl.String))]) + .lazy() const schema = { fields: [{ name: "name", type: "list" as const, delimiter: ";" }], } - const ldf = await denormalizeTable(table, schema) - const df = await ldf.collect() + const result = await denormalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(expected) + expect(frame.toRecords()[0]?.name).toEqual(expected) }) }) }) diff --git a/table/field/types/list.ts b/table/field/types/list.ts index 926954f3..389d75bc 100644 --- a/table/field/types/list.ts +++ b/table/field/types/list.ts @@ -1,31 +1,30 @@ -import type { ListField } from "@dpkit/core" -import { DataType } from "nodejs-polars" -import type { Expr } from "nodejs-polars" +import type { ListField } from "@dpkit/metadata" +import * as pl from "nodejs-polars" // TODO: // Add more validation: // - Return null instead of list if all array values are nulls? -export function parseListField(field: ListField, fieldExpr: Expr) { +export function parseListField(field: ListField, fieldExpr: pl.Expr) { const delimiter = field.delimiter ?? "," const itemType = field.itemType - let dtype: any = DataType.String - if (itemType === "integer") dtype = DataType.Int64 - if (itemType === "number") dtype = DataType.Float64 - if (itemType === "boolean") dtype = DataType.Bool - if (itemType === "datetime") dtype = DataType.Datetime - if (itemType === "date") dtype = DataType.Date - if (itemType === "time") dtype = DataType.Time + let dtype: any = pl.String + if (itemType === "integer") dtype = pl.Int64 + if (itemType === "number") dtype = pl.Float64 + if (itemType === "boolean") dtype = pl.Bool + if (itemType === "datetime") dtype = pl.Datetime + if (itemType === "date") dtype = pl.Date + if (itemType === "time") dtype = pl.Time - fieldExpr = fieldExpr.str.split(delimiter).cast(DataType.List(dtype)) + fieldExpr = fieldExpr.str.split(delimiter).cast(pl.List(dtype)) return fieldExpr } -export function stringifyListField(field: ListField, fieldExpr: Expr) { +export function stringifyListField(field: ListField, fieldExpr: pl.Expr) { const delimiter = field.delimiter ?? "," return fieldExpr - .cast(DataType.List(DataType.String)) + .cast(pl.List(pl.String)) .lst.join({ separator: delimiter, ignoreNulls: true }) } diff --git a/table/field/types/number.spec.ts b/table/field/types/number.spec.ts index cb9c4314..7d292cb9 100644 --- a/table/field/types/number.spec.ts +++ b/table/field/types/number.spec.ts @@ -1,4 +1,4 @@ -import { DataFrame, DataType, Series } from "nodejs-polars" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { denormalizeTable, normalizeTable } from "../../table/index.ts" @@ -63,16 +63,16 @@ describe("parseNumberField", () => { { bareNumber: false, groupChar: ".", decimalChar: "," }, ], ])("$0 -> $1 $2", async (cell, value, options) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [{ name: "name", type: "number" as const, ...options }], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.getColumn("name").get(0)).toEqual(value) + expect(frame.getColumn("name").get(0)).toEqual(value) }) }) @@ -103,15 +103,15 @@ describe("stringifyNumberField", () => { // Null handling [null, ""], ])("%s -> %s", async (value, expected) => { - const table = DataFrame([Series("name", [value], DataType.Float64)]).lazy() + const table = pl.DataFrame([pl.Series("name", [value], pl.Float64)]).lazy() const schema = { fields: [{ name: "name", type: "number" as const }], } - const ldf = await denormalizeTable(table, schema) - const df = await ldf.collect() + const result = await denormalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(expected) + expect(frame.toRecords()[0]?.name).toEqual(expected) }) }) diff --git a/table/field/types/number.ts b/table/field/types/number.ts index de55a395..8fa709a7 100644 --- a/table/field/types/number.ts +++ b/table/field/types/number.ts @@ -1,8 +1,7 @@ -import type { NumberField } from "@dpkit/core" -import { DataType } from "nodejs-polars" -import type { Expr } from "nodejs-polars" +import type { NumberField } from "@dpkit/metadata" +import * as pl from "nodejs-polars" -export function parseNumberField(field: NumberField, fieldExpr: Expr) { +export function parseNumberField(field: NumberField, fieldExpr: pl.Expr) { // Extract the decimal and group characters const decimalChar = field.decimalChar ?? "." const groupChar = field.groupChar ?? "" @@ -44,13 +43,13 @@ export function parseNumberField(field: NumberField, fieldExpr: Expr) { } // Cast to float64 - fieldExpr = fieldExpr.cast(DataType.Float64) + fieldExpr = fieldExpr.cast(pl.Float64) return fieldExpr } -export function stringifyNumberField(_field: NumberField, fieldExpr: Expr) { +export function stringifyNumberField(_field: NumberField, fieldExpr: pl.Expr) { // Convert to string - fieldExpr = fieldExpr.cast(DataType.String) + fieldExpr = fieldExpr.cast(pl.String) //const decimalChar = field.decimalChar ?? "." //const groupChar = field.groupChar ?? "" diff --git a/table/field/types/object.spec.ts b/table/field/types/object.spec.ts index aa302b1c..c314911e 100644 --- a/table/field/types/object.spec.ts +++ b/table/field/types/object.spec.ts @@ -1,13 +1,15 @@ -import type { Schema } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Schema } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" -import { validateTable } from "../../table/index.ts" +import { inspectTable } from "../../table/index.ts" describe("validateObjectField", () => { - it("should not report errors for valid JSON objects", async () => { - const table = DataFrame({ - metadata: ['{"key":"value"}', '{"num":123}', '{"arr":[1,2,3]}'], - }).lazy() + it("should not errors for valid JSON objects", async () => { + const table = pl + .DataFrame({ + metadata: ['{"key":"value"}', '{"num":123}', '{"arr":[1,2,3]}'], + }) + .lazy() const schema: Schema = { fields: [ @@ -18,14 +20,16 @@ describe("validateObjectField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) - it("should report errors for JSON arrays", async () => { - const table = DataFrame({ - data: ["[1,2,3]", '{"key":"value"}', '["a","b","c"]'], - }).lazy() + it("should errors for JSON arrays", async () => { + const table = pl + .DataFrame({ + data: ["[1,2,3]", '{"key":"value"}', '["a","b","c"]'], + }) + .lazy() const schema: Schema = { fields: [ @@ -36,7 +40,7 @@ describe("validateObjectField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { type: "cell/type", @@ -55,10 +59,12 @@ describe("validateObjectField", () => { ]) }) - it("should not report errors for null values", async () => { - const table = DataFrame({ - config: ['{"key":"value"}', null, '{"num":123}'], - }).lazy() + it("should not errors for null values", async () => { + const table = pl + .DataFrame({ + config: ['{"key":"value"}', null, '{"num":123}'], + }) + .lazy() const schema: Schema = { fields: [ @@ -69,14 +75,16 @@ describe("validateObjectField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) - it("should report errors for invalid JSON", async () => { - const table = DataFrame({ - data: ['{"valid":true}', "invalid json", '{"key":"value"}', "{broken}"], - }).lazy() + it("should errors for invalid JSON", async () => { + const table = pl + .DataFrame({ + data: ['{"valid":true}', "invalid json", '{"key":"value"}', "{broken}"], + }) + .lazy() const schema: Schema = { fields: [ @@ -87,7 +95,7 @@ describe("validateObjectField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors.filter(e => e.type === "cell/type")).toHaveLength(2) expect(errors).toContainEqual({ type: "cell/type", @@ -106,13 +114,15 @@ describe("validateObjectField", () => { }) it("should handle complex nested JSON structures", async () => { - const table = DataFrame({ - complex: [ - '{"user":{"name":"John","age":30,"tags":["admin","user"]}}', - '{"nested":{"deep":{"value":true}}}', - '{"array":[{"id":1},{"id":2}]}', - ], - }).lazy() + const table = pl + .DataFrame({ + complex: [ + '{"user":{"name":"John","age":30,"tags":["admin","user"]}}', + '{"nested":{"deep":{"value":true}}}', + '{"array":[{"id":1},{"id":2}]}', + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -123,14 +133,16 @@ describe("validateObjectField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) - it("should report errors for empty strings", async () => { - const table = DataFrame({ - data: ['{"valid":true}', "", '{"key":"value"}'], - }).lazy() + it("should errors for empty strings", async () => { + const table = pl + .DataFrame({ + data: ['{"valid":true}', "", '{"key":"value"}'], + }) + .lazy() const schema: Schema = { fields: [ @@ -141,7 +153,7 @@ describe("validateObjectField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { type: "cell/type", @@ -153,10 +165,12 @@ describe("validateObjectField", () => { ]) }) - it("should report errors for JSON primitives", async () => { - const table = DataFrame({ - data: ['"string"', "123", "true", "false", "null"], - }).lazy() + it("should errors for JSON primitives", async () => { + const table = pl + .DataFrame({ + data: ['"string"', "123", "true", "false", "null"], + }) + .lazy() const schema: Schema = { fields: [ @@ -167,7 +181,7 @@ describe("validateObjectField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { type: "cell/type", @@ -207,14 +221,16 @@ describe("validateObjectField", () => { ]) }) - it("should not report errors for objects matching jsonSchema", async () => { - const table = DataFrame({ - user: [ - '{"name":"John","age":30}', - '{"name":"Jane","age":25}', - '{"name":"Bob","age":35}', - ], - }).lazy() + it("should not errors for objects matching jsonSchema", async () => { + const table = pl + .DataFrame({ + user: [ + '{"name":"John","age":30}', + '{"name":"Jane","age":25}', + '{"name":"Bob","age":35}', + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -235,11 +251,11 @@ describe("validateObjectField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) - it("should report errors for objects not matching jsonSchema", async () => { + it("should errors for objects not matching jsonSchema", async () => { const jsonSchema = { type: "object", properties: { @@ -249,14 +265,16 @@ describe("validateObjectField", () => { required: ["name", "age"], } - const table = DataFrame({ - user: [ - '{"name":"John","age":30}', - '{"name":"Jane"}', - '{"age":25}', - '{"name":"Bob","age":"invalid"}', - ], - }).lazy() + const table = pl + .DataFrame({ + user: [ + '{"name":"John","age":30}', + '{"name":"Jane"}', + '{"age":25}', + '{"name":"Bob","age":"invalid"}', + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -270,38 +288,44 @@ describe("validateObjectField", () => { ], } - const { errors } = await validateTable(table, { schema }) - expect(errors.filter(e => e.type === "cell/jsonSchema")).toHaveLength(3) - expect(errors).toContainEqual({ - type: "cell/jsonSchema", - fieldName: "user", - jsonSchema, - rowNumber: 2, - cell: '{"name":"Jane"}', - }) - expect(errors).toContainEqual({ - type: "cell/jsonSchema", - fieldName: "user", - jsonSchema, - rowNumber: 3, - cell: '{"age":25}', - }) - expect(errors).toContainEqual({ - type: "cell/jsonSchema", - fieldName: "user", - jsonSchema, - rowNumber: 4, - cell: '{"name":"Bob","age":"invalid"}', - }) + const errors = await inspectTable(table, { schema }) + expect(errors.filter(e => e.type === "cell/jsonSchema")).toEqual([ + { + type: "cell/jsonSchema", + fieldName: "user", + rowNumber: 2, + cell: '{"name":"Jane"}', + pointer: "", + message: "must have required property 'age'", + }, + { + type: "cell/jsonSchema", + fieldName: "user", + rowNumber: 3, + cell: '{"age":25}', + pointer: "", + message: "must have required property 'name'", + }, + { + type: "cell/jsonSchema", + fieldName: "user", + rowNumber: 4, + cell: '{"name":"Bob","age":"invalid"}', + pointer: "/age", + message: "must be number", + }, + ]) }) it("should validate complex jsonSchema with nested objects", async () => { - const table = DataFrame({ - config: [ - '{"database":{"host":"localhost","port":5432},"cache":{"enabled":true}}', - '{"database":{"host":"localhost","port":"invalid"},"cache":{"enabled":true}}', - ], - }).lazy() + const table = pl + .DataFrame({ + config: [ + '{"database":{"host":"localhost","port":5432},"cache":{"enabled":true}}', + '{"database":{"host":"localhost","port":"invalid"},"cache":{"enabled":true}}', + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -335,25 +359,28 @@ describe("validateObjectField", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { type: "cell/jsonSchema", fieldName: "config", - jsonSchema: schema.fields[0].constraints?.jsonSchema, rowNumber: 2, cell: '{"database":{"host":"localhost","port":"invalid"},"cache":{"enabled":true}}', + pointer: "/database/port", + message: "must be number", }, ]) }) it("should validate jsonSchema with array properties", async () => { - const table = DataFrame({ - data: [ - '{"items":[1,2,3],"name":"test"}', - '{"items":["not","numbers"],"name":"test"}', - ], - }).lazy() + const table = pl + .DataFrame({ + data: [ + '{"items":[1,2,3],"name":"test"}', + '{"items":["not","numbers"],"name":"test"}', + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -377,14 +404,23 @@ describe("validateObjectField", () => { ], } - const { errors } = await validateTable(table, { schema }) - expect(errors).toEqual([ + const errors = await inspectTable(table, { schema }) + expect(errors.filter(e => e.type === "cell/jsonSchema")).toEqual([ + { + type: "cell/jsonSchema", + fieldName: "data", + rowNumber: 2, + cell: '{"items":["not","numbers"],"name":"test"}', + pointer: "/items/0", + message: "must be number", + }, { type: "cell/jsonSchema", fieldName: "data", - jsonSchema: schema.fields[0].constraints?.jsonSchema, rowNumber: 2, cell: '{"items":["not","numbers"],"name":"test"}', + pointer: "/items/1", + message: "must be number", }, ]) }) diff --git a/table/field/types/object.ts b/table/field/types/object.ts index 312f8240..881c5e50 100644 --- a/table/field/types/object.ts +++ b/table/field/types/object.ts @@ -1,7 +1,7 @@ -import type { ObjectField } from "@dpkit/core" +import type { ObjectField } from "@dpkit/metadata" import type { Table } from "../../table/index.ts" -import { validateJsonField } from "./json.ts" +import { inspectJsonField } from "./json.ts" -export async function validateObjectField(field: ObjectField, table: Table) { - return validateJsonField(field, table) +export async function inspectObjectField(field: ObjectField, table: Table) { + return inspectJsonField(field, table) } diff --git a/table/field/types/string.spec.ts b/table/field/types/string.spec.ts index e62ba159..781ca46f 100644 --- a/table/field/types/string.spec.ts +++ b/table/field/types/string.spec.ts @@ -1,4 +1,4 @@ -import { DataFrame, DataType, Series } from "nodejs-polars" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { normalizeTable } from "../../table/index.ts" @@ -12,17 +12,17 @@ describe("parseStringField", () => { // Null handling ["", null], ])("$0 -> $1", async (cell, value) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [{ name: "name", type: "string" as const }], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.getColumn("name").dtype).toEqual(DataType.String) - expect(df.toRecords()[0]?.name).toEqual(value) + expect(frame.getColumn("name").dtype).toEqual(pl.String) + expect(frame.toRecords()[0]?.name).toEqual(value) }) describe("email format", () => { @@ -45,7 +45,7 @@ describe("parseStringField", () => { // Null handling ["", null], ])("$0 -> $1", async (cell, value) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [ @@ -53,11 +53,11 @@ describe("parseStringField", () => { ], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.getColumn("name").dtype).toEqual(DataType.String) - expect(df.toRecords()[0]?.name).toEqual(value) + expect(frame.getColumn("name").dtype).toEqual(pl.String) + expect(frame.toRecords()[0]?.name).toEqual(value) }) }) @@ -84,7 +84,7 @@ describe("parseStringField", () => { // Null handling ["", null], ])("$0 -> $1", async (cell, value) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [ @@ -92,11 +92,11 @@ describe("parseStringField", () => { ], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.getColumn("name").dtype).toEqual(DataType.String) - expect(df.toRecords()[0]?.name).toEqual(value) + expect(frame.getColumn("name").dtype).toEqual(pl.String) + expect(frame.toRecords()[0]?.name).toEqual(value) }) }) @@ -120,7 +120,7 @@ describe("parseStringField", () => { // Null handling ["", null], ])("$0 -> $1", async (cell, value) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [ @@ -128,11 +128,11 @@ describe("parseStringField", () => { ], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.getColumn("name").dtype).toEqual(DataType.String) - expect(df.toRecords()[0]?.name).toEqual(value) + expect(frame.getColumn("name").dtype).toEqual(pl.String) + expect(frame.toRecords()[0]?.name).toEqual(value) }) }) @@ -152,7 +152,7 @@ describe("parseStringField", () => { // Null handling ["", null], ])("$0 -> $1 $2", async (cell, value) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [ @@ -160,11 +160,11 @@ describe("parseStringField", () => { ], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.getColumn("name").dtype).toEqual(DataType.Categorical) - expect(df.toRecords()[0]?.name).toEqual(value) + expect(frame.getColumn("name").dtype).toEqual(pl.Categorical) + expect(frame.toRecords()[0]?.name).toEqual(value) }) }) @@ -179,17 +179,17 @@ describe("parseStringField", () => { ["apple", "apple", { categories: [{ value: "apple", label: "Apple" }] }], ["orange", null, { categories: [{ value: "apple", label: "Apple" }] }], ])("$0 -> $1 $2", async (cell, value, options) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [{ name: "name", type: "string" as const, ...options }], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.getColumn("name").dtype).toEqual(DataType.Categorical) - expect(df.toRecords()[0]?.name).toEqual(value) + expect(frame.getColumn("name").dtype).toEqual(pl.Categorical) + expect(frame.toRecords()[0]?.name).toEqual(value) }) }) }) diff --git a/table/field/types/string.ts b/table/field/types/string.ts index 996303aa..872a99e3 100644 --- a/table/field/types/string.ts +++ b/table/field/types/string.ts @@ -1,6 +1,5 @@ -import type { StringField } from "@dpkit/core" -import { DataType, lit, when } from "nodejs-polars" -import type { Expr } from "nodejs-polars" +import type { StringField } from "@dpkit/metadata" +import * as pl from "nodejs-polars" const FORMAT_REGEX = { email: @@ -11,30 +10,32 @@ const FORMAT_REGEX = { } as const // TODO: support categoriesOrder? -export function parseStringField(field: StringField, fieldExpr: Expr) { +export function parseStringField(field: StringField, fieldExpr: pl.Expr) { const format = field.format const flattenCategories = field.categories?.map(it => typeof it === "string" ? it : it.value, ) if (flattenCategories) { - return when(fieldExpr.isIn(flattenCategories)) - .then(fieldExpr.cast(DataType.Categorical)) - .otherwise(lit(null)) + return pl + .when(fieldExpr.isIn(flattenCategories)) + .then(fieldExpr.cast(pl.Categorical)) + .otherwise(pl.lit(null)) .alias(field.name) } if (format) { const regex = FORMAT_REGEX[format] - return when(fieldExpr.str.contains(regex)) + return pl + .when(fieldExpr.str.contains(regex)) .then(fieldExpr) - .otherwise(lit(null)) + .otherwise(pl.lit(null)) .alias(field.name) } return fieldExpr } -export function stringifyStringField(_field: StringField, fieldExpr: Expr) { +export function stringifyStringField(_field: StringField, fieldExpr: pl.Expr) { return fieldExpr } diff --git a/table/field/types/time.spec.ts b/table/field/types/time.spec.ts index 0483b5d5..0822860c 100644 --- a/table/field/types/time.spec.ts +++ b/table/field/types/time.spec.ts @@ -1,4 +1,4 @@ -import { DataFrame, DataType, Series } from "nodejs-polars" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { normalizeTable } from "../../table/index.ts" import { denormalizeTable } from "../../table/index.ts" @@ -26,16 +26,16 @@ describe("parseTimeField", () => { // Invalid format //["06:00", null, { format: "invalid" }], ])("$0 -> $1 $2", async (cell, expected, options) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [{ name: "name", type: "time" as const, ...options }], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(expected) + expect(frame.toRecords()[0]?.name).toEqual(expected) }) }) @@ -49,15 +49,15 @@ describe("stringifyTimeField", () => { [new Date(Date.UTC(2014, 0, 1, 6, 0, 0)), "06:00", { format: "%H:%M" }], [new Date(Date.UTC(2014, 0, 1, 16, 30, 0)), "16:30", { format: "%H:%M" }], ])("%s -> %s %o", async (value, expected, options) => { - const table = DataFrame([Series("name", [value], DataType.Time)]).lazy() + const table = pl.DataFrame([pl.Series("name", [value], pl.Time)]).lazy() const schema = { fields: [{ name: "name", type: "time" as const, ...options }], } - const ldf = await denormalizeTable(table, schema) - const df = await ldf.collect() + const result = await denormalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(expected) + expect(frame.toRecords()[0]?.name).toEqual(expected) }) }) diff --git a/table/field/types/time.ts b/table/field/types/time.ts index 23ad2370..be3a9cc1 100644 --- a/table/field/types/time.ts +++ b/table/field/types/time.ts @@ -1,24 +1,22 @@ -import type { TimeField } from "@dpkit/core" -import { DataType } from "nodejs-polars" +import type { TimeField } from "@dpkit/metadata" import * as pl from "nodejs-polars" -import type { Expr } from "nodejs-polars" const DEFAULT_FORMAT = "%H:%M:%S" -export function parseTimeField(field: TimeField, fieldExpr: Expr) { +export function parseTimeField(field: TimeField, fieldExpr: pl.Expr) { let format = DEFAULT_FORMAT if (field.format && field.format !== "default" && field.format !== "any") { format = field.format } - return pl - .concatString([pl.lit("1970-01-01T"), fieldExpr], "") - .str.strptime(DataType.Datetime, `%Y-%m-%dT${format}`) - .cast(DataType.Time) + return pl.pl + .concatString([pl.pl.lit("1970-01-01T"), fieldExpr], "") + .str.strptime(pl.Datetime, `%Y-%m-%dT${format}`) + .cast(pl.Time) .alias(field.name) } -export function stringifyTimeField(field: TimeField, fieldExpr: Expr) { +export function stringifyTimeField(field: TimeField, fieldExpr: pl.Expr) { const format = field.format ?? DEFAULT_FORMAT return fieldExpr.date.strftime(format) diff --git a/table/field/types/year.spec.ts b/table/field/types/year.spec.ts index 5c6c1993..428b0907 100644 --- a/table/field/types/year.spec.ts +++ b/table/field/types/year.spec.ts @@ -1,4 +1,4 @@ -import { DataFrame, DataType, Series } from "nodejs-polars" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { denormalizeTable, normalizeTable } from "../../table/index.ts" @@ -20,16 +20,16 @@ describe("parseYearField", () => { ["12345", null], ["123", null], ])("%s -> %s", async (cell, value) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [{ name: "name", type: "year" as const }], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.getColumn("name").get(0)).toEqual(value) + expect(frame.getColumn("name").get(0)).toEqual(value) }) }) @@ -45,15 +45,15 @@ describe("stringifyYearField", () => { // Edge cases with null values [null, ""], ])("%s -> %s", async (value, expected) => { - const table = DataFrame([Series("name", [value], DataType.Int16)]).lazy() + const table = pl.DataFrame([pl.Series("name", [value], pl.Int16)]).lazy() const schema = { fields: [{ name: "name", type: "year" as const }], } - const ldf = await denormalizeTable(table, schema) - const df = await ldf.collect() + const result = await denormalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(expected) + expect(frame.toRecords()[0]?.name).toEqual(expected) }) }) diff --git a/table/field/types/year.ts b/table/field/types/year.ts index 48c85750..4c87046a 100644 --- a/table/field/types/year.ts +++ b/table/field/types/year.ts @@ -1,18 +1,19 @@ -import type { YearField } from "@dpkit/core" -import { DataType, lit, when } from "nodejs-polars" -import type { Expr } from "nodejs-polars" +import type { YearField } from "@dpkit/metadata" +import * as pl from "nodejs-polars" -export function parseYearField(_field: YearField, fieldExpr: Expr) { - fieldExpr = when(fieldExpr.str.lengths().eq(4)) +export function parseYearField(_field: YearField, fieldExpr: pl.Expr) { + fieldExpr = pl + .when(fieldExpr.str.lengths().eq(4)) .then(fieldExpr) - .otherwise(lit(null)) - .cast(DataType.Int16) + .otherwise(pl.lit(null)) + .cast(pl.Int16) - return when(fieldExpr.gtEq(0).and(fieldExpr.ltEq(9999))) + return pl + .when(fieldExpr.gtEq(0).and(fieldExpr.ltEq(9999))) .then(fieldExpr) - .otherwise(lit(null)) + .otherwise(pl.lit(null)) } -export function stringifyYearField(_field: YearField, fieldExpr: Expr) { - return fieldExpr.cast(DataType.String).str.zFill(4) +export function stringifyYearField(_field: YearField, fieldExpr: pl.Expr) { + return fieldExpr.cast(pl.String).str.zFill(4) } diff --git a/table/field/types/yearmonth.spec.ts b/table/field/types/yearmonth.spec.ts index 4e77b679..1bac9884 100644 --- a/table/field/types/yearmonth.spec.ts +++ b/table/field/types/yearmonth.spec.ts @@ -1,4 +1,4 @@ -import { DataFrame, DataType, Series } from "nodejs-polars" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { normalizeTable } from "../../table/index.ts" import { denormalizeTable } from "../../table/index.ts" @@ -8,16 +8,16 @@ describe("parseYearmonthField", () => { ["2000-01", [2000, 1]], ["0-0", [0, 0]], ])("%s -> %s", async (cell, value) => { - const table = DataFrame([Series("name", [cell], DataType.String)]).lazy() + const table = pl.DataFrame([pl.Series("name", [cell], pl.String)]).lazy() const schema = { fields: [{ name: "name", type: "yearmonth" as const }], } - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() + const result = await normalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(value) + expect(frame.toRecords()[0]?.name).toEqual(value) }) }) @@ -27,17 +27,17 @@ describe("stringifyYearmonthField", () => { [[2023, 12], "2023-12"], [[0, 0], "0000-00"], ])("%s -> %s", async (value, expected) => { - const table = DataFrame([ - Series("name", [value], DataType.List(DataType.Int16)), - ]).lazy() + const table = pl + .DataFrame([pl.Series("name", [value], pl.List(pl.Int16))]) + .lazy() const schema = { fields: [{ name: "name", type: "yearmonth" as const }], } - const ldf = await denormalizeTable(table, schema) - const df = await ldf.collect() + const result = await denormalizeTable(table, schema) + const frame = await result.collect() - expect(df.toRecords()[0]?.name).toEqual(expected) + expect(frame.toRecords()[0]?.name).toEqual(expected) }) }) diff --git a/table/field/types/yearmonth.ts b/table/field/types/yearmonth.ts index d9644723..18819f80 100644 --- a/table/field/types/yearmonth.ts +++ b/table/field/types/yearmonth.ts @@ -1,22 +1,26 @@ -import type { YearmonthField } from "@dpkit/core" -import { DataType, concatString } from "nodejs-polars" -import type { Expr } from "nodejs-polars" +import type { YearmonthField } from "@dpkit/metadata" +import * as pl from "nodejs-polars" -export function parseYearmonthField(_field: YearmonthField, fieldExpr: Expr) { - fieldExpr = fieldExpr.str.split("-").cast(DataType.List(DataType.Int16)) +export function parseYearmonthField( + _field: YearmonthField, + fieldExpr: pl.Expr, +) { + fieldExpr = fieldExpr.str.split("-").cast(pl.List(pl.Int16)) return fieldExpr } export function stringifyYearmonthField( field: YearmonthField, - fieldExpr: Expr, + fieldExpr: pl.Expr, ) { - return concatString( - [ - fieldExpr.lst.get(0).cast(DataType.String).str.zFill(4), - fieldExpr.lst.get(1).cast(DataType.String).str.zFill(2), - ], - "-", - ).alias(field.name) as Expr + return pl + .concatString( + [ + fieldExpr.lst.get(0).cast(pl.String).str.zFill(4), + fieldExpr.lst.get(1).cast(pl.String).str.zFill(2), + ], + "-", + ) + .alias(field.name) as pl.Expr } diff --git a/table/helpers.ts b/table/helpers.ts index f5c85729..7bd24bd6 100644 --- a/table/helpers.ts +++ b/table/helpers.ts @@ -1,4 +1,3 @@ -import type { Expr } from "nodejs-polars" import * as pl from "nodejs-polars" export function isObject(value: any): value is Record { @@ -9,7 +8,7 @@ export function arrayDiff(a: string[], b: string[]) { return a.filter(x => !b.includes(x)) } -export function evaluateExpression(expr: Expr) { +export function evaluateExpression(expr: pl.Expr) { // @ts-ignore return pl.select(expr.alias("value")).toRecords()[0].value } diff --git a/table/index.ts b/table/index.ts index a7b99757..4e083d78 100644 --- a/table/index.ts +++ b/table/index.ts @@ -1,8 +1,42 @@ -export * from "./data/index.ts" -export * from "./dialect/index.ts" -export * from "./error/index.ts" -export * from "./field/index.ts" -export * from "./package/index.ts" -export * from "./schema/index.ts" -export * from "./table/index.ts" -export * from "./plugin.ts" +export type { DataRecord } from "./data/index.ts" +export type { DataRow } from "./data/index.ts" +export type { DenormalizeFieldOptions } from "./field/index.ts" +export type { DialectOptions } from "./dialect/index.ts" +export type { Frame } from "./table/index.ts" +export type { InferDialectOptions } from "./dialect/index.ts" +export type { InferSchemaOptions } from "./schema/index.ts" +export type { LoadTableOptions } from "./plugin.ts" +export type { PolarsField } from "./field/index.ts" +export type { PolarsSchema } from "./schema/index.ts" +export type { SaveTableOptions } from "./plugin.ts" +export type { SchemaOptions } from "./schema/index.ts" +export type { Table } from "./table/index.ts" +export type { TablePlugin } from "./plugin.ts" + +export { ArrowPlugin } from "./plugins/arrow/index.ts" +export { CsvPlugin } from "./plugins/csv/index.ts" +export { InlinePlugin } from "./plugins/inline/index.ts" +export { JsonPlugin } from "./plugins/json/index.ts" +export { OdsPlugin } from "./plugins/ods/index.ts" +export { ParquetPlugin } from "./plugins/parquet/index.ts" +export { XlsxPlugin } from "./plugins/xlxs/index.ts" + +export { denormalizeTable } from "./table/index.ts" +export { inferSchemaFromSample } from "./schema/index.ts" +export { inferSchemaFromTable } from "./schema/index.ts" +export { inspectTable } from "./table/index.ts" +export { loadArrowTable } from "./plugins/arrow/index.ts" +export { loadCsvTable } from "./plugins/csv/index.ts" +export { loadInlineTable } from "./plugins/inline/index.ts" +export { loadJsonTable } from "./plugins/json/index.ts" +export { loadOdsTable } from "./plugins/ods/index.ts" +export { loadParquetTable } from "./plugins/parquet/index.ts" +export { loadXlsxTable } from "./plugins/xlxs/index.ts" +export { normalizeTable } from "./table/index.ts" +export { queryTable } from "./table/index.ts" +export { saveArrowTable } from "./plugins/arrow/index.ts" +export { saveCsvTable } from "./plugins/csv/index.ts" +export { saveJsonTable } from "./plugins/json/index.ts" +export { saveOdsTable } from "./plugins/ods/index.ts" +export { saveParquetTable } from "./plugins/parquet/index.ts" +export { saveXlsxTable } from "./plugins/xlxs/index.ts" diff --git a/table/package.json b/table/package.json index d30f4eb4..04f2bc07 100644 --- a/table/package.json +++ b/table/package.json @@ -4,6 +4,7 @@ "version": "0.0.0-dev", "exports": "./build/index.js", "sideEffects": false, + "files": ["build"], "license": "MIT", "author": "Evgeny Karev", "repository": "https://github.com/datisthq/dpkit", @@ -18,14 +19,24 @@ "validation", "quality", "fair", - "table" + "table", + "arrow", + "csv", + "inline", + "json", + "ods", + "parquet", + "xlsx" ], "scripts": { "build": "tsc" }, "dependencies": { - "@dpkit/core": "workspace:*", - "nodejs-polars": "^0.22.1", - "p-all": "^5.0.1" + "@dpkit/metadata": "workspace:*", + "@dpkit/dataset": "workspace:*", + "csv-sniffer": "^0.1.1", + "nodejs-polars": "^0.22.2", + "p-all": "^5.0.1", + "xlsx": "https://cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz" } } diff --git a/table/package/index.ts b/table/package/index.ts deleted file mode 100644 index eb1b4c45..00000000 --- a/table/package/index.ts +++ /dev/null @@ -1 +0,0 @@ -export { validatePackageForeignKeys } from "./validate.ts" diff --git a/table/plugin.ts b/table/plugin.ts index fc3014b9..4aa56e1b 100644 --- a/table/plugin.ts +++ b/table/plugin.ts @@ -1,5 +1,6 @@ -import type { Dialect, Package, Plugin, Resource, Schema } from "@dpkit/core" -import type { SavePackageOptions } from "@dpkit/core" +import type { SavePackageOptions } from "@dpkit/dataset" +import type { DatasetPlugin } from "@dpkit/dataset" +import type { Dialect, Package, Resource, Schema } from "@dpkit/metadata" import type { DialectOptions, InferDialectOptions } from "./dialect/index.ts" import type { InferSchemaOptions, SchemaOptions } from "./schema/index.ts" import type { Table } from "./table/index.ts" @@ -18,7 +19,7 @@ export type SaveTableOptions = DialectOptions & overwrite?: boolean } -export interface TablePlugin extends Plugin { +export interface TablePlugin extends DatasetPlugin { savePackage?( dataPackage: Package, options: SavePackageOptions & { plugins?: TablePlugin[] }, diff --git a/arrow/index.ts b/table/plugins/arrow/index.ts similarity index 100% rename from arrow/index.ts rename to table/plugins/arrow/index.ts diff --git a/arrow/plugin.spec.ts b/table/plugins/arrow/plugin.spec.ts similarity index 89% rename from arrow/plugin.spec.ts rename to table/plugins/arrow/plugin.spec.ts index e61c3ba6..3babaeee 100644 --- a/arrow/plugin.spec.ts +++ b/table/plugins/arrow/plugin.spec.ts @@ -1,5 +1,5 @@ -import type { Resource } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Resource } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { beforeEach, describe, expect, it, vi } from "vitest" import { ArrowPlugin } from "./plugin.ts" import * as tableModule from "./table/index.ts" @@ -26,7 +26,7 @@ describe("ArrowPlugin", () => { const resource: Partial = { path: "test.arrow", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadArrowTable.mockResolvedValue(mockTable) const result = await plugin.loadTable(resource) @@ -39,7 +39,7 @@ describe("ArrowPlugin", () => { const resource: Partial = { path: "test.feather", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadArrowTable.mockResolvedValue(mockTable) const result = await plugin.loadTable(resource) @@ -64,7 +64,7 @@ describe("ArrowPlugin", () => { path: "test.txt", format: "arrow", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadArrowTable.mockResolvedValue(mockTable) const result = await plugin.loadTable(resource) @@ -78,7 +78,7 @@ describe("ArrowPlugin", () => { path: "test.txt", format: "feather", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadArrowTable.mockResolvedValue(mockTable) const result = await plugin.loadTable(resource) @@ -92,7 +92,7 @@ describe("ArrowPlugin", () => { path: "test.arrow", } const options = { denormalized: true } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadArrowTable.mockResolvedValue(mockTable) await plugin.loadTable(resource, options) @@ -104,7 +104,7 @@ describe("ArrowPlugin", () => { const resource: Partial = { path: "/path/to/data.arrow", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadArrowTable.mockResolvedValue(mockTable) await plugin.loadTable(resource) @@ -126,7 +126,7 @@ describe("ArrowPlugin", () => { describe("saveTable", () => { it("should save table to arrow file", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.arrow" } mockSaveArrowTable.mockResolvedValue("output.arrow") @@ -137,7 +137,7 @@ describe("ArrowPlugin", () => { }) it("should save table to feather file", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.feather" } mockSaveArrowTable.mockResolvedValue("output.feather") @@ -148,7 +148,7 @@ describe("ArrowPlugin", () => { }) it("should return undefined for non-arrow files", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.csv" } const result = await plugin.saveTable(table, options) @@ -158,7 +158,7 @@ describe("ArrowPlugin", () => { }) it("should handle explicit arrow format specification", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.txt", format: "arrow" as const } mockSaveArrowTable.mockResolvedValue("output.txt") @@ -169,7 +169,7 @@ describe("ArrowPlugin", () => { }) it("should handle explicit feather format specification", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.txt", format: "feather" as const } mockSaveArrowTable.mockResolvedValue("output.txt") @@ -180,7 +180,7 @@ describe("ArrowPlugin", () => { }) it("should handle paths with directories", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "/path/to/output.arrow" } mockSaveArrowTable.mockResolvedValue("/path/to/output.arrow") @@ -190,7 +190,7 @@ describe("ArrowPlugin", () => { }) it("should return undefined for files without extension", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output" } const result = await plugin.saveTable(table, options) @@ -200,7 +200,7 @@ describe("ArrowPlugin", () => { }) it("should return undefined for parquet files", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.parquet" } const result = await plugin.saveTable(table, options) diff --git a/arrow/plugin.ts b/table/plugins/arrow/plugin.ts similarity index 68% rename from arrow/plugin.ts rename to table/plugins/arrow/plugin.ts index 0220ea31..e1a9f01f 100644 --- a/arrow/plugin.ts +++ b/table/plugins/arrow/plugin.ts @@ -1,8 +1,11 @@ -import type { Resource } from "@dpkit/core" -import { inferResourceFormat } from "@dpkit/core" -import type { LoadTableOptions } from "@dpkit/table" -import type { TablePlugin } from "@dpkit/table" -import type { SaveTableOptions, Table } from "@dpkit/table" +import type { Resource } from "@dpkit/metadata" +import { inferFormat } from "@dpkit/metadata" +import type { + LoadTableOptions, + SaveTableOptions, + TablePlugin, +} from "../../plugin.ts" +import type { Table } from "../../table/index.ts" import { loadArrowTable, saveArrowTable } from "./table/index.ts" export class ArrowPlugin implements TablePlugin { @@ -24,6 +27,6 @@ export class ArrowPlugin implements TablePlugin { } function getIsArrow(resource: Partial) { - const format = inferResourceFormat(resource) + const format = inferFormat(resource) return format === "arrow" || format === "feather" } diff --git a/arrow/table/fixtures/generated/loadArrowTable-file-variations-should-load-remote-file-multipart_3177840048/recording.har b/table/plugins/arrow/table/fixtures/generated/loadArrowTable-file-variations-should-load-remote-file-multipart_3177840048/recording.har similarity index 100% rename from arrow/table/fixtures/generated/loadArrowTable-file-variations-should-load-remote-file-multipart_3177840048/recording.har rename to table/plugins/arrow/table/fixtures/generated/loadArrowTable-file-variations-should-load-remote-file-multipart_3177840048/recording.har diff --git a/arrow/table/fixtures/generated/loadArrowTable-file-variations-should-load-remote-file_1224085963/recording.har b/table/plugins/arrow/table/fixtures/generated/loadArrowTable-file-variations-should-load-remote-file_1224085963/recording.har similarity index 100% rename from arrow/table/fixtures/generated/loadArrowTable-file-variations-should-load-remote-file_1224085963/recording.har rename to table/plugins/arrow/table/fixtures/generated/loadArrowTable-file-variations-should-load-remote-file_1224085963/recording.har diff --git a/arrow/table/index.ts b/table/plugins/arrow/table/index.ts similarity index 100% rename from arrow/table/index.ts rename to table/plugins/arrow/table/index.ts diff --git a/arrow/table/load.spec.ts b/table/plugins/arrow/table/load.spec.ts similarity index 83% rename from arrow/table/load.spec.ts rename to table/plugins/arrow/table/load.spec.ts index 97166328..a20c2b83 100644 --- a/arrow/table/load.spec.ts +++ b/table/plugins/arrow/table/load.spec.ts @@ -1,7 +1,7 @@ -import { getTempFilePath } from "@dpkit/file" -import { useRecording } from "@dpkit/test" -import { DataFrame } from "nodejs-polars" +import { getTempFilePath } from "@dpkit/dataset" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" +import { useRecording } from "vitest-polly" import { loadArrowTable } from "./load.ts" useRecording() @@ -10,7 +10,7 @@ describe("loadArrowTable", () => { describe("file variations", () => { it("should load local file", async () => { const path = getTempFilePath() - DataFrame({ id: [1, 2], name: ["english", "中文"] }).writeIPC(path) + pl.DataFrame({ id: [1, 2], name: ["english", "中文"] }).writeIPC(path) const table = await loadArrowTable({ path }) expect((await table.collect()).toRecords()).toEqual([ @@ -22,8 +22,8 @@ describe("loadArrowTable", () => { it("should load local file (multipart)", async () => { const path1 = getTempFilePath() const path2 = getTempFilePath() - DataFrame({ id: [1, 2], name: ["english", "中文"] }).writeIPC(path1) - DataFrame({ id: [1, 2], name: ["english", "中文"] }).writeIPC(path2) + pl.DataFrame({ id: [1, 2], name: ["english", "中文"] }).writeIPC(path1) + pl.DataFrame({ id: [1, 2], name: ["english", "中文"] }).writeIPC(path2) const table = await loadArrowTable({ path: [path1, path2] }) expect((await table.collect()).toRecords()).toEqual([ diff --git a/arrow/table/load.ts b/table/plugins/arrow/table/load.ts similarity index 51% rename from arrow/table/load.ts rename to table/plugins/arrow/table/load.ts index 6fa5c2e9..043b0dd9 100644 --- a/arrow/table/load.ts +++ b/table/plugins/arrow/table/load.ts @@ -1,10 +1,10 @@ -import type { Resource } from "@dpkit/core" -import { resolveSchema } from "@dpkit/core" -import { prefetchFiles } from "@dpkit/file" -import type { LoadTableOptions } from "@dpkit/table" -import { inferSchemaFromTable, normalizeTable } from "@dpkit/table" -import { concat } from "nodejs-polars" -import { scanIPC } from "nodejs-polars" +import type { Resource } from "@dpkit/metadata" +import { resolveSchema } from "@dpkit/metadata" +import { prefetchFiles } from "@dpkit/dataset" +import type { LoadTableOptions } from "../../../plugin.ts" +import { inferSchemaFromTable } from "../../../schema/index.ts" +import { normalizeTable } from "../../../table/index.ts" +import * as pl from "nodejs-polars" export async function loadArrowTable( resource: Partial, @@ -15,9 +15,9 @@ export async function loadArrowTable( throw new Error("Resource path is not defined") } - let table = scanIPC(firstPath) + let table = pl.scanIPC(firstPath) if (restPaths.length) { - table = concat([table, ...restPaths.map(path => scanIPC(path))]) + table = pl.concat([table, ...restPaths.map(path => pl.scanIPC(path))]) } if (!options?.denormalized) { diff --git a/arrow/table/save.spec.ts b/table/plugins/arrow/table/save.spec.ts similarity index 58% rename from arrow/table/save.spec.ts rename to table/plugins/arrow/table/save.spec.ts index 3b479d67..92afa9c3 100644 --- a/arrow/table/save.spec.ts +++ b/table/plugins/arrow/table/save.spec.ts @@ -1,5 +1,5 @@ -import { getTempFilePath } from "@dpkit/file" -import { DataFrame, DataType, Series } from "nodejs-polars" +import { getTempFilePath } from "@dpkit/dataset" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { loadArrowTable } from "./load.ts" import { saveArrowTable } from "./save.ts" @@ -7,10 +7,12 @@ import { saveArrowTable } from "./save.ts" describe("saveArrowTable", () => { it("should save table to Arrow file", async () => { const path = getTempFilePath() - const source = DataFrame({ - id: [1.0, 2.0, 3.0], - name: ["Alice", "Bob", "Charlie"], - }).lazy() + const source = pl + .DataFrame({ + id: [1.0, 2.0, 3.0], + name: ["Alice", "Bob", "Charlie"], + }) + .lazy() await saveArrowTable(source, { path }) @@ -25,23 +27,25 @@ describe("saveArrowTable", () => { it("should save and load various data types", async () => { const path = getTempFilePath() - const source = DataFrame([ - Series("array", ["[1, 2, 3]"], DataType.String), - Series("boolean", [true], DataType.Bool), - Series("date", [new Date(Date.UTC(2025, 0, 1))], DataType.Date), - Series("datetime", [new Date(Date.UTC(2025, 0, 1))], DataType.Datetime), - Series("duration", ["P23DT23H"], DataType.String), - Series("geojson", ['{"value": 1}'], DataType.String), - Series("geopoint", [[40.0, 50.0]], DataType.List(DataType.Float32)), - Series("integer", [1], DataType.Int32), - Series("list", [[1.0, 2.0, 3.0]], DataType.List(DataType.Float32)), - Series("number", [1.1], DataType.Float64), - Series("object", ['{"value": 1}']), - Series("string", ["string"], DataType.String), - Series("time", [new Date(Date.UTC(2025, 0, 1))], DataType.Time), - Series("year", [2025], DataType.Int32), - Series("yearmonth", [[2025, 1]], DataType.List(DataType.Int16)), - ]).lazy() + const source = pl + .DataFrame([ + pl.Series("array", ["[1, 2, 3]"], pl.String), + pl.Series("boolean", [true], pl.Bool), + pl.Series("date", [new Date(Date.UTC(2025, 0, 1))], pl.Date), + pl.Series("datetime", [new Date(Date.UTC(2025, 0, 1))], pl.Datetime), + pl.Series("duration", ["P23DT23H"], pl.String), + pl.Series("geojson", ['{"value": 1}'], pl.String), + pl.Series("geopoint", [[40.0, 50.0]], pl.List(pl.Float32)), + pl.Series("integer", [1], pl.Int32), + pl.Series("list", [[1.0, 2.0, 3.0]], pl.List(pl.Float32)), + pl.Series("number", [1.1], pl.Float64), + pl.Series("object", ['{"value": 1}']), + pl.Series("string", ["string"], pl.String), + pl.Series("time", [new Date(Date.UTC(2025, 0, 1))], pl.Time), + pl.Series("year", [2025], pl.Int32), + pl.Series("yearmonth", [[2025, 1]], pl.List(pl.Int16)), + ]) + .lazy() await saveArrowTable(source, { path, diff --git a/arrow/table/save.ts b/table/plugins/arrow/table/save.ts similarity index 64% rename from arrow/table/save.ts rename to table/plugins/arrow/table/save.ts index b787c98d..7c80bf24 100644 --- a/arrow/table/save.ts +++ b/table/plugins/arrow/table/save.ts @@ -1,6 +1,8 @@ -import { assertLocalPathVacant } from "@dpkit/file" -import { denormalizeTable, inferSchemaFromTable } from "@dpkit/table" -import type { SaveTableOptions, Table } from "@dpkit/table" +import { assertLocalPathVacant } from "@dpkit/dataset" +import type { SaveTableOptions } from "../../../plugin.ts" +import { inferSchemaFromTable } from "../../../schema/index.ts" +import { denormalizeTable } from "../../../table/index.ts" +import type { Table } from "../../../table/index.ts" // TODO: rebase on sinkIPC when it is available // https://github.com/pola-rs/nodejs-polars/issues/353 @@ -31,8 +33,8 @@ export async function saveArrowTable(table: Table, options: SaveTableOptions) { ], }) - const df = await table.collect() - df.writeIPC(path) + const frame = await table.collect() + frame.writeIPC(path) return path } diff --git a/csv/csv-sniffer.d.ts b/table/plugins/csv/csv-sniffer.d.ts similarity index 100% rename from csv/csv-sniffer.d.ts rename to table/plugins/csv/csv-sniffer.d.ts diff --git a/csv/dialect/index.ts b/table/plugins/csv/dialect/index.ts similarity index 100% rename from csv/dialect/index.ts rename to table/plugins/csv/dialect/index.ts diff --git a/csv/dialect/infer.spec.ts b/table/plugins/csv/dialect/infer.spec.ts similarity index 97% rename from csv/dialect/infer.spec.ts rename to table/plugins/csv/dialect/infer.spec.ts index 3d9222b2..0b832121 100644 --- a/csv/dialect/infer.spec.ts +++ b/table/plugins/csv/dialect/infer.spec.ts @@ -1,4 +1,4 @@ -import { writeTempFile } from "@dpkit/file" +import { writeTempFile } from "@dpkit/dataset" import { describe, expect, it } from "vitest" import { inferCsvDialect } from "./infer.ts" diff --git a/csv/dialect/infer.ts b/table/plugins/csv/dialect/infer.ts similarity index 92% rename from csv/dialect/infer.ts rename to table/plugins/csv/dialect/infer.ts index d5fd88d4..cb961484 100644 --- a/csv/dialect/infer.ts +++ b/table/plugins/csv/dialect/infer.ts @@ -1,6 +1,6 @@ import { text } from "node:stream/consumers" -import type { Dialect, Resource } from "@dpkit/core" -import { loadFileStream } from "@dpkit/file" +import { loadFileStream } from "@dpkit/dataset" +import type { Dialect, Resource } from "@dpkit/metadata" import { default as CsvSnifferFactory } from "csv-sniffer" const CSV_DELIMITERS = [",", ";", ":", "|", "\t", "^", "*", "&"] diff --git a/inline/index.ts b/table/plugins/csv/index.ts similarity index 100% rename from inline/index.ts rename to table/plugins/csv/index.ts diff --git a/csv/plugin.spec.ts b/table/plugins/csv/plugin.spec.ts similarity index 90% rename from csv/plugin.spec.ts rename to table/plugins/csv/plugin.spec.ts index 9f6ef7f8..c2a283c7 100644 --- a/csv/plugin.spec.ts +++ b/table/plugins/csv/plugin.spec.ts @@ -1,5 +1,5 @@ -import type { Resource } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Resource } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { beforeEach, describe, expect, it, vi } from "vitest" import { CsvPlugin } from "./plugin.ts" import * as tableModule from "./table/index.ts" @@ -26,7 +26,7 @@ describe("CsvPlugin", () => { const resource: Partial = { path: "test.csv", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadCsvTable.mockResolvedValue(mockTable) const result = await plugin.loadTable(resource) @@ -42,7 +42,7 @@ describe("CsvPlugin", () => { const resource: Partial = { path: "test.tsv", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadCsvTable.mockResolvedValue(mockTable) const result = await plugin.loadTable(resource) @@ -70,7 +70,7 @@ describe("CsvPlugin", () => { path: "test.txt", format: "csv", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadCsvTable.mockResolvedValue(mockTable) const result = await plugin.loadTable(resource) @@ -87,7 +87,7 @@ describe("CsvPlugin", () => { path: "test.csv", } const options = { denormalized: true } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadCsvTable.mockResolvedValue(mockTable) await plugin.loadTable(resource, options) @@ -102,7 +102,7 @@ describe("CsvPlugin", () => { const resource: Partial = { path: "/path/to/data.csv", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadCsvTable.mockResolvedValue(mockTable) await plugin.loadTable(resource) @@ -118,7 +118,7 @@ describe("CsvPlugin", () => { path: "test.txt", format: "tsv", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadCsvTable.mockResolvedValue(mockTable) const result = await plugin.loadTable(resource) @@ -133,7 +133,7 @@ describe("CsvPlugin", () => { describe("saveTable", () => { it("should save table to csv file", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.csv" } mockSaveCsvTable.mockResolvedValue("output.csv") @@ -147,7 +147,7 @@ describe("CsvPlugin", () => { }) it("should save table to tsv file", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.tsv" } mockSaveCsvTable.mockResolvedValue("output.tsv") @@ -161,7 +161,7 @@ describe("CsvPlugin", () => { }) it("should return undefined for non-csv files", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.json" } const result = await plugin.saveTable(table, options) @@ -171,7 +171,7 @@ describe("CsvPlugin", () => { }) it("should handle explicit format specification", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.txt", format: "csv" as const } mockSaveCsvTable.mockResolvedValue("output.txt") @@ -185,7 +185,7 @@ describe("CsvPlugin", () => { }) it("should handle paths with directories", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "/path/to/output.csv" } mockSaveCsvTable.mockResolvedValue("/path/to/output.csv") @@ -198,7 +198,7 @@ describe("CsvPlugin", () => { }) it("should return undefined for files without extension", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output" } const result = await plugin.saveTable(table, options) @@ -208,7 +208,7 @@ describe("CsvPlugin", () => { }) it("should handle explicit tsv format specification", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.txt", format: "tsv" as const } mockSaveCsvTable.mockResolvedValue("output.txt") diff --git a/csv/plugin.ts b/table/plugins/csv/plugin.ts similarity index 70% rename from csv/plugin.ts rename to table/plugins/csv/plugin.ts index 137ba8d6..efce7541 100644 --- a/csv/plugin.ts +++ b/table/plugins/csv/plugin.ts @@ -1,8 +1,8 @@ -import type { Resource } from "@dpkit/core" -import { inferResourceFormat } from "@dpkit/core" -import type { LoadTableOptions } from "@dpkit/table" -import type { TablePlugin } from "@dpkit/table" -import type { SaveTableOptions, Table } from "@dpkit/table" +import type { Resource } from "@dpkit/metadata" +import { inferFormat } from "@dpkit/metadata" +import type { LoadTableOptions, SaveTableOptions } from "../../plugin.ts" +import type { TablePlugin } from "../../plugin.ts" +import type { Table } from "../../table/index.ts" import { loadCsvTable, saveCsvTable } from "./table/index.ts" export class CsvPlugin implements TablePlugin { @@ -24,6 +24,6 @@ export class CsvPlugin implements TablePlugin { } function getCsvFormat(resource: Partial) { - const format = inferResourceFormat(resource) + const format = inferFormat(resource) return format === "csv" || format === "tsv" ? format : undefined } diff --git a/csv/table/fixtures/generated/loadCsvTable-should-load-remote-file-multipart_959749322/recording.har b/table/plugins/csv/table/fixtures/generated/loadCsvTable-should-load-remote-file-multipart_959749322/recording.har similarity index 100% rename from csv/table/fixtures/generated/loadCsvTable-should-load-remote-file-multipart_959749322/recording.har rename to table/plugins/csv/table/fixtures/generated/loadCsvTable-should-load-remote-file-multipart_959749322/recording.har diff --git a/csv/table/fixtures/generated/loadCsvTable-should-load-remote-file_2012170585/recording.har b/table/plugins/csv/table/fixtures/generated/loadCsvTable-should-load-remote-file_2012170585/recording.har similarity index 100% rename from csv/table/fixtures/generated/loadCsvTable-should-load-remote-file_2012170585/recording.har rename to table/plugins/csv/table/fixtures/generated/loadCsvTable-should-load-remote-file_2012170585/recording.har diff --git a/csv/table/fixtures/table.csv b/table/plugins/csv/table/fixtures/table.csv similarity index 100% rename from csv/table/fixtures/table.csv rename to table/plugins/csv/table/fixtures/table.csv diff --git a/csv/table/index.ts b/table/plugins/csv/table/index.ts similarity index 100% rename from csv/table/index.ts rename to table/plugins/csv/table/index.ts diff --git a/csv/table/load.spec.ts b/table/plugins/csv/table/load.spec.ts similarity index 99% rename from csv/table/load.spec.ts rename to table/plugins/csv/table/load.spec.ts index 93031c16..436ac717 100644 --- a/csv/table/load.spec.ts +++ b/table/plugins/csv/table/load.spec.ts @@ -1,7 +1,7 @@ import { Buffer } from "node:buffer" -import { writeTempFile } from "@dpkit/file" -import { useRecording } from "@dpkit/test" +import { writeTempFile } from "@dpkit/dataset" import { describe, expect, it } from "vitest" +import { useRecording } from "vitest-polly" import { loadCsvTable } from "./load.ts" useRecording() diff --git a/csv/table/load.ts b/table/plugins/csv/table/load.ts similarity index 80% rename from csv/table/load.ts rename to table/plugins/csv/table/load.ts index bfd0e560..71bccdc6 100644 --- a/csv/table/load.ts +++ b/table/plugins/csv/table/load.ts @@ -1,15 +1,14 @@ -import type { Dialect, Resource } from "@dpkit/core" -import { resolveDialect, resolveSchema } from "@dpkit/core" -import { prefetchFiles } from "@dpkit/file" -import type { Table } from "@dpkit/table" -import { inferSchemaFromTable, normalizeTable } from "@dpkit/table" -import { stripInitialSpace } from "@dpkit/table" -import { joinHeaderRows } from "@dpkit/table" -import { skipCommentRows } from "@dpkit/table" -import type { LoadTableOptions } from "@dpkit/table" -import { scanCSV } from "nodejs-polars" -import type { ScanCsvOptions } from "nodejs-polars" -import { concat } from "nodejs-polars" +import type { Dialect, Resource } from "@dpkit/metadata" +import { resolveDialect, resolveSchema } from "@dpkit/metadata" +import { prefetchFiles } from "@dpkit/dataset" +import type { LoadTableOptions } from "../../../plugin.ts" +import { inferSchemaFromTable } from "../../../schema/index.ts" +import { joinHeaderRows } from "../../../table/index.ts" +import { normalizeTable } from "../../../table/index.ts" +import { skipCommentRows } from "../../../table/index.ts" +import { stripInitialSpace } from "../../../table/index.ts" +import type { Table } from "../../../table/index.ts" +import * as pl from "nodejs-polars" import { inferCsvDialect } from "../dialect/index.ts" // TODO: Condier using sample to extract header first @@ -33,13 +32,13 @@ export async function loadCsvTable( const scanOptions = getScanOptions(resource, dialect) const tables: Table[] = [] for (const path of paths) { - const table = scanCSV(path, scanOptions) + const table = pl.scanCSV(path, scanOptions) tables.push(table) } // There is no way to specify column names in nodejs-polars by default // so we have to rename `column_*` to `field*` is table doesn't have header - let table = concat(tables) + let table = pl.concat(tables) if (!scanOptions.hasHeader) { table = table.rename( Object.fromEntries( @@ -64,7 +63,7 @@ export async function loadCsvTable( } function getScanOptions(resource: Partial, dialect?: Dialect) { - const options: Partial = { + const options: Partial = { inferSchemaLength: 0, truncateRaggedLines: true, } diff --git a/csv/table/save.spec.ts b/table/plugins/csv/table/save.spec.ts similarity index 62% rename from csv/table/save.spec.ts rename to table/plugins/csv/table/save.spec.ts index de76ff47..e311b41e 100644 --- a/csv/table/save.spec.ts +++ b/table/plugins/csv/table/save.spec.ts @@ -1,6 +1,6 @@ import { readFile } from "node:fs/promises" -import { getTempFilePath } from "@dpkit/file" -import { DataFrame, DataType, Series } from "nodejs-polars" +import { getTempFilePath } from "@dpkit/dataset" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { loadCsvTable } from "./load.ts" import { saveCsvTable } from "./save.ts" @@ -8,10 +8,12 @@ import { saveCsvTable } from "./save.ts" describe("saveCsvTable", () => { it("should save table to file", async () => { const path = getTempFilePath() - const table = DataFrame({ - id: [1.0, 2.0, 3.0], - name: ["Alice", "Bob", "Charlie"], - }).lazy() + const table = pl + .DataFrame({ + id: [1.0, 2.0, 3.0], + name: ["Alice", "Bob", "Charlie"], + }) + .lazy() await saveCsvTable(table, { path }) @@ -21,10 +23,12 @@ describe("saveCsvTable", () => { it("should save with custom delimiter", async () => { const path = getTempFilePath() - const table = DataFrame({ - id: [1.0, 2.0, 3.0], - name: ["Alice", "Bob", "Charlie"], - }).lazy() + const table = pl + .DataFrame({ + id: [1.0, 2.0, 3.0], + name: ["Alice", "Bob", "Charlie"], + }) + .lazy() await saveCsvTable(table, { path, @@ -37,10 +41,12 @@ describe("saveCsvTable", () => { it("should save without header", async () => { const path = getTempFilePath() - const table = DataFrame({ - id: [1.0, 2.0, 3.0], - name: ["Alice", "Bob", "Charlie"], - }).lazy() + const table = pl + .DataFrame({ + id: [1.0, 2.0, 3.0], + name: ["Alice", "Bob", "Charlie"], + }) + .lazy() await saveCsvTable(table, { path, @@ -53,10 +59,12 @@ describe("saveCsvTable", () => { it("should save with custom quote char", async () => { const path = getTempFilePath() - const table = DataFrame({ - id: [1.0, 2.0, 3.0], - name: ["Alice,Smith", "Bob,Jones", "Charlie,Brown"], - }).lazy() + const table = pl + .DataFrame({ + id: [1.0, 2.0, 3.0], + name: ["Alice,Smith", "Bob,Jones", "Charlie,Brown"], + }) + .lazy() await saveCsvTable(table, { path, @@ -72,23 +80,25 @@ describe("saveCsvTable", () => { it("should save and load various data types", async () => { const path = getTempFilePath() - const source = DataFrame([ - Series("array", ["[1, 2, 3]"], DataType.String), - Series("boolean", [true], DataType.Bool), - Series("date", [new Date(Date.UTC(2025, 0, 1))], DataType.Date), - Series("datetime", [new Date(Date.UTC(2025, 0, 1))], DataType.Datetime), - Series("duration", ["P23DT23H"], DataType.String), - Series("geojson", ['{"value": 1}'], DataType.String), - Series("geopoint", [[40.0, 50.0]], DataType.List(DataType.Float32)), - Series("integer", [1], DataType.Int32), - Series("list", [[1.0, 2.0, 3.0]], DataType.List(DataType.Float32)), - Series("number", [1.1], DataType.Float64), - Series("object", ['{"value": 1}']), - Series("string", ["string"], DataType.String), - Series("time", [new Date(Date.UTC(2025, 0, 1))], DataType.Time), - Series("year", [2025], DataType.Int32), - Series("yearmonth", [[2025, 1]], DataType.List(DataType.Int16)), - ]).lazy() + const source = pl + .DataFrame([ + pl.Series("array", ["[1, 2, 3]"], pl.String), + pl.Series("boolean", [true], pl.Bool), + pl.Series("date", [new Date(Date.UTC(2025, 0, 1))], pl.Date), + pl.Series("datetime", [new Date(Date.UTC(2025, 0, 1))], pl.Datetime), + pl.Series("duration", ["P23DT23H"], pl.String), + pl.Series("geojson", ['{"value": 1}'], pl.String), + pl.Series("geopoint", [[40.0, 50.0]], pl.List(pl.Float32)), + pl.Series("integer", [1], pl.Int32), + pl.Series("list", [[1.0, 2.0, 3.0]], pl.List(pl.Float32)), + pl.Series("number", [1.1], pl.Float64), + pl.Series("object", ['{"value": 1}']), + pl.Series("string", ["string"], pl.String), + pl.Series("time", [new Date(Date.UTC(2025, 0, 1))], pl.Time), + pl.Series("year", [2025], pl.Int32), + pl.Series("yearmonth", [[2025, 1]], pl.List(pl.Int16)), + ]) + .lazy() await saveCsvTable(source, { path, @@ -132,10 +142,12 @@ describe("saveCsvTable", () => { describe("saveCsvTable (format=tsv)", () => { it("should save table to file", async () => { const path = getTempFilePath() - const table = DataFrame({ - id: [1.0, 2.0, 3.0], - name: ["Alice", "Bob", "Charlie"], - }).lazy() + const table = pl + .DataFrame({ + id: [1.0, 2.0, 3.0], + name: ["Alice", "Bob", "Charlie"], + }) + .lazy() await saveCsvTable(table, { path, format: "tsv" }) diff --git a/csv/table/save.ts b/table/plugins/csv/table/save.ts similarity index 73% rename from csv/table/save.ts rename to table/plugins/csv/table/save.ts index f6f43d4e..a7576ddc 100644 --- a/csv/table/save.ts +++ b/table/plugins/csv/table/save.ts @@ -1,6 +1,8 @@ -import { assertLocalPathVacant } from "@dpkit/file" -import type { SaveTableOptions, Table } from "@dpkit/table" -import { denormalizeTable, inferSchemaFromTable } from "@dpkit/table" +import { assertLocalPathVacant } from "@dpkit/dataset" +import type { SaveTableOptions } from "../../../plugin.ts" +import { inferSchemaFromTable } from "../../../schema/index.ts" +import { denormalizeTable } from "../../../table/index.ts" +import type { Table } from "../../../table/index.ts" export async function saveCsvTable( table: Table, diff --git a/json/index.ts b/table/plugins/inline/index.ts similarity index 100% rename from json/index.ts rename to table/plugins/inline/index.ts diff --git a/inline/plugin.ts b/table/plugins/inline/plugin.ts similarity index 61% rename from inline/plugin.ts rename to table/plugins/inline/plugin.ts index 3f96c9a5..cf952f75 100644 --- a/inline/plugin.ts +++ b/table/plugins/inline/plugin.ts @@ -1,6 +1,5 @@ -import type { Resource } from "@dpkit/core" -import type { LoadTableOptions } from "@dpkit/table" -import type { TablePlugin } from "@dpkit/table" +import type { Resource } from "@dpkit/metadata" +import type { LoadTableOptions, TablePlugin } from "../../plugin.ts" import { loadInlineTable } from "./table/index.ts" export class InlinePlugin implements TablePlugin { @@ -13,5 +12,6 @@ export class InlinePlugin implements TablePlugin { } function getIsInline(resource: Resource) { - return !!resource.data + const isTable = resource.type === "table" || !!resource.schema + return isTable && !!resource.data } diff --git a/inline/table/index.ts b/table/plugins/inline/table/index.ts similarity index 100% rename from inline/table/index.ts rename to table/plugins/inline/table/index.ts diff --git a/inline/table/load.spec.ts b/table/plugins/inline/table/load.spec.ts similarity index 88% rename from inline/table/load.spec.ts rename to table/plugins/inline/table/load.spec.ts index 6012c31e..a0b08d8c 100644 --- a/inline/table/load.spec.ts +++ b/table/plugins/inline/table/load.spec.ts @@ -30,12 +30,12 @@ describe("loadInlineTable", () => { } const table = await loadInlineTable(resource) - const df = await table.collect() + const frame = await table.collect() expect([ { id: 1, name: "english" }, { id: 2, name: "中文" }, - ]).toEqual(df.toRecords()) + ]).toEqual(frame.toRecords()) }) it("should read objects", async () => { @@ -51,12 +51,12 @@ describe("loadInlineTable", () => { // @ts-ignore const table = await loadInlineTable(resource) - const df = await table.collect() + const frame = await table.collect() expect([ { id: 1, name: "english" }, { id: 2, name: "中文" }, - ]).toEqual(df.toRecords()) + ]).toEqual(frame.toRecords()) }) it("should handle longer rows", async () => { @@ -78,12 +78,12 @@ describe("loadInlineTable", () => { // @ts-ignore const table = await loadInlineTable(resource) - const df = await table.collect() + const frame = await table.collect() expect([ { id: 1, name: "english" }, { id: 2, name: "中文" }, - ]).toEqual(df.toRecords()) + ]).toEqual(frame.toRecords()) }) it("should handle shorter rows", async () => { @@ -105,12 +105,12 @@ describe("loadInlineTable", () => { // @ts-ignore const table = await loadInlineTable(resource) - const df = await table.collect() + const frame = await table.collect() expect([ { id: 1, name: "english" }, { id: 2, name: null }, - ]).toEqual(df.toRecords()) + ]).toEqual(frame.toRecords()) }) it("should handle various data types", async () => { @@ -132,7 +132,7 @@ describe("loadInlineTable", () => { // @ts-ignore const table = await loadInlineTable(resource) - const df = await table.collect() + const frame = await table.collect() expect([ { @@ -143,7 +143,7 @@ describe("loadInlineTable", () => { time: new Date("2025-01-01"), datetime: new Date("2025-01-01"), }, - ]).toEqual(df.toRecords()) + ]).toEqual(frame.toRecords()) }) it("should handle objects with shorter rows", async () => { @@ -155,13 +155,13 @@ describe("loadInlineTable", () => { // @ts-ignore const table = await loadInlineTable(resource) - const df = await table.collect() + const frame = await table.collect() expect([ { id: 1, name: "english" }, { id: 2, name: "中文" }, { id: 3, name: null }, - ]).toEqual(df.toRecords()) + ]).toEqual(frame.toRecords()) }) it("should handle objects with longer rows", async () => { @@ -177,12 +177,12 @@ describe("loadInlineTable", () => { // @ts-ignore const table = await loadInlineTable(resource) - const df = await table.collect() + const frame = await table.collect() expect([ { id: 1, name: "english", extra: null }, { id: 2, name: "中文", extra: null }, { id: 3, name: "german", extra: "extra" }, - ]).toEqual(df.toRecords()) + ]).toEqual(frame.toRecords()) }) }) diff --git a/inline/table/load.ts b/table/plugins/inline/table/load.ts similarity index 57% rename from inline/table/load.ts rename to table/plugins/inline/table/load.ts index 7ef5e51c..5ef6b05a 100644 --- a/inline/table/load.ts +++ b/table/plugins/inline/table/load.ts @@ -1,10 +1,11 @@ -import type { Resource } from "@dpkit/core" -import { resolveDialect } from "@dpkit/core" -import { resolveSchema } from "@dpkit/core" -import { getRecordsFromRows } from "@dpkit/table" -import type { LoadTableOptions } from "@dpkit/table" -import { inferSchemaFromTable, normalizeTable } from "@dpkit/table" -import { DataFrame } from "nodejs-polars" +import type { Resource } from "@dpkit/metadata" +import { resolveDialect } from "@dpkit/metadata" +import { resolveSchema } from "@dpkit/metadata" +import { getRecordsFromRows } from "../../../data/index.ts" +import type { LoadTableOptions } from "../../../plugin.ts" +import { inferSchemaFromTable } from "../../../schema/index.ts" +import { normalizeTable } from "../../../table/index.ts" +import * as pl from "nodejs-polars" export async function loadInlineTable( resource: Partial, @@ -19,7 +20,7 @@ export async function loadInlineTable( const isRows = data.every(row => Array.isArray(row)) const records = isRows ? getRecordsFromRows(data, dialect) : data - let table = DataFrame(records).lazy() + let table = pl.DataFrame(records).lazy() if (!options?.denormalized) { let schema = await resolveSchema(resource.schema) diff --git a/json/buffer/decode.ts b/table/plugins/json/buffer/decode.ts similarity index 100% rename from json/buffer/decode.ts rename to table/plugins/json/buffer/decode.ts diff --git a/json/buffer/encode.ts b/table/plugins/json/buffer/encode.ts similarity index 100% rename from json/buffer/encode.ts rename to table/plugins/json/buffer/encode.ts diff --git a/json/buffer/index.ts b/table/plugins/json/buffer/index.ts similarity index 100% rename from json/buffer/index.ts rename to table/plugins/json/buffer/index.ts diff --git a/ods/index.ts b/table/plugins/json/index.ts similarity index 100% rename from ods/index.ts rename to table/plugins/json/index.ts diff --git a/json/plugin.spec.ts b/table/plugins/json/plugin.spec.ts similarity index 86% rename from json/plugin.spec.ts rename to table/plugins/json/plugin.spec.ts index 1f39ccbf..370eb38a 100644 --- a/json/plugin.spec.ts +++ b/table/plugins/json/plugin.spec.ts @@ -1,11 +1,15 @@ -import type { Resource } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Resource } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { beforeEach, describe, expect, it, vi } from "vitest" import { JsonPlugin } from "./plugin.ts" -import * as tableModule from "./table/index.ts" +import * as loadModule from "./table/load.ts" +import * as saveModule from "./table/save.ts" -vi.mock("./table/index.ts", () => ({ +vi.mock("./table/load.ts", () => ({ loadJsonTable: vi.fn(), +})) + +vi.mock("./table/save.ts", () => ({ saveJsonTable: vi.fn(), })) @@ -16,8 +20,8 @@ describe("JsonPlugin", () => { beforeEach(() => { plugin = new JsonPlugin() - mockLoadJsonTable = vi.mocked(tableModule.loadJsonTable) - mockSaveJsonTable = vi.mocked(tableModule.saveJsonTable) + mockLoadJsonTable = vi.mocked(loadModule.loadJsonTable) + mockSaveJsonTable = vi.mocked(saveModule.saveJsonTable) vi.clearAllMocks() }) @@ -26,7 +30,7 @@ describe("JsonPlugin", () => { const resource: Partial = { path: "test.json", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadJsonTable.mockResolvedValue(mockTable) const result = await plugin.loadTable(resource) @@ -42,7 +46,7 @@ describe("JsonPlugin", () => { const resource: Partial = { path: "test.jsonl", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadJsonTable.mockResolvedValue(mockTable) const result = await plugin.loadTable(resource) @@ -58,7 +62,7 @@ describe("JsonPlugin", () => { const resource: Partial = { path: "test.ndjson", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadJsonTable.mockResolvedValue(mockTable) const result = await plugin.loadTable(resource) @@ -86,7 +90,7 @@ describe("JsonPlugin", () => { path: "test.txt", format: "json", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadJsonTable.mockResolvedValue(mockTable) const result = await plugin.loadTable(resource) @@ -103,7 +107,7 @@ describe("JsonPlugin", () => { path: "test.json", } const options = { denormalized: true } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadJsonTable.mockResolvedValue(mockTable) await plugin.loadTable(resource, options) @@ -118,7 +122,7 @@ describe("JsonPlugin", () => { const resource: Partial = { path: "/path/to/data.json", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadJsonTable.mockResolvedValue(mockTable) await plugin.loadTable(resource) @@ -132,7 +136,7 @@ describe("JsonPlugin", () => { describe("saveTable", () => { it("should save table to json file", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.json" } mockSaveJsonTable.mockResolvedValue({ path: "output.json" }) @@ -146,7 +150,7 @@ describe("JsonPlugin", () => { }) it("should save table to jsonl file", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.jsonl" } mockSaveJsonTable.mockResolvedValue({ path: "output.jsonl" }) @@ -160,7 +164,7 @@ describe("JsonPlugin", () => { }) it("should save table to ndjson file", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.ndjson" } mockSaveJsonTable.mockResolvedValue({ path: "output.ndjson" }) @@ -174,7 +178,7 @@ describe("JsonPlugin", () => { }) it("should return undefined for non-json files", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.csv" } const result = await plugin.saveTable(table, options) @@ -184,7 +188,7 @@ describe("JsonPlugin", () => { }) it("should handle explicit format specification", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.txt", format: "json" as const } mockSaveJsonTable.mockResolvedValue({ path: "output.txt" }) @@ -198,7 +202,7 @@ describe("JsonPlugin", () => { }) it("should handle paths with directories", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "/path/to/output.json" } mockSaveJsonTable.mockResolvedValue({ path: "/path/to/output.json" }) @@ -211,7 +215,7 @@ describe("JsonPlugin", () => { }) it("should return undefined for files without extension", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output" } const result = await plugin.saveTable(table, options) diff --git a/json/plugin.ts b/table/plugins/json/plugin.ts similarity index 73% rename from json/plugin.ts rename to table/plugins/json/plugin.ts index 24843cd8..4c6b3fed 100644 --- a/json/plugin.ts +++ b/table/plugins/json/plugin.ts @@ -1,8 +1,11 @@ -import type { Resource } from "@dpkit/core" -import { inferResourceFormat } from "@dpkit/core" -import type { LoadTableOptions } from "@dpkit/table" -import type { TablePlugin } from "@dpkit/table" -import type { SaveTableOptions, Table } from "@dpkit/table" +import type { Resource } from "@dpkit/metadata" +import { inferFormat } from "@dpkit/metadata" +import type { + LoadTableOptions, + SaveTableOptions, + TablePlugin, +} from "../../plugin.ts" +import type { Table } from "../../table/index.ts" import { loadJsonTable } from "./table/index.ts" import { saveJsonTable } from "./table/index.ts" @@ -25,7 +28,7 @@ export class JsonPlugin implements TablePlugin { } function getJsonFormat(resource: Partial) { - const format = inferResourceFormat(resource) + const format = inferFormat(resource) return format === "json" || format === "jsonl" || format === "ndjson" ? format : undefined diff --git a/json/table/fixtures/generated/loadJsonTable-file-variations-should-load-remote-file-multipart_2057098191/recording.har b/table/plugins/json/table/fixtures/generated/loadJsonTable-file-variations-should-load-remote-file-multipart_2057098191/recording.har similarity index 100% rename from json/table/fixtures/generated/loadJsonTable-file-variations-should-load-remote-file-multipart_2057098191/recording.har rename to table/plugins/json/table/fixtures/generated/loadJsonTable-file-variations-should-load-remote-file-multipart_2057098191/recording.har diff --git a/json/table/fixtures/generated/loadJsonTable-file-variations-should-load-remote-file_3069001120/recording.har b/table/plugins/json/table/fixtures/generated/loadJsonTable-file-variations-should-load-remote-file_3069001120/recording.har similarity index 100% rename from json/table/fixtures/generated/loadJsonTable-file-variations-should-load-remote-file_3069001120/recording.har rename to table/plugins/json/table/fixtures/generated/loadJsonTable-file-variations-should-load-remote-file_3069001120/recording.har diff --git a/json/table/fixtures/generated/loadJsonTable-format-jsonl-file-variations-should-load-remote-file-multipart_278600870/recording.har b/table/plugins/json/table/fixtures/generated/loadJsonTable-format-jsonl-file-variations-should-load-remote-file-multipart_278600870/recording.har similarity index 100% rename from json/table/fixtures/generated/loadJsonTable-format-jsonl-file-variations-should-load-remote-file-multipart_278600870/recording.har rename to table/plugins/json/table/fixtures/generated/loadJsonTable-format-jsonl-file-variations-should-load-remote-file-multipart_278600870/recording.har diff --git a/json/table/fixtures/generated/loadJsonTable-format-jsonl-file-variations-should-load-remote-file_1248351709/recording.har b/table/plugins/json/table/fixtures/generated/loadJsonTable-format-jsonl-file-variations-should-load-remote-file_1248351709/recording.har similarity index 100% rename from json/table/fixtures/generated/loadJsonTable-format-jsonl-file-variations-should-load-remote-file_1248351709/recording.har rename to table/plugins/json/table/fixtures/generated/loadJsonTable-format-jsonl-file-variations-should-load-remote-file_1248351709/recording.har diff --git a/json/table/fixtures/table.json b/table/plugins/json/table/fixtures/table.json similarity index 100% rename from json/table/fixtures/table.json rename to table/plugins/json/table/fixtures/table.json diff --git a/json/table/fixtures/table.jsonl b/table/plugins/json/table/fixtures/table.jsonl similarity index 100% rename from json/table/fixtures/table.jsonl rename to table/plugins/json/table/fixtures/table.jsonl diff --git a/json/table/index.ts b/table/plugins/json/table/index.ts similarity index 100% rename from json/table/index.ts rename to table/plugins/json/table/index.ts diff --git a/json/table/load.spec.ts b/table/plugins/json/table/load.spec.ts similarity index 98% rename from json/table/load.spec.ts rename to table/plugins/json/table/load.spec.ts index 182cd09f..b047eff1 100644 --- a/json/table/load.spec.ts +++ b/table/plugins/json/table/load.spec.ts @@ -1,6 +1,6 @@ -import { writeTempFile } from "@dpkit/file" -import { useRecording } from "@dpkit/test" +import { writeTempFile } from "@dpkit/dataset" import { describe, expect, it } from "vitest" +import { useRecording } from "vitest-polly" import { loadJsonTable } from "./load.ts" useRecording() diff --git a/json/table/load.ts b/table/plugins/json/table/load.ts similarity index 72% rename from json/table/load.ts rename to table/plugins/json/table/load.ts index 9b866c93..b98ecd8f 100644 --- a/json/table/load.ts +++ b/table/plugins/json/table/load.ts @@ -1,12 +1,12 @@ -import type { Dialect, Resource } from "@dpkit/core" -import { resolveDialect } from "@dpkit/core" -import { resolveSchema } from "@dpkit/core" -import { loadFile, prefetchFiles } from "@dpkit/file" -import type { LoadTableOptions } from "@dpkit/table" -import { inferSchemaFromTable, normalizeTable } from "@dpkit/table" -import type { Table } from "@dpkit/table" -import { concat } from "nodejs-polars" -import { DataFrame, scanJson } from "nodejs-polars" +import type { Dialect, Resource } from "@dpkit/metadata" +import { resolveDialect } from "@dpkit/metadata" +import { resolveSchema } from "@dpkit/metadata" +import { loadFile, prefetchFiles } from "@dpkit/dataset" +import type { LoadTableOptions } from "../../../plugin.ts" +import { inferSchemaFromTable } from "../../../schema/index.ts" +import { normalizeTable } from "../../../table/index.ts" +import type { Table } from "../../../table/index.ts" +import * as pl from "nodejs-polars" import { decodeJsonBuffer } from "../buffer/index.ts" export async function loadJsonTable( @@ -25,7 +25,7 @@ export async function loadJsonTable( const tables: Table[] = [] for (const path of paths) { if (isLines && !dialect) { - const table = scanJson(path) + const table = pl.scanJson(path) tables.push(table) continue } @@ -36,11 +36,11 @@ export async function loadJsonTable( data = processData(data, dialect) } - const table = DataFrame(data).lazy() + const table = pl.DataFrame(data).lazy() tables.push(table) } - let table = concat(tables) + let table = pl.concat(tables) if (!options?.denormalized) { let schema = await resolveSchema(resource.schema) diff --git a/json/table/parse.ts b/table/plugins/json/table/parse.ts similarity index 100% rename from json/table/parse.ts rename to table/plugins/json/table/parse.ts diff --git a/json/table/save.spec.ts b/table/plugins/json/table/save.spec.ts similarity index 78% rename from json/table/save.spec.ts rename to table/plugins/json/table/save.spec.ts index 366bd7e8..6147017f 100644 --- a/json/table/save.spec.ts +++ b/table/plugins/json/table/save.spec.ts @@ -1,14 +1,13 @@ import { readFile } from "node:fs/promises" -import { getTempFilePath } from "@dpkit/file" -import { DataFrame, DataType, Series } from "nodejs-polars" -import { readRecords } from "nodejs-polars" +import { getTempFilePath } from "@dpkit/dataset" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { loadJsonTable } from "./load.ts" import { saveJsonTable } from "./save.ts" const row1 = { id: 1, name: "english" } const row2 = { id: 2, name: "中文" } -const table = readRecords([row1, row2]).lazy() +const table = pl.readRecords([row1, row2]).lazy() describe("saveJsonTable", () => { it("should save table to file", async () => { @@ -66,23 +65,25 @@ describe("saveJsonTable", () => { it("should save and load various data types", async () => { const path = getTempFilePath() - const source = DataFrame([ - Series("array", ["[1, 2, 3]"], DataType.String), - Series("boolean", [true], DataType.Bool), - Series("date", [new Date(Date.UTC(2025, 0, 1))], DataType.Date), - Series("datetime", [new Date(Date.UTC(2025, 0, 1))], DataType.Datetime), - Series("duration", ["P23DT23H"], DataType.String), - Series("geojson", ['{"value": 1}'], DataType.String), - Series("geopoint", [[40.0, 50.0]], DataType.List(DataType.Float32)), - Series("integer", [1], DataType.Int32), - Series("list", [[1.0, 2.0, 3.0]], DataType.List(DataType.Float32)), - Series("number", [1.1], DataType.Float64), - Series("object", ['{"value": 1}']), - Series("string", ["string"], DataType.String), - Series("time", [new Date(Date.UTC(2025, 0, 1))], DataType.Time), - Series("year", [2025], DataType.Int32), - Series("yearmonth", [[2025, 1]], DataType.List(DataType.Int16)), - ]).lazy() + const source = pl + .DataFrame([ + pl.Series("array", ["[1, 2, 3]"], pl.String), + pl.Series("boolean", [true], pl.Bool), + pl.Series("date", [new Date(Date.UTC(2025, 0, 1))], pl.Date), + pl.Series("datetime", [new Date(Date.UTC(2025, 0, 1))], pl.Datetime), + pl.Series("duration", ["P23DT23H"], pl.String), + pl.Series("geojson", ['{"value": 1}'], pl.String), + pl.Series("geopoint", [[40.0, 50.0]], pl.List(pl.Float32)), + pl.Series("integer", [1], pl.Int32), + pl.Series("list", [[1.0, 2.0, 3.0]], pl.List(pl.Float32)), + pl.Series("number", [1.1], pl.Float64), + pl.Series("object", ['{"value": 1}']), + pl.Series("string", ["string"], pl.String), + pl.Series("time", [new Date(Date.UTC(2025, 0, 1))], pl.Time), + pl.Series("year", [2025], pl.Int32), + pl.Series("yearmonth", [[2025, 1]], pl.List(pl.Int16)), + ]) + .lazy() await saveJsonTable(source, { path, diff --git a/json/table/save.ts b/table/plugins/json/table/save.ts similarity index 78% rename from json/table/save.ts rename to table/plugins/json/table/save.ts index 3d00bf17..a26c5357 100644 --- a/json/table/save.ts +++ b/table/plugins/json/table/save.ts @@ -1,7 +1,9 @@ -import type { Dialect } from "@dpkit/core" -import { saveFile } from "@dpkit/file" -import { denormalizeTable, inferSchemaFromTable } from "@dpkit/table" -import type { SaveTableOptions, Table } from "@dpkit/table" +import { saveFile } from "@dpkit/dataset" +import type { Dialect } from "@dpkit/metadata" +import type { SaveTableOptions } from "../../../plugin.ts" +import { inferSchemaFromTable } from "../../../schema/index.ts" +import { denormalizeTable } from "../../../table/index.ts" +import type { Table } from "../../../table/index.ts" import { decodeJsonBuffer, encodeJsonBuffer } from "../buffer/index.ts" // TODO: rebase on sinkJSON when it is available @@ -27,8 +29,8 @@ export async function saveJsonTable( // We use polars to serialize the data // But encode it manually to support dialects/formatting - const df = await table.collect() - let buffer = df.writeJSON({ format: isLines ? "lines" : "json" }) + const frame = await table.collect() + let buffer = frame.writeJSON({ format: isLines ? "lines" : "json" }) let data = decodeJsonBuffer(buffer, { isLines }) if (dialect) { diff --git a/parquet/index.ts b/table/plugins/ods/index.ts similarity index 100% rename from parquet/index.ts rename to table/plugins/ods/index.ts diff --git a/ods/plugin.spec.ts b/table/plugins/ods/plugin.spec.ts similarity index 85% rename from ods/plugin.spec.ts rename to table/plugins/ods/plugin.spec.ts index c15d4a81..54834113 100644 --- a/ods/plugin.spec.ts +++ b/table/plugins/ods/plugin.spec.ts @@ -1,11 +1,15 @@ -import type { Resource } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Resource } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { beforeEach, describe, expect, it, vi } from "vitest" import { OdsPlugin } from "./plugin.ts" -import * as tableModule from "./table/index.ts" +import * as loadModule from "./table/load.ts" +import * as saveModule from "./table/save.ts" -vi.mock("./table/index.ts", () => ({ +vi.mock("./table/load.ts", () => ({ loadOdsTable: vi.fn(), +})) + +vi.mock("./table/save.ts", () => ({ saveOdsTable: vi.fn(), })) @@ -16,8 +20,8 @@ describe("OdsPlugin", () => { beforeEach(() => { plugin = new OdsPlugin() - mockLoadOdsTable = vi.mocked(tableModule.loadOdsTable) - mockSaveOdsTable = vi.mocked(tableModule.saveOdsTable) + mockLoadOdsTable = vi.mocked(loadModule.loadOdsTable) + mockSaveOdsTable = vi.mocked(saveModule.saveOdsTable) vi.clearAllMocks() }) @@ -26,7 +30,7 @@ describe("OdsPlugin", () => { const resource: Partial = { path: "test.ods", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadOdsTable.mockResolvedValue(mockTable) const result = await plugin.loadTable(resource) @@ -51,7 +55,7 @@ describe("OdsPlugin", () => { path: "test.txt", format: "ods", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadOdsTable.mockResolvedValue(mockTable) const result = await plugin.loadTable(resource) @@ -65,7 +69,7 @@ describe("OdsPlugin", () => { path: "test.ods", } const options = { denormalized: true } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadOdsTable.mockResolvedValue(mockTable) await plugin.loadTable(resource, options) @@ -77,7 +81,7 @@ describe("OdsPlugin", () => { const resource: Partial = { path: "/path/to/data.ods", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadOdsTable.mockResolvedValue(mockTable) await plugin.loadTable(resource) @@ -110,7 +114,7 @@ describe("OdsPlugin", () => { describe("saveTable", () => { it("should save table to ods file", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.ods" } mockSaveOdsTable.mockResolvedValue("output.ods") @@ -121,7 +125,7 @@ describe("OdsPlugin", () => { }) it("should return undefined for non-ods files", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.csv" } const result = await plugin.saveTable(table, options) @@ -131,7 +135,7 @@ describe("OdsPlugin", () => { }) it("should handle explicit format specification", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.txt", format: "ods" as const } mockSaveOdsTable.mockResolvedValue("output.txt") @@ -142,7 +146,7 @@ describe("OdsPlugin", () => { }) it("should handle paths with directories", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "/path/to/output.ods" } mockSaveOdsTable.mockResolvedValue("/path/to/output.ods") @@ -152,7 +156,7 @@ describe("OdsPlugin", () => { }) it("should return undefined for files without extension", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output" } const result = await plugin.saveTable(table, options) @@ -162,7 +166,7 @@ describe("OdsPlugin", () => { }) it("should return undefined for xlsx files", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.xlsx" } const result = await plugin.saveTable(table, options) @@ -172,7 +176,7 @@ describe("OdsPlugin", () => { }) it("should return undefined for json files", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.json" } const result = await plugin.saveTable(table, options) diff --git a/ods/plugin.ts b/table/plugins/ods/plugin.ts similarity index 60% rename from ods/plugin.ts rename to table/plugins/ods/plugin.ts index 4c43f5b8..6abeef3a 100644 --- a/ods/plugin.ts +++ b/table/plugins/ods/plugin.ts @@ -1,9 +1,13 @@ -import type { Resource } from "@dpkit/core" -import { inferResourceFormat } from "@dpkit/core" -import type { LoadTableOptions } from "@dpkit/table" -import type { TablePlugin } from "@dpkit/table" -import type { SaveTableOptions, Table } from "@dpkit/table" -import { loadOdsTable, saveOdsTable } from "./table/index.ts" +import type { Resource } from "@dpkit/metadata" +import { inferFormat } from "@dpkit/metadata" +import type { + LoadTableOptions, + SaveTableOptions, + TablePlugin, +} from "../../plugin.ts" +import type { Table } from "../../table/index.ts" +import { loadOdsTable } from "./table/index.ts" +import { saveOdsTable } from "./table/index.ts" export class OdsPlugin implements TablePlugin { async loadTable(resource: Partial, options?: LoadTableOptions) { @@ -24,6 +28,6 @@ export class OdsPlugin implements TablePlugin { } function getIsOds(resource: Partial) { - const format = inferResourceFormat(resource) + const format = inferFormat(resource) return ["ods"].includes(format ?? "") } diff --git a/ods/table/fixtures/generated/loadOdsTable-file-variations-should-load-multipart-remote-file_2072534240/recording.har b/table/plugins/ods/table/fixtures/generated/loadOdsTable-file-variations-should-load-multipart-remote-file_2072534240/recording.har similarity index 100% rename from ods/table/fixtures/generated/loadOdsTable-file-variations-should-load-multipart-remote-file_2072534240/recording.har rename to table/plugins/ods/table/fixtures/generated/loadOdsTable-file-variations-should-load-multipart-remote-file_2072534240/recording.har diff --git a/ods/table/fixtures/generated/loadOdsTable-file-variations-should-load-remote-file_2229433724/recording.har b/table/plugins/ods/table/fixtures/generated/loadOdsTable-file-variations-should-load-remote-file_2229433724/recording.har similarity index 100% rename from ods/table/fixtures/generated/loadOdsTable-file-variations-should-load-remote-file_2229433724/recording.har rename to table/plugins/ods/table/fixtures/generated/loadOdsTable-file-variations-should-load-remote-file_2229433724/recording.har diff --git a/ods/table/index.ts b/table/plugins/ods/table/index.ts similarity index 100% rename from ods/table/index.ts rename to table/plugins/ods/table/index.ts diff --git a/ods/table/load.spec.ts b/table/plugins/ods/table/load.spec.ts similarity index 98% rename from ods/table/load.spec.ts rename to table/plugins/ods/table/load.spec.ts index 090d74e8..d7afcf55 100644 --- a/ods/table/load.spec.ts +++ b/table/plugins/ods/table/load.spec.ts @@ -1,6 +1,6 @@ -import { getTempFilePath } from "@dpkit/file" -import { useRecording } from "@dpkit/test" +import { getTempFilePath } from "@dpkit/dataset" import { describe, expect, it } from "vitest" +import { useRecording } from "vitest-polly" import { loadOdsTable } from "./load.ts" import { writeTestData } from "./test.ts" diff --git a/ods/table/load.ts b/table/plugins/ods/table/load.ts similarity index 62% rename from ods/table/load.ts rename to table/plugins/ods/table/load.ts index 4d4461a3..43f21fc9 100644 --- a/ods/table/load.ts +++ b/table/plugins/ods/table/load.ts @@ -1,12 +1,14 @@ -import { resolveDialect } from "@dpkit/core" -import type { Resource } from "@dpkit/core" -import { resolveSchema } from "@dpkit/core" -import { loadFile, prefetchFiles } from "@dpkit/file" -import type { LoadTableOptions } from "@dpkit/table" -import { inferSchemaFromTable, normalizeTable } from "@dpkit/table" -import type { DataRow, Table } from "@dpkit/table" -import { getRecordsFromRows } from "@dpkit/table" -import { DataFrame, concat } from "nodejs-polars" +import { resolveDialect } from "@dpkit/metadata" +import type { Resource } from "@dpkit/metadata" +import { resolveSchema } from "@dpkit/metadata" +import { loadFile, prefetchFiles } from "@dpkit/dataset" +import type { DataRow } from "../../../data/index.ts" +import { getRecordsFromRows } from "../../../data/index.ts" +import type { LoadTableOptions } from "../../../plugin.ts" +import { inferSchemaFromTable } from "../../../schema/index.ts" +import { normalizeTable } from "../../../table/index.ts" +import type { Table } from "../../../table/index.ts" +import * as pl from "nodejs-polars" import { read, utils } from "xlsx" export async function loadOdsTable( @@ -36,13 +38,13 @@ export async function loadOdsTable( }) as DataRow[] const records = getRecordsFromRows(rows, dialect) - const table = DataFrame(records).lazy() + const table = pl.DataFrame(records).lazy() tables.push(table) } } - let table = concat(tables) + let table = pl.concat(tables) if (!options?.denormalized) { let schema = await resolveSchema(resource.schema) diff --git a/ods/table/save.spec.ts b/table/plugins/ods/table/save.spec.ts similarity index 57% rename from ods/table/save.spec.ts rename to table/plugins/ods/table/save.spec.ts index 56b5b28c..ed1e9507 100644 --- a/ods/table/save.spec.ts +++ b/table/plugins/ods/table/save.spec.ts @@ -1,6 +1,5 @@ -import { getTempFilePath } from "@dpkit/file" -import { DataFrame, DataType, Series } from "nodejs-polars" -import { readRecords } from "nodejs-polars" +import { getTempFilePath } from "@dpkit/dataset" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { loadOdsTable } from "./load.ts" import { saveOdsTable } from "./save.ts" @@ -8,7 +7,7 @@ import { readTestData } from "./test.ts" const row1 = { id: 1, name: "english" } const row2 = { id: 2, name: "中文" } -const table = readRecords([row1, row2]).lazy() +const table = pl.readRecords([row1, row2]).lazy() describe("saveOdsTable", () => { it("should save table to file", async () => { @@ -22,23 +21,25 @@ describe("saveOdsTable", () => { it("should save and load various data types", async () => { const path = getTempFilePath() - const source = DataFrame([ - Series("array", ["[1, 2, 3]"], DataType.String), - Series("boolean", [true], DataType.Bool), - Series("date", [new Date(Date.UTC(2025, 0, 1))], DataType.Date), - Series("datetime", [new Date(Date.UTC(2025, 0, 1))], DataType.Datetime), - Series("duration", ["P23DT23H"], DataType.String), - Series("geojson", ['{"value": 1}'], DataType.String), - Series("geopoint", [[40.0, 50.0]], DataType.List(DataType.Float32)), - Series("integer", [1], DataType.Int32), - Series("list", [[1.0, 2.0, 3.0]], DataType.List(DataType.Float32)), - Series("number", [1.1], DataType.Float64), - Series("object", ['{"value": 1}']), - Series("string", ["string"], DataType.String), - Series("time", [new Date(Date.UTC(2025, 0, 1))], DataType.Time), - Series("year", [2025], DataType.Int32), - Series("yearmonth", [[2025, 1]], DataType.List(DataType.Int16)), - ]).lazy() + const source = pl + .DataFrame([ + pl.Series("array", ["[1, 2, 3]"], pl.String), + pl.Series("boolean", [true], pl.Bool), + pl.Series("date", [new Date(Date.UTC(2025, 0, 1))], pl.Date), + pl.Series("datetime", [new Date(Date.UTC(2025, 0, 1))], pl.Datetime), + pl.Series("duration", ["P23DT23H"], pl.String), + pl.Series("geojson", ['{"value": 1}'], pl.String), + pl.Series("geopoint", [[40.0, 50.0]], pl.List(pl.Float32)), + pl.Series("integer", [1], pl.Int32), + pl.Series("list", [[1.0, 2.0, 3.0]], pl.List(pl.Float32)), + pl.Series("number", [1.1], pl.Float64), + pl.Series("object", ['{"value": 1}']), + pl.Series("string", ["string"], pl.String), + pl.Series("time", [new Date(Date.UTC(2025, 0, 1))], pl.Time), + pl.Series("year", [2025], pl.Int32), + pl.Series("yearmonth", [[2025, 1]], pl.List(pl.Int16)), + ]) + .lazy() await saveOdsTable(source, { path, diff --git a/ods/table/save.ts b/table/plugins/ods/table/save.ts similarity index 63% rename from ods/table/save.ts rename to table/plugins/ods/table/save.ts index cacaea77..56afcd1e 100644 --- a/ods/table/save.ts +++ b/table/plugins/ods/table/save.ts @@ -1,8 +1,10 @@ -import { resolveDialect } from "@dpkit/core" -import { saveFile } from "@dpkit/file" -import { denormalizeTable, inferSchemaFromTable } from "@dpkit/table" -import type { SaveTableOptions, Table } from "@dpkit/table" +import { saveFile } from "@dpkit/dataset" +import { resolveDialect } from "@dpkit/metadata" import { utils, write } from "xlsx" +import type { SaveTableOptions } from "../../../plugin.ts" +import { inferSchemaFromTable } from "../../../schema/index.ts" +import { denormalizeTable } from "../../../table/index.ts" +import type { Table } from "../../../table/index.ts" export async function saveOdsTable(table: Table, options: SaveTableOptions) { const { path, overwrite } = options @@ -18,11 +20,11 @@ export async function saveOdsTable(table: Table, options: SaveTableOptions) { nativeTypes: ["boolean", "integer", "number", "string", "year"], }) - const df = await table.collect() + const frame = await table.collect() const dialect = await resolveDialect(options.dialect) const sheetName = dialect?.sheetName ?? "Sheet1" - const sheet = utils.json_to_sheet(df.toRecords()) + const sheet = utils.json_to_sheet(frame.toRecords()) const book = utils.book_new() utils.book_append_sheet(book, sheet, sheetName) diff --git a/ods/table/test.ts b/table/plugins/ods/table/test.ts similarity index 100% rename from ods/table/test.ts rename to table/plugins/ods/table/test.ts diff --git a/xlsx/index.ts b/table/plugins/parquet/index.ts similarity index 100% rename from xlsx/index.ts rename to table/plugins/parquet/index.ts diff --git a/parquet/plugin.spec.ts b/table/plugins/parquet/plugin.spec.ts similarity index 85% rename from parquet/plugin.spec.ts rename to table/plugins/parquet/plugin.spec.ts index 5f8c361c..b91e99a2 100644 --- a/parquet/plugin.spec.ts +++ b/table/plugins/parquet/plugin.spec.ts @@ -1,11 +1,15 @@ -import type { Resource } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Resource } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { beforeEach, describe, expect, it, vi } from "vitest" import { ParquetPlugin } from "./plugin.ts" -import * as tableModule from "./table/index.ts" +import * as loadModule from "./table/load.ts" +import * as saveModule from "./table/save.ts" -vi.mock("./table/index.ts", () => ({ +vi.mock("./table/load.ts", () => ({ loadParquetTable: vi.fn(), +})) + +vi.mock("./table/save.ts", () => ({ saveParquetTable: vi.fn(), })) @@ -16,8 +20,8 @@ describe("ParquetPlugin", () => { beforeEach(() => { plugin = new ParquetPlugin() - mockLoadParquetTable = vi.mocked(tableModule.loadParquetTable) - mockSaveParquetTable = vi.mocked(tableModule.saveParquetTable) + mockLoadParquetTable = vi.mocked(loadModule.loadParquetTable) + mockSaveParquetTable = vi.mocked(saveModule.saveParquetTable) vi.clearAllMocks() }) @@ -26,7 +30,7 @@ describe("ParquetPlugin", () => { const resource: Partial = { path: "test.parquet", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadParquetTable.mockResolvedValue(mockTable) const result = await plugin.loadTable(resource) @@ -51,7 +55,7 @@ describe("ParquetPlugin", () => { path: "test.txt", format: "parquet", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadParquetTable.mockResolvedValue(mockTable) const result = await plugin.loadTable(resource) @@ -65,7 +69,7 @@ describe("ParquetPlugin", () => { path: "test.parquet", } const options = { denormalized: true } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadParquetTable.mockResolvedValue(mockTable) await plugin.loadTable(resource, options) @@ -77,7 +81,7 @@ describe("ParquetPlugin", () => { const resource: Partial = { path: "/path/to/data.parquet", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadParquetTable.mockResolvedValue(mockTable) await plugin.loadTable(resource) @@ -110,7 +114,7 @@ describe("ParquetPlugin", () => { describe("saveTable", () => { it("should save table to parquet file", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.parquet" } mockSaveParquetTable.mockResolvedValue("output.parquet") @@ -121,7 +125,7 @@ describe("ParquetPlugin", () => { }) it("should return undefined for non-parquet files", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.csv" } const result = await plugin.saveTable(table, options) @@ -131,7 +135,7 @@ describe("ParquetPlugin", () => { }) it("should handle explicit format specification", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.txt", format: "parquet" as const } mockSaveParquetTable.mockResolvedValue("output.txt") @@ -142,7 +146,7 @@ describe("ParquetPlugin", () => { }) it("should handle paths with directories", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "/path/to/output.parquet" } mockSaveParquetTable.mockResolvedValue("/path/to/output.parquet") @@ -152,7 +156,7 @@ describe("ParquetPlugin", () => { }) it("should return undefined for files without extension", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output" } const result = await plugin.saveTable(table, options) @@ -162,7 +166,7 @@ describe("ParquetPlugin", () => { }) it("should return undefined for arrow files", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.arrow" } const result = await plugin.saveTable(table, options) @@ -172,7 +176,7 @@ describe("ParquetPlugin", () => { }) it("should return undefined for json files", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.json" } const result = await plugin.saveTable(table, options) diff --git a/parquet/plugin.ts b/table/plugins/parquet/plugin.ts similarity index 61% rename from parquet/plugin.ts rename to table/plugins/parquet/plugin.ts index a6f26f22..76bea279 100644 --- a/parquet/plugin.ts +++ b/table/plugins/parquet/plugin.ts @@ -1,9 +1,13 @@ -import type { Resource } from "@dpkit/core" -import { inferResourceFormat } from "@dpkit/core" -import type { LoadTableOptions } from "@dpkit/table" -import type { TablePlugin } from "@dpkit/table" -import type { SaveTableOptions, Table } from "@dpkit/table" -import { loadParquetTable, saveParquetTable } from "./table/index.ts" +import type { Resource } from "@dpkit/metadata" +import { inferFormat } from "@dpkit/metadata" +import type { + LoadTableOptions, + SaveTableOptions, + TablePlugin, +} from "../../plugin.ts" +import type { Table } from "../../table/index.ts" +import { loadParquetTable } from "./table/index.ts" +import { saveParquetTable } from "./table/index.ts" export class ParquetPlugin implements TablePlugin { async loadTable(resource: Partial, options?: LoadTableOptions) { @@ -24,6 +28,6 @@ export class ParquetPlugin implements TablePlugin { } function getIsParquet(resource: Partial) { - const format = inferResourceFormat(resource) + const format = inferFormat(resource) return format === "parquet" } diff --git a/parquet/table/fixtures/generated/loadParquetTable-file-variations-should-load-remote-file-multipart_3893757127/recording.har b/table/plugins/parquet/table/fixtures/generated/loadParquetTable-file-variations-should-load-remote-file-multipart_3893757127/recording.har similarity index 100% rename from parquet/table/fixtures/generated/loadParquetTable-file-variations-should-load-remote-file-multipart_3893757127/recording.har rename to table/plugins/parquet/table/fixtures/generated/loadParquetTable-file-variations-should-load-remote-file-multipart_3893757127/recording.har diff --git a/parquet/table/fixtures/generated/loadParquetTable-file-variations-should-load-remote-file_3029162600/recording.har b/table/plugins/parquet/table/fixtures/generated/loadParquetTable-file-variations-should-load-remote-file_3029162600/recording.har similarity index 100% rename from parquet/table/fixtures/generated/loadParquetTable-file-variations-should-load-remote-file_3029162600/recording.har rename to table/plugins/parquet/table/fixtures/generated/loadParquetTable-file-variations-should-load-remote-file_3029162600/recording.har diff --git a/parquet/table/fixtures/table.parquet b/table/plugins/parquet/table/fixtures/table.parquet similarity index 100% rename from parquet/table/fixtures/table.parquet rename to table/plugins/parquet/table/fixtures/table.parquet diff --git a/parquet/table/index.ts b/table/plugins/parquet/table/index.ts similarity index 100% rename from parquet/table/index.ts rename to table/plugins/parquet/table/index.ts diff --git a/parquet/table/load.spec.ts b/table/plugins/parquet/table/load.spec.ts similarity index 81% rename from parquet/table/load.spec.ts rename to table/plugins/parquet/table/load.spec.ts index 4f493dd5..00d9e61c 100644 --- a/parquet/table/load.spec.ts +++ b/table/plugins/parquet/table/load.spec.ts @@ -1,7 +1,7 @@ -import { getTempFilePath } from "@dpkit/file" -import { useRecording } from "@dpkit/test" -import { DataFrame } from "nodejs-polars" +import { getTempFilePath } from "@dpkit/dataset" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" +import { useRecording } from "vitest-polly" import { loadParquetTable } from "./load.ts" useRecording() @@ -10,7 +10,7 @@ describe("loadParquetTable", () => { describe("file variations", () => { it("should load local file", async () => { const path = getTempFilePath() - DataFrame({ id: [1, 2], name: ["english", "中文"] }).writeParquet(path) + pl.DataFrame({ id: [1, 2], name: ["english", "中文"] }).writeParquet(path) const table = await loadParquetTable({ path }) expect((await table.collect()).toRecords()).toEqual([ @@ -22,8 +22,12 @@ describe("loadParquetTable", () => { it("should load local file (multipart)", async () => { const path1 = getTempFilePath() const path2 = getTempFilePath() - DataFrame({ id: [1, 2], name: ["english", "中文"] }).writeParquet(path1) - DataFrame({ id: [1, 2], name: ["english", "中文"] }).writeParquet(path2) + pl.DataFrame({ id: [1, 2], name: ["english", "中文"] }).writeParquet( + path1, + ) + pl.DataFrame({ id: [1, 2], name: ["english", "中文"] }).writeParquet( + path2, + ) const table = await loadParquetTable({ path: [path1, path2] }) expect((await table.collect()).toRecords()).toEqual([ diff --git a/parquet/table/load.ts b/table/plugins/parquet/table/load.ts similarity index 51% rename from parquet/table/load.ts rename to table/plugins/parquet/table/load.ts index bb83e7bb..6afa7bc9 100644 --- a/parquet/table/load.ts +++ b/table/plugins/parquet/table/load.ts @@ -1,10 +1,10 @@ -import type { Resource } from "@dpkit/core" -import { resolveSchema } from "@dpkit/core" -import { prefetchFiles } from "@dpkit/file" -import type { LoadTableOptions } from "@dpkit/table" -import { inferSchemaFromTable, normalizeTable } from "@dpkit/table" -import { concat } from "nodejs-polars" -import { scanParquet } from "nodejs-polars" +import type { Resource } from "@dpkit/metadata" +import { resolveSchema } from "@dpkit/metadata" +import { prefetchFiles } from "@dpkit/dataset" +import type { LoadTableOptions } from "../../../plugin.ts" +import { inferSchemaFromTable } from "../../../schema/index.ts" +import { normalizeTable } from "../../../table/index.ts" +import * as pl from "nodejs-polars" export async function loadParquetTable( resource: Partial, @@ -15,9 +15,9 @@ export async function loadParquetTable( throw new Error("Resource path is not defined") } - let table = scanParquet(firstPath) + let table = pl.scanParquet(firstPath) if (restPaths.length) { - table = concat([table, ...restPaths.map(path => scanParquet(path))]) + table = pl.concat([table, ...restPaths.map(path => pl.scanParquet(path))]) } if (!options?.denormalized) { diff --git a/parquet/table/save.spec.ts b/table/plugins/parquet/table/save.spec.ts similarity index 58% rename from parquet/table/save.spec.ts rename to table/plugins/parquet/table/save.spec.ts index 615dde4a..65e88be5 100644 --- a/parquet/table/save.spec.ts +++ b/table/plugins/parquet/table/save.spec.ts @@ -1,5 +1,5 @@ -import { getTempFilePath } from "@dpkit/file" -import { DataFrame, DataType, Series } from "nodejs-polars" +import { getTempFilePath } from "@dpkit/dataset" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { loadParquetTable } from "./load.ts" import { saveParquetTable } from "./save.ts" @@ -7,10 +7,12 @@ import { saveParquetTable } from "./save.ts" describe("saveParquetTable", () => { it("should save table to Parquet file", async () => { const path = getTempFilePath() - const source = DataFrame({ - id: [1.0, 2.0, 3.0], - name: ["Alice", "Bob", "Charlie"], - }).lazy() + const source = pl + .DataFrame({ + id: [1.0, 2.0, 3.0], + name: ["Alice", "Bob", "Charlie"], + }) + .lazy() await saveParquetTable(source, { path }) @@ -25,23 +27,25 @@ describe("saveParquetTable", () => { it("should save and load various data types", async () => { const path = getTempFilePath() - const source = DataFrame([ - Series("array", ["[1, 2, 3]"], DataType.String), - Series("boolean", [true], DataType.Bool), - Series("date", [new Date(Date.UTC(2025, 0, 1))], DataType.Date), - Series("datetime", [new Date(Date.UTC(2025, 0, 1))], DataType.Datetime), - Series("duration", ["P23DT23H"], DataType.String), - Series("geojson", ['{"value": 1}'], DataType.String), - Series("geopoint", [[40.0, 50.0]], DataType.List(DataType.Float32)), - Series("integer", [1], DataType.Int32), - Series("list", [[1.0, 2.0, 3.0]], DataType.List(DataType.Float32)), - Series("number", [1.1], DataType.Float64), - Series("object", ['{"value": 1}']), - Series("string", ["string"], DataType.String), - Series("time", [new Date(Date.UTC(2025, 0, 1))], DataType.Time), - Series("year", [2025], DataType.Int32), - Series("yearmonth", [[2025, 1]], DataType.List(DataType.Int16)), - ]).lazy() + const source = pl + .DataFrame([ + pl.Series("array", ["[1, 2, 3]"], pl.String), + pl.Series("boolean", [true], pl.Bool), + pl.Series("date", [new Date(Date.UTC(2025, 0, 1))], pl.Date), + pl.Series("datetime", [new Date(Date.UTC(2025, 0, 1))], pl.Datetime), + pl.Series("duration", ["P23DT23H"], pl.String), + pl.Series("geojson", ['{"value": 1}'], pl.String), + pl.Series("geopoint", [[40.0, 50.0]], pl.List(pl.Float32)), + pl.Series("integer", [1], pl.Int32), + pl.Series("list", [[1.0, 2.0, 3.0]], pl.List(pl.Float32)), + pl.Series("number", [1.1], pl.Float64), + pl.Series("object", ['{"value": 1}']), + pl.Series("string", ["string"], pl.String), + pl.Series("time", [new Date(Date.UTC(2025, 0, 1))], pl.Time), + pl.Series("year", [2025], pl.Int32), + pl.Series("yearmonth", [[2025, 1]], pl.List(pl.Int16)), + ]) + .lazy() await saveParquetTable(source, { path, diff --git a/parquet/table/save.ts b/table/plugins/parquet/table/save.ts similarity index 68% rename from parquet/table/save.ts rename to table/plugins/parquet/table/save.ts index 99bee355..66a81e93 100644 --- a/parquet/table/save.ts +++ b/table/plugins/parquet/table/save.ts @@ -1,6 +1,8 @@ -import { assertLocalPathVacant } from "@dpkit/file" -import { denormalizeTable, inferSchemaFromTable } from "@dpkit/table" -import type { SaveTableOptions, Table } from "@dpkit/table" +import { assertLocalPathVacant } from "@dpkit/dataset" +import type { SaveTableOptions } from "../../../plugin.ts" +import { inferSchemaFromTable } from "../../../schema/index.ts" +import { denormalizeTable } from "../../../table/index.ts" +import type { Table } from "../../../table/index.ts" export async function saveParquetTable( table: Table, diff --git a/csv/index.ts b/table/plugins/xlxs/index.ts similarity index 63% rename from csv/index.ts rename to table/plugins/xlxs/index.ts index 0bb8be6b..5f4f33d9 100644 --- a/csv/index.ts +++ b/table/plugins/xlxs/index.ts @@ -1,3 +1,2 @@ -export * from "./dialect/index.ts" export * from "./table/index.ts" export * from "./plugin.ts" diff --git a/xlsx/plugin.spec.ts b/table/plugins/xlxs/plugin.spec.ts similarity index 85% rename from xlsx/plugin.spec.ts rename to table/plugins/xlxs/plugin.spec.ts index cbf47610..3733871a 100644 --- a/xlsx/plugin.spec.ts +++ b/table/plugins/xlxs/plugin.spec.ts @@ -1,11 +1,15 @@ -import type { Resource } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Resource } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { beforeEach, describe, expect, it, vi } from "vitest" import { XlsxPlugin } from "./plugin.ts" -import * as tableModule from "./table/index.ts" +import * as loadModule from "./table/load.ts" +import * as saveModule from "./table/save.ts" -vi.mock("./table/index.ts", () => ({ +vi.mock("./table/load.ts", () => ({ loadXlsxTable: vi.fn(), +})) + +vi.mock("./table/save.ts", () => ({ saveXlsxTable: vi.fn(), })) @@ -16,8 +20,8 @@ describe("XlsxPlugin", () => { beforeEach(() => { plugin = new XlsxPlugin() - mockLoadXlsxTable = vi.mocked(tableModule.loadXlsxTable) - mockSaveXlsxTable = vi.mocked(tableModule.saveXlsxTable) + mockLoadXlsxTable = vi.mocked(loadModule.loadXlsxTable) + mockSaveXlsxTable = vi.mocked(saveModule.saveXlsxTable) vi.clearAllMocks() }) @@ -26,7 +30,7 @@ describe("XlsxPlugin", () => { const resource: Partial = { path: "test.xlsx", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadXlsxTable.mockResolvedValue(mockTable) const result = await plugin.loadTable(resource) @@ -51,7 +55,7 @@ describe("XlsxPlugin", () => { path: "test.txt", format: "xlsx", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadXlsxTable.mockResolvedValue(mockTable) const result = await plugin.loadTable(resource) @@ -65,7 +69,7 @@ describe("XlsxPlugin", () => { path: "test.xlsx", } const options = { denormalized: true } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadXlsxTable.mockResolvedValue(mockTable) await plugin.loadTable(resource, options) @@ -77,7 +81,7 @@ describe("XlsxPlugin", () => { const resource: Partial = { path: "/path/to/data.xlsx", } - const mockTable = DataFrame().lazy() + const mockTable = pl.DataFrame().lazy() mockLoadXlsxTable.mockResolvedValue(mockTable) await plugin.loadTable(resource) @@ -110,7 +114,7 @@ describe("XlsxPlugin", () => { describe("saveTable", () => { it("should save table to xlsx file", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.xlsx" } mockSaveXlsxTable.mockResolvedValue("output.xlsx") @@ -121,7 +125,7 @@ describe("XlsxPlugin", () => { }) it("should return undefined for non-xlsx files", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.csv" } const result = await plugin.saveTable(table, options) @@ -131,7 +135,7 @@ describe("XlsxPlugin", () => { }) it("should handle explicit format specification", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.txt", format: "xlsx" as const } mockSaveXlsxTable.mockResolvedValue("output.txt") @@ -142,7 +146,7 @@ describe("XlsxPlugin", () => { }) it("should handle paths with directories", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "/path/to/output.xlsx" } mockSaveXlsxTable.mockResolvedValue("/path/to/output.xlsx") @@ -152,7 +156,7 @@ describe("XlsxPlugin", () => { }) it("should return undefined for files without extension", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output" } const result = await plugin.saveTable(table, options) @@ -162,7 +166,7 @@ describe("XlsxPlugin", () => { }) it("should return undefined for ods files", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.ods" } const result = await plugin.saveTable(table, options) @@ -172,7 +176,7 @@ describe("XlsxPlugin", () => { }) it("should return undefined for json files", async () => { - const table = DataFrame().lazy() + const table = pl.DataFrame().lazy() const options = { path: "output.json" } const result = await plugin.saveTable(table, options) diff --git a/xlsx/plugin.ts b/table/plugins/xlxs/plugin.ts similarity index 60% rename from xlsx/plugin.ts rename to table/plugins/xlxs/plugin.ts index 266be920..3480f1b3 100644 --- a/xlsx/plugin.ts +++ b/table/plugins/xlxs/plugin.ts @@ -1,9 +1,13 @@ -import type { Resource } from "@dpkit/core" -import { inferResourceFormat } from "@dpkit/core" -import type { LoadTableOptions } from "@dpkit/table" -import type { TablePlugin } from "@dpkit/table" -import type { SaveTableOptions, Table } from "@dpkit/table" -import { loadXlsxTable, saveXlsxTable } from "./table/index.ts" +import type { Resource } from "@dpkit/metadata" +import { inferFormat } from "@dpkit/metadata" +import type { + LoadTableOptions, + SaveTableOptions, + TablePlugin, +} from "../../plugin.ts" +import type { Table } from "../../table/index.ts" +import { loadXlsxTable } from "./table/index.ts" +import { saveXlsxTable } from "./table/index.ts" export class XlsxPlugin implements TablePlugin { async loadTable(resource: Partial, options?: LoadTableOptions) { @@ -24,6 +28,6 @@ export class XlsxPlugin implements TablePlugin { } function getIsXlsx(resource: Partial) { - const format = inferResourceFormat(resource) + const format = inferFormat(resource) return ["xlsx"].includes(format ?? "") } diff --git a/xlsx/table/fixtures/generated/loadXlsxTable-file-variations-should-load-multipart-remote-file_2310816283/recording.har b/table/plugins/xlxs/table/fixtures/generated/loadXlsxTable-file-variations-should-load-multipart-remote-file_2310816283/recording.har similarity index 100% rename from xlsx/table/fixtures/generated/loadXlsxTable-file-variations-should-load-multipart-remote-file_2310816283/recording.har rename to table/plugins/xlxs/table/fixtures/generated/loadXlsxTable-file-variations-should-load-multipart-remote-file_2310816283/recording.har diff --git a/xlsx/table/fixtures/generated/loadXlsxTable-file-variations-should-load-remote-file_504643743/recording.har b/table/plugins/xlxs/table/fixtures/generated/loadXlsxTable-file-variations-should-load-remote-file_504643743/recording.har similarity index 100% rename from xlsx/table/fixtures/generated/loadXlsxTable-file-variations-should-load-remote-file_504643743/recording.har rename to table/plugins/xlxs/table/fixtures/generated/loadXlsxTable-file-variations-should-load-remote-file_504643743/recording.har diff --git a/xlsx/table/fixtures/table.xlsx b/table/plugins/xlxs/table/fixtures/table.xlsx similarity index 100% rename from xlsx/table/fixtures/table.xlsx rename to table/plugins/xlxs/table/fixtures/table.xlsx diff --git a/xlsx/table/index.ts b/table/plugins/xlxs/table/index.ts similarity index 100% rename from xlsx/table/index.ts rename to table/plugins/xlxs/table/index.ts diff --git a/xlsx/table/load.spec.ts b/table/plugins/xlxs/table/load.spec.ts similarity index 98% rename from xlsx/table/load.spec.ts rename to table/plugins/xlxs/table/load.spec.ts index 953b57b9..16f57539 100644 --- a/xlsx/table/load.spec.ts +++ b/table/plugins/xlxs/table/load.spec.ts @@ -1,6 +1,6 @@ -import { getTempFilePath } from "@dpkit/file" -import { useRecording } from "@dpkit/test" +import { getTempFilePath } from "@dpkit/dataset" import { describe, expect, it } from "vitest" +import { useRecording } from "vitest-polly" import { loadXlsxTable } from "./load.ts" import { writeTestData } from "./test.ts" diff --git a/xlsx/table/load.ts b/table/plugins/xlxs/table/load.ts similarity index 65% rename from xlsx/table/load.ts rename to table/plugins/xlxs/table/load.ts index ce59d5f9..b37f686c 100644 --- a/xlsx/table/load.ts +++ b/table/plugins/xlxs/table/load.ts @@ -1,12 +1,14 @@ -import { resolveDialect } from "@dpkit/core" -import type { Resource } from "@dpkit/core" -import { resolveSchema } from "@dpkit/core" -import { loadFile, prefetchFiles } from "@dpkit/file" -import type { LoadTableOptions } from "@dpkit/table" -import { inferSchemaFromTable, normalizeTable } from "@dpkit/table" -import type { DataRow, Table } from "@dpkit/table" -import { getRecordsFromRows } from "@dpkit/table" -import { DataFrame, concat } from "nodejs-polars" +import { resolveDialect } from "@dpkit/metadata" +import type { Resource } from "@dpkit/metadata" +import { resolveSchema } from "@dpkit/metadata" +import { loadFile, prefetchFiles } from "@dpkit/dataset" +import type { DataRow } from "../../../data/index.ts" +import { getRecordsFromRows } from "../../../data/index.ts" +import type { LoadTableOptions } from "../../../plugin.ts" +import { inferSchemaFromTable } from "../../../schema/index.ts" +import { normalizeTable } from "../../../table/index.ts" +import type { Table } from "../../../table/index.ts" +import * as pl from "nodejs-polars" import { read, utils } from "xlsx" // Currently, we use slow non-rust implementation as in the future @@ -39,13 +41,13 @@ export async function loadXlsxTable( }) as DataRow[] const records = getRecordsFromRows(rows, dialect) - const table = DataFrame(records).lazy() + const table = pl.DataFrame(records).lazy() tables.push(table) } } - let table = concat(tables) + let table = pl.concat(tables) if (!options?.denormalized) { let schema = await resolveSchema(resource.schema) diff --git a/xlsx/table/save.spec.ts b/table/plugins/xlxs/table/save.spec.ts similarity index 57% rename from xlsx/table/save.spec.ts rename to table/plugins/xlxs/table/save.spec.ts index 0ec415e7..423e1882 100644 --- a/xlsx/table/save.spec.ts +++ b/table/plugins/xlxs/table/save.spec.ts @@ -1,6 +1,5 @@ -import { getTempFilePath } from "@dpkit/file" -import { DataFrame, DataType, Series } from "nodejs-polars" -import { readRecords } from "nodejs-polars" +import { getTempFilePath } from "@dpkit/dataset" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { loadXlsxTable } from "./load.ts" import { saveXlsxTable } from "./save.ts" @@ -8,7 +7,7 @@ import { readTestData } from "./test.ts" const row1 = { id: 1, name: "english" } const row2 = { id: 2, name: "中文" } -const table = readRecords([row1, row2]).lazy() +const table = pl.readRecords([row1, row2]).lazy() describe("saveXlsxTable", () => { it("should save table to file", async () => { @@ -22,23 +21,25 @@ describe("saveXlsxTable", () => { it("should save and load various data types", async () => { const path = getTempFilePath() - const source = DataFrame([ - Series("array", ["[1, 2, 3]"], DataType.String), - Series("boolean", [true], DataType.Bool), - Series("date", [new Date(Date.UTC(2025, 0, 1))], DataType.Date), - Series("datetime", [new Date(Date.UTC(2025, 0, 1))], DataType.Datetime), - Series("duration", ["P23DT23H"], DataType.String), - Series("geojson", ['{"value": 1}'], DataType.String), - Series("geopoint", [[40.0, 50.0]], DataType.List(DataType.Float32)), - Series("integer", [1], DataType.Int32), - Series("list", [[1.0, 2.0, 3.0]], DataType.List(DataType.Float32)), - Series("number", [1.1], DataType.Float64), - Series("object", ['{"value": 1}']), - Series("string", ["string"], DataType.String), - Series("time", [new Date(Date.UTC(2025, 0, 1))], DataType.Time), - Series("year", [2025], DataType.Int32), - Series("yearmonth", [[2025, 1]], DataType.List(DataType.Int16)), - ]).lazy() + const source = pl + .DataFrame([ + pl.Series("array", ["[1, 2, 3]"], pl.String), + pl.Series("boolean", [true], pl.Bool), + pl.Series("date", [new Date(Date.UTC(2025, 0, 1))], pl.Date), + pl.Series("datetime", [new Date(Date.UTC(2025, 0, 1))], pl.Datetime), + pl.Series("duration", ["P23DT23H"], pl.String), + pl.Series("geojson", ['{"value": 1}'], pl.String), + pl.Series("geopoint", [[40.0, 50.0]], pl.List(pl.Float32)), + pl.Series("integer", [1], pl.Int32), + pl.Series("list", [[1.0, 2.0, 3.0]], pl.List(pl.Float32)), + pl.Series("number", [1.1], pl.Float64), + pl.Series("object", ['{"value": 1}']), + pl.Series("string", ["string"], pl.String), + pl.Series("time", [new Date(Date.UTC(2025, 0, 1))], pl.Time), + pl.Series("year", [2025], pl.Int32), + pl.Series("yearmonth", [[2025, 1]], pl.List(pl.Int16)), + ]) + .lazy() await saveXlsxTable(source, { path, diff --git a/xlsx/table/save.ts b/table/plugins/xlxs/table/save.ts similarity index 67% rename from xlsx/table/save.ts rename to table/plugins/xlxs/table/save.ts index 9c5420d6..eb3c99d5 100644 --- a/xlsx/table/save.ts +++ b/table/plugins/xlxs/table/save.ts @@ -1,8 +1,10 @@ -import { resolveDialect } from "@dpkit/core" -import { saveFile } from "@dpkit/file" -import { denormalizeTable, inferSchemaFromTable } from "@dpkit/table" -import type { SaveTableOptions, Table } from "@dpkit/table" +import { saveFile } from "@dpkit/dataset" +import { resolveDialect } from "@dpkit/metadata" import { utils, write } from "xlsx" +import type { SaveTableOptions } from "../../../plugin.ts" +import { inferSchemaFromTable } from "../../../schema/index.ts" +import { denormalizeTable } from "../../../table/index.ts" +import type { Table } from "../../../table/index.ts" // Currently, we use slow non-rust implementation as in the future // polars-rust might be able to provide a faster native implementation @@ -21,11 +23,11 @@ export async function saveXlsxTable(table: Table, options: SaveTableOptions) { nativeTypes: ["boolean", "integer", "number", "string", "year"], }) - const df = await table.collect() + const frame = await table.collect() const dialect = await resolveDialect(options.dialect) const sheetName = dialect?.sheetName ?? "Sheet1" - const sheet = utils.json_to_sheet(df.toRecords()) + const sheet = utils.json_to_sheet(frame.toRecords()) const book = utils.book_new() utils.book_append_sheet(book, sheet, sheetName) diff --git a/xlsx/table/test.ts b/table/plugins/xlxs/table/test.ts similarity index 100% rename from xlsx/table/test.ts rename to table/plugins/xlxs/table/test.ts diff --git a/table/schema/Mapping.ts b/table/schema/Mapping.ts index baaee2cc..7b6fb541 100644 --- a/table/schema/Mapping.ts +++ b/table/schema/Mapping.ts @@ -1,4 +1,4 @@ -import type { Schema } from "@dpkit/core" +import type { Schema } from "@dpkit/metadata" import type { PolarsSchema } from "./Schema.ts" export interface SchemaMapping { diff --git a/table/schema/Options.ts b/table/schema/Options.ts index 08594c7c..0dde360b 100644 --- a/table/schema/Options.ts +++ b/table/schema/Options.ts @@ -1,6 +1,6 @@ -import type { GeojsonField, GeopointField, ListField } from "@dpkit/core" -import type { StringField } from "@dpkit/core" -import type { FieldType } from "@dpkit/core" +import type { GeojsonField, GeopointField, ListField } from "@dpkit/metadata" +import type { StringField } from "@dpkit/metadata" +import type { FieldType } from "@dpkit/metadata" export interface SchemaOptions { fieldNames?: string[] diff --git a/table/schema/helpers.ts b/table/schema/helpers.ts index 5b15a458..5d00efc0 100644 --- a/table/schema/helpers.ts +++ b/table/schema/helpers.ts @@ -1,8 +1,8 @@ -import type { DataType } from "nodejs-polars" +import type * as pl from "nodejs-polars" import type { PolarsSchema } from "./Schema.ts" export function getPolarsSchema( - typeMapping: Record, + typeMapping: Record, ): PolarsSchema { const entries = Object.entries(typeMapping) const fields = entries.map(([name, type]) => ({ name, type })) diff --git a/table/schema/infer.spec.ts b/table/schema/infer.spec.ts index 141081ea..784ce419 100644 --- a/table/schema/infer.spec.ts +++ b/table/schema/infer.spec.ts @@ -1,14 +1,15 @@ -import { DataFrame, Series } from "nodejs-polars" -import { DataType } from "nodejs-polars" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { inferSchemaFromTable } from "./infer.ts" describe("inferSchemaFromTable", () => { it("should infer from native types", async () => { - const table = DataFrame({ - integer: Series("integer", [1, 2], DataType.Int32), - number: [1.1, 2.2], - }).lazy() + const table = pl + .DataFrame({ + integer: pl.Series("integer", [1, 2], pl.Int32), + number: [1.1, 2.2], + }) + .lazy() const schema = { fields: [ @@ -21,10 +22,12 @@ describe("inferSchemaFromTable", () => { }) it("should infer integers from floats", async () => { - const table = DataFrame({ - id: [1.0, 2.0, 3.0], - count: [10.0, 20.0, 30.0], - }).lazy() + const table = pl + .DataFrame({ + id: [1.0, 2.0, 3.0], + count: [10.0, 20.0, 30.0], + }) + .lazy() const schema = { fields: [ @@ -37,12 +40,14 @@ describe("inferSchemaFromTable", () => { }) it("should infer numeric", async () => { - const table = DataFrame({ - name1: ["1", "2", "3"], - name2: ["1,000", "2,000", "3,000"], - name3: ["1.1", "2.2", "3.3"], - name4: ["1,000.1", "2,000.2", "3,000.3"], - }).lazy() + const table = pl + .DataFrame({ + name1: ["1", "2", "3"], + name2: ["1,000", "2,000", "3,000"], + name3: ["1.1", "2.2", "3.3"], + name4: ["1,000.1", "2,000.2", "3,000.3"], + }) + .lazy() const schema = { fields: [ @@ -57,10 +62,12 @@ describe("inferSchemaFromTable", () => { }) it("should infer numeric (commaDecimal)", async () => { - const table = DataFrame({ - name1: ["1.000", "2.000", "3.000"], - name2: ["1.000,5", "2.000,5", "3.000,5"], - }).lazy() + const table = pl + .DataFrame({ + name1: ["1.000", "2.000", "3.000"], + name2: ["1.000,5", "2.000,5", "3.000,5"], + }) + .lazy() const schema = { fields: [ @@ -75,10 +82,12 @@ describe("inferSchemaFromTable", () => { }) it("should infer booleans", async () => { - const table = DataFrame({ - name1: ["true", "True", "TRUE"], - name2: ["false", "False", "FALSE"], - }).lazy() + const table = pl + .DataFrame({ + name1: ["true", "True", "TRUE"], + name2: ["false", "False", "FALSE"], + }) + .lazy() const schema = { fields: [ @@ -91,10 +100,12 @@ describe("inferSchemaFromTable", () => { }) it("should infer objects", async () => { - const table = DataFrame({ - name1: ['{"a": 1}'], - name2: ["{}"], - }).lazy() + const table = pl + .DataFrame({ + name1: ['{"a": 1}'], + name2: ["{}"], + }) + .lazy() const schema = { fields: [ @@ -107,10 +118,12 @@ describe("inferSchemaFromTable", () => { }) it("should infer arrays", async () => { - const table = DataFrame({ - name1: ["[1,2,3]"], - name2: ["[]"], - }).lazy() + const table = pl + .DataFrame({ + name1: ["[1,2,3]"], + name2: ["[]"], + }) + .lazy() const schema = { fields: [ @@ -123,9 +136,11 @@ describe("inferSchemaFromTable", () => { }) it("should infer dates with ISO format", async () => { - const table = DataFrame({ - name1: ["2023-01-15", "2023-02-20", "2023-03-25"], - }).lazy() + const table = pl + .DataFrame({ + name1: ["2023-01-15", "2023-02-20", "2023-03-25"], + }) + .lazy() const schema = { fields: [{ name: "name1", type: "date" }], @@ -135,11 +150,13 @@ describe("inferSchemaFromTable", () => { }) it("should infer dates with slash format", async () => { - const table = DataFrame({ - yearFirst: ["2023/01/15", "2023/02/20", "2023/03/25"], - dayMonth: ["15/01/2023", "20/02/2023", "25/03/2023"], - monthDay: ["01/15/2023", "02/20/2023", "03/25/2023"], - }).lazy() + const table = pl + .DataFrame({ + yearFirst: ["2023/01/15", "2023/02/20", "2023/03/25"], + dayMonth: ["15/01/2023", "20/02/2023", "25/03/2023"], + monthDay: ["01/15/2023", "02/20/2023", "03/25/2023"], + }) + .lazy() const schemaDefault = { fields: [ @@ -164,9 +181,11 @@ describe("inferSchemaFromTable", () => { }) it("should infer dates with hyphen format", async () => { - const table = DataFrame({ - dayMonth: ["15-01-2023", "20-02-2023", "25-03-2023"], - }).lazy() + const table = pl + .DataFrame({ + dayMonth: ["15-01-2023", "20-02-2023", "25-03-2023"], + }) + .lazy() const schemaDefault = { fields: [{ name: "dayMonth", type: "date", format: "%d-%m-%Y" }], @@ -183,10 +202,12 @@ describe("inferSchemaFromTable", () => { }) it("should infer times with standard format", async () => { - const table = DataFrame({ - fullTime: ["14:30:45", "08:15:30", "23:59:59"], - shortTime: ["14:30", "08:15", "23:59"], - }).lazy() + const table = pl + .DataFrame({ + fullTime: ["14:30:45", "08:15:30", "23:59:59"], + shortTime: ["14:30", "08:15", "23:59"], + }) + .lazy() const schema = { fields: [ @@ -199,10 +220,12 @@ describe("inferSchemaFromTable", () => { }) it("should infer times with 12-hour format", async () => { - const table = DataFrame({ - fullTime: ["2:30:45 PM", "8:15:30 AM", "11:59:59 PM"], - shortTime: ["2:30 PM", "8:15 AM", "11:59 PM"], - }).lazy() + const table = pl + .DataFrame({ + fullTime: ["2:30:45 PM", "8:15:30 AM", "11:59:59 PM"], + shortTime: ["2:30 PM", "8:15 AM", "11:59 PM"], + }) + .lazy() const schema = { fields: [ @@ -215,9 +238,11 @@ describe("inferSchemaFromTable", () => { }) it("should infer times with timezone offset", async () => { - const table = DataFrame({ - name: ["14:30:45+01:00", "08:15:30-05:00", "23:59:59+00:00"], - }).lazy() + const table = pl + .DataFrame({ + name: ["14:30:45+01:00", "08:15:30-05:00", "23:59:59+00:00"], + }) + .lazy() const schema = { fields: [{ name: "name", type: "time" }], @@ -227,28 +252,30 @@ describe("inferSchemaFromTable", () => { }) it("should infer datetimes with ISO format", async () => { - const table = DataFrame({ - standard: [ - "2023-01-15T14:30:45", - "2023-02-20T08:15:30", - "2023-03-25T23:59:59", - ], - utc: [ - "2023-01-15T14:30:45Z", - "2023-02-20T08:15:30Z", - "2023-03-25T23:59:59Z", - ], - withTz: [ - "2023-01-15T14:30:45+01:00", - "2023-02-20T08:15:30-05:00", - "2023-03-25T23:59:59+00:00", - ], - withSpace: [ - "2023-01-15 14:30:45", - "2023-02-20 08:15:30", - "2023-03-25 23:59:59", - ], - }).lazy() + const table = pl + .DataFrame({ + standard: [ + "2023-01-15T14:30:45", + "2023-02-20T08:15:30", + "2023-03-25T23:59:59", + ], + utc: [ + "2023-01-15T14:30:45Z", + "2023-02-20T08:15:30Z", + "2023-03-25T23:59:59Z", + ], + withTz: [ + "2023-01-15T14:30:45+01:00", + "2023-02-20T08:15:30-05:00", + "2023-03-25T23:59:59+00:00", + ], + withSpace: [ + "2023-01-15 14:30:45", + "2023-02-20 08:15:30", + "2023-03-25 23:59:59", + ], + }) + .lazy() const schema = { fields: [ @@ -263,28 +290,30 @@ describe("inferSchemaFromTable", () => { }) it("should infer datetimes with custom formats", async () => { - const table = DataFrame({ - shortDayMonth: [ - "15/01/2023 14:30", - "20/02/2023 08:15", - "25/03/2023 23:59", - ], - fullDayMonth: [ - "15/01/2023 14:30:45", - "20/02/2023 08:15:30", - "25/03/2023 23:59:59", - ], - shortMonthDay: [ - "01/15/2023 14:30", - "02/20/2023 08:15", - "03/25/2023 23:59", - ], - fullMonthDay: [ - "01/15/2023 14:30:45", - "02/20/2023 08:15:30", - "03/25/2023 23:59:59", - ], - }).lazy() + const table = pl + .DataFrame({ + shortDayMonth: [ + "15/01/2023 14:30", + "20/02/2023 08:15", + "25/03/2023 23:59", + ], + fullDayMonth: [ + "15/01/2023 14:30:45", + "20/02/2023 08:15:30", + "25/03/2023 23:59:59", + ], + shortMonthDay: [ + "01/15/2023 14:30", + "02/20/2023 08:15", + "03/25/2023 23:59", + ], + fullMonthDay: [ + "01/15/2023 14:30:45", + "02/20/2023 08:15:30", + "03/25/2023 23:59:59", + ], + }) + .lazy() const schemaDefault = { fields: [ @@ -311,11 +340,13 @@ describe("inferSchemaFromTable", () => { }) it("should infer lists", async () => { - const table = DataFrame({ - numericList: ["1.5,2.3", "4.1,5.9", "7.2,8.6"], - integerList: ["1,2", "3,4", "5,6"], - singleValue: ["1.5", "2.3", "4.1"], - }).lazy() + const table = pl + .DataFrame({ + numericList: ["1.5,2.3", "4.1,5.9", "7.2,8.6"], + integerList: ["1,2", "3,4", "5,6"], + singleValue: ["1.5", "2.3", "4.1"], + }) + .lazy() const schema = { fields: [ diff --git a/table/schema/infer.ts b/table/schema/infer.ts index d5b7232c..e9bffbe2 100644 --- a/table/schema/infer.ts +++ b/table/schema/infer.ts @@ -1,6 +1,5 @@ -import type { Field, Schema } from "@dpkit/core" -import type { DataFrame } from "nodejs-polars" -import { col } from "nodejs-polars" +import type { Field, Schema } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { getPolarsSchema } from "../schema/index.ts" import type { Table } from "../table/index.ts" import type { SchemaOptions } from "./Options.ts" @@ -27,7 +26,7 @@ export async function inferSchemaFromTable( } export function inferSchemaFromSample( - sample: DataFrame, + sample: pl.DataFrame, options?: Exclude, ) { const { confidence = 0.9, fieldTypes, keepStrings } = options ?? {} @@ -72,7 +71,7 @@ export function inferSchemaFromSample( if (!keepStrings) { for (const [regex, patch] of Object.entries(regexMapping)) { const failures = sample - .filter(col(name).str.contains(regex).not()) + .filter(pl.col(name).str.contains(regex).not()) .head(failureThreshold).height if (failures < failureThreshold) { @@ -85,7 +84,7 @@ export function inferSchemaFromSample( if (type === "number") { const failures = sample - .filter(col(name).eq(col(name).round(0)).not()) + .filter(pl.col(name).eq(pl.col(name).round(0)).not()) .head(failureThreshold).height if (failures < failureThreshold) { diff --git a/table/schema/match.ts b/table/schema/match.ts index 24583b0f..d505664d 100644 --- a/table/schema/match.ts +++ b/table/schema/match.ts @@ -1,4 +1,4 @@ -import type { Field } from "@dpkit/core" +import type { Field } from "@dpkit/metadata" import type { SchemaMapping } from "./Mapping.ts" export function matchSchemaField( diff --git a/table/table/Frame.ts b/table/table/Frame.ts new file mode 100644 index 00000000..e9df756f --- /dev/null +++ b/table/table/Frame.ts @@ -0,0 +1,3 @@ +import type * as pl from "nodejs-polars" + +export type Frame = pl.DataFrame diff --git a/table/table/Table.ts b/table/table/Table.ts index 8108f290..954d1bcd 100644 --- a/table/table/Table.ts +++ b/table/table/Table.ts @@ -1,3 +1,3 @@ -import type { LazyDataFrame } from "nodejs-polars" +import type * as pl from "nodejs-polars" -export type Table = LazyDataFrame +export type Table = pl.LazyDataFrame diff --git a/table/table/checks/unique.spec.ts b/table/table/checks/unique.spec.ts index c3365a80..6d909c43 100644 --- a/table/table/checks/unique.spec.ts +++ b/table/table/checks/unique.spec.ts @@ -1,14 +1,16 @@ -import type { Schema } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Schema } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" -import { validateTable } from "../../table/index.ts" +import { inspectTable } from "../../table/index.ts" -describe("validateTable (row/unique)", () => { - it("should not report errors when all rows are unique for primary key", async () => { - const table = DataFrame({ - id: [1, 2, 3, 4, 5], - name: ["Alice", "Bob", "Charlie", "David", "Eve"], - }).lazy() +describe("inspectTable (row/unique)", () => { + it("should not errors when all rows are unique for primary key", async () => { + const table = pl + .DataFrame({ + id: [1, 2, 3, 4, 5], + name: ["Alice", "Bob", "Charlie", "David", "Eve"], + }) + .lazy() const schema: Schema = { fields: [ @@ -18,15 +20,17 @@ describe("validateTable (row/unique)", () => { primaryKey: ["id"], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) - it("should report errors for duplicate primary key rows", async () => { - const table = DataFrame({ - id: [1, 2, 3, 2, 5], - name: ["Alice", "Bob", "Charlie", "Bob2", "Eve"], - }).lazy() + it("should errors for duplicate primary key rows", async () => { + const table = pl + .DataFrame({ + id: [1, 2, 3, 2, 5], + name: ["Alice", "Bob", "Charlie", "Bob2", "Eve"], + }) + .lazy() const schema: Schema = { fields: [ @@ -36,7 +40,7 @@ describe("validateTable (row/unique)", () => { primaryKey: ["id"], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors.filter(e => e.type === "row/unique")).toHaveLength(1) expect(errors).toContainEqual({ @@ -46,17 +50,19 @@ describe("validateTable (row/unique)", () => { }) }) - it("should not report errors when all rows are unique for unique key", async () => { - const table = DataFrame({ - id: [1, 2, 3, 4, 5], - email: [ - "a@test.com", - "b@test.com", - "c@test.com", - "d@test.com", - "e@test.com", - ], - }).lazy() + it("should not errors when all rows are unique for unique key", async () => { + const table = pl + .DataFrame({ + id: [1, 2, 3, 4, 5], + email: [ + "a@test.com", + "b@test.com", + "c@test.com", + "d@test.com", + "e@test.com", + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -66,21 +72,23 @@ describe("validateTable (row/unique)", () => { uniqueKeys: [["email"]], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toHaveLength(0) }) - it("should report errors for duplicate unique key rows", async () => { - const table = DataFrame({ - id: [1, 2, 3, 4, 5], - email: [ - "a@test.com", - "b@test.com", - "a@test.com", - "d@test.com", - "b@test.com", - ], - }).lazy() + it("should errors for duplicate unique key rows", async () => { + const table = pl + .DataFrame({ + id: [1, 2, 3, 4, 5], + email: [ + "a@test.com", + "b@test.com", + "a@test.com", + "d@test.com", + "b@test.com", + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -90,7 +98,7 @@ describe("validateTable (row/unique)", () => { uniqueKeys: [["email"]], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors.filter(e => e.type === "row/unique")).toHaveLength(2) expect(errors).toContainEqual({ type: "row/unique", @@ -105,11 +113,13 @@ describe("validateTable (row/unique)", () => { }) it("should handle composite unique keys", async () => { - const table = DataFrame({ - category: ["A", "A", "B", "A", "B"], - subcategory: ["X", "Y", "X", "X", "Y"], - value: [1, 2, 3, 4, 5], - }).lazy() + const table = pl + .DataFrame({ + category: ["A", "A", "B", "A", "B"], + subcategory: ["X", "Y", "X", "X", "Y"], + value: [1, 2, 3, 4, 5], + }) + .lazy() const schema: Schema = { fields: [ @@ -120,7 +130,7 @@ describe("validateTable (row/unique)", () => { uniqueKeys: [["category", "subcategory"]], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors.filter(e => e.type === "row/unique")).toHaveLength(1) expect(errors).toContainEqual({ type: "row/unique", @@ -130,16 +140,18 @@ describe("validateTable (row/unique)", () => { }) it("should handle both primary key and unique keys", async () => { - const table = DataFrame({ - id: [1, 2, 3, 2, 5], - email: [ - "a@test.com", - "b@test.com", - "c@test.com", - "d@test.com", - "a@test.com", - ], - }).lazy() + const table = pl + .DataFrame({ + id: [1, 2, 3, 2, 5], + email: [ + "a@test.com", + "b@test.com", + "c@test.com", + "d@test.com", + "a@test.com", + ], + }) + .lazy() const schema: Schema = { fields: [ @@ -150,7 +162,7 @@ describe("validateTable (row/unique)", () => { uniqueKeys: [["email"]], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors.filter(e => e.type === "row/unique")).toHaveLength(2) expect(errors).toContainEqual({ type: "row/unique", @@ -165,10 +177,12 @@ describe("validateTable (row/unique)", () => { }) it("should handle null values in unique keys correctly", async () => { - const table = DataFrame({ - id: [1, 2, null, 4, null, 2], - name: ["Alice", "Bob", "Charlie", "David", "Eve", "Bob"], - }).lazy() + const table = pl + .DataFrame({ + id: [1, 2, null, 4, null, 2], + name: ["Alice", "Bob", "Charlie", "David", "Eve", "Bob"], + }) + .lazy() const schema: Schema = { fields: [ @@ -178,7 +192,7 @@ describe("validateTable (row/unique)", () => { uniqueKeys: [["id"], ["id", "name"]], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) console.log(errors) expect(errors).toHaveLength(2) diff --git a/table/table/checks/unique.ts b/table/table/checks/unique.ts index 77d085fd..6b6b8389 100644 --- a/table/table/checks/unique.ts +++ b/table/table/checks/unique.ts @@ -1,5 +1,5 @@ -import { concatList } from "nodejs-polars" -import type { RowUniqueError } from "../../error/index.ts" +import type { RowUniqueError } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import type { SchemaMapping } from "../../schema/index.ts" export function createChecksRowUnique(mapping: SchemaMapping) { @@ -13,11 +13,12 @@ export function createChecksRowUnique(mapping: SchemaMapping) { } function createCheckRowUnique(uniqueKey: string[]) { - const isErrorExpr = concatList(uniqueKey) + const isErrorExpr = pl + .concatList(uniqueKey) .isFirstDistinct() .not() // Fold is not available so we use a tricky way to eliminate nulls - .and(concatList(uniqueKey).lst.min().isNotNull()) + .and(pl.concatList(uniqueKey).lst.min().isNotNull()) const errorTemplate: RowUniqueError = { type: "row/unique", diff --git a/table/table/denormalize.ts b/table/table/denormalize.ts index 37b7bfae..d18278a4 100644 --- a/table/table/denormalize.ts +++ b/table/table/denormalize.ts @@ -1,5 +1,5 @@ -import type { Schema } from "@dpkit/core" -import type { Expr } from "nodejs-polars" +import type { Schema } from "@dpkit/metadata" +import type * as pl from "nodejs-polars" import { denormalizeField } from "../field/index.ts" import type { DenormalizeFieldOptions } from "../field/index.ts" import type { Table } from "./Table.ts" @@ -16,7 +16,7 @@ export function denormalizeFields( schema: Schema, options?: DenormalizeFieldOptions, ) { - const exprs: Record = {} + const exprs: Record = {} for (const field of schema.fields) { const missingValues = field.missingValues ?? schema.missingValues diff --git a/table/table/helpers.spec.ts b/table/table/helpers.spec.ts index 0a90b50a..ca970b12 100644 --- a/table/table/helpers.spec.ts +++ b/table/table/helpers.spec.ts @@ -1,4 +1,4 @@ -import { DataFrame } from "nodejs-polars" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { joinHeaderRows, @@ -8,17 +8,19 @@ import { describe("joinHeaderRows", () => { it("should join two header rows with default space separator", async () => { - const table = DataFrame({ - col1: ["first", "name", "header3", "Alice", "Bob"], - col2: ["last", "name", "header3", "Smith", "Jones"], - col3: [ - "contact", - "email", - "header3", - "alice@example.com", - "bob@example.com", - ], - }).lazy() + const table = pl + .DataFrame({ + col1: ["first", "name", "header3", "Alice", "Bob"], + col2: ["last", "name", "header3", "Smith", "Jones"], + col3: [ + "contact", + "email", + "header3", + "alice@example.com", + "bob@example.com", + ], + }) + .lazy() const result = await joinHeaderRows(table, { dialect: { headerRows: [2, 3] }, @@ -38,11 +40,13 @@ describe("joinHeaderRows", () => { }) it("should join two header rows with custom separator", async () => { - const table = DataFrame({ - col1: ["user", "first", "header3", "Alice", "Bob"], - col2: ["user", "last", "header3", "Smith", "Jones"], - col3: ["meta", "created", "header3", "2023-01-01", "2023-01-02"], - }).lazy() + const table = pl + .DataFrame({ + col1: ["user", "first", "header3", "Alice", "Bob"], + col2: ["user", "last", "header3", "Smith", "Jones"], + col3: ["meta", "created", "header3", "2023-01-01", "2023-01-02"], + }) + .lazy() const result = await joinHeaderRows(table, { dialect: { headerRows: [2, 3], headerJoin: "_" }, @@ -58,11 +62,13 @@ describe("joinHeaderRows", () => { }) it("should return table unchanged when only one header row", async () => { - const table = DataFrame({ - name: ["Alice", "Bob"], - age: [30, 25], - city: ["NYC", "LA"], - }).lazy() + const table = pl + .DataFrame({ + name: ["Alice", "Bob"], + age: [30, 25], + city: ["NYC", "LA"], + }) + .lazy() const result = await joinHeaderRows(table, { dialect: { headerRows: [1] }, @@ -74,11 +80,13 @@ describe("joinHeaderRows", () => { }) it("should return table unchanged when no header rows", async () => { - const table = DataFrame({ - field1: ["Alice", "Bob"], - field2: [30, 25], - field3: ["NYC", "LA"], - }).lazy() + const table = pl + .DataFrame({ + field1: ["Alice", "Bob"], + field2: [30, 25], + field3: ["NYC", "LA"], + }) + .lazy() const result = await joinHeaderRows(table, { dialect: { header: false }, @@ -90,11 +98,13 @@ describe("joinHeaderRows", () => { }) it("should join three header rows", async () => { - const table = DataFrame({ - col1: ["person", "user", "first", "header4", "Alice", "Bob"], - col2: ["person", "user", "last", "header4", "Smith", "Jones"], - col3: ["location", "address", "city", "header4", "NYC", "LA"], - }).lazy() + const table = pl + .DataFrame({ + col1: ["person", "user", "first", "header4", "Alice", "Bob"], + col2: ["person", "user", "last", "header4", "Smith", "Jones"], + col3: ["location", "address", "city", "header4", "NYC", "LA"], + }) + .lazy() const result = await joinHeaderRows(table, { dialect: { headerRows: [2, 3, 4] }, @@ -114,11 +124,13 @@ describe("joinHeaderRows", () => { }) it("should handle empty strings in header rows", async () => { - const table = DataFrame({ - col1: ["person", "", "header3", "Alice", "Bob"], - col2: ["", "name", "header3", "Smith", "Jones"], - col3: ["location", "city", "header3", "NYC", "LA"], - }).lazy() + const table = pl + .DataFrame({ + col1: ["person", "", "header3", "Alice", "Bob"], + col2: ["", "name", "header3", "Smith", "Jones"], + col3: ["location", "city", "header3", "NYC", "LA"], + }) + .lazy() const result = await joinHeaderRows(table, { dialect: { headerRows: [2, 3] }, @@ -136,11 +148,13 @@ describe("joinHeaderRows", () => { describe("skipCommentRows", () => { it("should skip comment rows by row number", async () => { - const table = DataFrame({ - name: ["Alice", "# Comment", "Bob", "Charlie"], - age: [30, 0, 25, 35], - city: ["NYC", "ignored", "LA", "SF"], - }).lazy() + const table = pl + .DataFrame({ + name: ["Alice", "# Comment", "Bob", "Charlie"], + age: [30, 0, 25, 35], + city: ["NYC", "ignored", "LA", "SF"], + }) + .lazy() const result = skipCommentRows(table, { dialect: { commentRows: [2], header: false }, @@ -154,11 +168,13 @@ describe("skipCommentRows", () => { }) it("should skip multiple comment rows", async () => { - const table = DataFrame({ - name: ["Alice", "# Comment 1", "Bob", "# Comment 2", "Charlie"], - age: [30, 0, 25, 0, 35], - city: ["NYC", "ignored", "LA", "ignored", "SF"], - }).lazy() + const table = pl + .DataFrame({ + name: ["Alice", "# Comment 1", "Bob", "# Comment 2", "Charlie"], + age: [30, 0, 25, 0, 35], + city: ["NYC", "ignored", "LA", "ignored", "SF"], + }) + .lazy() const result = skipCommentRows(table, { dialect: { commentRows: [2, 4], header: false }, @@ -172,11 +188,13 @@ describe("skipCommentRows", () => { }) it("should return table unchanged when no commentRows specified", async () => { - const table = DataFrame({ - name: ["Alice", "Bob", "Charlie"], - age: [30, 25, 35], - city: ["NYC", "LA", "SF"], - }).lazy() + const table = pl + .DataFrame({ + name: ["Alice", "Bob", "Charlie"], + age: [30, 25, 35], + city: ["NYC", "LA", "SF"], + }) + .lazy() const result = skipCommentRows(table, { dialect: {}, @@ -188,11 +206,13 @@ describe("skipCommentRows", () => { }) it("should skip rows after header when headerRows specified", async () => { - const table = DataFrame({ - col1: ["name", "Alice", "# Comment", "Bob"], - col2: ["age", "30", "-1", "25"], - col3: ["city", "NYC", "ignored", "LA"], - }).lazy() + const table = pl + .DataFrame({ + col1: ["name", "Alice", "# Comment", "Bob"], + col2: ["age", "30", "-1", "25"], + col3: ["city", "NYC", "ignored", "LA"], + }) + .lazy() const result = skipCommentRows(table, { dialect: { headerRows: [2], commentRows: [5] }, @@ -206,11 +226,13 @@ describe("skipCommentRows", () => { }) it("should handle commentRows at the beginning", async () => { - const table = DataFrame({ - name: ["# Skip this", "Alice", "Bob"], - age: [0, 30, 25], - city: ["ignored", "NYC", "LA"], - }).lazy() + const table = pl + .DataFrame({ + name: ["# Skip this", "Alice", "Bob"], + age: [0, 30, 25], + city: ["ignored", "NYC", "LA"], + }) + .lazy() const result = skipCommentRows(table, { dialect: { commentRows: [1], header: false }, @@ -223,11 +245,13 @@ describe("skipCommentRows", () => { }) it("should handle commentRows at the end", async () => { - const table = DataFrame({ - name: ["Alice", "Bob", "# Footer comment"], - age: [30, 25, 0], - city: ["NYC", "LA", "ignored"], - }).lazy() + const table = pl + .DataFrame({ + name: ["Alice", "Bob", "# Footer comment"], + age: [30, 25, 0], + city: ["NYC", "LA", "ignored"], + }) + .lazy() const result = skipCommentRows(table, { dialect: { commentRows: [3], header: false }, @@ -240,11 +264,13 @@ describe("skipCommentRows", () => { }) it("should handle multiple header rows with commentRows", async () => { - const table = DataFrame({ - col1: ["person", "first", "Alice", "# Comment", "Bob"], - col2: ["person", "last", "Smith", "ignored", "Jones"], - col3: ["location", "city", "NYC", "ignored", "LA"], - }).lazy() + const table = pl + .DataFrame({ + col1: ["person", "first", "Alice", "# Comment", "Bob"], + col2: ["person", "last", "Smith", "ignored", "Jones"], + col3: ["location", "city", "NYC", "ignored", "LA"], + }) + .lazy() const result = skipCommentRows(table, { dialect: { headerRows: [2, 3], commentRows: [7] }, @@ -261,11 +287,13 @@ describe("skipCommentRows", () => { describe("stripInitialSpace", () => { it("should strip leading and trailing spaces from all columns", async () => { - const table = DataFrame({ - name: [" Alice ", " Bob", "Charlie "], - age: ["30", " 25 ", "35"], - city: [" NYC", "LA ", " SF "], - }).lazy() + const table = pl + .DataFrame({ + name: [" Alice ", " Bob", "Charlie "], + age: ["30", " 25 ", "35"], + city: [" NYC", "LA ", " SF "], + }) + .lazy() const result = stripInitialSpace(table, { dialect: { skipInitialSpace: true }, @@ -278,11 +306,13 @@ describe("stripInitialSpace", () => { }) it("should return table unchanged when skipInitialSpace is false", async () => { - const table = DataFrame({ - name: [" Alice ", " Bob"], - age: ["30", " 25 "], - city: [" NYC", "LA "], - }).lazy() + const table = pl + .DataFrame({ + name: [" Alice ", " Bob"], + age: ["30", " 25 "], + city: [" NYC", "LA "], + }) + .lazy() const result = stripInitialSpace(table, { dialect: { skipInitialSpace: false }, @@ -294,11 +324,13 @@ describe("stripInitialSpace", () => { }) it("should return table unchanged when skipInitialSpace is not specified", async () => { - const table = DataFrame({ - name: [" Alice ", " Bob"], - age: ["30", " 25 "], - city: [" NYC", "LA "], - }).lazy() + const table = pl + .DataFrame({ + name: [" Alice ", " Bob"], + age: ["30", " 25 "], + city: [" NYC", "LA "], + }) + .lazy() const result = stripInitialSpace(table, { dialect: {}, @@ -310,11 +342,13 @@ describe("stripInitialSpace", () => { }) it("should handle strings with no spaces", async () => { - const table = DataFrame({ - name: ["Alice", "Bob"], - age: ["30", "25"], - city: ["NYC", "LA"], - }).lazy() + const table = pl + .DataFrame({ + name: ["Alice", "Bob"], + age: ["30", "25"], + city: ["NYC", "LA"], + }) + .lazy() const result = stripInitialSpace(table, { dialect: { skipInitialSpace: true }, @@ -326,11 +360,13 @@ describe("stripInitialSpace", () => { }) it("should handle empty strings", async () => { - const table = DataFrame({ - name: ["Alice", ""], - age: ["30", " "], - city: ["", "LA"], - }).lazy() + const table = pl + .DataFrame({ + name: ["Alice", ""], + age: ["30", " "], + city: ["", "LA"], + }) + .lazy() const result = stripInitialSpace(table, { dialect: { skipInitialSpace: true }, @@ -342,11 +378,13 @@ describe("stripInitialSpace", () => { }) it("should handle strings with multiple spaces", async () => { - const table = DataFrame({ - name: [" Alice ", " Bob"], - age: ["30 ", " 25 "], - city: [" NYC ", " LA "], - }).lazy() + const table = pl + .DataFrame({ + name: [" Alice ", " Bob"], + age: ["30 ", " 25 "], + city: [" NYC ", " LA "], + }) + .lazy() const result = stripInitialSpace(table, { dialect: { skipInitialSpace: true }, @@ -358,11 +396,13 @@ describe("stripInitialSpace", () => { }) it("should handle tabs and other whitespace", async () => { - const table = DataFrame({ - name: ["\tAlice\t", "\nBob"], - age: ["30\n", "\t25\t"], - city: ["\tNYC", "LA\n"], - }).lazy() + const table = pl + .DataFrame({ + name: ["\tAlice\t", "\nBob"], + age: ["30\n", "\t25\t"], + city: ["\tNYC", "LA\n"], + }) + .lazy() const result = stripInitialSpace(table, { dialect: { skipInitialSpace: true }, diff --git a/table/table/helpers.ts b/table/table/helpers.ts index b52fb979..f9afb63e 100644 --- a/table/table/helpers.ts +++ b/table/table/helpers.ts @@ -1,5 +1,5 @@ -import type { Dialect } from "@dpkit/core" -import { col } from "nodejs-polars" +import type { Dialect } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import type { Table } from "../table/index.ts" export async function joinHeaderRows( @@ -17,9 +17,9 @@ export async function joinHeaderRows( const extraLabelsFrame = await table .withRowCount() - .withColumn(col("row_nr").add(1)) - .filter(col("row_nr").add(headerOffset).isIn(headerRows)) - .select(...table.columns.map(name => col(name).str.concat(headerJoin))) + .withColumn(pl.col("row_nr").add(1)) + .filter(pl.col("row_nr").add(headerOffset).isIn(headerRows)) + .select(...table.columns.map(name => pl.col(name).str.concat(headerJoin))) .collect() const labels = table.columns @@ -34,8 +34,8 @@ export async function joinHeaderRows( return table .withRowCount() - .withColumn(col("row_nr").add(1)) - .filter(col("row_nr").add(headerOffset).isIn(headerRows).not()) + .withColumn(pl.col("row_nr").add(1)) + .filter(pl.col("row_nr").add(headerOffset).isIn(headerRows).not()) .rename(mapping) .drop("row_nr") } @@ -50,8 +50,8 @@ export function skipCommentRows(table: Table, options: { dialect: Dialect }) { return table .withRowCount() - .withColumn(col("row_nr").add(1)) - .filter(col("row_nr").add(commentOffset).isIn(dialect.commentRows).not()) + .withColumn(pl.col("row_nr").add(1)) + .filter(pl.col("row_nr").add(commentOffset).isIn(dialect.commentRows).not()) .drop("row_nr") } @@ -65,7 +65,7 @@ export function stripInitialSpace(table: Table, options: { dialect: Dialect }) { return table.select( // TODO: rebase on stripCharsStart when it's fixed in polars // https://github.com/pola-rs/nodejs-polars/issues/336 - table.columns.map(name => col(name).str.strip().as(name)), + table.columns.map(name => pl.col(name).str.strip().as(name)), ) } diff --git a/table/table/index.ts b/table/table/index.ts index 9e1285fe..a439bcb2 100644 --- a/table/table/index.ts +++ b/table/table/index.ts @@ -1,6 +1,7 @@ export { normalizeTable } from "./normalize.ts" export { denormalizeTable } from "./denormalize.ts" -export { validateTable } from "./validate.ts" +export { inspectTable } from "./inspect.ts" +export type { Frame } from "./Frame.ts" export type { Table } from "./Table.ts" export { skipCommentRows } from "./helpers.ts" export { joinHeaderRows } from "./helpers.ts" diff --git a/table/table/validate.spec.ts b/table/table/inspect.spec.ts similarity index 66% rename from table/table/validate.spec.ts rename to table/table/inspect.spec.ts index a9756e71..084bd4fc 100644 --- a/table/table/validate.spec.ts +++ b/table/table/inspect.spec.ts @@ -1,15 +1,17 @@ -import type { Schema } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Schema } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" -import { validateTable } from "./validate.ts" +import { inspectTable } from "./inspect.ts" -describe("validateTable", () => { +describe("inspectTable", () => { describe("fields validation with fieldsMatch='exact'", () => { it("should pass when fields exactly match", async () => { - const table = DataFrame({ - id: [1, 2], - name: ["John", "Jane"], - }).lazy() + const table = pl + .DataFrame({ + id: [1, 2], + name: ["John", "Jane"], + }) + .lazy() const schema: Schema = { fields: [ @@ -18,16 +20,18 @@ describe("validateTable", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([]) }) it("should not have fields error when fields same length", async () => { - const table = DataFrame({ - id: [1, 2], - age: [30, 25], - }).lazy() + const table = pl + .DataFrame({ + id: [1, 2], + age: [30, 25], + }) + .lazy() const schema: Schema = { fieldsMatch: "exact", @@ -37,7 +41,7 @@ describe("validateTable", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([ { type: "field/name", @@ -49,11 +53,13 @@ describe("validateTable", () => { }) it("should detect extra fields", async () => { - const table = DataFrame({ - id: [1, 2], - name: ["John", "Jane"], - age: [30, 25], - }).lazy() + const table = pl + .DataFrame({ + id: [1, 2], + name: ["John", "Jane"], + age: [30, 25], + }) + .lazy() const schema: Schema = { fields: [ @@ -62,7 +68,7 @@ describe("validateTable", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toContainEqual({ type: "fields/extra", fieldNames: ["age"], @@ -70,9 +76,11 @@ describe("validateTable", () => { }) it("should detect missing fields", async () => { - const table = DataFrame({ - id: [1, 2], - }).lazy() + const table = pl + .DataFrame({ + id: [1, 2], + }) + .lazy() const schema: Schema = { fields: [ @@ -81,7 +89,7 @@ describe("validateTable", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toContainEqual({ type: "fields/missing", fieldNames: ["name"], @@ -90,10 +98,12 @@ describe("validateTable", () => { describe("fields validation with fieldsMatch='equal'", () => { it("should pass when field names match regardless of order", async () => { - const table = DataFrame({ - name: ["John", "Jane"], - id: [1, 2], - }).lazy() + const table = pl + .DataFrame({ + name: ["John", "Jane"], + id: [1, 2], + }) + .lazy() const schema: Schema = { fieldsMatch: "equal", @@ -103,16 +113,18 @@ describe("validateTable", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([]) }) it("should detect extra fields", async () => { - const table = DataFrame({ - id: [1, 2], - name: ["John", "Jane"], - age: [30, 25], - }).lazy() + const table = pl + .DataFrame({ + id: [1, 2], + name: ["John", "Jane"], + age: [30, 25], + }) + .lazy() const schema: Schema = { fieldsMatch: "equal", @@ -122,7 +134,7 @@ describe("validateTable", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toContainEqual({ type: "fields/extra", fieldNames: ["age"], @@ -130,9 +142,11 @@ describe("validateTable", () => { }) it("should detect missing fields", async () => { - const table = DataFrame({ - id: [1, 2], - }).lazy() + const table = pl + .DataFrame({ + id: [1, 2], + }) + .lazy() const schema: Schema = { fieldsMatch: "equal", @@ -146,7 +160,7 @@ describe("validateTable", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toContainEqual({ type: "fields/missing", fieldNames: ["name"], @@ -154,9 +168,11 @@ describe("validateTable", () => { }) it("should pass when non-required fields are missing", async () => { - const table = DataFrame({ - id: [1, 2], - }).lazy() + const table = pl + .DataFrame({ + id: [1, 2], + }) + .lazy() const schema: Schema = { fieldsMatch: "equal", @@ -166,18 +182,20 @@ describe("validateTable", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([]) }) }) describe("fields validation with fieldsMatch='subset'", () => { it("should pass when data contains all schema fields", async () => { - const table = DataFrame({ - id: [1, 2], - name: ["John", "Jane"], - age: [30, 25], - }).lazy() + const table = pl + .DataFrame({ + id: [1, 2], + name: ["John", "Jane"], + age: [30, 25], + }) + .lazy() const schema: Schema = { fieldsMatch: "subset", @@ -187,15 +205,17 @@ describe("validateTable", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([]) }) it("should pass when data contains exact schema fields", async () => { - const table = DataFrame({ - id: [1, 2], - name: ["John", "Jane"], - }).lazy() + const table = pl + .DataFrame({ + id: [1, 2], + name: ["John", "Jane"], + }) + .lazy() const schema: Schema = { fieldsMatch: "subset", @@ -205,14 +225,16 @@ describe("validateTable", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([]) }) it("should detect missing fields", async () => { - const table = DataFrame({ - id: [1, 2], - }).lazy() + const table = pl + .DataFrame({ + id: [1, 2], + }) + .lazy() const schema: Schema = { fieldsMatch: "subset", @@ -226,7 +248,7 @@ describe("validateTable", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toContainEqual({ type: "fields/missing", fieldNames: ["name"], @@ -234,9 +256,11 @@ describe("validateTable", () => { }) it("should pass when non-required fields are missing", async () => { - const table = DataFrame({ - id: [1, 2], - }).lazy() + const table = pl + .DataFrame({ + id: [1, 2], + }) + .lazy() const schema: Schema = { fieldsMatch: "subset", @@ -246,16 +270,18 @@ describe("validateTable", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([]) }) }) describe("fields validation with fieldsMatch='superset'", () => { it("should pass when schema contains all data fields", async () => { - const table = DataFrame({ - id: [1, 2], - }).lazy() + const table = pl + .DataFrame({ + id: [1, 2], + }) + .lazy() const schema: Schema = { fieldsMatch: "superset", @@ -265,15 +291,17 @@ describe("validateTable", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([]) }) it("should pass when schema contains exact data fields", async () => { - const table = DataFrame({ - id: [1, 2], - name: ["John", "Jane"], - }).lazy() + const table = pl + .DataFrame({ + id: [1, 2], + name: ["John", "Jane"], + }) + .lazy() const schema: Schema = { fieldsMatch: "superset", @@ -283,16 +311,18 @@ describe("validateTable", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([]) }) it("should detect extra fields", async () => { - const table = DataFrame({ - id: [1, 2], - name: ["John", "Jane"], - age: [30, 25], - }).lazy() + const table = pl + .DataFrame({ + id: [1, 2], + name: ["John", "Jane"], + age: [30, 25], + }) + .lazy() const schema: Schema = { fieldsMatch: "superset", @@ -302,7 +332,7 @@ describe("validateTable", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toContainEqual({ type: "fields/extra", fieldNames: ["age"], @@ -312,10 +342,12 @@ describe("validateTable", () => { describe("fields validation with fieldsMatch='partial'", () => { it("should pass when at least one field matches", async () => { - const table = DataFrame({ - id: [1, 2], - age: [30, 25], - }).lazy() + const table = pl + .DataFrame({ + id: [1, 2], + age: [30, 25], + }) + .lazy() const schema: Schema = { fieldsMatch: "partial", @@ -325,15 +357,17 @@ describe("validateTable", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toEqual([]) }) it("should detect when no fields match", async () => { - const table = DataFrame({ - age: [30, 25], - email: ["john@example.com", "jane@example.com"], - }).lazy() + const table = pl + .DataFrame({ + age: [30, 25], + email: ["john@example.com", "jane@example.com"], + }) + .lazy() const schema: Schema = { fieldsMatch: "partial", @@ -343,7 +377,7 @@ describe("validateTable", () => { ], } - const { errors } = await validateTable(table, { schema }) + const errors = await inspectTable(table, { schema }) expect(errors).toContainEqual({ type: "fields/missing", fieldNames: ["id", "name"], diff --git a/table/table/validate.ts b/table/table/inspect.ts similarity index 83% rename from table/table/validate.ts rename to table/table/inspect.ts index 5e06b486..5978ea9d 100644 --- a/table/table/validate.ts +++ b/table/table/inspect.ts @@ -1,10 +1,10 @@ import os from "node:os" -import type { Field, Schema } from "@dpkit/core" -import { col, lit, when } from "nodejs-polars" +import type { Field, Schema } from "@dpkit/metadata" +import type { RowError } from "@dpkit/metadata" +import type { TableError } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import pAll from "p-all" -import type { RowError } from "../error/index.ts" -import type { TableError } from "../error/index.ts" -import { validateField } from "../field/index.ts" +import { inspectField } from "../field/index.ts" import { arrayDiff } from "../helpers.ts" import { matchSchemaField } from "../schema/index.ts" import { getPolarsSchema } from "../schema/index.ts" @@ -12,7 +12,7 @@ import type { SchemaMapping } from "../schema/index.ts" import type { Table } from "./Table.ts" import { createChecksRowUnique } from "./checks/unique.ts" -export async function validateTable( +export async function inspectTable( table: Table, options?: { schema?: Schema @@ -28,23 +28,20 @@ export async function validateTable( const polarsSchema = getPolarsSchema(sample.schema) const mapping = { source: polarsSchema, target: schema } - const matchErrors = validateFieldsMatch(mapping) + const matchErrors = inspectFieldsMatch(mapping) errors.push(...matchErrors) - const fieldErrors = await validateFields(mapping, table, { maxErrors }) + const fieldErrors = await inspectFields(mapping, table, { maxErrors }) errors.push(...fieldErrors) - const rowErrors = await validateRows(mapping, table, { maxErrors }) + const rowErrors = await inspectRows(mapping, table, { maxErrors }) errors.push(...rowErrors) } - return { - errors: errors.slice(0, maxErrors), - valid: !errors.length, - } + return errors.slice(0, maxErrors) } -function validateFieldsMatch(mapping: SchemaMapping) { +function inspectFieldsMatch(mapping: SchemaMapping) { const errors: TableError[] = [] const fieldsMatch = mapping.target.fieldsMatch ?? "exact" @@ -127,7 +124,7 @@ function validateFieldsMatch(mapping: SchemaMapping) { return errors } -async function validateFields( +async function inspectFields( mapping: SchemaMapping, table: Table, options: { @@ -145,11 +142,11 @@ async function validateFields( const fieldMapping = matchSchemaField(mapping, field, index) if (!fieldMapping) return - const report = await validateField(fieldMapping, table, { + const fieldErrors = await inspectField(fieldMapping, table, { maxErrors: maxFieldErrors, }) - errors.push(...report.errors) + errors.push(...fieldErrors) if (errors.length > maxErrors) { abortController.abort() } @@ -168,7 +165,7 @@ async function validateFields( return errors } -async function validateRows( +async function inspectRows( mapping: SchemaMapping, table: Table, options: { maxErrors: number }, @@ -183,17 +180,18 @@ async function validateRows( const collectRowErrors = async (check: any) => { const rowCheckTable = table .withRowCount() - .withColumn(col("row_nr").add(1)) + .withColumn(pl.col("row_nr").add(1)) .rename({ row_nr: "dpkit:number" }) .withColumn( - when(check.isErrorExpr) - .then(lit(JSON.stringify(check.errorTemplate))) - .otherwise(lit(null)) + pl + .when(check.isErrorExpr) + .then(pl.lit(JSON.stringify(check.errorTemplate))) + .otherwise(pl.lit(null)) .alias("dpkit:error"), ) const rowCheckFrame = await rowCheckTable - .filter(col("dpkit:error").isNotNull()) + .filter(pl.col("dpkit:error").isNotNull()) .head(maxRowErrors) .collect() diff --git a/table/table/normalize.spec.ts b/table/table/normalize.spec.ts index 96f60dc3..5c6c0c1d 100644 --- a/table/table/normalize.spec.ts +++ b/table/table/normalize.spec.ts @@ -1,14 +1,16 @@ -import type { Schema } from "@dpkit/core" -import { DataFrame } from "nodejs-polars" +import type { Schema } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { describe, expect, it } from "vitest" import { normalizeTable } from "./normalize.ts" describe("normalizeTable", () => { it("should work with schema", async () => { - const table = DataFrame({ - id: [1, 2], - name: ["english", "中文"], - }).lazy() + const table = pl + .DataFrame({ + id: [1, 2], + name: ["english", "中文"], + }) + .lazy() const schema: Schema = { fields: [ @@ -22,16 +24,18 @@ describe("normalizeTable", () => { { id: 2, name: "中文" }, ] - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() - expect(df.toRecords()).toEqual(records) + const result = await normalizeTable(table, schema) + const frame = await result.collect() + expect(frame.toRecords()).toEqual(records) }) it("should work with less fields in data", async () => { - const table = DataFrame({ - id: [1, 2], - name: ["english", "中文"], - }).lazy() + const table = pl + .DataFrame({ + id: [1, 2], + name: ["english", "中文"], + }) + .lazy() const schema: Schema = { fields: [ @@ -46,17 +50,19 @@ describe("normalizeTable", () => { { id: 2, name: "中文", other: null }, ] - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() - expect(df.toRecords()).toEqual(records) + const result = await normalizeTable(table, schema) + const frame = await result.collect() + expect(frame.toRecords()).toEqual(records) }) it("should work with more fields in data", async () => { - const table = DataFrame({ - id: [1, 2], - name: ["english", "中文"], - other: [true, false], - }).lazy() + const table = pl + .DataFrame({ + id: [1, 2], + name: ["english", "中文"], + other: [true, false], + }) + .lazy() const schema: Schema = { fields: [ @@ -70,16 +76,18 @@ describe("normalizeTable", () => { { id: 2, name: "中文" }, ] - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() - expect(df.toRecords()).toEqual(records) + const result = await normalizeTable(table, schema) + const frame = await result.collect() + expect(frame.toRecords()).toEqual(records) }) it("should work based on fields order", async () => { - const table = DataFrame({ - field1: [1, 2], - field2: ["english", "中文"], - }).lazy() + const table = pl + .DataFrame({ + field1: [1, 2], + field2: ["english", "中文"], + }) + .lazy() const schema: Schema = { fields: [ @@ -93,16 +101,18 @@ describe("normalizeTable", () => { { id: 2, name: "中文" }, ] - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() - expect(df.toRecords()).toEqual(records) + const result = await normalizeTable(table, schema) + const frame = await result.collect() + expect(frame.toRecords()).toEqual(records) }) it("should work based on field names (equal)", async () => { - const table = DataFrame({ - name: ["english", "中文"], - id: [1, 2], - }).lazy() + const table = pl + .DataFrame({ + name: ["english", "中文"], + id: [1, 2], + }) + .lazy() const schema: Schema = { fieldsMatch: "equal", @@ -117,16 +127,18 @@ describe("normalizeTable", () => { { id: 2, name: "中文" }, ] - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() - expect(df.toRecords()).toEqual(records) + const result = await normalizeTable(table, schema) + const frame = await result.collect() + expect(frame.toRecords()).toEqual(records) }) it("should work based on field names (subset)", async () => { - const table = DataFrame({ - name: ["english", "中文"], - id: [1, 2], - }).lazy() + const table = pl + .DataFrame({ + name: ["english", "中文"], + id: [1, 2], + }) + .lazy() const schema: Schema = { fieldsMatch: "subset", @@ -141,16 +153,18 @@ describe("normalizeTable", () => { { id: 2, name: "中文" }, ] - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() - expect(df.toRecords()).toEqual(records) + const result = await normalizeTable(table, schema) + const frame = await result.collect() + expect(frame.toRecords()).toEqual(records) }) it("should work based on field names (superset)", async () => { - const table = DataFrame({ - name: ["english", "中文"], - id: [1, 2], - }).lazy() + const table = pl + .DataFrame({ + name: ["english", "中文"], + id: [1, 2], + }) + .lazy() const schema: Schema = { fieldsMatch: "superset", @@ -165,16 +179,18 @@ describe("normalizeTable", () => { { id: 2, name: "中文" }, ] - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() - expect(df.toRecords()).toEqual(records) + const result = await normalizeTable(table, schema) + const frame = await result.collect() + expect(frame.toRecords()).toEqual(records) }) it("should work based on field names (partial)", async () => { - const table = DataFrame({ - name: ["english", "中文"], - id: [1, 2], - }).lazy() + const table = pl + .DataFrame({ + name: ["english", "中文"], + id: [1, 2], + }) + .lazy() const schema: Schema = { fieldsMatch: "partial", @@ -189,16 +205,18 @@ describe("normalizeTable", () => { { id: 2, name: "中文" }, ] - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() - expect(df.toRecords()).toEqual(records) + const result = await normalizeTable(table, schema) + const frame = await result.collect() + expect(frame.toRecords()).toEqual(records) }) it("should parse string columns", async () => { - const table = DataFrame({ - id: ["1", "2"], - name: ["english", "中文"], - }).lazy() + const table = pl + .DataFrame({ + id: ["1", "2"], + name: ["english", "中文"], + }) + .lazy() const schema: Schema = { fields: [ @@ -212,16 +230,18 @@ describe("normalizeTable", () => { { id: 2, name: "中文" }, ] - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() - expect(df.toRecords()).toEqual(records) + const result = await normalizeTable(table, schema) + const frame = await result.collect() + expect(frame.toRecords()).toEqual(records) }) it("should read type errors as nulls", async () => { - const table = DataFrame({ - id: [1, 2], - name: ["english", "中文"], - }).lazy() + const table = pl + .DataFrame({ + id: [1, 2], + name: ["english", "中文"], + }) + .lazy() const schema: Schema = { fields: [ @@ -235,8 +255,8 @@ describe("normalizeTable", () => { { id: 2, name: null }, ] - const ldf = await normalizeTable(table, schema) - const df = await ldf.collect() - expect(df.toRecords()).toEqual(records) + const result = await normalizeTable(table, schema) + const frame = await result.collect() + expect(frame.toRecords()).toEqual(records) }) }) diff --git a/table/table/normalize.ts b/table/table/normalize.ts index 8d23b81e..1a90ddbe 100644 --- a/table/table/normalize.ts +++ b/table/table/normalize.ts @@ -1,6 +1,5 @@ -import type { Schema } from "@dpkit/core" -import type { Expr } from "nodejs-polars" -import { lit } from "nodejs-polars" +import type { Schema } from "@dpkit/metadata" +import * as pl from "nodejs-polars" import { normalizeField } from "../field/index.ts" import { matchSchemaField } from "../schema/index.ts" import { getPolarsSchema } from "../schema/index.ts" @@ -18,11 +17,11 @@ export async function normalizeTable(table: Table, schema: Schema) { } export function normalizeFields(mapping: SchemaMapping) { - const exprs: Record = {} + const exprs: Record = {} for (const [index, field] of mapping.target.fields.entries()) { const fieldMapping = matchSchemaField(mapping, field, index) - let expr = lit(null).alias(field.name) + let expr = pl.lit(null).alias(field.name) if (fieldMapping) { const missingValues = field.missingValues ?? mapping.target.missingValues diff --git a/table/table/query.ts b/table/table/query.ts index 84966158..d3471f83 100644 --- a/table/table/query.ts +++ b/table/table/query.ts @@ -1,7 +1,7 @@ -import { SQLContext } from "nodejs-polars" +import * as pl from "nodejs-polars" import type { Table } from "./Table.ts" export function queryTable(table: Table, query: string) { - const context = SQLContext({ self: table }) + const context = pl.SQLContext({ self: table }) return context.execute(query) } diff --git a/cli/@compile.ts b/terminal/@compile.ts similarity index 90% rename from cli/@compile.ts rename to terminal/@compile.ts index 099c8639..108bc7a9 100644 --- a/cli/@compile.ts +++ b/terminal/@compile.ts @@ -26,7 +26,7 @@ await $root` pnpm deploy compile --legacy --production ---filter cli +--filter terminal --config.node-linker=hoisted ` @@ -72,7 +72,7 @@ const targets = [ ] for (const target of targets) { - const folder = `dp-${metadata.version}-${target.dpkit}` + const folder = `dpkit-terminal-${metadata.version}-${target.dpkit}` for (const packageName of [target.polars]) { const pack = await $compile`npm pack ${packageName}` @@ -82,15 +82,15 @@ for (const target of targets) { } await $compile` - bun build main.ts + bun build build/main.js --compile - --outfile binaries/${folder}/dp + --outfile binaries/${folder}/dpkit --target ${target.name} ` // For some reason bun creates it with no permissions if (target.name.startsWith("bun-windows")) { - await $binaries`chmod +r ${folder}/dp.exe` + await $binaries`chmod +r ${folder}/dpkit.exe` } await $binaries`zip -r ${folder}.zip ${folder}` @@ -101,4 +101,4 @@ for (const target of targets) { // Clean artifacts (pnpm creates an unwanted dpkit folder) -await $root`rm -rf cli` +await $root`rm -rf terminal` diff --git a/cli/README.md b/terminal/README.md similarity index 76% rename from cli/README.md rename to terminal/README.md index 136016ec..570260a1 100644 --- a/cli/README.md +++ b/terminal/README.md @@ -1,3 +1,3 @@ -# @dpkit/cli +# @dpkit/terminal -dpkit CLI is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). +dpkit CLI is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [project's website](https://dpkit.app). diff --git a/cli/commander.d.ts b/terminal/commander.d.ts similarity index 100% rename from cli/commander.d.ts rename to terminal/commander.d.ts diff --git a/terminal/commands/dialect/explore.spec.tsx b/terminal/commands/dialect/explore.spec.tsx new file mode 100644 index 00000000..61a3c0a0 --- /dev/null +++ b/terminal/commands/dialect/explore.spec.tsx @@ -0,0 +1,145 @@ +import { writeTempFile } from "@dpkit/dataset" +import { Command } from "commander" +import { beforeEach, describe, expect, it, vi } from "vitest" +import * as sessionModule from "../../session.ts" +import { exploreDialectCommand } from "./explore.tsx" + +vi.mock("../../components/DialectGrid.tsx", () => ({ + DialectGrid: vi.fn(() => null), +})) + +describe("dialect explore", () => { + let mockRender: ReturnType + + beforeEach(() => { + vi.clearAllMocks() + mockRender = vi.fn().mockResolvedValue(undefined) + vi.spyOn(sessionModule.Session, "create").mockImplementation(() => { + const session = { + task: vi.fn(async (_message: string, promise: Promise) => { + try { + return await promise + } catch (error) { + console.log(String(error)) + return undefined + } + }), + render: mockRender, + terminate: vi.fn((msg: string) => { + throw new Error(msg) + }), + } + return session as any + }) + }) + + it("should call session methods when exploring a dialect", async () => { + const dialectDescriptor = JSON.stringify({ + delimiter: ",", + lineTerminator: "\n", + }) + const descriptorPath = await writeTempFile(dialectDescriptor) + + const command = new Command() + .addCommand(exploreDialectCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync(["node", "test", "explore", descriptorPath]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + expect(mockRender).toHaveBeenCalled() + }) + + it("should handle dialect with various properties", async () => { + const dialectDescriptor = JSON.stringify({ + delimiter: "|", + lineTerminator: "\r\n", + quoteChar: '"', + doubleQuote: true, + skipInitialSpace: false, + }) + const descriptorPath = await writeTempFile(dialectDescriptor) + + const command = new Command() + .addCommand(exploreDialectCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync(["node", "test", "explore", descriptorPath]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + expect(mockRender).toHaveBeenCalled() + }) + + it("should handle json output option", async () => { + const dialectDescriptor = JSON.stringify({ + delimiter: ",", + lineTerminator: "\n", + }) + const descriptorPath = await writeTempFile(dialectDescriptor) + + const command = new Command() + .addCommand(exploreDialectCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync([ + "node", + "test", + "explore", + descriptorPath, + "--json", + ]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + expect(mockRender).toHaveBeenCalled() + }) + + it("should handle dialect with header configuration", async () => { + const dialectDescriptor = JSON.stringify({ + delimiter: "\t", + lineTerminator: "\n", + header: true, + }) + const descriptorPath = await writeTempFile(dialectDescriptor) + + const command = new Command() + .addCommand(exploreDialectCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync(["node", "test", "explore", descriptorPath]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + expect(mockRender).toHaveBeenCalled() + }) +}) diff --git a/cli/commands/dialect/explore.tsx b/terminal/commands/dialect/explore.tsx similarity index 88% rename from cli/commands/dialect/explore.tsx rename to terminal/commands/dialect/explore.tsx index 8f5924a5..3ed7ac77 100644 --- a/cli/commands/dialect/explore.tsx +++ b/terminal/commands/dialect/explore.tsx @@ -1,14 +1,14 @@ -import { loadDialect } from "@dpkit/lib" -import type { Resource } from "@dpkit/lib" -import { resolveDialect } from "@dpkit/lib" +import { loadDialect } from "@dpkit/library" +import type { Resource } from "@dpkit/library" +import { resolveDialect } from "@dpkit/library" import { Command } from "commander" import React from "react" import { DialectGrid } from "../../components/DialectGrid.tsx" import { helpConfiguration } from "../../helpers/help.ts" import { isEmptyObject } from "../../helpers/object.ts" import { selectResource } from "../../helpers/resource.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const exploreDialectCommand = new Command("explore") .configureHelp(helpConfiguration) diff --git a/cli/commands/dialect/index.ts b/terminal/commands/dialect/index.ts similarity index 100% rename from cli/commands/dialect/index.ts rename to terminal/commands/dialect/index.ts diff --git a/cli/commands/dialect/infer.spec.ts b/terminal/commands/dialect/infer.spec.ts similarity index 97% rename from cli/commands/dialect/infer.spec.ts rename to terminal/commands/dialect/infer.spec.ts index b1c7c736..2d45fdce 100644 --- a/cli/commands/dialect/infer.spec.ts +++ b/terminal/commands/dialect/infer.spec.ts @@ -1,7 +1,7 @@ -import { writeTempFile } from "@dpkit/file" -import { useRecording } from "@dpkit/test" +import { writeTempFile } from "@dpkit/dataset" import { Command } from "commander" import { describe, expect, it, vi } from "vitest" +import { useRecording } from "vitest-polly" import { inferDialectCommand } from "./infer.tsx" useRecording() diff --git a/cli/commands/dialect/infer.tsx b/terminal/commands/dialect/infer.tsx similarity index 90% rename from cli/commands/dialect/infer.tsx rename to terminal/commands/dialect/infer.tsx index 616c7b0e..0485e846 100644 --- a/cli/commands/dialect/infer.tsx +++ b/terminal/commands/dialect/infer.tsx @@ -1,13 +1,13 @@ -import { inferDialect } from "@dpkit/lib" -import type { Resource } from "@dpkit/lib" +import { inferDialect } from "@dpkit/library" +import type { Resource } from "@dpkit/library" import { Command } from "commander" import React from "react" import { DialectGrid } from "../../components/DialectGrid.tsx" import { helpConfiguration } from "../../helpers/help.ts" import { isEmptyObject } from "../../helpers/object.ts" import { selectResource } from "../../helpers/resource.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const inferDialectCommand = new Command("infer") .configureHelp(helpConfiguration) diff --git a/terminal/commands/dialect/script.spec.tsx b/terminal/commands/dialect/script.spec.tsx new file mode 100644 index 00000000..26e24f1e --- /dev/null +++ b/terminal/commands/dialect/script.spec.tsx @@ -0,0 +1,112 @@ +import repl from "node:repl" +import { writeTempFile } from "@dpkit/dataset" +import { Command } from "commander" +import { beforeEach, describe, expect, it, vi } from "vitest" +import * as sessionModule from "../../session.ts" +import { scriptDialectCommand } from "./script.tsx" + +describe("dialect script", () => { + beforeEach(() => { + vi.clearAllMocks() + + vi.spyOn(repl, "start").mockReturnValue({ + context: {}, + } as any) + + vi.spyOn(sessionModule.Session, "create").mockImplementation(() => { + const session = { + task: vi.fn(async (_message: string, promise: Promise) => { + try { + return await promise + } catch (error) { + console.log(String(error)) + return undefined + } + }), + terminate: vi.fn((msg: string) => { + throw new Error(msg) + }), + } + return session as any + }) + }) + + it("should call session methods when starting a script session", async () => { + const dialectContent = JSON.stringify({ + delimiter: ",", + header: true, + }) + const dialectPath = await writeTempFile(dialectContent) + + const command = new Command() + .addCommand(scriptDialectCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync(["node", "test", "script", dialectPath]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + }) + + it("should handle dialect with custom delimiter", async () => { + const dialectContent = JSON.stringify({ + delimiter: "|", + header: true, + quoteChar: '"', + }) + const dialectPath = await writeTempFile(dialectContent) + + const command = new Command() + .addCommand(scriptDialectCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync(["node", "test", "script", dialectPath]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + }) + + it("should handle json output option", async () => { + const dialectContent = JSON.stringify({ + delimiter: ",", + header: true, + }) + const dialectPath = await writeTempFile(dialectContent) + + const command = new Command() + .addCommand(scriptDialectCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync([ + "node", + "test", + "script", + dialectPath, + "--json", + ]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + }) +}) diff --git a/cli/commands/dialect/script.tsx b/terminal/commands/dialect/script.tsx similarity index 87% rename from cli/commands/dialect/script.tsx rename to terminal/commands/dialect/script.tsx index 8765171f..13edda79 100644 --- a/cli/commands/dialect/script.tsx +++ b/terminal/commands/dialect/script.tsx @@ -1,15 +1,15 @@ import repl from "node:repl" -import * as dpkit from "@dpkit/lib" -import { loadDialect } from "@dpkit/lib" -import type { Resource } from "@dpkit/lib" -import { resolveDialect } from "@dpkit/lib" +import * as dpkit from "@dpkit/library" +import { loadDialect } from "@dpkit/library" +import type { Resource } from "@dpkit/library" +import { resolveDialect } from "@dpkit/library" import { Command } from "commander" import pc from "picocolors" import { helpConfiguration } from "../../helpers/help.ts" import { isEmptyObject } from "../../helpers/object.ts" import { selectResource } from "../../helpers/resource.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const scriptDialectCommand = new Command("script") .configureHelp(helpConfiguration) diff --git a/cli/commands/dialect/validate.spec.ts b/terminal/commands/dialect/validate.spec.ts similarity index 97% rename from cli/commands/dialect/validate.spec.ts rename to terminal/commands/dialect/validate.spec.ts index 90f59926..b988236f 100644 --- a/cli/commands/dialect/validate.spec.ts +++ b/terminal/commands/dialect/validate.spec.ts @@ -1,7 +1,7 @@ -import { writeTempFile } from "@dpkit/file" -import { useRecording } from "@dpkit/test" +import { writeTempFile } from "@dpkit/dataset" import { Command } from "commander" import { describe, expect, it, vi } from "vitest" +import { useRecording } from "vitest-polly" import { validateDialectCommand } from "./validate.tsx" useRecording() diff --git a/cli/commands/dialect/validate.tsx b/terminal/commands/dialect/validate.tsx similarity index 90% rename from cli/commands/dialect/validate.tsx rename to terminal/commands/dialect/validate.tsx index 59bdb329..5dd4224c 100644 --- a/cli/commands/dialect/validate.tsx +++ b/terminal/commands/dialect/validate.tsx @@ -1,14 +1,14 @@ -import { loadDescriptor, validateDialect } from "@dpkit/lib" -import type { Resource } from "@dpkit/lib" -import { resolveDialect } from "@dpkit/lib" +import { loadDescriptor, validateDialect } from "@dpkit/library" +import type { Resource } from "@dpkit/library" +import { resolveDialect } from "@dpkit/library" import { Command } from "commander" import React from "react" import { ErrorGrid } from "../../components/ErrorGrid.tsx" import { selectErrorType } from "../../helpers/error.ts" import { helpConfiguration } from "../../helpers/help.ts" import { selectResource } from "../../helpers/resource.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const validateDialectCommand = new Command("validate") .configureHelp(helpConfiguration) diff --git a/cli/commands/file/copy.spec.ts b/terminal/commands/file/copy.spec.ts similarity index 86% rename from cli/commands/file/copy.spec.ts rename to terminal/commands/file/copy.spec.ts index 42162764..b6a4fcee 100644 --- a/cli/commands/file/copy.spec.ts +++ b/terminal/commands/file/copy.spec.ts @@ -1,8 +1,8 @@ import { existsSync } from "node:fs" -import { getTempFilePath, writeTempFile } from "@dpkit/file" -import { useRecording } from "@dpkit/test" +import { getTempFilePath, writeTempFile } from "@dpkit/dataset" import { Command } from "commander" import { describe, expect, it } from "vitest" +import { useRecording } from "vitest-polly" import { copyFileCommand } from "./copy.ts" useRecording() diff --git a/cli/commands/file/copy.ts b/terminal/commands/file/copy.ts similarity index 93% rename from cli/commands/file/copy.ts rename to terminal/commands/file/copy.ts index 20030ef1..5e242ad3 100644 --- a/cli/commands/file/copy.ts +++ b/terminal/commands/file/copy.ts @@ -1,9 +1,9 @@ -import { copyFile } from "@dpkit/lib" +import { copyFile } from "@dpkit/library" import { Command } from "commander" import { helpConfiguration } from "../../helpers/help.ts" import { selectResource } from "../../helpers/resource.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const copyFileCommand = new Command("copy") .configureHelp(helpConfiguration) diff --git a/cli/commands/file/describe.spec.ts b/terminal/commands/file/describe.spec.ts similarity index 96% rename from cli/commands/file/describe.spec.ts rename to terminal/commands/file/describe.spec.ts index af4a6149..c7225902 100644 --- a/cli/commands/file/describe.spec.ts +++ b/terminal/commands/file/describe.spec.ts @@ -1,7 +1,7 @@ -import { writeTempFile } from "@dpkit/file" -import { useRecording } from "@dpkit/test" +import { writeTempFile } from "@dpkit/dataset" import { Command } from "commander" import { describe, expect, it, vi } from "vitest" +import { useRecording } from "vitest-polly" import { describeFileCommand } from "./describe.tsx" useRecording() diff --git a/cli/commands/file/describe.tsx b/terminal/commands/file/describe.tsx similarity index 93% rename from cli/commands/file/describe.tsx rename to terminal/commands/file/describe.tsx index 25f97496..7014b8a4 100644 --- a/cli/commands/file/describe.tsx +++ b/terminal/commands/file/describe.tsx @@ -1,11 +1,11 @@ -import { describeFile } from "@dpkit/lib" +import { describeFile } from "@dpkit/library" import { Command } from "commander" import React from "react" import { DataGrid } from "../../components/DataGrid.tsx" import { helpConfiguration } from "../../helpers/help.ts" import { selectResource } from "../../helpers/resource.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const describeFileCommand = new Command("describe") .configureHelp(helpConfiguration) diff --git a/cli/commands/file/index.ts b/terminal/commands/file/index.ts similarity index 100% rename from cli/commands/file/index.ts rename to terminal/commands/file/index.ts diff --git a/cli/commands/file/validate.spec.ts b/terminal/commands/file/validate.spec.ts similarity index 96% rename from cli/commands/file/validate.spec.ts rename to terminal/commands/file/validate.spec.ts index 20c33f9b..a220a75d 100644 --- a/cli/commands/file/validate.spec.ts +++ b/terminal/commands/file/validate.spec.ts @@ -1,7 +1,7 @@ -import { writeTempFile } from "@dpkit/file" -import { useRecording } from "@dpkit/test" +import { writeTempFile } from "@dpkit/dataset" import { Command } from "commander" import { describe, expect, it, vi } from "vitest" +import { useRecording } from "vitest-polly" import { validateFileCommand } from "./validate.tsx" useRecording() diff --git a/cli/commands/file/validate.tsx b/terminal/commands/file/validate.tsx similarity index 92% rename from cli/commands/file/validate.tsx rename to terminal/commands/file/validate.tsx index 7e6da890..e28cc073 100644 --- a/cli/commands/file/validate.tsx +++ b/terminal/commands/file/validate.tsx @@ -1,11 +1,11 @@ -import { validateFile } from "@dpkit/lib" +import { validateFile } from "@dpkit/library" import { Command } from "commander" import React from "react" import { ErrorGrid } from "../../components/ErrorGrid.tsx" import { selectErrorType } from "../../helpers/error.ts" import { helpConfiguration } from "../../helpers/help.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const validateFileCommand = new Command("validate") .configureHelp(helpConfiguration) @@ -35,7 +35,8 @@ export const validateFileCommand = new Command("validate") const report = await session.task( "Validating file", - validateFile(path, { + validateFile({ + path, bytes: options.bytes ? Number.parseInt(options.bytes) : undefined, hash: options.hash, }), diff --git a/cli/commands/package/copy.spec.ts b/terminal/commands/package/copy.spec.ts similarity index 94% rename from cli/commands/package/copy.spec.ts rename to terminal/commands/package/copy.spec.ts index 387875ad..cb3b26bf 100644 --- a/cli/commands/package/copy.spec.ts +++ b/terminal/commands/package/copy.spec.ts @@ -1,10 +1,10 @@ import { existsSync } from "node:fs" import { tmpdir } from "node:os" import { join } from "node:path" -import { writeTempFile } from "@dpkit/file" -import { useRecording } from "@dpkit/test" +import { writeTempFile } from "@dpkit/dataset" import { Command } from "commander" import { describe, expect, it, vi } from "vitest" +import { useRecording } from "vitest-polly" import { copyPackageCommand } from "./copy.ts" useRecording() diff --git a/cli/commands/package/copy.ts b/terminal/commands/package/copy.ts similarity index 89% rename from cli/commands/package/copy.ts rename to terminal/commands/package/copy.ts index d06a763d..92f06fc0 100644 --- a/cli/commands/package/copy.ts +++ b/terminal/commands/package/copy.ts @@ -1,8 +1,8 @@ -import { loadPackage, savePackage } from "@dpkit/lib" +import { loadPackage, savePackage } from "@dpkit/library" import { Command } from "commander" import { helpConfiguration } from "../../helpers/help.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const copyPackageCommand = new Command("copy") .configureHelp(helpConfiguration) diff --git a/terminal/commands/package/explore.spec.tsx b/terminal/commands/package/explore.spec.tsx new file mode 100644 index 00000000..be946a3f --- /dev/null +++ b/terminal/commands/package/explore.spec.tsx @@ -0,0 +1,164 @@ +import { writeTempFile } from "@dpkit/dataset" +import { Command } from "commander" +import { beforeEach, describe, expect, it, vi } from "vitest" +import * as sessionModule from "../../session.ts" +import { explorePackageCommand } from "./explore.tsx" + +vi.mock("../../components/PackageGrid.tsx", () => ({ + PackageGrid: vi.fn(() => null), +})) + +describe("package explore", () => { + let mockRender: ReturnType + + beforeEach(() => { + vi.clearAllMocks() + mockRender = vi.fn().mockResolvedValue(undefined) + vi.spyOn(sessionModule.Session, "create").mockImplementation(() => { + const session = { + task: vi.fn(async (_message: string, promise: Promise) => { + try { + return await promise + } catch (error) { + console.log(String(error)) + return undefined + } + }), + render: mockRender, + terminate: vi.fn((msg: string) => { + throw new Error(msg) + }), + } + return session as any + }) + }) + + it("should call session methods when exploring a package", async () => { + const packageDescriptor = JSON.stringify({ + name: "test-package", + resources: [ + { + name: "test-resource", + path: "data.csv", + }, + ], + }) + const descriptorPath = await writeTempFile(packageDescriptor) + + const command = new Command() + .addCommand(explorePackageCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync(["node", "test", "explore", descriptorPath]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + expect(mockRender).toHaveBeenCalled() + }) + + it("should handle package with multiple resources", async () => { + const packageDescriptor = JSON.stringify({ + name: "test-package", + resources: [ + { + name: "resource1", + path: "data1.csv", + }, + { + name: "resource2", + path: "data2.json", + format: "json", + }, + ], + }) + const descriptorPath = await writeTempFile(packageDescriptor) + + const command = new Command() + .addCommand(explorePackageCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync(["node", "test", "explore", descriptorPath]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + expect(mockRender).toHaveBeenCalled() + }) + + it("should handle json output option", async () => { + const packageDescriptor = JSON.stringify({ + name: "test-package", + resources: [], + }) + const descriptorPath = await writeTempFile(packageDescriptor) + + const command = new Command() + .addCommand(explorePackageCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync([ + "node", + "test", + "explore", + descriptorPath, + "--json", + ]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + expect(mockRender).toHaveBeenCalled() + }) + + it("should handle package with metadata", async () => { + const packageDescriptor = JSON.stringify({ + name: "test-package", + title: "Test Package", + description: "A test package", + version: "1.0.0", + resources: [ + { + name: "test-resource", + path: "data.csv", + }, + ], + }) + const descriptorPath = await writeTempFile(packageDescriptor) + + const command = new Command() + .addCommand(explorePackageCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync(["node", "test", "explore", descriptorPath]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + expect(mockRender).toHaveBeenCalled() + }) +}) diff --git a/cli/commands/package/explore.tsx b/terminal/commands/package/explore.tsx similarity index 89% rename from cli/commands/package/explore.tsx rename to terminal/commands/package/explore.tsx index 2ea2671a..f1aee371 100644 --- a/cli/commands/package/explore.tsx +++ b/terminal/commands/package/explore.tsx @@ -1,10 +1,10 @@ -import { loadPackage } from "@dpkit/lib" +import { loadPackage } from "@dpkit/library" import { Command } from "commander" import React from "react" import { PackageGrid } from "../../components/PackageGrid.tsx" import { helpConfiguration } from "../../helpers/help.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const explorePackageCommand = new Command("explore") .configureHelp(helpConfiguration) diff --git a/cli/commands/package/index.ts b/terminal/commands/package/index.ts similarity index 100% rename from cli/commands/package/index.ts rename to terminal/commands/package/index.ts diff --git a/cli/commands/package/infer.spec.ts b/terminal/commands/package/infer.spec.ts similarity index 96% rename from cli/commands/package/infer.spec.ts rename to terminal/commands/package/infer.spec.ts index 7371bc84..d7fff950 100644 --- a/cli/commands/package/infer.spec.ts +++ b/terminal/commands/package/infer.spec.ts @@ -1,7 +1,7 @@ -import { writeTempFile } from "@dpkit/file" -import { useRecording } from "@dpkit/test" +import { writeTempFile } from "@dpkit/dataset" import { Command } from "commander" import { describe, expect, it, vi } from "vitest" +import { useRecording } from "vitest-polly" import { inferPackageCommand } from "./infer.tsx" useRecording() diff --git a/cli/commands/package/infer.tsx b/terminal/commands/package/infer.tsx similarity index 96% rename from cli/commands/package/infer.tsx rename to terminal/commands/package/infer.tsx index 0dd70edb..ef98cb4a 100644 --- a/cli/commands/package/infer.tsx +++ b/terminal/commands/package/infer.tsx @@ -1,10 +1,10 @@ -import { inferPackage } from "@dpkit/lib" +import { inferPackage } from "@dpkit/library" import { Command } from "commander" import React from "react" import { PackageGrid } from "../../components/PackageGrid.tsx" import { helpConfiguration } from "../../helpers/help.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const inferPackageCommand = new Command("infer") .configureHelp(helpConfiguration) diff --git a/cli/commands/package/publish/ckan.spec.ts b/terminal/commands/package/publish/ckan.spec.ts similarity index 85% rename from cli/commands/package/publish/ckan.spec.ts rename to terminal/commands/package/publish/ckan.spec.ts index d216c94b..d7840fea 100644 --- a/cli/commands/package/publish/ckan.spec.ts +++ b/terminal/commands/package/publish/ckan.spec.ts @@ -1,8 +1,8 @@ -import { writeTempFile } from "@dpkit/file" -import * as lib from "@dpkit/lib" -import { useRecording } from "@dpkit/test" +import { writeTempFile } from "@dpkit/dataset" +import * as lib from "@dpkit/library" import { Command } from "commander" import { describe, expect, it, vi } from "vitest" +import { useRecording } from "vitest-polly" import { ckanPublishPackageCommand } from "./ckan.ts" useRecording() @@ -19,7 +19,10 @@ describe("package publish ckan", () => { }, ], }) - const packagePath = await writeTempFile(packageContent) + + const packagePath = await writeTempFile(packageContent, { + filename: "datapackage.json", + }) const savePackageToCkanSpy = vi .spyOn(lib, "savePackageToCkan") diff --git a/cli/commands/package/publish/ckan.ts b/terminal/commands/package/publish/ckan.ts similarity index 91% rename from cli/commands/package/publish/ckan.ts rename to terminal/commands/package/publish/ckan.ts index 83c9a632..6b8acb0b 100644 --- a/cli/commands/package/publish/ckan.ts +++ b/terminal/commands/package/publish/ckan.ts @@ -1,8 +1,8 @@ -import { loadPackage, savePackageToCkan } from "@dpkit/lib" +import { loadPackage, savePackageToCkan } from "@dpkit/library" import { Command } from "commander" import { helpConfiguration } from "../../../helpers/help.ts" -import { Session } from "../../../helpers/session.ts" import * as params from "../../../params/index.ts" +import { Session } from "../../../session.ts" export const ckanPublishPackageCommand = new Command("ckan") .configureHelp(helpConfiguration) diff --git a/cli/commands/package/publish/github.spec.ts b/terminal/commands/package/publish/github.spec.ts similarity index 85% rename from cli/commands/package/publish/github.spec.ts rename to terminal/commands/package/publish/github.spec.ts index 367f8808..b071bd1e 100644 --- a/cli/commands/package/publish/github.spec.ts +++ b/terminal/commands/package/publish/github.spec.ts @@ -1,8 +1,8 @@ -import { writeTempFile } from "@dpkit/file" -import * as lib from "@dpkit/lib" -import { useRecording } from "@dpkit/test" +import { writeTempFile } from "@dpkit/dataset" +import * as lib from "@dpkit/library" import { Command } from "commander" import { describe, expect, it, vi } from "vitest" +import { useRecording } from "vitest-polly" import { githubPublishPackageCommand } from "./github.ts" useRecording() @@ -19,7 +19,10 @@ describe("package publish github", () => { }, ], }) - const packagePath = await writeTempFile(packageContent) + + const packagePath = await writeTempFile(packageContent, { + filename: "datapackage.json", + }) const savePackageToGithubSpy = vi .spyOn(lib, "savePackageToGithub") diff --git a/cli/commands/package/publish/github.ts b/terminal/commands/package/publish/github.ts similarity index 90% rename from cli/commands/package/publish/github.ts rename to terminal/commands/package/publish/github.ts index 94420be1..d4fe4ad1 100644 --- a/cli/commands/package/publish/github.ts +++ b/terminal/commands/package/publish/github.ts @@ -1,8 +1,8 @@ -import { loadPackage, savePackageToGithub } from "@dpkit/lib" +import { loadPackage, savePackageToGithub } from "@dpkit/library" import { Command } from "commander" import { helpConfiguration } from "../../../helpers/help.ts" -import { Session } from "../../../helpers/session.ts" import * as params from "../../../params/index.ts" +import { Session } from "../../../session.ts" export const githubPublishPackageCommand = new Command("github") .configureHelp(helpConfiguration) diff --git a/cli/commands/package/publish/index.ts b/terminal/commands/package/publish/index.ts similarity index 100% rename from cli/commands/package/publish/index.ts rename to terminal/commands/package/publish/index.ts diff --git a/cli/commands/package/publish/zenodo.spec.ts b/terminal/commands/package/publish/zenodo.spec.ts similarity index 84% rename from cli/commands/package/publish/zenodo.spec.ts rename to terminal/commands/package/publish/zenodo.spec.ts index 1a58a4ad..2509a830 100644 --- a/cli/commands/package/publish/zenodo.spec.ts +++ b/terminal/commands/package/publish/zenodo.spec.ts @@ -1,8 +1,8 @@ -import { writeTempFile } from "@dpkit/file" -import * as lib from "@dpkit/lib" -import { useRecording } from "@dpkit/test" +import { writeTempFile } from "@dpkit/dataset" +import * as lib from "@dpkit/library" import { Command } from "commander" import { describe, expect, it, vi } from "vitest" +import { useRecording } from "vitest-polly" import { zenodoPublishPackageCommand } from "./zenodo.ts" useRecording() @@ -19,7 +19,10 @@ describe("package publish zenodo", () => { }, ], }) - const packagePath = await writeTempFile(packageContent) + + const packagePath = await writeTempFile(packageContent, { + filename: "datapackage.json", + }) const savePackageToZenodoSpy = vi .spyOn(lib, "savePackageToZenodo") diff --git a/cli/commands/package/publish/zenodo.ts b/terminal/commands/package/publish/zenodo.ts similarity index 90% rename from cli/commands/package/publish/zenodo.ts rename to terminal/commands/package/publish/zenodo.ts index 3231747f..37d9c945 100644 --- a/cli/commands/package/publish/zenodo.ts +++ b/terminal/commands/package/publish/zenodo.ts @@ -1,8 +1,8 @@ -import { loadPackage, savePackageToZenodo } from "@dpkit/lib" +import { loadPackage, savePackageToZenodo } from "@dpkit/library" import { Command } from "commander" import { helpConfiguration } from "../../../helpers/help.ts" -import { Session } from "../../../helpers/session.ts" import * as params from "../../../params/index.ts" +import { Session } from "../../../session.ts" export const zenodoPublishPackageCommand = new Command("zenodo") .configureHelp(helpConfiguration) diff --git a/terminal/commands/package/script.spec.tsx b/terminal/commands/package/script.spec.tsx new file mode 100644 index 00000000..02327b1f --- /dev/null +++ b/terminal/commands/package/script.spec.tsx @@ -0,0 +1,131 @@ +import repl from "node:repl" +import { writeTempFile } from "@dpkit/dataset" +import { Command } from "commander" +import { beforeEach, describe, expect, it, vi } from "vitest" +import * as sessionModule from "../../session.ts" +import { scriptPackageCommand } from "./script.tsx" + +describe("package script", () => { + beforeEach(() => { + vi.clearAllMocks() + + vi.spyOn(repl, "start").mockReturnValue({ + context: {}, + } as any) + + vi.spyOn(sessionModule.Session, "create").mockImplementation(() => { + const session = { + task: vi.fn(async (_message: string, promise: Promise) => { + try { + return await promise + } catch (error) { + console.log(String(error)) + return undefined + } + }), + terminate: vi.fn((msg: string) => { + throw new Error(msg) + }), + } + return session as any + }) + }) + + it("should call session methods when starting a script session", async () => { + const packageDescriptor = JSON.stringify({ + name: "test-package", + resources: [ + { + name: "data", + path: "data.csv", + schema: { + fields: [ + { name: "id", type: "integer" }, + { name: "name", type: "string" }, + ], + }, + }, + ], + }) + const descriptorPath = await writeTempFile(packageDescriptor) + + const command = new Command() + .addCommand(scriptPackageCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync(["node", "test", "script", descriptorPath]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + }) + + it("should handle package with multiple resources", async () => { + const packageDescriptor = JSON.stringify({ + name: "multi-resource-package", + resources: [ + { + name: "users", + path: "users.csv", + }, + { + name: "products", + path: "products.csv", + }, + ], + }) + const descriptorPath = await writeTempFile(packageDescriptor) + + const command = new Command() + .addCommand(scriptPackageCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync(["node", "test", "script", descriptorPath]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + }) + + it("should handle json output option", async () => { + const packageDescriptor = JSON.stringify({ + name: "test-package", + resources: [], + }) + const descriptorPath = await writeTempFile(packageDescriptor) + + const command = new Command() + .addCommand(scriptPackageCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync([ + "node", + "test", + "script", + descriptorPath, + "--json", + ]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + }) +}) diff --git a/cli/commands/package/script.tsx b/terminal/commands/package/script.tsx similarity index 88% rename from cli/commands/package/script.tsx rename to terminal/commands/package/script.tsx index a7209f40..40b7c6c4 100644 --- a/cli/commands/package/script.tsx +++ b/terminal/commands/package/script.tsx @@ -1,11 +1,11 @@ import repl from "node:repl" -import { loadPackage } from "@dpkit/lib" -import * as dpkit from "@dpkit/lib" +import { loadPackage } from "@dpkit/library" +import * as dpkit from "@dpkit/library" import { Command } from "commander" import pc from "picocolors" import { helpConfiguration } from "../../helpers/help.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const scriptPackageCommand = new Command("script") .configureHelp(helpConfiguration) diff --git a/cli/commands/package/validate.spec.ts b/terminal/commands/package/validate.spec.ts similarity index 97% rename from cli/commands/package/validate.spec.ts rename to terminal/commands/package/validate.spec.ts index 5354b018..a548ae1a 100644 --- a/cli/commands/package/validate.spec.ts +++ b/terminal/commands/package/validate.spec.ts @@ -1,8 +1,8 @@ import { basename } from "node:path" -import { writeTempFile } from "@dpkit/file" -import { useRecording } from "@dpkit/test" +import { writeTempFile } from "@dpkit/dataset" import { Command } from "commander" import { describe, expect, it, vi } from "vitest" +import { useRecording } from "vitest-polly" import { validatePackageCommand } from "./validate.tsx" useRecording() diff --git a/cli/commands/package/validate.tsx b/terminal/commands/package/validate.tsx similarity index 94% rename from cli/commands/package/validate.tsx rename to terminal/commands/package/validate.tsx index b7617d03..ef3073bc 100644 --- a/cli/commands/package/validate.tsx +++ b/terminal/commands/package/validate.tsx @@ -1,11 +1,11 @@ -import { validatePackage } from "@dpkit/lib" +import { validatePackage } from "@dpkit/library" import { Command } from "commander" import React from "react" import { ErrorGrid } from "../../components/ErrorGrid.tsx" import { selectErrorResource, selectErrorType } from "../../helpers/error.ts" import { helpConfiguration } from "../../helpers/help.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const validatePackageCommand = new Command("validate") .configureHelp(helpConfiguration) diff --git a/terminal/commands/resource/explore.spec.tsx b/terminal/commands/resource/explore.spec.tsx new file mode 100644 index 00000000..851fb685 --- /dev/null +++ b/terminal/commands/resource/explore.spec.tsx @@ -0,0 +1,152 @@ +import { writeTempFile } from "@dpkit/dataset" +import { Command } from "commander" +import { beforeEach, describe, expect, it, vi } from "vitest" +import * as sessionModule from "../../session.ts" +import { exploreResourceCommand } from "./explore.tsx" + +vi.mock("../../components/ResourceGrid.tsx", () => ({ + ResourceGrid: vi.fn(() => null), +})) + +describe("resource explore", () => { + let mockRender: ReturnType + + beforeEach(() => { + vi.clearAllMocks() + mockRender = vi.fn().mockResolvedValue(undefined) + vi.spyOn(sessionModule.Session, "create").mockImplementation(() => { + const session = { + task: vi.fn(async (_message: string, promise: Promise) => { + try { + return await promise + } catch (error) { + console.log(String(error)) + return undefined + } + }), + render: mockRender, + terminate: vi.fn((msg: string) => { + throw new Error(msg) + }), + } + return session as any + }) + }) + + it("should call session methods when exploring a resource", async () => { + const resourceDescriptor = JSON.stringify({ + name: "test-resource", + path: "data.csv", + schema: { + fields: [ + { name: "id", type: "integer" }, + { name: "name", type: "string" }, + ], + }, + }) + const descriptorPath = await writeTempFile(resourceDescriptor) + + const command = new Command() + .addCommand(exploreResourceCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync(["node", "test", "explore", descriptorPath]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + expect(mockRender).toHaveBeenCalled() + }) + + it("should handle resource with format", async () => { + const resourceDescriptor = JSON.stringify({ + name: "test-resource", + path: "data.json", + format: "json", + }) + const descriptorPath = await writeTempFile(resourceDescriptor) + + const command = new Command() + .addCommand(exploreResourceCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync(["node", "test", "explore", descriptorPath]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + expect(mockRender).toHaveBeenCalled() + }) + + it("should handle json output option", async () => { + const resourceDescriptor = JSON.stringify({ + name: "test-resource", + path: "data.csv", + }) + const descriptorPath = await writeTempFile(resourceDescriptor) + + const command = new Command() + .addCommand(exploreResourceCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync([ + "node", + "test", + "explore", + descriptorPath, + "--json", + ]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + expect(mockRender).toHaveBeenCalled() + }) + + it("should handle resource with encoding", async () => { + const resourceDescriptor = JSON.stringify({ + name: "test-resource", + path: "data.csv", + encoding: "utf-8", + schema: { + fields: [{ name: "id", type: "integer" }], + }, + }) + const descriptorPath = await writeTempFile(resourceDescriptor) + + const command = new Command() + .addCommand(exploreResourceCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync(["node", "test", "explore", descriptorPath]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + expect(mockRender).toHaveBeenCalled() + }) +}) diff --git a/cli/commands/resource/explore.tsx b/terminal/commands/resource/explore.tsx similarity index 92% rename from cli/commands/resource/explore.tsx rename to terminal/commands/resource/explore.tsx index e350d826..99680507 100644 --- a/cli/commands/resource/explore.tsx +++ b/terminal/commands/resource/explore.tsx @@ -1,12 +1,12 @@ -import { loadResourceDescriptor } from "@dpkit/lib" +import { loadResourceDescriptor } from "@dpkit/library" import { Command } from "commander" import React from "react" import { ResourceGrid } from "../../components/ResourceGrid.tsx" import { helpConfiguration } from "../../helpers/help.ts" import { isEmptyObject } from "../../helpers/object.ts" import { selectResource } from "../../helpers/resource.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const exploreResourceCommand = new Command("explore") .configureHelp(helpConfiguration) diff --git a/cli/commands/resource/index.ts b/terminal/commands/resource/index.ts similarity index 100% rename from cli/commands/resource/index.ts rename to terminal/commands/resource/index.ts diff --git a/cli/commands/resource/infer.spec.ts b/terminal/commands/resource/infer.spec.ts similarity index 97% rename from cli/commands/resource/infer.spec.ts rename to terminal/commands/resource/infer.spec.ts index 94ea0f3a..65c4fa1b 100644 --- a/cli/commands/resource/infer.spec.ts +++ b/terminal/commands/resource/infer.spec.ts @@ -1,7 +1,7 @@ -import { writeTempFile } from "@dpkit/file" -import { useRecording } from "@dpkit/test" +import { writeTempFile } from "@dpkit/dataset" import { Command } from "commander" import { describe, expect, it, vi } from "vitest" +import { useRecording } from "vitest-polly" import { inferResourceCommand } from "./infer.tsx" useRecording() diff --git a/cli/commands/resource/infer.tsx b/terminal/commands/resource/infer.tsx similarity index 96% rename from cli/commands/resource/infer.tsx rename to terminal/commands/resource/infer.tsx index 78c73e4e..21eaf460 100644 --- a/cli/commands/resource/infer.tsx +++ b/terminal/commands/resource/infer.tsx @@ -1,12 +1,12 @@ -import { inferResource } from "@dpkit/lib" +import { inferResource } from "@dpkit/library" import { Command } from "commander" import React from "react" import { ResourceGrid } from "../../components/ResourceGrid.tsx" import { helpConfiguration } from "../../helpers/help.ts" import { isEmptyObject } from "../../helpers/object.ts" import { selectResource } from "../../helpers/resource.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const inferResourceCommand = new Command("infer") .configureHelp(helpConfiguration) diff --git a/terminal/commands/resource/script.spec.tsx b/terminal/commands/resource/script.spec.tsx new file mode 100644 index 00000000..800e780b --- /dev/null +++ b/terminal/commands/resource/script.spec.tsx @@ -0,0 +1,118 @@ +import repl from "node:repl" +import { writeTempFile } from "@dpkit/dataset" +import { Command } from "commander" +import { beforeEach, describe, expect, it, vi } from "vitest" +import * as sessionModule from "../../session.ts" +import { scriptResourceCommand } from "./script.tsx" + +describe("resource script", () => { + beforeEach(() => { + vi.clearAllMocks() + + vi.spyOn(repl, "start").mockReturnValue({ + context: {}, + } as any) + + vi.spyOn(sessionModule.Session, "create").mockImplementation(() => { + const session = { + task: vi.fn(async (_message: string, promise: Promise) => { + try { + return await promise + } catch (error) { + console.log(String(error)) + return undefined + } + }), + terminate: vi.fn((msg: string) => { + throw new Error(msg) + }), + } + return session as any + }) + }) + + it("should call session methods when starting a script session", async () => { + const resourceDescriptor = JSON.stringify({ + name: "test-resource", + path: "data.csv", + schema: { + fields: [ + { name: "id", type: "integer" }, + { name: "name", type: "string" }, + ], + }, + }) + const descriptorPath = await writeTempFile(resourceDescriptor) + + const command = new Command() + .addCommand(scriptResourceCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync(["node", "test", "script", descriptorPath]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + }) + + it("should handle resource with format", async () => { + const resourceDescriptor = JSON.stringify({ + name: "test-resource", + path: "data.json", + format: "json", + }) + const descriptorPath = await writeTempFile(resourceDescriptor) + + const command = new Command() + .addCommand(scriptResourceCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync(["node", "test", "script", descriptorPath]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + }) + + it("should handle json output option", async () => { + const resourceDescriptor = JSON.stringify({ + name: "test-resource", + path: "data.csv", + }) + const descriptorPath = await writeTempFile(resourceDescriptor) + + const command = new Command() + .addCommand(scriptResourceCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync([ + "node", + "test", + "script", + descriptorPath, + "--json", + ]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + }) +}) diff --git a/cli/commands/resource/script.tsx b/terminal/commands/resource/script.tsx similarity index 90% rename from cli/commands/resource/script.tsx rename to terminal/commands/resource/script.tsx index 449efd27..c08e31d1 100644 --- a/cli/commands/resource/script.tsx +++ b/terminal/commands/resource/script.tsx @@ -1,13 +1,13 @@ import repl from "node:repl" -import { loadResourceDescriptor } from "@dpkit/lib" -import * as dpkit from "@dpkit/lib" +import { loadResourceDescriptor } from "@dpkit/library" +import * as dpkit from "@dpkit/library" import { Command } from "commander" import pc from "picocolors" import { helpConfiguration } from "../../helpers/help.ts" import { isEmptyObject } from "../../helpers/object.ts" import { selectResource } from "../../helpers/resource.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const scriptResourceCommand = new Command("script") .configureHelp(helpConfiguration) diff --git a/cli/commands/resource/validate.spec.ts b/terminal/commands/resource/validate.spec.ts similarity index 98% rename from cli/commands/resource/validate.spec.ts rename to terminal/commands/resource/validate.spec.ts index c52e3780..5e9d2e33 100644 --- a/cli/commands/resource/validate.spec.ts +++ b/terminal/commands/resource/validate.spec.ts @@ -1,8 +1,8 @@ import { basename } from "node:path" -import { writeTempFile } from "@dpkit/file" -import { useRecording } from "@dpkit/test" +import { writeTempFile } from "@dpkit/dataset" import { Command } from "commander" import { describe, expect, it, vi } from "vitest" +import { useRecording } from "vitest-polly" import { validateResourceCommand } from "./validate.tsx" useRecording() diff --git a/cli/commands/resource/validate.tsx b/terminal/commands/resource/validate.tsx similarity index 94% rename from cli/commands/resource/validate.tsx rename to terminal/commands/resource/validate.tsx index 22dde5f4..ca437a69 100644 --- a/cli/commands/resource/validate.tsx +++ b/terminal/commands/resource/validate.tsx @@ -1,12 +1,12 @@ -import { validateResource } from "@dpkit/lib" +import { validateResource } from "@dpkit/library" import { Command } from "commander" import React from "react" import { ErrorGrid } from "../../components/ErrorGrid.tsx" import { selectErrorType } from "../../helpers/error.ts" import { helpConfiguration } from "../../helpers/help.ts" import { selectResource } from "../../helpers/resource.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const validateResourceCommand = new Command("validate") .configureHelp(helpConfiguration) diff --git a/cli/commands/schema/convert.spec.ts b/terminal/commands/schema/convert.spec.ts similarity index 97% rename from cli/commands/schema/convert.spec.ts rename to terminal/commands/schema/convert.spec.ts index e4d18990..8ea2240b 100644 --- a/cli/commands/schema/convert.spec.ts +++ b/terminal/commands/schema/convert.spec.ts @@ -1,7 +1,7 @@ -import { writeTempFile } from "@dpkit/file" -import { useRecording } from "@dpkit/test" +import { writeTempFile } from "@dpkit/dataset" import { Command } from "commander" import { describe, expect, it, vi } from "vitest" +import { useRecording } from "vitest-polly" import { convertSchemaCommand } from "./convert.tsx" useRecording() diff --git a/cli/commands/schema/convert.tsx b/terminal/commands/schema/convert.tsx similarity index 95% rename from cli/commands/schema/convert.tsx rename to terminal/commands/schema/convert.tsx index 6405ac20..ce99ae23 100644 --- a/cli/commands/schema/convert.tsx +++ b/terminal/commands/schema/convert.tsx @@ -4,12 +4,12 @@ import { convertSchemaToHtml, convertSchemaToJsonSchema, convertSchemaToMarkdown, -} from "@dpkit/lib" -import { loadDescriptor, saveDescriptor } from "@dpkit/lib" +} from "@dpkit/library" +import { loadDescriptor, saveDescriptor } from "@dpkit/library" import { Command, Option } from "commander" import { helpConfiguration } from "../../helpers/help.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" const format = new Option("--format ", "source schema format").choices([ "jsonschema", diff --git a/terminal/commands/schema/explore.spec.tsx b/terminal/commands/schema/explore.spec.tsx new file mode 100644 index 00000000..4d97ad40 --- /dev/null +++ b/terminal/commands/schema/explore.spec.tsx @@ -0,0 +1,145 @@ +import { writeTempFile } from "@dpkit/dataset" +import { Command } from "commander" +import { beforeEach, describe, expect, it, vi } from "vitest" +import * as sessionModule from "../../session.ts" +import { exploreSchemaCommand } from "./explore.tsx" + +vi.mock("../../components/SchemaGrid.tsx", () => ({ + SchemaGrid: vi.fn(() => null), +})) + +describe("schema explore", () => { + let mockRender: ReturnType + + beforeEach(() => { + vi.clearAllMocks() + mockRender = vi.fn().mockResolvedValue(undefined) + vi.spyOn(sessionModule.Session, "create").mockImplementation(() => { + const session = { + task: vi.fn(async (_message: string, promise: Promise) => { + try { + return await promise + } catch (error) { + console.log(String(error)) + return undefined + } + }), + render: mockRender, + terminate: vi.fn((msg: string) => { + throw new Error(msg) + }), + } + return session as any + }) + }) + + it("should call session methods when exploring a schema", async () => { + const schemaDescriptor = JSON.stringify({ + fields: [ + { name: "id", type: "integer" }, + { name: "name", type: "string" }, + ], + }) + const descriptorPath = await writeTempFile(schemaDescriptor) + + const command = new Command() + .addCommand(exploreSchemaCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync(["node", "test", "explore", descriptorPath]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + expect(mockRender).toHaveBeenCalled() + }) + + it("should handle schema with constraints", async () => { + const schemaDescriptor = JSON.stringify({ + fields: [ + { name: "id", type: "integer", constraints: { required: true } }, + { name: "email", type: "string", format: "email" }, + ], + }) + const descriptorPath = await writeTempFile(schemaDescriptor) + + const command = new Command() + .addCommand(exploreSchemaCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync(["node", "test", "explore", descriptorPath]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + expect(mockRender).toHaveBeenCalled() + }) + + it("should handle json output option", async () => { + const schemaDescriptor = JSON.stringify({ + fields: [ + { name: "id", type: "integer" }, + { name: "value", type: "number" }, + ], + }) + const descriptorPath = await writeTempFile(schemaDescriptor) + + const command = new Command() + .addCommand(exploreSchemaCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync([ + "node", + "test", + "explore", + descriptorPath, + "--json", + ]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + expect(mockRender).toHaveBeenCalled() + }) + + it("should terminate when schema is empty", async () => { + const schemaDescriptor = JSON.stringify({}) + const descriptorPath = await writeTempFile(schemaDescriptor) + + const command = new Command() + .addCommand(exploreSchemaCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync(["node", "test", "explore", descriptorPath]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.terminate).toHaveBeenCalledWith( + "Schema is not available", + ) + }) +}) diff --git a/cli/commands/schema/explore.tsx b/terminal/commands/schema/explore.tsx similarity index 88% rename from cli/commands/schema/explore.tsx rename to terminal/commands/schema/explore.tsx index a6ba0632..92cbcf02 100644 --- a/cli/commands/schema/explore.tsx +++ b/terminal/commands/schema/explore.tsx @@ -1,14 +1,14 @@ -import { loadSchema } from "@dpkit/lib" -import type { Resource } from "@dpkit/lib" -import { resolveSchema } from "@dpkit/lib" +import { loadSchema } from "@dpkit/library" +import type { Resource } from "@dpkit/library" +import { resolveSchema } from "@dpkit/library" import { Command } from "commander" import React from "react" import { SchemaGrid } from "../../components/SchemaGrid.tsx" import { helpConfiguration } from "../../helpers/help.ts" import { isEmptyObject } from "../../helpers/object.ts" import { selectResource } from "../../helpers/resource.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const exploreSchemaCommand = new Command("explore") .configureHelp(helpConfiguration) diff --git a/cli/commands/schema/index.ts b/terminal/commands/schema/index.ts similarity index 100% rename from cli/commands/schema/index.ts rename to terminal/commands/schema/index.ts diff --git a/cli/commands/schema/infer.spec.ts b/terminal/commands/schema/infer.spec.ts similarity index 97% rename from cli/commands/schema/infer.spec.ts rename to terminal/commands/schema/infer.spec.ts index c8ff946c..59b17102 100644 --- a/cli/commands/schema/infer.spec.ts +++ b/terminal/commands/schema/infer.spec.ts @@ -1,7 +1,7 @@ -import { writeTempFile } from "@dpkit/file" -import { useRecording } from "@dpkit/test" +import { writeTempFile } from "@dpkit/dataset" import { Command } from "commander" import { describe, expect, it, vi } from "vitest" +import { useRecording } from "vitest-polly" import { inferSchemaCommand } from "./infer.tsx" useRecording() diff --git a/cli/commands/schema/infer.tsx b/terminal/commands/schema/infer.tsx similarity index 96% rename from cli/commands/schema/infer.tsx rename to terminal/commands/schema/infer.tsx index ee770f61..e0497e27 100644 --- a/cli/commands/schema/infer.tsx +++ b/terminal/commands/schema/infer.tsx @@ -1,4 +1,4 @@ -import { inferSchemaFromTable, loadTable } from "@dpkit/lib" +import { inferSchemaFromTable, loadTable } from "@dpkit/library" import { Command } from "commander" import React from "react" import { SchemaGrid } from "../../components/SchemaGrid.tsx" @@ -6,8 +6,8 @@ import { createDialectFromOptions } from "../../helpers/dialect.ts" import { helpConfiguration } from "../../helpers/help.ts" import { isEmptyObject } from "../../helpers/object.ts" import { selectResource } from "../../helpers/resource.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const inferSchemaCommand = new Command("infer") .configureHelp(helpConfiguration) diff --git a/terminal/commands/schema/script.spec.tsx b/terminal/commands/schema/script.spec.tsx new file mode 100644 index 00000000..2cee0f5b --- /dev/null +++ b/terminal/commands/schema/script.spec.tsx @@ -0,0 +1,117 @@ +import repl from "node:repl" +import { writeTempFile } from "@dpkit/dataset" +import { Command } from "commander" +import { beforeEach, describe, expect, it, vi } from "vitest" +import * as sessionModule from "../../session.ts" +import { scriptSchemaCommand } from "./script.tsx" + +describe("schema script", () => { + beforeEach(() => { + vi.clearAllMocks() + + vi.spyOn(repl, "start").mockReturnValue({ + context: {}, + } as any) + + vi.spyOn(sessionModule.Session, "create").mockImplementation(() => { + const session = { + task: vi.fn(async (_message: string, promise: Promise) => { + try { + return await promise + } catch (error) { + console.log(String(error)) + return undefined + } + }), + terminate: vi.fn((msg: string) => { + throw new Error(msg) + }), + } + return session as any + }) + }) + + it("should call session methods when starting a script session", async () => { + const schemaDescriptor = JSON.stringify({ + fields: [ + { name: "id", type: "integer" }, + { name: "name", type: "string" }, + ], + }) + const descriptorPath = await writeTempFile(schemaDescriptor) + + const command = new Command() + .addCommand(scriptSchemaCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync(["node", "test", "script", descriptorPath]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + }) + + it("should handle schema with constraints", async () => { + const schemaDescriptor = JSON.stringify({ + fields: [ + { name: "id", type: "integer", constraints: { required: true } }, + { name: "email", type: "string", format: "email" }, + ], + }) + const descriptorPath = await writeTempFile(schemaDescriptor) + + const command = new Command() + .addCommand(scriptSchemaCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync(["node", "test", "script", descriptorPath]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + }) + + it("should handle json output option", async () => { + const schemaDescriptor = JSON.stringify({ + fields: [ + { name: "id", type: "integer" }, + { name: "value", type: "number" }, + ], + }) + const descriptorPath = await writeTempFile(schemaDescriptor) + + const command = new Command() + .addCommand(scriptSchemaCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync([ + "node", + "test", + "script", + descriptorPath, + "--json", + ]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + }) +}) diff --git a/cli/commands/schema/script.tsx b/terminal/commands/schema/script.tsx similarity index 85% rename from cli/commands/schema/script.tsx rename to terminal/commands/schema/script.tsx index 68f766e4..7279f76d 100644 --- a/cli/commands/schema/script.tsx +++ b/terminal/commands/schema/script.tsx @@ -1,14 +1,14 @@ import repl from "node:repl" -import { loadSchema } from "@dpkit/lib" -import type { Resource } from "@dpkit/lib" -import { resolveSchema } from "@dpkit/lib" -import * as dpkit from "@dpkit/lib" +import { loadSchema } from "@dpkit/library" +import type { Resource } from "@dpkit/library" +import { resolveSchema } from "@dpkit/library" +import * as dpkit from "@dpkit/library" import { Command } from "commander" import pc from "picocolors" import { helpConfiguration } from "../../helpers/help.ts" import { selectResource } from "../../helpers/resource.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const scriptSchemaCommand = new Command("script") .configureHelp(helpConfiguration) diff --git a/cli/commands/schema/validate.spec.ts b/terminal/commands/schema/validate.spec.ts similarity index 97% rename from cli/commands/schema/validate.spec.ts rename to terminal/commands/schema/validate.spec.ts index d21da4b3..652c70d6 100644 --- a/cli/commands/schema/validate.spec.ts +++ b/terminal/commands/schema/validate.spec.ts @@ -1,7 +1,7 @@ -import { writeTempFile } from "@dpkit/file" -import { useRecording } from "@dpkit/test" +import { writeTempFile } from "@dpkit/dataset" import { Command } from "commander" import { describe, expect, it, vi } from "vitest" +import { useRecording } from "vitest-polly" import { validateSchemaCommand } from "./validate.tsx" useRecording() diff --git a/cli/commands/schema/validate.tsx b/terminal/commands/schema/validate.tsx similarity index 90% rename from cli/commands/schema/validate.tsx rename to terminal/commands/schema/validate.tsx index 0e97e623..0efe87b1 100644 --- a/cli/commands/schema/validate.tsx +++ b/terminal/commands/schema/validate.tsx @@ -1,14 +1,14 @@ -import { loadDescriptor, validateSchema } from "@dpkit/lib" -import { resolveSchema } from "@dpkit/lib" -import type { Resource } from "@dpkit/lib" +import { loadDescriptor, validateSchema } from "@dpkit/library" +import { resolveSchema } from "@dpkit/library" +import type { Resource } from "@dpkit/library" import { Command } from "commander" import React from "react" import { ErrorGrid } from "../../components/ErrorGrid.tsx" import { selectErrorType } from "../../helpers/error.ts" import { helpConfiguration } from "../../helpers/help.ts" import { selectResource } from "../../helpers/resource.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const validateSchemaCommand = new Command("validate") .configureHelp(helpConfiguration) diff --git a/cli/commands/table/convert.spec.ts b/terminal/commands/table/convert.spec.ts similarity index 96% rename from cli/commands/table/convert.spec.ts rename to terminal/commands/table/convert.spec.ts index 4e57f4ab..6ef667e6 100644 --- a/cli/commands/table/convert.spec.ts +++ b/terminal/commands/table/convert.spec.ts @@ -1,10 +1,10 @@ import { existsSync } from "node:fs" import { tmpdir } from "node:os" import { join } from "node:path" -import { writeTempFile } from "@dpkit/file" -import { useRecording } from "@dpkit/test" +import { writeTempFile } from "@dpkit/dataset" import { Command } from "commander" import { describe, expect, it, vi } from "vitest" +import { useRecording } from "vitest-polly" import { convertTableCommand } from "./convert.tsx" useRecording() diff --git a/cli/commands/table/convert.tsx b/terminal/commands/table/convert.tsx similarity index 94% rename from cli/commands/table/convert.tsx rename to terminal/commands/table/convert.tsx index 5c0649d2..fe05fa9b 100644 --- a/cli/commands/table/convert.tsx +++ b/terminal/commands/table/convert.tsx @@ -1,17 +1,17 @@ -import { getTempFilePath, loadFile } from "@dpkit/lib" -import { loadSchema } from "@dpkit/lib" -import { loadDialect } from "@dpkit/lib" -import { loadTable, saveTable } from "@dpkit/lib" -import { queryTable } from "@dpkit/lib" -import type { Resource } from "@dpkit/lib" +import { getTempFilePath, loadFile } from "@dpkit/library" +import { loadSchema } from "@dpkit/library" +import { loadDialect } from "@dpkit/library" +import { loadTable, saveTable } from "@dpkit/library" +import { queryTable } from "@dpkit/library" +import type { Resource } from "@dpkit/library" import { Command } from "commander" import { createDialectFromOptions } from "../../helpers/dialect.ts" import { createToDialectFromOptions } from "../../helpers/dialect.ts" import { helpConfiguration } from "../../helpers/help.ts" import { selectResource } from "../../helpers/resource.ts" import { createSchemaOptionsFromToSchemaOptions } from "../../helpers/schema.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const convertTableCommand = new Command("convert") .configureHelp(helpConfiguration) diff --git a/cli/commands/table/describe.spec.ts b/terminal/commands/table/describe.spec.ts similarity index 97% rename from cli/commands/table/describe.spec.ts rename to terminal/commands/table/describe.spec.ts index 023ac387..05339ca2 100644 --- a/cli/commands/table/describe.spec.ts +++ b/terminal/commands/table/describe.spec.ts @@ -1,7 +1,7 @@ -import { writeTempFile } from "@dpkit/file" -import { useRecording } from "@dpkit/test" +import { writeTempFile } from "@dpkit/dataset" import { Command } from "commander" import { describe, expect, it, vi } from "vitest" +import { useRecording } from "vitest-polly" import { describeTableCommand } from "./describe.tsx" useRecording() diff --git a/cli/commands/table/describe.tsx b/terminal/commands/table/describe.tsx similarity index 88% rename from cli/commands/table/describe.tsx rename to terminal/commands/table/describe.tsx index 8b5ba6da..53d508d0 100644 --- a/cli/commands/table/describe.tsx +++ b/terminal/commands/table/describe.tsx @@ -1,16 +1,16 @@ -import { loadTable } from "@dpkit/lib" -import { queryTable } from "@dpkit/lib" -import { loadSchema } from "@dpkit/lib" -import type { Resource } from "@dpkit/lib" -import { loadDialect } from "@dpkit/lib" +import { loadTable } from "@dpkit/library" +import { queryTable } from "@dpkit/library" +import { loadSchema } from "@dpkit/library" +import type { Resource } from "@dpkit/library" +import { loadDialect } from "@dpkit/library" import { Command } from "commander" import React from "react" import { DataGrid } from "../../components/DataGrid.tsx" import { createDialectFromOptions } from "../../helpers/dialect.ts" import { helpConfiguration } from "../../helpers/help.ts" import { selectResource } from "../../helpers/resource.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const describeTableCommand = new Command("describe") .configureHelp(helpConfiguration) @@ -102,9 +102,9 @@ export const describeTableCommand = new Command("describe") table = queryTable(table, options.query) } - const df = await session.task("Calculating stats", table.collect()) + const frame = await session.task("Calculating stats", table.collect()) - const stats = df.describe().rename({ describe: "#" }) + const stats = frame.describe().rename({ describe: "#" }) const records = stats.toRecords() session.render(records, ) diff --git a/terminal/commands/table/explore.spec.tsx b/terminal/commands/table/explore.spec.tsx new file mode 100644 index 00000000..e07d11d1 --- /dev/null +++ b/terminal/commands/table/explore.spec.tsx @@ -0,0 +1,115 @@ +import { writeTempFile } from "@dpkit/dataset" +import { Command } from "commander" +import { beforeEach, describe, expect, it, vi } from "vitest" +import * as sessionModule from "../../session.ts" +import { exploreTableCommand } from "./explore.tsx" + +vi.mock("../../components/TableGrid.tsx", () => ({ + TableGrid: vi.fn(() => null), +})) + +describe("table explore", () => { + let mockRender: ReturnType + + beforeEach(() => { + vi.clearAllMocks() + mockRender = vi.fn().mockResolvedValue(undefined) + vi.spyOn(sessionModule.Session, "create").mockImplementation(() => { + const session = { + task: vi.fn(async (_message: string, promise: Promise) => { + try { + return await promise + } catch (error) { + console.log(String(error)) + return undefined + } + }), + render: mockRender, + terminate: vi.fn((msg: string) => { + throw new Error(msg) + }), + } + return session as any + }) + }) + + it("should call session methods when exploring a csv table", async () => { + const csvPath = await writeTempFile( + "id,name,age\n1,alice,25\n2,bob,30\n3,charlie,35", + ) + + const command = new Command() + .addCommand(exploreTableCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync(["node", "test", "explore", csvPath, "--quit"]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + }) + + it("should handle custom delimiter option", async () => { + const csvPath = await writeTempFile("id|name|value\n1|test|100\n2|demo|200") + + const command = new Command() + .addCommand(exploreTableCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync([ + "node", + "test", + "explore", + csvPath, + "--delimiter", + "|", + "--quit", + ]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + }) + + it("should handle query option", async () => { + const csvPath = await writeTempFile( + "id,name,age\n1,alice,25\n2,bob,30\n3,charlie,35", + ) + + const command = new Command() + .addCommand(exploreTableCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync([ + "node", + "test", + "explore", + csvPath, + "--query", + "SELECT * FROM self WHERE age > 25", + "--quit", + ]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + }) +}) diff --git a/cli/commands/table/explore.tsx b/terminal/commands/table/explore.tsx similarity index 93% rename from cli/commands/table/explore.tsx rename to terminal/commands/table/explore.tsx index 4a8cfb5d..18586fc1 100644 --- a/cli/commands/table/explore.tsx +++ b/terminal/commands/table/explore.tsx @@ -1,16 +1,16 @@ -import { inferSchemaFromTable, resolveSchema } from "@dpkit/lib" -import { queryTable } from "@dpkit/lib" -import { loadSchema } from "@dpkit/lib" -import { loadDialect, loadTable, normalizeTable } from "@dpkit/lib" -import type { Resource } from "@dpkit/lib" +import { inferSchemaFromTable, resolveSchema } from "@dpkit/library" +import { queryTable } from "@dpkit/library" +import { loadSchema } from "@dpkit/library" +import { loadDialect, loadTable, normalizeTable } from "@dpkit/library" +import type { Resource } from "@dpkit/library" import { Command } from "commander" import React from "react" import { TableGrid } from "../../components/TableGrid.tsx" import { createDialectFromOptions } from "../../helpers/dialect.ts" import { helpConfiguration } from "../../helpers/help.ts" import { selectResource } from "../../helpers/resource.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const exploreTableCommand = new Command("explore") .configureHelp(helpConfiguration) diff --git a/cli/commands/table/index.ts b/terminal/commands/table/index.ts similarity index 100% rename from cli/commands/table/index.ts rename to terminal/commands/table/index.ts diff --git a/terminal/commands/table/script.spec.tsx b/terminal/commands/table/script.spec.tsx new file mode 100644 index 00000000..1de873b9 --- /dev/null +++ b/terminal/commands/table/script.spec.tsx @@ -0,0 +1,111 @@ +import repl from "node:repl" +import { writeTempFile } from "@dpkit/dataset" +import { Command } from "commander" +import { beforeEach, describe, expect, it, vi } from "vitest" +import * as sessionModule from "../../session.ts" +import { scriptTableCommand } from "./script.tsx" + +describe("table script", () => { + beforeEach(() => { + vi.clearAllMocks() + + vi.spyOn(repl, "start").mockReturnValue({ + context: {}, + } as any) + + vi.spyOn(sessionModule.Session, "create").mockImplementation(() => { + const session = { + task: vi.fn(async (_message: string, promise: Promise) => { + try { + return await promise + } catch (error) { + console.log(String(error)) + return undefined + } + }), + terminate: vi.fn((msg: string) => { + throw new Error(msg) + }), + } + return session as any + }) + }) + + it("should call session methods when starting a script session", async () => { + const csvPath = await writeTempFile( + "id,name,age\n1,alice,25\n2,bob,30\n3,charlie,35", + ) + + const command = new Command() + .addCommand(scriptTableCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync(["node", "test", "script", csvPath]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + }) + + it("should handle custom delimiter option", async () => { + const csvPath = await writeTempFile("id|name|value\n1|test|100\n2|demo|200") + + const command = new Command() + .addCommand(scriptTableCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync([ + "node", + "test", + "script", + csvPath, + "--delimiter", + "|", + ]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + }) + + it("should handle query option", async () => { + const csvPath = await writeTempFile( + "id,name,age\n1,alice,25\n2,bob,30\n3,charlie,35", + ) + + const command = new Command() + .addCommand(scriptTableCommand) + .configureOutput({ + writeOut: () => {}, + writeErr: () => {}, + }) + + try { + await command.parseAsync([ + "node", + "test", + "script", + csvPath, + "--query", + "SELECT * FROM self WHERE age > 25", + ]) + } catch (error) {} + + const mockSession = vi.mocked(sessionModule.Session.create).mock.results[0] + ?.value + expect(mockSession).toBeDefined() + expect(mockSession.task).toHaveBeenCalled() + }) +}) diff --git a/cli/commands/table/script.tsx b/terminal/commands/table/script.tsx similarity index 91% rename from cli/commands/table/script.tsx rename to terminal/commands/table/script.tsx index c8f5dddf..8f1b8685 100644 --- a/cli/commands/table/script.tsx +++ b/terminal/commands/table/script.tsx @@ -1,17 +1,17 @@ import repl from "node:repl" -import { queryTable } from "@dpkit/lib" -import * as dpkit from "@dpkit/lib" -import { loadSchema } from "@dpkit/lib" -import type { Resource } from "@dpkit/lib" -import { loadDialect } from "@dpkit/lib" -import { loadTable } from "@dpkit/lib" +import { queryTable } from "@dpkit/library" +import * as dpkit from "@dpkit/library" +import { loadSchema } from "@dpkit/library" +import type { Resource } from "@dpkit/library" +import { loadDialect } from "@dpkit/library" +import { loadTable } from "@dpkit/library" import { Command } from "commander" import pc from "picocolors" import { createDialectFromOptions } from "../../helpers/dialect.ts" import { helpConfiguration } from "../../helpers/help.ts" import { selectResource } from "../../helpers/resource.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const scriptTableCommand = new Command("script") .configureHelp(helpConfiguration) diff --git a/cli/commands/table/validate.spec.ts b/terminal/commands/table/validate.spec.ts similarity index 97% rename from cli/commands/table/validate.spec.ts rename to terminal/commands/table/validate.spec.ts index e67b7ff1..bb991757 100644 --- a/cli/commands/table/validate.spec.ts +++ b/terminal/commands/table/validate.spec.ts @@ -1,7 +1,7 @@ -import { writeTempFile } from "@dpkit/file" -import { useRecording } from "@dpkit/test" +import { writeTempFile } from "@dpkit/dataset" import { Command } from "commander" import { describe, expect, it, vi } from "vitest" +import { useRecording } from "vitest-polly" import { validateTableCommand } from "./validate.tsx" useRecording() diff --git a/cli/commands/table/validate.tsx b/terminal/commands/table/validate.tsx similarity index 83% rename from cli/commands/table/validate.tsx rename to terminal/commands/table/validate.tsx index 79a9bc72..3490b922 100644 --- a/cli/commands/table/validate.tsx +++ b/terminal/commands/table/validate.tsx @@ -1,8 +1,9 @@ -import { loadTable, validateTable } from "@dpkit/lib" -import { loadSchema } from "@dpkit/lib" -import { inferSchemaFromTable, resolveSchema } from "@dpkit/lib" -import { loadDialect } from "@dpkit/lib" -import type { Resource } from "@dpkit/lib" +import { inspectTable, loadTable } from "@dpkit/library" +import { createReport } from "@dpkit/library" +import { loadSchema } from "@dpkit/library" +import { inferSchemaFromTable, resolveSchema } from "@dpkit/library" +import { loadDialect } from "@dpkit/library" +import type { Resource } from "@dpkit/library" import { Command } from "commander" import React from "react" import { ErrorGrid } from "../../components/ErrorGrid.tsx" @@ -10,8 +11,8 @@ import { createDialectFromOptions } from "../../helpers/dialect.ts" import { selectErrorType } from "../../helpers/error.ts" import { helpConfiguration } from "../../helpers/help.ts" import { selectResource } from "../../helpers/resource.ts" -import { Session } from "../../helpers/session.ts" import * as params from "../../params/index.ts" +import { Session } from "../../session.ts" export const validateTableCommand = new Command("validate") .configureHelp(helpConfiguration) @@ -118,23 +119,23 @@ export const validateTableCommand = new Command("validate") ) } - const report = await session.task( - "Validating table", - validateTable(table, { schema }), + let errors = await session.task( + "Inspecting table", + inspectTable(table, { schema }), ) - if (report.errors.length) { - const type = await selectErrorType(session, report.errors) - if (type) report.errors = report.errors.filter(e => e.type === type) + if (errors.length) { + const type = await selectErrorType(session, errors) + if (type) errors = errors.filter(e => e.type === type) } - if (report.valid) { + if (!errors.length) { session.success("Table is valid") return } session.render( - report, - , + createReport(errors), + , ) }) diff --git a/terminal/components/DataGrid.spec.tsx b/terminal/components/DataGrid.spec.tsx new file mode 100644 index 00000000..7ee0b86b --- /dev/null +++ b/terminal/components/DataGrid.spec.tsx @@ -0,0 +1,175 @@ +import type { DataRecord } from "@dpkit/library" +import { render } from "ink-testing-library" +import React from "react" +import { describe, expect, it } from "vitest" +import { DataGrid } from "./DataGrid.tsx" + +describe("DataGrid", () => { + it("should render empty grid with no records", () => { + const records: DataRecord[] = [] + const { lastFrame } = render() + + expect(lastFrame()).toBeDefined() + }) + + it("should render basic data grid", () => { + const records: DataRecord[] = [ + { id: 1, name: "alice" }, + { id: 2, name: "bob" }, + ] + const { lastFrame } = render() + + const output = lastFrame() + expect(output).toContain("id") + expect(output).toContain("name") + expect(output).toContain("alice") + expect(output).toContain("bob") + }) + + it("should render with custom schema", () => { + const records: DataRecord[] = [ + { id: 1, value: 100 }, + { id: 2, value: 200 }, + ] + const schema = { + fields: [ + { name: "id", type: "integer" as const }, + { name: "value", type: "number" as const }, + ], + } + const { lastFrame } = render() + + const output = lastFrame() + expect(output).toContain("id") + expect(output).toContain("value") + }) + + it("should render with types when withTypes is true", () => { + const records: DataRecord[] = [{ id: 1, name: "alice" }] + const { lastFrame } = render() + + const output = lastFrame() + expect(output).toBeDefined() + expect(output).toContain("id") + expect(output).toContain("name") + }) + + it("should render with column selection", () => { + const records: DataRecord[] = [ + { id: 1, name: "alice" }, + { id: 2, name: "bob" }, + ] + const { lastFrame } = render() + + const output = lastFrame() + expect(output).toContain("id") + expect(output).toContain("name") + }) + + it("should render with row selection", () => { + const records: DataRecord[] = [ + { id: 1, name: "alice" }, + { id: 2, name: "bob" }, + ] + const { lastFrame } = render() + + const output = lastFrame() + expect(output).toContain("alice") + expect(output).toContain("bob") + }) + + it("should render with both row and column selection", () => { + const records: DataRecord[] = [ + { id: 1, name: "alice" }, + { id: 2, name: "bob" }, + ] + const { lastFrame } = render() + + const output = lastFrame() + expect(output).toContain("alice") + expect(output).toContain("bob") + }) + + it("should render with ascending order indicator", () => { + const records: DataRecord[] = [ + { id: 1, name: "alice" }, + { id: 2, name: "bob" }, + ] + const { lastFrame } = render( + , + ) + + const output = lastFrame() + expect(output).toContain("▲") + }) + + it("should render with descending order indicator", () => { + const records: DataRecord[] = [ + { id: 1, name: "alice" }, + { id: 2, name: "bob" }, + ] + const { lastFrame } = render( + , + ) + + const output = lastFrame() + expect(output).toContain("▼") + }) + + it("should render with green border by default", () => { + const records: DataRecord[] = [{ id: 1, name: "alice" }] + const { lastFrame } = render() + + expect(lastFrame()).toBeDefined() + }) + + it("should render with red border when specified", () => { + const records: DataRecord[] = [{ id: 1, name: "alice" }] + const { lastFrame } = render( + , + ) + + expect(lastFrame()).toBeDefined() + }) + + it("should render multiple rows correctly", () => { + const records: DataRecord[] = [ + { id: 1, name: "alice", age: 30 }, + { id: 2, name: "bob", age: 25 }, + { id: 3, name: "charlie", age: 35 }, + ] + const { lastFrame } = render() + + const output = lastFrame() + expect(output).toContain("alice") + expect(output).toContain("bob") + expect(output).toContain("charlie") + expect(output).toContain("30") + expect(output).toContain("25") + expect(output).toContain("35") + }) + + it("should handle numeric values", () => { + const records: DataRecord[] = [ + { id: 1, value: 100.5 }, + { id: 2, value: 200.75 }, + ] + const { lastFrame } = render() + + const output = lastFrame() + expect(output).toContain("100.5") + expect(output).toContain("200.75") + }) + + it("should handle boolean values", () => { + const records: DataRecord[] = [ + { id: 1, active: true }, + { id: 2, active: false }, + ] + const { lastFrame } = render() + + const output = lastFrame() + expect(output).toContain("true") + expect(output).toContain("false") + }) +}) diff --git a/cli/components/DataGrid.tsx b/terminal/components/DataGrid.tsx similarity index 93% rename from cli/components/DataGrid.tsx rename to terminal/components/DataGrid.tsx index ac06b39e..d522eb88 100644 --- a/cli/components/DataGrid.tsx +++ b/terminal/components/DataGrid.tsx @@ -1,7 +1,7 @@ -import type { DataRecord, Schema } from "@dpkit/lib" -import { inferSchemaFromSample } from "@dpkit/lib" +import type { DataRecord, Schema } from "@dpkit/library" +import { inferSchemaFromSample } from "@dpkit/library" import { Box, Text } from "ink" -import { DataFrame } from "nodejs-polars" +import * as pl from "nodejs-polars" import React from "react" // TODO: Autocalculate geometry (e.g. row height etc) @@ -22,7 +22,7 @@ export function DataGrid(props: { }) { const { records, col, row, order, rowHeight, borderColor = "green" } = props - const schema = props.schema ?? inferSchemaFromSample(DataFrame(records)) + const schema = props.schema ?? inferSchemaFromSample(pl.DataFrame(records)) const startCol = col ? Math.floor((col - 1) / MAX_COLUMNS) * MAX_COLUMNS : 0 const fields = schema.fields.slice(startCol, startCol + MAX_COLUMNS) diff --git a/cli/components/DialectGrid.tsx b/terminal/components/DialectGrid.tsx similarity index 74% rename from cli/components/DialectGrid.tsx rename to terminal/components/DialectGrid.tsx index 40417a56..7db3b319 100644 --- a/cli/components/DialectGrid.tsx +++ b/terminal/components/DialectGrid.tsx @@ -1,5 +1,5 @@ -import type { Dialect } from "@dpkit/lib" -import type { DataRecord } from "@dpkit/lib" +import type { Dialect } from "@dpkit/library" +import type { DataRecord } from "@dpkit/library" import React from "react" import { DataGrid } from "./DataGrid.tsx" diff --git a/cli/components/ErrorGrid.tsx b/terminal/components/ErrorGrid.tsx similarity index 61% rename from cli/components/ErrorGrid.tsx rename to terminal/components/ErrorGrid.tsx index c4dc40b0..14782f36 100644 --- a/cli/components/ErrorGrid.tsx +++ b/terminal/components/ErrorGrid.tsx @@ -1,15 +1,10 @@ -import type { - DataError, - FileError, - MetadataError, - TableError, -} from "@dpkit/lib" -import { DataFrame } from "nodejs-polars" +import type { UnboundError } from "@dpkit/library" +import * as pl from "nodejs-polars" import React from "react" import { TableGrid } from "./TableGrid.tsx" export function ErrorGrid(props: { - errors: (TableError | DataError | MetadataError | FileError)[] + errors: UnboundError[] quit?: boolean }) { // TODO: Property process errors @@ -19,7 +14,7 @@ export function ErrorGrid(props: { params: error.params ? error.params.toString() : undefined, })) - const table = DataFrame(errors).lazy() + const table = pl.DataFrame(errors).lazy() return } diff --git a/cli/components/PackageGrid.tsx b/terminal/components/PackageGrid.tsx similarity index 89% rename from cli/components/PackageGrid.tsx rename to terminal/components/PackageGrid.tsx index d2badb57..68468101 100644 --- a/cli/components/PackageGrid.tsx +++ b/terminal/components/PackageGrid.tsx @@ -1,4 +1,4 @@ -import type { Package } from "@dpkit/lib" +import type { Package } from "@dpkit/library" import React from "react" import { DataGrid } from "./DataGrid.tsx" diff --git a/cli/components/ResourceGrid.tsx b/terminal/components/ResourceGrid.tsx similarity index 85% rename from cli/components/ResourceGrid.tsx rename to terminal/components/ResourceGrid.tsx index e8655765..53621403 100644 --- a/cli/components/ResourceGrid.tsx +++ b/terminal/components/ResourceGrid.tsx @@ -1,4 +1,4 @@ -import type { Resource } from "@dpkit/lib" +import type { Resource } from "@dpkit/library" import React from "react" import { DataGrid } from "./DataGrid.tsx" diff --git a/cli/components/SchemaGrid.tsx b/terminal/components/SchemaGrid.tsx similarity index 88% rename from cli/components/SchemaGrid.tsx rename to terminal/components/SchemaGrid.tsx index e3f3bc72..298d7164 100644 --- a/cli/components/SchemaGrid.tsx +++ b/terminal/components/SchemaGrid.tsx @@ -1,4 +1,4 @@ -import type { Schema } from "@dpkit/lib" +import type { Schema } from "@dpkit/library" import React from "react" import { DataGrid } from "./DataGrid.tsx" diff --git a/terminal/components/TableGrid.spec.tsx b/terminal/components/TableGrid.spec.tsx new file mode 100644 index 00000000..8452a0ef --- /dev/null +++ b/terminal/components/TableGrid.spec.tsx @@ -0,0 +1,196 @@ +import { render } from "ink-testing-library" +import * as pl from "nodejs-polars" +import React from "react" +import { describe, expect, it } from "vitest" +import { TableGrid } from "./TableGrid.tsx" + +describe("TableGrid", () => { + it("should render basic table", async () => { + const table = pl + .DataFrame({ + id: [1, 2, 3], + name: ["alice", "bob", "charlie"], + }) + .lazy() + + const { lastFrame } = render() + + await new Promise(resolve => setTimeout(resolve, 100)) + + const output = lastFrame() + expect(output).toContain("id") + expect(output).toContain("name") + expect(output).toContain("alice") + }) + + it("should render with custom schema", async () => { + const table = pl + .DataFrame({ + id: [1, 2], + value: [100, 200], + }) + .lazy() + + const schema = { + fields: [ + { name: "id", type: "integer" as const }, + { name: "value", type: "number" as const }, + ], + } + + const { lastFrame } = render() + + await new Promise(resolve => setTimeout(resolve, 100)) + + const output = lastFrame() + expect(output).toContain("id") + expect(output).toContain("value") + }) + + it("should render with green border by default", async () => { + const table = pl + .DataFrame({ + id: [1], + name: ["alice"], + }) + .lazy() + + const { lastFrame } = render() + + await new Promise(resolve => setTimeout(resolve, 100)) + + expect(lastFrame()).toBeDefined() + }) + + it("should render with red border when specified", async () => { + const table = pl + .DataFrame({ + id: [1], + name: ["alice"], + }) + .lazy() + + const { lastFrame } = render() + + await new Promise(resolve => setTimeout(resolve, 100)) + + expect(lastFrame()).toBeDefined() + }) + + it("should render with types when withTypes is true", async () => { + const table = pl + .DataFrame({ + id: [1], + name: ["alice"], + }) + .lazy() + + const { lastFrame } = render() + + await new Promise(resolve => setTimeout(resolve, 100)) + + expect(lastFrame()).toBeDefined() + }) + + it("should display help text by default", async () => { + const table = pl + .DataFrame({ + id: [1], + name: ["alice"], + }) + .lazy() + + const { lastFrame } = render() + + await new Promise(resolve => setTimeout(resolve, 100)) + + const output = lastFrame() + expect(output).toContain("page") + expect(output).toContain("to toggle docs") + expect(output).toContain("to quit") + }) + + it("should display page number", async () => { + const table = pl + .DataFrame({ + id: [1, 2, 3], + name: ["alice", "bob", "charlie"], + }) + .lazy() + + const { lastFrame } = render() + + await new Promise(resolve => setTimeout(resolve, 100)) + + const output = lastFrame() + expect(output).toContain("page") + expect(output).toContain("1") + }) + + it("should render multiple rows", async () => { + const table = pl + .DataFrame({ + id: [1, 2, 3, 4], + name: ["alice", "bob", "charlie", "dave"], + }) + .lazy() + + const { lastFrame } = render() + + await new Promise(resolve => setTimeout(resolve, 100)) + + const output = lastFrame() + expect(output).toContain("alice") + expect(output).toContain("bob") + expect(output).toContain("charlie") + }) + + it("should render with numeric values", async () => { + const table = pl + .DataFrame({ + id: [1, 2], + value: [100.5, 200.75], + }) + .lazy() + + const { lastFrame } = render() + + await new Promise(resolve => setTimeout(resolve, 100)) + + const output = lastFrame() + expect(output).toContain("100.5") + expect(output).toContain("200.75") + }) + + it("should render with boolean values", async () => { + const table = pl + .DataFrame({ + id: [1, 2], + active: [true, false], + }) + .lazy() + + const { lastFrame } = render() + + await new Promise(resolve => setTimeout(resolve, 100)) + + const output = lastFrame() + expect(output).toContain("true") + expect(output).toContain("false") + }) + + it("should handle empty table", async () => { + const table = pl + .DataFrame({ + id: [], + name: [], + }) + .lazy() + + const { lastFrame } = render() + + await new Promise(resolve => setTimeout(resolve, 100)) + + expect(lastFrame()).toBeDefined() + }) +}) diff --git a/cli/components/TableGrid.tsx b/terminal/components/TableGrid.tsx similarity index 94% rename from cli/components/TableGrid.tsx rename to terminal/components/TableGrid.tsx index 9f97f05d..e8aedb1c 100644 --- a/cli/components/TableGrid.tsx +++ b/terminal/components/TableGrid.tsx @@ -1,4 +1,4 @@ -import type { DataRecord, Schema, Table } from "@dpkit/lib" +import type { DataRecord, Schema, Table } from "@dpkit/library" import { useApp, useInput } from "ink" import { Box, Text } from "ink" import pc from "picocolors" @@ -7,6 +7,8 @@ import React from "react" import type { Order } from "./DataGrid.tsx" import { DataGrid } from "./DataGrid.tsx" +// TODO: Move components to their own folders + const PAGE_SIZE = 10 export function TableGrid(props: { @@ -59,17 +61,17 @@ export function TableGrid(props: { const thisOrder = newOrder ?? order if (page === 0) return - let ldf = table + let result = table if (thisOrder) { const name = table.columns[thisOrder.col - 1] if (name) { - ldf = ldf.sort(name, thisOrder.dir === "desc") + result = result.sort(name, thisOrder.dir === "desc") } } const offset = (page - 1) * PAGE_SIZE - const df = await ldf.slice(offset, PAGE_SIZE).collect() - const records = df.toRecords() + const frame = await result.slice(offset, PAGE_SIZE).collect() + const records = frame.toRecords() if (records.length) { setPage(page) diff --git a/cli/entrypoints/run.cmd b/terminal/entrypoints/run.cmd similarity index 100% rename from cli/entrypoints/run.cmd rename to terminal/entrypoints/run.cmd diff --git a/cli/entrypoints/run.ts b/terminal/entrypoints/run.ts similarity index 100% rename from cli/entrypoints/run.ts rename to terminal/entrypoints/run.ts diff --git a/cli/helpers/dialect.ts b/terminal/helpers/dialect.ts similarity index 98% rename from cli/helpers/dialect.ts rename to terminal/helpers/dialect.ts index da94ebb5..c735c0da 100644 --- a/cli/helpers/dialect.ts +++ b/terminal/helpers/dialect.ts @@ -1,4 +1,4 @@ -import type { Dialect } from "@dpkit/lib" +import type { Dialect } from "@dpkit/library" // TODO: Find a better way to construct dialects diff --git a/cli/helpers/error.ts b/terminal/helpers/error.ts similarity index 77% rename from cli/helpers/error.ts rename to terminal/helpers/error.ts index 36b9b687..2d4e75f8 100644 --- a/cli/helpers/error.ts +++ b/terminal/helpers/error.ts @@ -1,17 +1,10 @@ -import type { - DataError, - FileError, - MetadataError, - TableError, -} from "@dpkit/lib" +import type { BoundError, UnboundError } from "@dpkit/library" import { countBy } from "es-toolkit" -import type { Session } from "./session.ts" +import type { Session } from "../session.ts" export async function selectErrorResource( session: Session, - errors: ((TableError | MetadataError | DataError | FileError) & { - resource: string - })[], + errors: BoundError[], ) { const groups = countBy(errors, error => error.resource) @@ -32,7 +25,7 @@ export async function selectErrorResource( export async function selectErrorType( session: Session, - errors: (TableError | MetadataError | DataError | FileError)[], + errors: UnboundError[], ) { const groups = countBy(errors, error => error.type) diff --git a/cli/helpers/help.ts b/terminal/helpers/help.ts similarity index 100% rename from cli/helpers/help.ts rename to terminal/helpers/help.ts diff --git a/cli/helpers/object.ts b/terminal/helpers/object.ts similarity index 100% rename from cli/helpers/object.ts rename to terminal/helpers/object.ts diff --git a/cli/helpers/resource.ts b/terminal/helpers/resource.ts similarity index 91% rename from cli/helpers/resource.ts rename to terminal/helpers/resource.ts index 379c5e24..e6adce92 100644 --- a/cli/helpers/resource.ts +++ b/terminal/helpers/resource.ts @@ -1,5 +1,5 @@ -import { loadPackage } from "@dpkit/lib" -import type { Session } from "./session.ts" +import { loadPackage } from "@dpkit/library" +import type { Session } from "../session.ts" export async function selectResource( session: Session, diff --git a/cli/helpers/schema.ts b/terminal/helpers/schema.ts similarity index 97% rename from cli/helpers/schema.ts rename to terminal/helpers/schema.ts index 53eaeee5..a49f3a70 100644 --- a/cli/helpers/schema.ts +++ b/terminal/helpers/schema.ts @@ -1,4 +1,4 @@ -import type { SchemaOptions } from "@dpkit/lib" +import type { SchemaOptions } from "@dpkit/library" // TODO: Find a better way to construct schema options diff --git a/terminal/index.ts b/terminal/index.ts new file mode 100644 index 00000000..278fc56a --- /dev/null +++ b/terminal/index.ts @@ -0,0 +1 @@ +export { program } from "./program.ts" diff --git a/terminal/main.ts b/terminal/main.ts new file mode 100644 index 00000000..6679ddb7 --- /dev/null +++ b/terminal/main.ts @@ -0,0 +1,3 @@ +import { program } from "./program.ts" + +program.parse() diff --git a/cli/package.json b/terminal/package.json similarity index 83% rename from cli/package.json rename to terminal/package.json index da12c026..0244b9d3 100644 --- a/cli/package.json +++ b/terminal/package.json @@ -1,10 +1,12 @@ { - "name": "@dpkit/cli", + "name": "@dpkit/terminal", "type": "module", "version": "0.0.0-dev", "sideEffects": false, + "exports": "./build/index.js", + "files": ["build"], "bin": { - "dp": "./build/entrypoints/run.js" + "dpkit": "./build/entrypoints/run.js" }, "license": "MIT", "author": "Evgeny Karev", @@ -20,7 +22,7 @@ "validation", "quality", "fair", - "cli" + "terminal" ], "scripts": { "build": "tsc && pnpm build:copy && pnpm build:mode", @@ -33,12 +35,12 @@ "dependencies": { "@clack/prompts": "^0.11.0", "@commander-js/extra-typings": "^14.0.0", - "@dpkit/lib": "workspace:*", + "@dpkit/library": "workspace:*", "commander": "^14.0.0", "es-toolkit": "^1.39.10", "exit-hook": "^4.0.0", "ink": "^6.3.1", - "nodejs-polars": "^0.22.1", + "nodejs-polars": "^0.22.2", "picocolors": "^1.1.1", "react": "^19.1.1", "react-devtools-core": "^6.1.2", @@ -46,8 +48,7 @@ "ts-extras": "^0.14.0" }, "devDependencies": { - "@dpkit/file": "workspace:*", - "@dpkit/test": "workspace:*", + "@dpkit/dataset": "workspace:*", "@types/node": "24.2.0", "@types/react": "19.1.9", "ink-testing-library": "4.0.0" diff --git a/cli/params/all.ts b/terminal/params/all.ts similarity index 100% rename from cli/params/all.ts rename to terminal/params/all.ts diff --git a/cli/params/ckan.ts b/terminal/params/ckan.ts similarity index 100% rename from cli/params/ckan.ts rename to terminal/params/ckan.ts diff --git a/cli/params/debug.ts b/terminal/params/debug.ts similarity index 100% rename from cli/params/debug.ts rename to terminal/params/debug.ts diff --git a/cli/params/dialect.ts b/terminal/params/dialect.ts similarity index 100% rename from cli/params/dialect.ts rename to terminal/params/dialect.ts diff --git a/cli/params/file.ts b/terminal/params/file.ts similarity index 100% rename from cli/params/file.ts rename to terminal/params/file.ts diff --git a/cli/params/github.ts b/terminal/params/github.ts similarity index 100% rename from cli/params/github.ts rename to terminal/params/github.ts diff --git a/cli/params/index.ts b/terminal/params/index.ts similarity index 100% rename from cli/params/index.ts rename to terminal/params/index.ts diff --git a/cli/params/json.ts b/terminal/params/json.ts similarity index 100% rename from cli/params/json.ts rename to terminal/params/json.ts diff --git a/cli/params/package.ts b/terminal/params/package.ts similarity index 100% rename from cli/params/package.ts rename to terminal/params/package.ts diff --git a/cli/params/path.ts b/terminal/params/path.ts similarity index 100% rename from cli/params/path.ts rename to terminal/params/path.ts diff --git a/cli/params/quit.ts b/terminal/params/quit.ts similarity index 100% rename from cli/params/quit.ts rename to terminal/params/quit.ts diff --git a/cli/params/resource.ts b/terminal/params/resource.ts similarity index 100% rename from cli/params/resource.ts rename to terminal/params/resource.ts diff --git a/cli/params/schema.ts b/terminal/params/schema.ts similarity index 99% rename from cli/params/schema.ts rename to terminal/params/schema.ts index 2f21cba3..8b61183a 100644 --- a/cli/params/schema.ts +++ b/terminal/params/schema.ts @@ -1,4 +1,4 @@ -import type { FieldType } from "@dpkit/lib" +import type { FieldType } from "@dpkit/library" import { Option } from "commander" export const schema = new Option( diff --git a/cli/params/silent.ts b/terminal/params/silent.ts similarity index 100% rename from cli/params/silent.ts rename to terminal/params/silent.ts diff --git a/cli/params/table.ts b/terminal/params/table.ts similarity index 100% rename from cli/params/table.ts rename to terminal/params/table.ts diff --git a/cli/params/zenodo.ts b/terminal/params/zenodo.ts similarity index 100% rename from cli/params/zenodo.ts rename to terminal/params/zenodo.ts diff --git a/cli/main.ts b/terminal/program.ts similarity index 91% rename from cli/main.ts rename to terminal/program.ts index 2cfaf5eb..30049527 100644 --- a/cli/main.ts +++ b/terminal/program.ts @@ -1,6 +1,4 @@ -// TODO: Support tab completion when @bombsh/tab is released -//import tab from "@bombsh/tab/commander" -import { program } from "commander" +import * as commander from "commander" import { dialectCommand } from "./commands/dialect/index.ts" import { fileCommand } from "./commands/file/index.ts" import { packageCommand } from "./commands/package/index.ts" @@ -10,7 +8,7 @@ import { tableCommand } from "./commands/table/index.ts" import { helpConfiguration } from "./helpers/help.ts" import metadata from "./package.json" with { type: "json" } -const main = program +export const program = commander.program .name("dp") .description( "Fast data management CLI built on top of the Data Package standard and Polars DataFrames", @@ -26,5 +24,6 @@ const main = program .addCommand(tableCommand) .addCommand(fileCommand) -//tab(main) -main.parse() +// TODO: Support tab completion when @bombsh/tab is released +//import tab from "@bombsh/tab/commander" +//tab(program) diff --git a/cli/helpers/session.ts b/terminal/session.ts similarity index 100% rename from cli/helpers/session.ts rename to terminal/session.ts diff --git a/file/tsconfig.json b/terminal/tsconfig.json similarity index 100% rename from file/tsconfig.json rename to terminal/tsconfig.json diff --git a/test/README.md b/test/README.md deleted file mode 100644 index 0230e064..00000000 --- a/test/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# @dpkit/test - -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). diff --git a/test/index.ts b/test/index.ts deleted file mode 100644 index fee0754e..00000000 --- a/test/index.ts +++ /dev/null @@ -1 +0,0 @@ -export * from "./recording/index.ts" diff --git a/test/recording/index.ts b/test/recording/index.ts deleted file mode 100644 index 86c8b328..00000000 --- a/test/recording/index.ts +++ /dev/null @@ -1 +0,0 @@ -export { useRecording } from "./recording.ts" diff --git a/test/recording/recording.ts b/test/recording/recording.ts deleted file mode 100644 index bc7360d9..00000000 --- a/test/recording/recording.ts +++ /dev/null @@ -1,58 +0,0 @@ -import FetchAdapter from "@pollyjs/adapter-fetch" -import { Polly } from "@pollyjs/core" -import FSPersister from "@pollyjs/persister-fs" -import { afterAll, beforeAll, beforeEach } from "vitest" - -// @ts-ignore -Polly.register(FSPersister) - -// It emits a deprecation warning, but at the moment there is not -// working alternative for the fetch adapter -// @ts-ignore -Polly.register(FetchAdapter) - -/** - * Sets up Polly for recording and replaying HTTP interactions in tests. - * - * https://github.com/Netflix/pollyjs/issues/499 - * - * @param {Object} [options={}] - Configuration options for the recording. - * @param {string} [options.recordingName] - The name of the recording. If not provided, the suite name will be used. - * @param {string} [options.recordingPath] - The path to save the recordings. If not provided, the recordings will be saved in a "__recordings__" directory next to the test file. - */ -export function useRecording( - options: { recordingName?: string; recordingPath?: string } = {}, -) { - let polly: Polly - - beforeAll(suite => { - polly = new Polly(options.recordingName ?? suite.name, { - adapters: ["fetch"], - mode: "replay", - recordIfMissing: true, - recordFailedRequests: true, - persister: "fs", - persisterOptions: { - fs: { - recordingsDir: - options.recordingPath ?? - `${suite.file.filepath.substring(0, suite.file.filepath.lastIndexOf("/"))}/fixtures/generated`, - }, - }, - }) - }) - - beforeEach(context => { - // Overwrite recording name on a per-test basis - polly.recordingName = options.recordingName ?? getFullTaskName(context.task) - }) - - afterAll(async () => { - await polly.stop() - }) -} - -function getFullTaskName(item: any): string { - const suiteName = item.suite ? getFullTaskName(item.suite) : undefined - return [suiteName, item.name].filter(Boolean).join("-") -} diff --git a/test/tsconfig.json b/test/tsconfig.json deleted file mode 100644 index 3c43903c..00000000 --- a/test/tsconfig.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "extends": "../tsconfig.json" -} diff --git a/tsconfig.json b/tsconfig.json index 3dc5a8c6..9486f66c 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -7,8 +7,7 @@ "${configDir}/**/site/", "${configDir}/**/build/", "${configDir}/**/browser/", - "${configDir}/**/compile/", - "${configDir}/**/*.spec.*/" + "${configDir}/**/compile/" ], "compilerOptions": { diff --git a/zip/README.md b/video/README.md similarity index 78% rename from zip/README.md rename to video/README.md index 9d8b1d9b..d2f84066 100644 --- a/zip/README.md +++ b/video/README.md @@ -1,3 +1,3 @@ -# @dpkit/zip +# @dpkit/video -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). +dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [project's website](https://dpkit.app). diff --git a/video/index.ts b/video/index.ts new file mode 100644 index 00000000..e69de29b diff --git a/test/package.json b/video/package.json similarity index 79% rename from test/package.json rename to video/package.json index 62d603f3..f6480c62 100644 --- a/test/package.json +++ b/video/package.json @@ -1,9 +1,10 @@ { - "name": "@dpkit/test", + "name": "@dpkit/video", "type": "module", "version": "0.0.0-dev", "exports": "./build/index.js", "sideEffects": false, + "files": ["build"], "license": "MIT", "author": "Evgeny Karev", "repository": "https://github.com/datisthq/dpkit", @@ -18,14 +19,13 @@ "validation", "quality", "fair", - "test" + "video" ], "scripts": { "build": "tsc" }, "dependencies": { - "@pollyjs/adapter-fetch": "^6.0.6", - "@pollyjs/core": "^6.0.6", - "@pollyjs/persister-fs": "^6.0.6" + "@dpkit/metadata": "workspace:*", + "@dpkit/dataset": "workspace:*" } } diff --git a/folder/tsconfig.json b/video/tsconfig.json similarity index 100% rename from folder/tsconfig.json rename to video/tsconfig.json diff --git a/folder/typedoc.json b/video/typedoc.json similarity index 100% rename from folder/typedoc.json rename to video/typedoc.json diff --git a/vitest.config.ts b/vitest.config.ts index 36f25188..7fd91fe7 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -11,6 +11,7 @@ export default defineConfig({ test: { include: ["**/*.spec.(ts|tsx)"], exclude: [...configDefaults.exclude, "**/build/**", "**/compile/**"], + env: { NODE_OPTIONS: "--no-warnings" }, testTimeout: 60 * 1000, passWithNoTests: true, silent: "passed-only", @@ -19,12 +20,13 @@ export default defineConfig({ reporter: ["html", "json"], exclude: [ ...coverageConfigDefaults.exclude, + "**/@*", "**/build/**", "**/compile/**", "**/coverage/**", - "**/scripts/**", - "**/examples/**", "**/entrypoints/**", + "**/examples/**", + "**/index.ts", "browser/**", "docs/**", "service/**", diff --git a/xlsx/README.md b/xlsx/README.md deleted file mode 100644 index 1cef3613..00000000 --- a/xlsx/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# @dpkit/xlsx - -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). diff --git a/xlsx/package.json b/xlsx/package.json deleted file mode 100644 index 462b236d..00000000 --- a/xlsx/package.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "name": "@dpkit/xlsx", - "type": "module", - "version": "0.0.0-dev", - "exports": "./build/index.js", - "sideEffects": false, - "license": "MIT", - "author": "Evgeny Karev", - "repository": "https://github.com/datisthq/dpkit", - "description": "Fast TypeScript data management framework built on top of the Data Package standard and Polars DataFrames", - "keywords": [ - "data", - "polars", - "dataframe", - "datapackage", - "tableschema", - "typescript", - "validation", - "quality", - "fair", - "xlsx" - ], - "scripts": { - "build": "tsc" - }, - "dependencies": { - "@dpkit/core": "workspace:*", - "@dpkit/file": "workspace:*", - "@dpkit/table": "workspace:*", - "nodejs-polars": "^0.22.1", - "xlsx": "https://cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz" - }, - "devDependencies": { - "@dpkit/test": "workspace:*" - } -} diff --git a/xlsx/tsconfig.json b/xlsx/tsconfig.json deleted file mode 100644 index 3c43903c..00000000 --- a/xlsx/tsconfig.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "extends": "../tsconfig.json" -} diff --git a/xlsx/typedoc.json b/xlsx/typedoc.json deleted file mode 100644 index f8e49f3a..00000000 --- a/xlsx/typedoc.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "entryPoints": ["index.ts"], - "skipErrorChecking": true -} diff --git a/zenodo/README.md b/zenodo/README.md deleted file mode 100644 index c828772c..00000000 --- a/zenodo/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# @dpkit/zenodo - -dpkit is a fast data management framework built on top of the Data Package standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please visit the [documentation portal](https://dpkit.dev). diff --git a/zenodo/package.json b/zenodo/package.json deleted file mode 100644 index 77691162..00000000 --- a/zenodo/package.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "name": "@dpkit/zenodo", - "type": "module", - "version": "0.0.0-dev", - "exports": "./build/index.js", - "sideEffects": false, - "license": "MIT", - "author": "Evgeny Karev", - "repository": "https://github.com/datisthq/dpkit", - "description": "Fast TypeScript data management framework built on top of the Data Package standard and Polars DataFrames", - "keywords": [ - "data", - "polars", - "dataframe", - "datapackage", - "tableschema", - "typescript", - "validation", - "quality", - "fair", - "zenodo" - ], - "scripts": { - "build": "tsc" - }, - "dependencies": { - "@dpkit/core": "workspace:*", - "@dpkit/file": "workspace:*" - }, - "devDependencies": { - "@dpkit/test": "workspace:*" - } -} diff --git a/zenodo/tsconfig.json b/zenodo/tsconfig.json deleted file mode 100644 index 3c43903c..00000000 --- a/zenodo/tsconfig.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "extends": "../tsconfig.json" -} diff --git a/zenodo/typedoc.json b/zenodo/typedoc.json deleted file mode 100644 index f8e49f3a..00000000 --- a/zenodo/typedoc.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "entryPoints": ["index.ts"], - "skipErrorChecking": true -} diff --git a/zip/package.json b/zip/package.json deleted file mode 100644 index c60776ec..00000000 --- a/zip/package.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "name": "@dpkit/zip", - "type": "module", - "version": "0.0.0-dev", - "exports": "./build/index.js", - "sideEffects": false, - "license": "MIT", - "author": "Evgeny Karev", - "repository": "https://github.com/datisthq/dpkit", - "description": "Fast TypeScript data management framework built on top of the Data Package standard and Polars DataFrames", - "keywords": [ - "data", - "polars", - "dataframe", - "datapackage", - "tableschema", - "typescript", - "validation", - "quality", - "fair", - "zip" - ], - "scripts": { - "build": "tsc" - }, - "dependencies": { - "@dpkit/core": "workspace:*", - "@dpkit/file": "workspace:*", - "@dpkit/folder": "workspace:*", - "fflate": "^0.8.2" - } -} diff --git a/zip/tsconfig.json b/zip/tsconfig.json deleted file mode 100644 index 3c43903c..00000000 --- a/zip/tsconfig.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "extends": "../tsconfig.json" -} diff --git a/zip/typedoc.json b/zip/typedoc.json deleted file mode 100644 index f8e49f3a..00000000 --- a/zip/typedoc.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "entryPoints": ["index.ts"], - "skipErrorChecking": true -}