Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/many-rings-punch.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'meca': patch
---

Add a manifest.xml class
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,7 @@ jobs:
with:
node-version: ${{ matrix.node }}
cache: 'npm'
- run: sudo apt-get update
- run: sudo apt-get install -y libxml2-utils
- run: npm install
- run: npm run test
62 changes: 59 additions & 3 deletions packages/meca/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,67 @@ meca -v

## What is MECA?

## Packages
Manuscript Exchange Common Approach (MECA) is a [NISO standard](https://www.niso.org/standards-committees/meca) for transferring scientific manuscripts between vendors. It is a ZIP file with a `manifest.xml`, which contains a JATS file as the `article-metadata` and other source materials.

See packages folder:
## From the command line

- meca
Commands available:

`validate`: validate the MECA zip file, including the JATS

```bash
meca validate my-meca-file.zip
```

## From Typescript

The `manifest.xml` can be read and written as follows.

```typescript
import fs from 'fs';
import { ManifestXml, createManifestXml } from 'meca';

const data = fs.readFileSync('manifest.xml').toString();
const manifest = new ManifestXml(data);
console.log(manifest.items);

// Write a manifest file
const roundTrip = createManifestXml(manifest.items);
fs.writeFileSync('manifest.xml', roundTrip);
```

The `ManifestItem` has the following shape:

```typescript
type ManifestItem = {
id?: string;
itemType?: string;
version?: string;
title?: string;
description?: string;
href: string;
mediaType?: string;
fileOrder?: string;
metadata?: Record<string, string>;
};
```

which translates to the following XML, for example, from the NISO spec:

```xml
<item id="b-456" item-type="figure" item-version="0">
<item-description>Figure</item-description>
<file-order>3</file-order>
<item-metadata>
<metadata metadata-name="Figure Number">1</metadata>
<metadata metadata-name="Caption"
>This is the caption for Figure 1</metadata>
</item-metadata>
<instance media-type="image/jpeg" xlink:href="wrist_scaphoidvx_diagnosis.jpg" />
</item>
```

We assume that there is only one instance for each `item` and will warn if that is not the case.

---

Expand Down
2 changes: 1 addition & 1 deletion packages/meca/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
},
"bin": "./dist/meca.cjs",
"scripts": {
"copy:dtd": "cp ./static/MECA_manifest.dtd dist/MECA_manifest.dtd",
"copy:dtd": "cp ./static/manifest-1.0.dtd dist/manifest-1.0.dtd",
"copy:version": "echo \"const version = '\"$npm_package_version\"';\nexport default version;\" > src/version.ts",
"clean": "rm -rf dist",
"unlink": "npm uninstall -g meca;",
Expand Down
2 changes: 1 addition & 1 deletion packages/meca/src/cli/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@ const program = new Command();

addValidateCLI(program);

program.version(`v${version}`, '-v, --version', 'Print the current version of jats-xml');
program.version(`v${version}`, '-v, --version', 'Print the current version of meca');
program.option('-d, --debug', 'Log out any errors to the console.');
program.parse(process.argv);
2 changes: 1 addition & 1 deletion packages/meca/src/cli/validate.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { Command, Option } from 'commander';
import { clirun } from 'myst-cli-utils';
import { getSession } from 'jats-xml';
import { validateMecaWrapper } from '../validate/index.js';
import { validateMecaWrapper } from '../validate.js';

function makeValidateCLI(program: Command) {
const command = new Command('validate')
Expand Down
3 changes: 2 additions & 1 deletion packages/meca/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
export { default as version } from './version.js';
export * from './validate/index.js';
export * from './manifest.js';
export * from './validate.js';
218 changes: 218 additions & 0 deletions packages/meca/src/manifest.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
import fs from 'node:fs';
import path from 'node:path';
import type { GenericParent } from 'myst-common';
import { js2xml, xml2js } from 'xml-js';
import type { Element, DeclarationAttributes } from 'xml-js';
import { convertToUnist, xmllintValidate } from 'jats-xml';
import type { Logger } from 'myst-cli-utils';
import { tic } from 'myst-cli-utils';
import { createTempFolder, elementWithText, removeTempFolder, select, selectAll } from './utils.js';

export const MANIFEST = 'manifest.xml';
export const MANIFEST_DTD = 'manifest-1.0.dtd';

type Options = { log?: Logger; source?: string };

export enum ItemTypes {
articleMetadata = 'article-metadata',
articleSupportingFile = 'article-supporting-file',
manuscript = 'manuscript',
manuscriptSupportingFile = 'manuscript-supporting-file',
articleSource = 'article-source',
articleSourceEnvironment = 'article-source-environment',
articleSourceDirectory = 'article-source-directory',
transferMetadata = 'transfer-metadata',
}

export type ManifestItem = {
id?: string;
itemType?: string;
version?: string;
title?: string;
description?: string;
href: string;
mediaType?: string;
fileOrder?: string;
metadata?: Record<string, string>;
};

export class ManifestXml {
declaration?: DeclarationAttributes;
doctype?: string;
rawXML: string;
raw: Element;
log: Logger;
tree: GenericParent;
source?: string;

constructor(data: string, opts?: Options) {
const toc = tic();
this.log = opts?.log ?? console;
this.rawXML = data;
if (opts?.source) this.source = opts.source;
try {
this.raw = xml2js(data, { compact: false }) as Element;
} catch (error) {
throw new Error('Problem parsing the TransferXML document, please ensure it is XML');
}
const { declaration, elements } = this.raw;
this.declaration = declaration?.attributes;
if (
!(elements?.length === 2 && elements[0].type === 'doctype' && elements[1].name === 'manifest')
) {
throw new Error('Element <manifest> is not the only element of the manifest.xml');
}
this.doctype = elements[0].doctype;
const converted = convertToUnist(elements[1]);
this.tree = select('manifest', converted) as GenericParent;
this.log?.debug(toc('Parsed and converted manifest.xml to unist tree in %s'));
}

get localDtd(): string {
// This works both compiled and in tests
const dtd = fs.existsSync(path.join(__dirname, MANIFEST_DTD))
? path.join(__dirname, MANIFEST_DTD)
: path.join(__dirname, '..', 'static', MANIFEST_DTD);
if (fs.existsSync(dtd)) return dtd;
throw new Error(`Unable to locate manifest DTD file ${MANIFEST_DTD} in meca lib distribution`);
}

async validateXml() {
const tempFolder = createTempFolder();
fs.writeFileSync(path.join(tempFolder, MANIFEST), this.rawXML);
const manifestIsValid = await xmllintValidate(
this,
path.join(tempFolder, MANIFEST),
this.localDtd,
).catch(() => {
this.log.error(`${MANIFEST} DTD validation failed`);
return false;
});
removeTempFolder(tempFolder);
return manifestIsValid;
}

get version(): string {
return this.tree['manifest-version'] || this.tree.version;
}

get items(): ManifestItem[] {
const items = selectAll(`item`, this.tree)
.map((item): ManifestItem | undefined => {
const instances = selectAll(`instance`, item);
if (instances.length === 0) {
this.log.warn('Item without an instance');
return undefined;
}
if (instances.length > 1) {
this.log.warn('Item has multiple instances, only the first is used.');
}
const instance = instances[0];
const title = select(`item-title`, item)?.children?.[0].value;
const description = select(`item-description`, item)?.children?.[0].value;
const fileOrder = select(`file-order`, item)?.children?.[0].value;
const metadata = Object.fromEntries(
selectAll('metadata', item)?.map((n) => [
n['metadata-name'] || n.name,
n?.children?.[0].value,
]) ?? [],
);
return {
id: item.id,
itemType: item['item-type'],
version: item['item-version'] || item['version'],
title,
description,
href: instance['xlink:href'] || instance.href,
mediaType: instance['media-type'],
fileOrder,
metadata,
};
})
.filter((item): item is ManifestItem => !!item);
return items;
}

get itemTypes(): string[] {
const itemTypes = new Set<string>();
this.items.forEach((item) => {
if (item.itemType) itemTypes.add(item.itemType);
});
return [...itemTypes];
}

get articleMetadata(): ManifestItem | undefined {
return this.items.filter((item) => item.itemType === ItemTypes.articleMetadata)[0];
}
}

type WriteOptions = {
/** Some publishers prefer `href` instead of `xlink:href`, which is in the spec */
noXLink: boolean;
};

function writeManifestItem(item: ManifestItem, opts?: WriteOptions): Element {
const { id, version, href, itemType, mediaType, title, description, fileOrder, metadata } = item;
return {
type: 'element',
name: 'item',
attributes: {
id,
'item-type': itemType,
'item-version': version,
},
elements: (
[
title ? elementWithText('item-title', title) : undefined,
description ? elementWithText('item-description', description) : undefined,
fileOrder ? elementWithText('file-order', fileOrder) : undefined,
metadata && Object.keys(metadata ?? {}).length > 0
? {
type: 'element',
name: 'item-metadata',
elements: Object.entries(metadata).map(([k, v]) =>
elementWithText('metadata', v, { 'metadata-name': k }),
),
}
: undefined,
{
type: 'element',
name: 'instance',
attributes: {
[opts?.noXLink ? 'href' : 'xlink:href']: href,
'media-type': mediaType,
},
},
] as Element[]
).filter((e) => !!e),
};
}

export function createManifestXml(manifestItems: ManifestItem[], opts?: WriteOptions) {
const element = {
type: 'element',
elements: [
{
type: 'doctype',
doctype:
'manifest PUBLIC "-//MECA//DTD Manifest v1.0//en" "https://meca.zip/manifest-1.0.dtd"',
},
{
type: 'element',
name: 'manifest',
attributes: {
'manifest-version': '1',
xmlns: 'https://manuscriptexchange.org/schema/manifest',
...(opts?.noXLink ? {} : { 'xmlns:xlink': 'http://www.w3.org/1999/xlink' }),
},
elements: manifestItems.map((item) => writeManifestItem(item, opts)),
},
],
declaration: { attributes: { version: '1.0', encoding: 'UTF-8' } },
};
const manifest = js2xml(element, {
compact: false,
spaces: 2,
});
return manifest;
}
39 changes: 39 additions & 0 deletions packages/meca/src/utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import fs from 'node:fs';
import path from 'node:path';
import os from 'os';
import type { GenericNode } from 'myst-common';
import type { Element } from 'xml-js';

import { select as unistSelect, selectAll as unistSelectAll } from 'unist-util-select';

export function createTempFolder() {
return fs.mkdtempSync(path.join(os.tmpdir(), 'meca'));
}

export function removeTempFolder(tempFolder?: string) {
if (tempFolder && fs.existsSync(tempFolder)) {
if (fs.rmSync) {
// Node >= 14.14
fs.rmSync(tempFolder, { recursive: true });
} else {
// Node < 14.14
fs.rmdirSync(tempFolder, { recursive: true });
}
}
}

export function select<T extends GenericNode>(selector: string, node?: GenericNode): T | undefined {
return (unistSelect(selector, node) ?? undefined) as T | undefined;
}

export function selectAll<T extends GenericNode>(selector: string, node?: GenericNode): T[] {
return (unistSelectAll(selector, node) ?? undefined) as T[];
}

export function elementWithText(
name: string,
text: string,
attributes?: Record<string, string>,
): Element {
return { type: 'element', name, elements: [{ type: 'text', text }], attributes };
}
Loading