Skip to content

Commit

Permalink
feat(docx-to-vfile): make more isomorphic
Browse files Browse the repository at this point in the history
  • Loading branch information
tefkah committed Mar 27, 2023
1 parent 3e22847 commit 41ed5e8
Show file tree
Hide file tree
Showing 11 changed files with 981 additions and 186 deletions.
387 changes: 266 additions & 121 deletions libs/reoff/docx-to-vfile/README.md

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions libs/reoff/docx-to-vfile/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@
}
},
"main": "./index.js",
"browser": {
"fs": false,
"stream": false,
"buffer": false
},
"types": "./index.d.ts",
"files": [
"index.d.ts",
Expand Down
41 changes: 41 additions & 0 deletions libs/reoff/docx-to-vfile/src/index.ts
Original file line number Diff line number Diff line change
@@ -1 +1,42 @@
// import type { ReadStream } from 'fs'
// import { VFile } from 'vfile'
// import { docxToVFile as docxToVFileBrowser, Options } from './lib/docx-to-vfile-unzipit.js'

// /**
// * Takes a docx file as a Blob, Buffer, ArrayBuffer, ReadStream or string
// * and returns a VFile with the contents of the document.xml file as the root, and the contents of the other xml files as data.
// *
// * Should work in both Node and the browser, but to be sure it works in the browser, import it like
// * ```ts
// * import { docxToVFile } from 'docx-to-vfile/browser'
// * ```
// *
// * @param file The docx file as a File, Blob, Buffer, ArrayBuffer, ReadStream or string
// * @param options Options
// * @returns A VFile with the contents of the document.xml file as the root, and the contents of the other xml files as data.
// */
// export async function docxToVFile(
// file: ArrayBuffer | File | Blob | Buffer | ReadStream | string,
// options?: Options,
// ): Promise<VFile> {
// if (typeof window !== 'undefined') {
// return await docxToVFileBrowser(file as Blob, options)
// }

// const { createReadStream, ReadStream } = await import('fs')
// const { blob } = await import('stream/consumers')
// const { Blob: NodeBlob } = await import('buffer')

// const inp = typeof file === 'string' ? createReadStream(file) : file

// const input =
// inp instanceof ReadStream
// ? await blob(inp)
// : inp instanceof ArrayBuffer
// ? new NodeBlob([Buffer.from(inp)])
// : inp

// return await docxToVFileBrowser(input as Blob | File, options)
// }

export * from './lib/docx-to-vfile-unzipit.js'

Large diffs are not rendered by default.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { docxToVFile } from './docx-to-vfile-unzipit.js'
import fs from 'fs'
import path from 'path'

describe('reoffDocxToVfile', () => {
const doc = fs.readFileSync(
Expand Down
52 changes: 38 additions & 14 deletions libs/reoff/docx-to-vfile/src/lib/docx-to-vfile-unzipit.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { Data, VFile } from 'vfile'
import { unzip } from 'unzipit'
import { ReadStream } from 'fs'
import type { ReadStream } from 'fs'

const removeHeader = (text: string | undefined) => (text ? text.replace(/<\?xml.*?\?>/, '') : '')

Expand All @@ -10,7 +10,7 @@ export interface Options {
/**
* Whether or not to include media in the VFile.
*
* By default, images are included on the `data.media` attribute of the VFile as an object of ArrayBuffers, which are accessible both client and serverside.
* By default, images are included on the `data.media` attribute of the VFile as an object of Blobs, which are accessible both client and serverside.
*
* @default false
*/
Expand All @@ -31,6 +31,13 @@ export interface Options {
include?: string[] | RegExp[] | ((key: string) => boolean) | 'all' | 'allWithDocumentXML'
}

export interface OptionsWithFetchConfig extends Options {
/**
* The config to pass to fetch, for e.g. authorization headers.
*/
fetchConfig?: RequestInit
}

/**
* The data attribute of a VFile
* Is set to the DataMap interface in the vfile module
Expand All @@ -43,7 +50,7 @@ export interface DocxData extends Data {
/**
* The media files in the .docx file
*/
media: { [key: string]: ArrayBuffer }
media: { [key: string]: Blob }
/**
* The relations between the .xml files in the .docx file
*/
Expand All @@ -60,7 +67,7 @@ declare module 'vfile' {
* The media files in the .docx file
* Possibly undefined only to be compatible with the VFile interface
*/
media: { [key: string]: ArrayBuffer }
media: { [key: string]: Blob }
/**
* The relations between the .xml files in the .docx file
* Possibly undefined only to be compatible with the VFile interface
Expand All @@ -82,25 +89,42 @@ export interface DocxVFile extends VFile {
}

/**
* Takes a docx file as an ArrayBuffer and returns a VFile with the contents of the document.xml file as the root, and the contents of the other xml files as data.
* Takes a docx file as a Blob or File and returns a VFile with the contents of the document.xml file as the root, and the contents of the other xml files as data.
*
* @param file The docx file as an ArrayBuffer or Blob
* @param file The docx file as a Blob or File
* @param options Options
* @returns A VFile with the contents of the document.xml file as the root, and the contents of the other xml files as data.
*/
export async function docxToVFile(
file: ArrayBuffer | Blob | Buffer,
userOptions: Options = {},
file: ArrayBuffer | File | Blob | Buffer | ReadStream | string,
userOptions?: Options,
): Promise<VFile> {
let input = file

// node code
if (typeof window === 'undefined') {
const { createReadStream, ReadStream } = await import('fs')
const { blob } = await import('stream/consumers')
const { Blob: NodeBlob } = await import('buffer')

const inp = typeof file === 'string' ? createReadStream(file) : file

input = (
inp instanceof ReadStream
? await blob(inp)
: inp instanceof ArrayBuffer
? new NodeBlob([Buffer.from(inp)])
: inp
) as Blob
}

const options: Options = {
withoutMedia: false,
include: 'all',
...userOptions,
}

const blb = file instanceof Blob ? file : new Blob([file])

const { entries } = await unzip(blb)
const { entries } = await unzip(input as Blob)
const rels = await entries['word/_rels/document.xml.rels'].text()
const relations = Object.fromEntries(
// eslint-disable-next-line regexp/no-super-linear-backtracking
Expand Down Expand Up @@ -144,7 +168,7 @@ export async function docxToVFile(

const vfileData: DocxData = textEntriesObject
vfileData.relations = relations
vfileData.media = {} as { [key: string]: ArrayBuffer }
vfileData.media = {} as { [key: string]: Blob }

// vfile.data = vfileData

Expand All @@ -153,9 +177,9 @@ export async function docxToVFile(
}

const mediaUrls = Object.values(relations).filter((rel: string) => rel.includes('media/'))
const media = {} as { [key: string]: ArrayBuffer }
const media = {} as { [key: string]: Blob }
for (const url of mediaUrls) {
media[url] = await entries[`word/${url}`].arrayBuffer()
media[url] = await entries[`word/${url}`].blob()
}
vfileData.media = media
return new VFile({ value: removeCarriage(doc), data: vfileData })
Expand Down
3 changes: 3 additions & 0 deletions nx.json
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@
},
"e2e": {
"inputs": ["default", "^production"]
},
"test": {
"inputs": ["default", "^production"]
}
},
"tasksRunnerOptions": {
Expand Down
14 changes: 12 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@
"@nrwl/nest": "15.8.5",
"@nrwl/node": "15.8.5",
"@nrwl/react": "15.8.5",
"@nrwl/vite": "^15.8.8",
"@nrwl/web": "15.8.5",
"@nrwl/workspace": "15.8.5",
"@nxext/sveltekit": "14.0.0-next.2",
Expand Down Expand Up @@ -213,6 +214,8 @@
"@typescript/analyze-trace": "^0.9.0",
"@unified-latex/unified-latex-types": "^1.3.0",
"@vercel/remote-nx": "^1.0.1",
"@vitest/coverage-c8": "~0.25.8",
"@vitest/ui": "^0.25.8",
"autoprefixer": "10.4.13",
"babel-jest": "29.4.3",
"commander": "^9.0.0",
Expand All @@ -238,6 +241,7 @@
"jest-esm-transformer": "^1.0.0",
"jest-transform-nearley": "^2.0.0",
"jsdoc": "^4.0.2",
"jsdom": "~20.0.3",
"jsonc-eslint-parser": "^2.1.0",
"nx": "15.8.7",
"postcss": "8.4.19",
Expand All @@ -262,7 +266,10 @@
"typescript": "4.9.5",
"unist-util-remove-position": "4.0.2",
"unist-util-visit-parents": "5.1.3",
"vercel": "^27.0.1"
"vercel": "^27.0.1",
"vite-plugin-eslint": "^1.8.1",
"vite-tsconfig-paths": "^4.0.2",
"vitest": "^0.25.8"
},
"packageManager": "yarn@1.22.1",
"repository": "https://github.com/TrialAndErrorOrg/parsers",
Expand All @@ -274,5 +281,8 @@
"syntax",
"tree",
"ast"
]
],
"wallaby": {
"trace": true
}
}
Loading

0 comments on commit 41ed5e8

Please sign in to comment.