Skip to content

Commit

Permalink
feat(reoff): parse more async
Browse files Browse the repository at this point in the history
  • Loading branch information
tefkah committed Feb 14, 2022
1 parent 5280b3b commit 7d76e79
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 9 deletions.
21 changes: 21 additions & 0 deletions libs/reoff/docx-to-vfile/src/lib/docx-to-vfile-yauzl.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import { VFile } from 'vfile'
import { getXMLDatas } from './get-xml-data'

const removeHeader = (text: string | undefined) =>
text ? text.replace(/<\?xml.*?\?>/, '') : ''

export async function docxToVFile(file: Buffer | string) {
const data = await getXMLDatas(file, {
filenames: [/customXml(\/|\\)/, 'word/document.xml', 'word/footnotes.xml'],
})

const total = `${removeHeader(data['word/document.xml'])}
${removeHeader(data['word/footnotes.xml'])}
</w:document>`
const vfile = new VFile(total)
// if (footnotes) {
// Object.assign(vfile.data, { footnotes })
// }
return vfile
}
10 changes: 5 additions & 5 deletions libs/reoff/docx-to-vfile/src/lib/docx-to-vfile.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import { VFile } from 'vfile'
import { getXMLData } from './get-xml-data'

const removeHeader = (text: string) => text.replace(/<\?xml.*?\?>/, '')

export async function docxToVFile(file: Buffer | string) {
const mainXML = (await getXMLData(file)).slice(0, -'</w:document>'.length)
// xast-util-from-xml cannot handle two xml headers in one doc
const footnotes = (await getXMLData(file, { filename: 'footnotes' })).replace(
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>',
''
)
// easier to put the footnotes in the same spot
const footnotes = removeHeader(
await getXMLData(file, { filename: 'footnotes' })
) // easier to put the footnotes in the same spot
const total = `${mainXML}${footnotes}
</w:document>`
console.log(footnotes)
Expand Down
86 changes: 84 additions & 2 deletions libs/reoff/docx-to-vfile/src/lib/get-xml-data.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import AdmZip from 'adm-zip'
import { extname } from 'path'
import AdmZip, { IZipEntry } from 'adm-zip'
import { join, extname } from 'path'
import { promisify } from 'util'
import yauzl, { Entry, ZipFile } from 'yauzl'
import { tryCatchPromise } from '@jote/utils'

const tab = '\t',
cr = '\n\n',
Expand All @@ -12,6 +15,85 @@ const paragraphRegex = new RegExp(
'g'
)

/**
* Get xml data from either a buffer or path,
*/
export async function getXMLDatas(
file: string | Buffer,
{
filenames,
xml,
returnBuffer,
}: { filenames: (string | RegExp)[]; xml?: boolean; returnBuffer?: boolean }
): Promise<{ [key: string]: string }> {
return new Promise((resolve, reject) => {
if (
typeof file === 'string' &&
!extensionRegex.test(extname(file).toLowerCase())
) {
reject(new Error('The file must be either a .docx, .xlsx or .pptx'))
}
try {
if (typeof file === 'string') {
yauzl.open(file, { lazyEntries: true }, (err, zip) =>
unzipCallback(err, zip, resolve, reject, filenames)
)
return
}
yauzl.fromBuffer(file, { lazyEntries: true }, (err, zip) =>
unzipCallback(err, zip, resolve, reject, filenames)
)
} catch (err) {
reject(new Error(`${err} (${file})`))
}
})
}

function unzipCallback(
err: unknown,
zip: ZipFile | undefined,
resolve: (
value: { [key: string]: string } | PromiseLike<{ [key: string]: string }>
) => void,
reject: (reason?: any) => void,
filenames: (string | RegExp)[]
) {
if (err) {
reject(err)
return
}
if (!zip) {
reject(new Error('Empty zip file'))
return
}
let result: { [key: string]: string }

const openReadStream = promisify(zip.openReadStream.bind(zip))
zip.readEntry()
zip.on('entry', async (entry: Entry) => {
if (
/\/$/.test(entry.fileName) ||
!filenames.some((filename) => entry.fileName.match(filename))
) {
zip.readEntry()
return
}
let stream = await openReadStream(entry)
let entryChunks: any[] = []
if (!stream) {
zip.readEntry()
return
}
stream.on('data', (chunk) => entryChunks.push(chunk))
stream.on('end', () => {
result[entry.fileName] = Buffer.from(entryChunks).toString()
})
})
zip.on('end', () => {
resolve(result)
})
}

export async function getXMLData(
file: string | Buffer,
{ filename = 'document', xml = true, returnBuffer = false } = {
Expand Down
2 changes: 1 addition & 1 deletion libs/utils/src/lib/tryCatchPromise.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export default async function tryCatchPromise(
export async function tryCatchPromise(
promise: Promise<any>,
errorHandler?: (error: any) => void
): Promise<[any, any]> {
Expand Down
2 changes: 1 addition & 1 deletion libs/utils/src/lib/tryCatchVoid.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export default function tryCatchVoid(
export function tryCatchVoid(
func: any,
errorHandler?: (error: any) => void
): unknown | void {
Expand Down

0 comments on commit 7d76e79

Please sign in to comment.