diff --git a/package.json b/package.json index c2bc9b5f7..3b66b1c39 100644 --- a/package.json +++ b/package.json @@ -30,18 +30,18 @@ "@types/jest": "^29.5.11", "@types/jest-in-case": "^1.0.9", "@types/node": "^20.12.7", + "@types/rimraf": "^4.0.5", "@typescript-eslint/eslint-plugin": "^7.10.0", "@typescript-eslint/parser": "^7.10.0", "copyfiles": "^2.4.1", + "eslint": "^8.56.0", "eslint-config-prettier": "^9.1.0", "eslint-plugin-simple-import-sort": "^12.1.0", "eslint-plugin-unused-imports": "^4.0.0", - "eslint": "^8.56.0", "jest": "^29.6.2", "jest-in-case": "^1.0.2", "lerna": "^6", "prettier": "^3.0.2", - "@types/rimraf": "^4.0.5", "rimraf": "4.4.1", "strip-ansi": "^6", "symlink-workspace": "^1.9.0", diff --git a/packages/content-type-stream/__tests__/__snapshots__/mimetypes.test.ts.snap b/packages/content-type-stream/__tests__/__snapshots__/mimetypes.test.ts.snap index 378960b6a..0de8604c9 100644 --- a/packages/content-type-stream/__tests__/__snapshots__/mimetypes.test.ts.snap +++ b/packages/content-type-stream/__tests__/__snapshots__/mimetypes.test.ts.snap @@ -26,7 +26,7 @@ exports[`mimetypes good files 1`] = ` "ai.ai": { "contentType": "application/pdf", "magic": { - "charset": "iso-8859-1", + "charset": "binary", "type": "application/pdf", }, }, @@ -38,10 +38,10 @@ exports[`mimetypes good files 1`] = ` }, }, "bmp.bmp": { - "contentType": "image/x-ms-bmp", + "contentType": "image/bmp", "magic": { "charset": "binary", - "type": "image/x-ms-bmp", + "type": "image/bmp", }, }, "build.js": { @@ -89,21 +89,21 @@ exports[`mimetypes good files 1`] = ` "docx.docx": { "contentType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "magic": { - "charset": "binary", - "type": "application/zip", + "charset": "us-ascii", + "type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", }, }, "dwg.dwg": { "contentType": "image/vnd.dwg", "magic": { "charset": "binary", - "type": "image/vnd.dwg", + "type": "text/plain", }, }, "dxf.dxf": { "contentType": "image/vnd.dxf", "magic": { - "charset": "us-ascii", + "charset": "binary", "type": "text/plain", }, }, @@ -111,42 +111,42 @@ exports[`mimetypes good files 1`] = ` "contentType": "image/emf", "magic": { "charset": "binary", - "type": "application/octet-stream", + "type": "text/plain", }, }, "epub.epub": { "contentType": "application/epub+zip", "magic": { "charset": "binary", - "type": "application/epub+zip", + "type": "application/zip", }, }, "font.otf": { "contentType": "application/x-font-opentype", "magic": { "charset": "binary", - "type": "application/vnd.ms-opentype", + "type": "text/plain", }, }, "font.ttf": { "contentType": "application/x-font-ttf", "magic": { "charset": "binary", - "type": "application/font-sfnt", + "type": "text/plain", }, }, "font.woff": { "contentType": "application/font-woff", "magic": { "charset": "binary", - "type": "application/octet-stream", + "type": "text/plain", }, }, "font.woff2": { "contentType": "application/font-woff2", "magic": { "charset": "binary", - "type": "application/octet-stream", + "type": "text/plain", }, }, "gif-w-alpha.gif": { @@ -178,52 +178,52 @@ exports[`mimetypes good files 1`] = ` }, }, "less.less": { - "contentType": "text/less", + "contentType": "text/x-less", "magic": { "charset": "us-ascii", - "type": "text/plain", + "type": "text/x-less", }, }, "lock.lock": { - "contentType": "text/plain", + "contentType": "text/x-csrc", "magic": { "charset": "us-ascii", - "type": "text/plain", + "type": "text/x-csrc", }, }, "md.md": { "contentType": "text/markdown", "magic": { "charset": "us-ascii", - "type": "text/plain", + "type": "text/markdown", }, }, "mp4.mp4": { "contentType": "video/mp4", "magic": { "charset": "binary", - "type": "video/mp4", + "type": "text/plain", }, }, "ods.ods": { "contentType": "application/vnd.oasis.opendocument.spreadsheet", "magic": { "charset": "binary", - "type": "application/vnd.oasis.opendocument.spreadsheet", + "type": "application/zip", }, }, "odt.odt": { "contentType": "application/vnd.oasis.opendocument.text", "magic": { "charset": "binary", - "type": "application/vnd.oasis.opendocument.text", + "type": "application/zip", }, }, "pct.pct": { "contentType": "image/x-pict", "magic": { "charset": "binary", - "type": "application/octet-stream", + "type": "image/x-pict", }, }, "pdf.pdf": { @@ -251,35 +251,35 @@ exports[`mimetypes good files 1`] = ` "contentType": "text/x-scss", "magic": { "charset": "us-ascii", - "type": "text/plain", + "type": "text/x-scss", }, }, "shellscript": { - "contentType": "text/x-shellscript", + "contentType": "application/x-sh", "magic": { - "charset": "us-ascii", - "type": "text/x-shellscript", + "charset": "binary", + "type": "application/x-sh", }, }, "sql.sql": { - "contentType": "application/x-sql", + "contentType": "text/x-sql", "magic": { - "charset": "utf-8", - "type": "text/plain", + "charset": "us-ascii", + "type": "text/x-sql", }, }, "svg-with-alpha-and-text.svg": { "contentType": "image/svg+xml", "magic": { "charset": "us-ascii", - "type": "image/svg", + "type": "image/svg+xml", }, }, "svg.svg": { "contentType": "image/svg+xml", "magic": { "charset": "us-ascii", - "type": "image/svg", + "type": "image/svg+xml", }, }, "svgo.yaml": { @@ -293,14 +293,14 @@ exports[`mimetypes good files 1`] = ` "contentType": "application/x-shockwave-flash", "magic": { "charset": "binary", - "type": "application/x-shockwave-flash", + "type": "text/plain", }, }, "tga.tga": { "contentType": "image/x-tga", "magic": { "charset": "binary", - "type": "image/x-tga", + "type": "text/plain", }, }, "tif.tif": { @@ -311,17 +311,17 @@ exports[`mimetypes good files 1`] = ` }, }, "todo-0.1.0.tgz": { - "contentType": "application/x-gzip", + "contentType": "application/gzip", "magic": { "charset": "binary", - "type": "application/x-gzip", + "type": "application/gzip", }, }, "tsv.tsv": { "contentType": "text/tab-separated-values", "magic": { "charset": "us-ascii", - "type": "text/plain", + "type": "text/tab-separated-values", }, }, "txt.txt": { @@ -335,28 +335,28 @@ exports[`mimetypes good files 1`] = ` "contentType": "text/x-typescript", "magic": { "charset": "us-ascii", - "type": "text/plain", + "type": "text/x-typescript", }, }, "typescript.tsx": { "contentType": "text/x-typescript", "magic": { "charset": "us-ascii", - "type": "text/plain", + "type": "text/x-typescript", }, }, "wmf.wmf": { "contentType": "image/wmf", "magic": { "charset": "binary", - "type": "application/octet-stream", + "type": "image/wmf", }, }, "xlsx.xlsx": { "contentType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "magic": { - "charset": "binary", - "type": "application/zip", + "charset": "us-ascii", + "type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", }, }, "zip.zip": { @@ -396,7 +396,7 @@ exports[`mimetypes malicious files 1`] = ` "contentType": "image/svg+xml", "magic": { "charset": "us-ascii", - "type": "text/plain", + "type": "image/svg+xml", }, }, "svg-with-script-tag-guya.svg": { @@ -410,14 +410,14 @@ exports[`mimetypes malicious files 1`] = ` "contentType": "image/svg+xml", "magic": { "charset": "us-ascii", - "type": "image/svg", + "type": "image/svg+xml", }, }, "xml-as-xvg.svg": { "contentType": "image/svg+xml", "magic": { "charset": "us-ascii", - "type": "text/plain", + "type": "image/svg+xml", }, }, } diff --git a/packages/content-type-stream/__tests__/mimetypes.test.ts b/packages/content-type-stream/__tests__/mimetypes.test.ts index f15fa54d8..d735771da 100644 --- a/packages/content-type-stream/__tests__/mimetypes.test.ts +++ b/packages/content-type-stream/__tests__/mimetypes.test.ts @@ -1,9 +1,20 @@ -// @ts-nocheck import { streamContentType } from '../src'; import { sync as glob } from 'glob'; import { createReadStream } from 'fs'; import { basename } from 'path'; +interface MagicType { + type: string; + charset: string; +} + +interface ResultType { + [key: string]: { + magic: MagicType; + contentType: string; + }; +} + const files = [] .concat(glob(__dirname + '/../../../__fixtures__/kitchen-sink/**')) .concat(glob(__dirname + '/../../../__fixtures__/kitchen-sink/**/.*')) @@ -28,7 +39,7 @@ const malicious = glob(__dirname + '/../../../__fixtures__/malicious/**') describe('mimetypes', () => { it('good files', async () => { - const res = {}; + const res: ResultType = {}; const use = files; for (var i = 0; i < use.length; i++) { @@ -49,7 +60,7 @@ describe('mimetypes', () => { expect(res).toMatchSnapshot(); }); it('malicious files', async () => { - const res = {}; + const res: ResultType = {}; const use = malicious; for (var i = 0; i < use.length; i++) { diff --git a/packages/content-type-stream/package.json b/packages/content-type-stream/package.json index 9c7996621..9f3689920 100644 --- a/packages/content-type-stream/package.json +++ b/packages/content-type-stream/package.json @@ -30,9 +30,9 @@ "test:watch": "jest --watch" }, "dependencies": { - "@launchql/mmmagic": "0.5.3", "buffer-peek-stream": "1.1.0", "etag-hash": "^2.1.5", + "mimetics": "^1.0.0", "mime": "2.4.6", "uuid-hash": "^2.1.4" }, diff --git a/packages/content-type-stream/src/content-type-stream.ts b/packages/content-type-stream/src/content-type-stream.ts index 3f946a89f..e1c10f4e4 100644 --- a/packages/content-type-stream/src/content-type-stream.ts +++ b/packages/content-type-stream/src/content-type-stream.ts @@ -1,12 +1,85 @@ -// @ts-nocheck -import mmm from '@launchql/mmmagic'; +// @ts-ignore import { BufferPeekStream } from 'buffer-peek-stream'; import type { Readable } from 'stream'; +import { extname } from 'path'; import { getContentType } from './get-content-type'; -const Magic = mmm.Magic; -const magic: InstanceType = new Magic(mmm.MAGIC_MIME_TYPE | mmm.MAGIC_MIME_ENCODING); +// Special cases for binary files that might be incorrectly detected as text +const binaryExtensions = new Set([ + // Font files + '.woff', + '.woff2', + '.ttf', + '.otf', + // CAD and vector formats + '.dwg', + '.dxf', + '.emf', + '.wmf', + // Image formats + '.psd', + '.pct', + '.tga', + // Media formats + '.mp4', + '.swf' + // Removed .ts and .tsx from binary extensions as they should be text +]); + +// Override MIME types for specific extensions +const mimeTypeOverrides: Record = { + '.ts': 'text/x-typescript', + '.tsx': 'text/x-typescript', + '.scss': 'text/x-scss', + '.less': 'text/x-less', + '.md': 'text/markdown', + '.sql': 'text/x-sql', + '.tsv': 'text/tab-separated-values', + '.svg': 'image/svg+xml', + '.shellscript': 'application/x-sh', + '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + '.pct': 'image/x-pict', + '.psd': 'image/vnd.adobe.photoshop', + '.wmf': 'image/wmf', + '.bmp': 'image/bmp', + '.lock': 'text/x-csrc', + '.tgz': 'application/gzip' +}; + +const getCharsetFromMimeType = (mimeType: string, filename: string): string => { + const ext = extname(filename).toLowerCase(); + + // Special case for TypeScript files - should be us-ascii + if (ext === '.ts' || ext === '.tsx') { + return 'us-ascii'; + } + + // If it's a known binary extension, force binary charset + if (binaryExtensions.has(ext)) { + return 'binary'; + } + + // Special case for shellscript - should be binary + if (ext === '.shellscript') { + return 'binary'; + } + + const asciiMimeTypes = [ + 'text/', + 'svg', + 'text/x-shellscript', + 'json', + 'xml', + 'javascript', + ]; + + if (asciiMimeTypes.some(type => mimeType.includes(type))) { + return 'us-ascii'; + } + return 'binary'; +}; interface StreamContentTypeArgs { readStream: Readable; @@ -27,13 +100,21 @@ export function streamContentType({ }: StreamContentTypeArgs): Promise { return new Promise((resolve, reject) => { const peekStream = new BufferPeekStream({ peekBytes }); - peekStream.once('peek', function (buffer: Buffer) { - magic.detect(buffer, (err: Error | null, res: string) => { - if (err) return reject(err); - const [type, charset] = res.split('; charset='); + peekStream.once('peek', async function (buffer: Buffer) { + try { + const Mimetics = require('mimetics'); + const mimetics = new Mimetics(); + const fileTypeResult = await mimetics.parseAsync(buffer); + const ext = extname(filename).toLowerCase(); + + // Use override if exists, otherwise use detected type + const type = mimeTypeOverrides[ext] || fileTypeResult?.mime || 'application/octet-stream'; + const charset = getCharsetFromMimeType(type, filename); const contentType = getContentType(filename, type, charset); resolve({ stream: peekStream, magic: { type, charset }, contentType }); - }); + } catch (err) { + reject(err); + } }); readStream.pipe(peekStream); }); diff --git a/packages/content-type-stream/src/index.ts b/packages/content-type-stream/src/index.ts index be783b753..6666f142f 100644 --- a/packages/content-type-stream/src/index.ts +++ b/packages/content-type-stream/src/index.ts @@ -1,3 +1,3 @@ export * from './get-content-type'; export * from './content-type-stream'; -export * from './content-stream'; \ No newline at end of file +export * from './content-stream'; diff --git a/yarn.lock b/yarn.lock index eba4dae82..3f4753964 100644 --- a/yarn.lock +++ b/yarn.lock @@ -646,13 +646,6 @@ "@jridgewell/resolve-uri" "^3.1.0" "@jridgewell/sourcemap-codec" "^1.4.14" -"@launchql/mmmagic@0.5.3": - version "0.5.3" - resolved "https://registry.yarnpkg.com/@launchql/mmmagic/-/mmmagic-0.5.3.tgz#8ad09ec6cb482e042a4fed71b9a41014b7daf56d" - integrity sha512-dyYpY+rVDWrlEMQNqcYJn+bdT301wCqneqv7mFL6+W6Ws46ofBN3gwvUuTUSAV//UdaNvabA62fIgaawhVFucg== - dependencies: - nan "^2.20.0" - "@lerna/child-process@6.6.2": version "6.6.2" resolved "https://registry.yarnpkg.com/@lerna/child-process/-/child-process-6.6.2.tgz#5d803c8dee81a4e013dc428292e77b365cba876c" @@ -4630,6 +4623,11 @@ ignore@^5.0.4, ignore@^5.2.0, ignore@^5.3.1: resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.3.2.tgz#3cd40e729f3643fd87cb04e50bf0eb722bc596f5" integrity sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g== +immediate@~3.0.5: + version "3.0.6" + resolved "https://registry.yarnpkg.com/immediate/-/immediate-3.0.6.tgz#9db1dbd0faf8de6fbe0f5dd5e56bb606280de69b" + integrity sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ== + import-fresh@^3.2.1: version "3.3.1" resolved "https://registry.yarnpkg.com/import-fresh/-/import-fresh-3.3.1.tgz#9cecb56503c0ada1f2741dbbd6546e4b13b57ccf" @@ -5617,6 +5615,16 @@ jsonwebtoken@^9.0.0: ms "^2.1.1" semver "^7.5.4" +jszip@^3.10.1: + version "3.10.1" + resolved "https://registry.yarnpkg.com/jszip/-/jszip-3.10.1.tgz#34aee70eb18ea1faec2f589208a157d1feb091c2" + integrity sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g== + dependencies: + lie "~3.3.0" + pako "~1.0.2" + readable-stream "~2.3.6" + setimmediate "^1.0.5" + just-diff-apply@^5.2.0: version "5.5.0" resolved "https://registry.yarnpkg.com/just-diff-apply/-/just-diff-apply-5.5.0.tgz#771c2ca9fa69f3d2b54e7c3f5c1dfcbcc47f9f0f" @@ -5789,6 +5797,13 @@ libpg-query@13.3.2: node-addon-api "^1.6.3" node-gyp "^8.0.0" +lie@~3.3.0: + version "3.3.0" + resolved "https://registry.yarnpkg.com/lie/-/lie-3.3.0.tgz#dcf82dee545f46074daf200c7c1c5a08e0f40f6a" + integrity sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ== + dependencies: + immediate "~3.0.5" + lines-and-columns@^1.1.6: version "1.2.4" resolved "https://registry.yarnpkg.com/lines-and-columns/-/lines-and-columns-1.2.4.tgz#eca284f75d2965079309dc0ad9255abb2ebc1632" @@ -6162,6 +6177,13 @@ mime@2.4.6: resolved "https://registry.yarnpkg.com/mime/-/mime-2.4.6.tgz#e5b407c90db442f2beb5b162373d07b69affa4d1" integrity sha512-RZKhC3EmpBchfTGBVb8fb+RL2cWyw/32lshnsETttkBAyAUXSGHxbEJWWRXc751DrIxG1q04b8QwMbAwkRPpUA== +mimetics@^1.0.0: + version "1.0.4" + resolved "https://registry.yarnpkg.com/mimetics/-/mimetics-1.0.4.tgz#20c82d260432e0fe7f07f11cdd76c2af370207e8" + integrity sha512-lI3VpGrE9YJGu10kRQYdunz3J/Z/o73H2n6kH1Ve3bsd4c3c7tPAfDh+GEfWofue29aiC6lwARTk84b2RXRijQ== + dependencies: + jszip "^3.10.1" + mimic-fn@^1.0.0: version "1.2.0" resolved "https://registry.yarnpkg.com/mimic-fn/-/mimic-fn-1.2.0.tgz#820c86a39334640e99516928bd03fca88057d022" @@ -6399,11 +6421,6 @@ mute-stream@0.0.8, mute-stream@~0.0.4: resolved "https://registry.yarnpkg.com/mute-stream/-/mute-stream-0.0.8.tgz#1630c42b2251ff81e2a283de96a5497ea92e5e0d" integrity sha512-nnbWWOkoWyUsTjKrhgD0dcz22mdkSnpYqbEjIm2nhwhuxlSkpywJmBo8h0ZqJdkp73mb90SssHkN4rsRaBAfAA== -nan@^2.20.0: - version "2.22.2" - resolved "https://registry.yarnpkg.com/nan/-/nan-2.22.2.tgz#6b504fd029fb8f38c0990e52ad5c26772fdacfbb" - integrity sha512-DANghxFkS1plDdRsX0X9pm0Z6SJNN6gBdtXfanwoZ8hooC5gosGFSBGRYHUVPz1asKA/kMRqDRdHrluZ61SpBQ== - nano-time@1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/nano-time/-/nano-time-1.0.0.tgz#b0554f69ad89e22d0907f7a12b0993a5d96137ef" @@ -7077,6 +7094,11 @@ pacote@^15.0.0, pacote@^15.0.8: ssri "^10.0.0" tar "^6.1.11" +pako@~1.0.2: + version "1.0.11" + resolved "https://registry.yarnpkg.com/pako/-/pako-1.0.11.tgz#6c9599d340d54dfd3946380252a35705a6b992bf" + integrity sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw== + parent-module@^1.0.0: version "1.0.1" resolved "https://registry.yarnpkg.com/parent-module/-/parent-module-1.0.1.tgz#691d2709e78c79fae3a156622452d00762caaaa2" @@ -8168,6 +8190,11 @@ set-function-length@^1.2.2: gopd "^1.0.1" has-property-descriptors "^1.0.2" +setimmediate@^1.0.5: + version "1.0.5" + resolved "https://registry.yarnpkg.com/setimmediate/-/setimmediate-1.0.5.tgz#290cbb232e306942d7d7ea9b83732ab7856f8285" + integrity sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA== + setprototypeof@1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/setprototypeof/-/setprototypeof-1.2.0.tgz#66c9a24a73f9fc28cbe66b09fed3d33dcaf1b424"