diff --git a/README.md b/README.md index ab9538a..627f8d7 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,8 @@ [download-image]: https://img.shields.io/npm/dm/egg-multipart.svg?style=flat-square [download-url]: https://npmjs.org/package/egg-multipart -Use [co-busboy](https://github.com/cojs/busboy) to upload file by streaming and process it without save to disk. +Use [co-busboy](https://github.com/cojs/busboy) to upload file by streaming and +process it without save to disk(using the `stream` mode). Just use `ctx.multipart()` to got file stream, then pass to image processing liberary such as `gm` or upload to cloud storage such as `oss`. @@ -107,9 +108,135 @@ exports.multipart = { ## Examples -[More Examples](https://github.com/eggjs/examples/tree/master/multipart) +More examples please follow: -### Upload File +- [Handle multipart request in `stream` mode](https://github.com/eggjs/examples/tree/master/multipart) +- [Handle multipart request in `file` mode](https://github.com/eggjs/examples/tree/master/multipart-file-mode) + +## `file` mode: the easy way + +If you don't know the [Node.js Stream](https://nodejs.org/dist/latest-v10.x/docs/api/stream.html) work, maybe you should use the `file` mode to get started. + +The usage very similar to [bodyParser](https://eggjs.org/en/basics/controller.html#body). + +- `ctx.request.body`: Get all the multipart fields and values, except `file`. +- `ctx.request.files`: Contains all `file` from the multipart request, it's an Array object. + +**WARNING: you should remove the temporary upload file after you use it** + +### Enable `file` mode on config + +You need to set `config.multipart.mode = 'file'` to enable `file` mode: + +```js +// config/config.default.js +exports.multipart = { + mode: 'file', +}; +``` + +After `file` mode enable, egg will remove the old temporary files(don't include today's files) on `04:30 AM` every day by default. + +```js +config.multipart = { + mode: 'file', + tmpdir: path.join(os.tmpdir(), 'egg-multipart-tmp', appInfo.name), + cleanSchedule: { + // run tmpdir clean job on every day 04:30 am + // cron style see https://github.com/eggjs/egg-schedule#cron-style-scheduling + cron: '0 30 4 * * *', + }, +}; +``` + +### Upload One File + +```html +
+``` + +Controller which hanlder `POST /upload`: + +```js +// app/controller/upload.js +const Controller = require('egg').Controller; +const fs = require('mz/fs'); + +module.exports = class extends Controller { + async upload() { + const { ctx } = this; + const file = ctx.request.files[0]; + const name = 'egg-multipart-test/' + path.basename(file.filename); + let result; + try { + // process file or upload to cloud storage + result = await ctx.oss.put(name, file.filepath); + } finally { + // need to remove the tmp file + await fs.unlink(file.filepath); + } + + ctx.body = { + url: result.url, + // get all field values + requestBody: ctx.request.body, + }; + } +}; +``` + +### Upload Multiple Files + +```html + +``` + +Controller which hanlder `POST /upload`: + +```js +// app/controller/upload.js +const Controller = require('egg').Controller; +const fs = require('mz/fs'); + +module.exports = class extends Controller { + async upload() { + const { ctx } = this; + console.log(ctx.request.body); + console.log('got %d files', ctx.request.files.length); + for (const file of ctx.request.files) { + console.log('field: ' + file.fieldname); + console.log('filename: ' + file.filename); + console.log('encoding: ' + file.encoding); + console.log('mime: ' + file.mime); + console.log('tmp filepath: ' + file.filepath); + let result; + try { + // process file or upload to cloud storage + result = await ctx.oss.put('egg-multipart-test/' + file.filename, file.filepath); + } finally { + // need to remove the tmp file + await fs.unlink(file.filepath); + } + console.log(result); + } + } +}; +``` + +## `stream` mode: the hard way + +If you're well-known about know the Node.js Stream work, you should use the `stream` mode. + +### Upload One File You can got upload stream by `ctx.getFileStream*()`. @@ -129,9 +256,9 @@ const path = require('path'); const sendToWormhole = require('stream-wormhole'); const Controller = require('egg').Controller; -module.exports = Class UploadController extends Controller { +module.exports = class extends Controller { async upload() { - const ctx = this.ctx; + const { ctx } = this; // file not exists will response 400 error const stream = await ctx.getFileStream(); const name = 'egg-multipart-test/' + path.basename(stream.filename); @@ -146,7 +273,7 @@ module.exports = Class UploadController extends Controller { } async uploadNotRequiredFile() { - const ctx = this.ctx; + const { ctx } = this; // file not required const stream = await ctx.getFileStream({ requireFile: false }); let result; @@ -173,7 +300,8 @@ module.exports = Class UploadController extends Controller { ```html ``` @@ -184,9 +312,9 @@ Controller which hanlder `POST /upload`: // app/controller/upload.js const Controller = require('egg').Controller; -module.exports = Class UploadController extends Controller { +module.exports = class extends Controller { async upload() { - const ctx = this.ctx; + const { ctx } = this; const parts = ctx.multipart(); let part; while ((part = await parts()) != null) { @@ -201,7 +329,7 @@ module.exports = Class UploadController extends Controller { // user click `upload` before choose a file, // `part` will be file stream, but `part.filename` is empty // must handler this, such as log error. - return; + continue; } // otherwise, it's a stream console.log('field: ' + part.fieldname); @@ -216,3 +344,7 @@ module.exports = Class UploadController extends Controller { } }; ``` + +## License + +[MIT](LICENSE) diff --git a/app.js b/app.js index e88c901..f249028 100644 --- a/app.js +++ b/app.js @@ -79,4 +79,17 @@ module.exports = app => { } }, }; + + options.mode = options.mode || 'stream'; + if (![ 'stream', 'file' ].includes(options.mode)) { + throw new TypeError(`Expect mode to be 'stream' or 'file', but got '${options.mode}'`); + } + + app.coreLogger.info('[egg-multipart] %s mode enable', options.mode); + if (options.mode === 'file') { + app.coreLogger.info('[egg-multipart] will save temporary files to %j, cleanup job cron: %j', + options.tmpdir, options.cleanSchedule.cron); + // enable multipart middleware + app.config.coreMiddleware.push('multipart'); + } }; diff --git a/app/extend/context.js b/app/extend/context.js index 8998111..fbe7223 100644 --- a/app/extend/context.js +++ b/app/extend/context.js @@ -9,6 +9,8 @@ class EmptyStream extends Readable { } } +const HAS_CONSUMED = Symbol('Context#multipartHasConsumed'); + module.exports = { /** * create multipart.parts instance, to get separated files. @@ -21,6 +23,9 @@ module.exports = { if (!this.is('multipart')) { this.throw(400, 'Content-Type must be multipart/*'); } + if (this[HAS_CONSUMED]) throw new TypeError('the multipart request can\'t be consumed twice'); + + this[HAS_CONSUMED] = true; const parseOptions = {}; Object.assign(parseOptions, this.app.config.multipartParseOptions, options); return parse(this, parseOptions); diff --git a/app/middleware/multipart.js b/app/middleware/multipart.js new file mode 100644 index 0000000..c342212 --- /dev/null +++ b/app/middleware/multipart.js @@ -0,0 +1,108 @@ +'use strict'; + +const path = require('path'); +const fs = require('mz/fs'); +const uuid = require('uuid'); +const mkdirp = require('mz-modules/mkdirp'); +const pump = require('mz-modules/pump'); +const sendToWormhole = require('stream-wormhole'); +const moment = require('moment'); + +module.exports = options => { + async function cleanup(requestFiles) { + for (const file of requestFiles) { + try { + await fs.unlink(file.filepath); + } catch (_) { + // do nothing + } + } + } + + async function limit(requestFiles, code, message) { + // cleanup requestFiles + await cleanup(requestFiles); + + // throw 413 error + const err = new Error(message); + err.code = code; + err.status = 413; + throw err; + } + + return async function multipart(ctx, next) { + if (!ctx.is('multipart')) return next(); + + let storedir; + + const requestBody = {}; + const requestFiles = []; + + const parts = ctx.multipart({ autoFields: false }); + let part; + do { + try { + part = await parts(); + } catch (err) { + await cleanup(requestFiles); + throw err; + } + + if (!part) break; + + if (part.length) { + ctx.coreLogger.debug('[egg-multipart:storeMultipart] handle value part: %j', part); + const fieldnameTruncated = part[2]; + const valueTruncated = part[3]; + if (valueTruncated) { + return await limit(requestFiles, 'Request_fieldSize_limit', 'Reach fieldSize limit'); + } + if (fieldnameTruncated) { + return await limit(requestFiles, 'Request_fieldNameSize_limit', 'Reach fieldNameSize limit'); + } + + // arrays are busboy fields + requestBody[part[0]] = part[1]; + continue; + } + + // otherwise, it's a stream + const meta = { + field: part.fieldname, + filename: part.filename, + encoding: part.encoding, + mime: part.mime, + }; + ctx.coreLogger.debug('[egg-multipart:storeMultipart] handle stream part: %j', meta); + // empty part, ignore it + if (!part.filename) { + await sendToWormhole(part); + continue; + } + + if (!storedir) { + // ${tmpdir}/YYYY/MM/DD/HH + storedir = path.join(options.tmpdir, moment().format('YYYY/MM/DD/HH')); + const exists = await fs.exists(storedir); + if (!exists) { + await mkdirp(storedir); + } + } + const filepath = path.join(storedir, uuid.v4() + path.extname(meta.filename)); + const target = fs.createWriteStream(filepath); + await pump(part, target); + // https://github.com/mscdex/busboy/blob/master/lib/types/multipart.js#L221 + meta.filepath = filepath; + requestFiles.push(meta); + + // https://github.com/mscdex/busboy/blob/master/lib/types/multipart.js#L221 + if (part.truncated) { + return await limit(requestFiles, 'Request_fileSize_limit', 'Reach fileSize limit'); + } + } while (part != null); + + ctx.request.body = requestBody; + ctx.request.files = requestFiles; + return next(); + }; +}; diff --git a/app/schedule/clean_tmpdir.js b/app/schedule/clean_tmpdir.js new file mode 100644 index 0000000..13bf23d --- /dev/null +++ b/app/schedule/clean_tmpdir.js @@ -0,0 +1,58 @@ +'use strict'; + +const path = require('path'); +const fs = require('mz/fs'); +const rimraf = require('mz-modules/rimraf'); +const moment = require('moment'); + +module.exports = app => { + return class CleanTmpdir extends (app.Subscription || app.BaseContextClass) { + static get schedule() { + return { + type: 'worker', + cron: app.config.multipart.cleanSchedule.cron, + immediate: false, + // disable on stream mode + disable: app.config.multipart.mode === 'stream', + }; + } + + async _remove(dir) { + const { ctx } = this; + if (await fs.exists(dir)) { + ctx.coreLogger.info('[egg-multipart:CleanTmpdir] removing tmpdir: %j', dir); + try { + await rimraf(dir); + ctx.coreLogger.info('[egg-multipart:CleanTmpdir:success] tmpdir: %j has been removed', dir); + } catch (err) { + ctx.coreLogger.error('[egg-multipart:CleanTmpdir:error] remove tmpdir: %j error: %s', + dir, err); + ctx.coreLogger.error(err); + } + } + } + + async subscribe() { + const { ctx } = this; + const config = ctx.app.config; + ctx.coreLogger.info('[egg-multipart:CleanTmpdir] start clean tmpdir: %j', config.multipart.tmpdir); + // last year + const lastYear = moment().subtract(1, 'years'); + const lastYearDir = path.join(config.multipart.tmpdir, lastYear.format('YYYY')); + await this._remove(lastYearDir); + // 3 months + for (let i = 1; i <= 3; i++) { + const date = moment().subtract(i, 'months'); + const dir = path.join(config.multipart.tmpdir, date.format('YYYY/MM')); + await this._remove(dir); + } + // 7 days + for (let i = 1; i <= 7; i++) { + const date = moment().subtract(i, 'days'); + const dir = path.join(config.multipart.tmpdir, date.format('YYYY/MM/DD')); + await this._remove(dir); + } + ctx.coreLogger.info('[egg-multipart:CleanTmpdir] end'); + } + }; +}; diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index 981e82b..0000000 --- a/appveyor.yml +++ /dev/null @@ -1,15 +0,0 @@ -environment: - matrix: - - nodejs_version: '8' - - nodejs_version: '10' - -install: - - ps: Install-Product node $env:nodejs_version - - npm i npminstall && node_modules\.bin\npminstall - -test_script: - - node --version - - npm --version - - npm run test - -build: off diff --git a/azure-pipelines.template.yml b/azure-pipelines.template.yml new file mode 100644 index 0000000..b468c90 --- /dev/null +++ b/azure-pipelines.template.yml @@ -0,0 +1,47 @@ +# Node.js +# Build a general Node.js application with npm. +# Add steps that analyze code, save build artifacts, deploy, and more: +# https://docs.microsoft.com/vsts/pipelines/languages/javascript +# demo: https://github.com/parcel-bundler/parcel/blob/master/azure-pipelines-template.yml + +jobs: +- job: ${{ parameters.name }} + pool: + vmImage: ${{ parameters.vmImage }} + strategy: + matrix: + node_8: + node_version: 8 + node_10: + node_version: 10 + maxParallel: 2 + steps: + - task: NodeTool@0 + inputs: + versionSpec: $(node_version) + displayName: 'Install Node.js' + + # Set ENV + - ${{ if ne(parameters.name, 'windows') }}: + - script: | + echo $PWD + export PATH="$PATH:$PWD/node_modules/.bin" + echo "##vso[task.setvariable variable=PATH]$PATH" + displayName: Set ENV + - ${{ if eq(parameters.name, 'windows') }}: + - script: | + echo %cd% + set PATH=%PATH%;%cd%\node_modules\.bin + echo "##vso[task.setvariable variable=PATH]%PATH%" + displayName: Set ENV + + - script: | + npm i npminstall && npminstall + displayName: 'Install Packages' + - script: | + npm run ci-windows + displayName: 'Build & Unit Test' + - ${{ if ne(parameters.name, 'windows') }}: + - script: | + npminstall codecov && codecov + displayName: 'Report Coverage' diff --git a/azure-pipelines.yml b/azure-pipelines.yml new file mode 100644 index 0000000..554935f --- /dev/null +++ b/azure-pipelines.yml @@ -0,0 +1,16 @@ +jobs: +- template: azure-pipelines.template.yml + parameters: + name: linux + vmImage: 'ubuntu-16.04' + +- template: azure-pipelines.template.yml + parameters: + name: windows + vmImage: 'vs2017-win2016' + +- template: azure-pipelines.template.yml + parameters: + name: macos + vmImage: 'xcode9-macos10.13' + diff --git a/config/config.default.js b/config/config.default.js index 64e87f1..de94497 100644 --- a/config/config.default.js +++ b/config/config.default.js @@ -1,27 +1,47 @@ 'use strict'; -/** - * multipart parser options - * @member Config#multipart - * @property {Boolean} autoFields - Auto set fields to parts, default is `false`. - * If set true,all fields will be auto handle and can acces by `parts.fields` - * @property {String} defaultCharset - Default charset encoding, don't change it before you real know about it - * @property {Integer} fieldNameSize - Max field name size (in bytes), default is `100` - * @property {String|Integer} fieldSize - Max field value size (in bytes), default is `100kb` - * @property {Integer} fields - Max number of non-file fields, default is `10` - * @property {String|Integer} fileSize - Max file size (in bytes), default is `10mb` - * @property {Integer} files - Max number of file fields, default is `10` - * @property {Array|Function} whitelist - The white ext file names, default is `null` - * @property {Array} fileExtensions - Add more ext file names to the `whitelist`, default is `[]` - */ -exports.multipart = { - autoFields: false, - defaultCharset: 'utf8', - fieldNameSize: 100, - fieldSize: '100kb', - fields: 10, - fileSize: '10mb', - files: 10, - fileExtensions: [], - whitelist: null, +const os = require('os'); +const path = require('path'); + +module.exports = appInfo => { + const config = {}; + + /** + * multipart parser options + * @member Config#multipart + * @property {String} mode - which mode to handle multipart request, default is `stream`, the hard way. + * If set mode to `file`, it's the easy way to handle multipart request and save it to local files. + * If you don't know the Node.js Stream work, maybe you should use the `file` mode to get started. + * @property {Boolean} autoFields - Auto set fields to parts, default is `false`. Only work on `stream` mode. + * If set true,all fields will be auto handle and can acces by `parts.fields` + * @property {String} defaultCharset - Default charset encoding, don't change it before you real know about it + * @property {Integer} fieldNameSize - Max field name size (in bytes), default is `100` + * @property {String|Integer} fieldSize - Max field value size (in bytes), default is `100kb` + * @property {Integer} fields - Max number of non-file fields, default is `10` + * @property {String|Integer} fileSize - Max file size (in bytes), default is `10mb` + * @property {Integer} files - Max number of file fields, default is `10` + * @property {Array|Function} whitelist - The white ext file names, default is `null` + * @property {Array} fileExtensions - Add more ext file names to the `whitelist`, default is `[]` + * @property {String} tmpdir - The directory for temporary files. Only work on `file` mode. + */ + config.multipart = { + mode: 'stream', + autoFields: false, + defaultCharset: 'utf8', + fieldNameSize: 100, + fieldSize: '100kb', + fields: 10, + fileSize: '10mb', + files: 10, + fileExtensions: [], + whitelist: null, + tmpdir: path.join(os.tmpdir(), 'egg-multipart-tmp', appInfo.name), + cleanSchedule: { + // run tmpdir clean job on every day 04:30 am + // cron style see https://github.com/eggjs/egg-schedule#cron-style-scheduling + cron: '0 30 4 * * *', + }, + }; + + return config; }; diff --git a/package.json b/package.json index bbd448a..99369bc 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,10 @@ "name": "egg-multipart", "version": "2.1.0", "eggPlugin": { - "name": "multipart" + "name": "multipart", + "optionalDependencies": [ + "schedule" + ] }, "description": "multipart plugin for egg", "main": "index.js", @@ -13,6 +16,7 @@ "test-local": "egg-bin test", "cov": "egg-bin cov", "ci": "egg-bin pkgfiles && npm run lint && npm run cov", + "ci-windows": "egg-bin pkgfiles && npm run lint -- --fix && npm run cov", "pkgfiles": "egg-bin pkgfiles" }, "repository": { @@ -28,7 +32,7 @@ "author": "gxcsoccer