make Body.formData able to parse Multipart. #571

jimmywarting · 2017-10-16T18:02:52Z

I wrote this code recently that would make Body.formData able to parse Multipart bodies.
It's a bit more heavier. Just realize it conflicts with yours urlencoded version, so depending on the header this would kick in instead...

Code

;(function() {

  var s = 0,
      S =
      { PARSER_UNINITIALIZED: s++,
        START: s++,
        START_BOUNDARY: s++,
        HEADER_FIELD_START: s++,
        HEADER_FIELD: s++,
        HEADER_VALUE_START: s++,
        HEADER_VALUE: s++,
        HEADER_VALUE_ALMOST_DONE: s++,
        HEADERS_ALMOST_DONE: s++,
        PART_DATA_START: s++,
        PART_DATA: s++,
        PART_END: s++,
        END: s++
      },

      f = 1,
      F =
      { PART_BOUNDARY: f,
        LAST_BOUNDARY: f *= 2
      },

      LF = 10,
      CR = 13,
      SPACE = 32,
      HYPHEN = 45,
      COLON = 58,
      A = 97,
      Z = 122,

      lower = function(c) {
        return c | 0x20;
      };

  function MultipartParser() {
    this.boundary = null;
    this.boundaryChars = null;
    this.lookbehind = null;
    this.state = S.PARSER_UNINITIALIZED;

    this.index = null;
    this.flags = 0;
  }

  MultipartParser.prototype.initWithBoundary = function(str) {

    this.boundaryChars = {};

    str = '\r\n--' + str
    var ui8a = new Uint8Array(str.length);
    for (let i = 0; i < str.length; i++) {
      ui8a[i] = str.charCodeAt(i)
      this.boundaryChars[ui8a[i]] = true;
    }

    this.boundary = ui8a;
    this.lookbehind = new Uint8Array(this.boundary.length+8);
    this.state = S.START;
  };

  MultipartParser.prototype.write = function(ui8a) {
    
    var self = this,
        i = 0,
        len = ui8a.length,
        prevIndex = this.index,
        index = this.index,
        state = this.state,
        flags = this.flags,
        lookbehind = this.lookbehind,
        boundary = this.boundary,
        boundaryChars = this.boundaryChars,
        boundaryLength = this.boundary.length,
        boundaryEnd = boundaryLength - 1,
        bufferLength = ui8a.length,
        c,
        cl,

        mark = function(name) {
          self[name + 'Mark'] = i;
        },
        clear = function(name) {
          delete self[name + 'Mark'];
        },
        callback = function(name, start, end, ui8a) {
          if (start !== undefined && start === end) {
            return;
          }

          var callbackSymbol = 'on'+name.substr(0, 1).toUpperCase()+name.substr(1);
          if (callbackSymbol in self) {
            self[callbackSymbol](start, end, ui8a);
          }
        },
        dataCallback = function(name, clear) {
          var markSymbol = name + 'Mark';
          if (!(markSymbol in self)) {
            return;
          }

          if (!clear) {
            callback(name, self[markSymbol], ui8a.length, ui8a);
            self[markSymbol] = 0;
          } else {
            callback(name, self[markSymbol], i, ui8a);
            delete self[markSymbol];
          }
        };

    for (i = 0; i < len; i++) {
      c = ui8a[i];

      switch (state) {
        case S.PARSER_UNINITIALIZED:
          return i;
        case S.START:
          index = 0;
          state = S.START_BOUNDARY;
        case S.START_BOUNDARY:
          if (index == boundary.length - 2) {
            if (c == HYPHEN) {
              flags |= F.LAST_BOUNDARY;
            } else if (c != CR) {
              return i;
            }
            index++;
            break;
          } else if (index - 1 == boundary.length - 2) {
            if (flags & F.LAST_BOUNDARY && c == HYPHEN){
              callback('end');
              state = S.END;
              flags = 0;
            } else if (!(flags & F.LAST_BOUNDARY) && c == LF) {
              index = 0;
              callback('partBegin');
              state = S.HEADER_FIELD_START;
            } else {
              return i;
            }
            break;
          }

          if (c != boundary[index+2]) {
            index = -2;
          }
          if (c == boundary[index+2]) {
            index++;
          }
          break;
        case S.HEADER_FIELD_START:
          state = S.HEADER_FIELD;
          mark('headerField');
          index = 0;
        case S.HEADER_FIELD:
          if (c == CR) {
            clear('headerField');
            state = S.HEADERS_ALMOST_DONE;
            break;
          }

          index++;
          if (c == HYPHEN) {
            break;
          }

          if (c == COLON) {
            if (index == 1) {
              // empty header field
              return i;
            }
            dataCallback('headerField', true);
            state = S.HEADER_VALUE_START;
            break;
          }

          cl = lower(c);
          if (cl < A || cl > Z) {
            return i;
          }
          break;
        case S.HEADER_VALUE_START:
          if (c == SPACE) {
            break;
          }

          mark('headerValue');
          state = S.HEADER_VALUE;
        case S.HEADER_VALUE:
          if (c == CR) {
            dataCallback('headerValue', true);
            callback('headerEnd');
            state = S.HEADER_VALUE_ALMOST_DONE;
          }
          break;
        case S.HEADER_VALUE_ALMOST_DONE:
          if (c != LF) {
            return i;
          }
          state = S.HEADER_FIELD_START;
          break;
        case S.HEADERS_ALMOST_DONE:
          if (c != LF) {
            return i;
          }

          callback('headersEnd');
          state = S.PART_DATA_START;
          break;
        case S.PART_DATA_START:
          state = S.PART_DATA;
          mark('partData');
        case S.PART_DATA:
          prevIndex = index;

          if (index === 0) {
            // boyer-moore derrived algorithm to safely skip non-boundary data
            i += boundaryEnd;
            while (i < bufferLength && !(ui8a[i] in boundaryChars)) {
              i += boundaryLength;
            }
            i -= boundaryEnd;
            c = ui8a[i];
          }

          if (index < boundary.length) {
            if (boundary[index] == c) {
              if (index === 0) {
                dataCallback('partData', true);
              }
              index++;
            } else {
              index = 0;
            }
          } else if (index == boundary.length) {
            index++;
            if (c == CR) {
              // CR = part boundary
              flags |= F.PART_BOUNDARY;
            } else if (c == HYPHEN) {
              // HYPHEN = end boundary
              flags |= F.LAST_BOUNDARY;
            } else {
              index = 0;
            }
          } else if (index - 1 == boundary.length)  {
            if (flags & F.PART_BOUNDARY) {
              index = 0;
              if (c == LF) {
                // unset the PART_BOUNDARY flag
                flags &= ~F.PART_BOUNDARY;
                callback('partEnd');
                callback('partBegin');
                state = S.HEADER_FIELD_START;
                break;
              }
            } else if (flags & F.LAST_BOUNDARY) {
              if (c == HYPHEN) {
                callback('partEnd');
                callback('end');
                state = S.END;
                flags = 0;
              } else {
                index = 0;
              }
            } else {
              index = 0;
            }
          }

          if (index > 0) {
            // when matching a possible boundary, keep a lookbehind reference
            // in case it turns out to be a false lead
            lookbehind[index-1] = c;
          } else if (prevIndex > 0) {
            // if our boundary turned out to be rubbish, the captured lookbehind
            // belongs to partData
            let _lookbehind = new Uint8Array(lookbehind.buffer, lookbehind.byteOffset, lookbehind.byteLength);
            callback('partData', 0, prevIndex, _lookbehind);
            prevIndex = 0;
            mark('partData');

            // reconsider the current character even so it interrupted the sequence
            // it could be the beginning of a new sequence
            i--;
          }

          break;
        case S.END:
          break;
        default:
          return i;
      }
    }

    dataCallback('headerField');
    dataCallback('headerValue');
    dataCallback('partData');

    this.index = index;
    this.state = state;
    this.flags = flags;

    return len;
  };

  MultipartParser.prototype.end = function() {
    var callback = function(self, name) {
      var callbackSymbol = 'on'+name.substr(0, 1).toUpperCase()+name.substr(1);
      if (callbackSymbol in self) {
        self[callbackSymbol]();
      }
    };
    if ((this.state == S.HEADER_FIELD_START && this.index === 0) ||
        (this.state == S.PART_DATA && this.index == this.boundary.length)) {
      callback(this, 'partEnd');
      callback(this, 'end');
    } else if (this.state != S.END) {
      return new Error('MultipartParser.end(): stream ended unexpectedly');
    }
  }

  function uint8toStr(uint8Arr) {
    return String.fromCharCode.apply(null, uint8Arr)
  }
    
  function _fileName(headerValue) {
    // matches either a quoted-string or a token (RFC 2616 section 19.5.1)
    var m = headerValue.match(/\bfilename=("(.*?)"|([^\(\)<>@,;:\\"\/\[\]\?=\{\}\s\t/]+))($|;\s)/i);
    if (!m) return;

    var match = m[2] || m[3] || '';
    var filename = match.substr(match.lastIndexOf('\\') + 1);
    filename = filename.replace(/%22/g, '"');
    filename = filename.replace(/&#([\d]{4});/g, function(m, code) {
      return String.fromCharCode(code);
    });
    return filename;
  }

  function toFormData() {
    var self = this
    var parser = new MultipartParser,
        part,
        headerField,
        headerValue,
        fd = new FormData

    parser.onPartBegin = function() {
      part = {data: []};
      headerField = '';
      headerValue = '';
    };
    parser.onHeaderField = function(start, end, ui8a) {
      headerField += uint8toStr(ui8a.slice(start, end))
    }
    parser.onHeaderValue = function(start, end, ui8a) {
      headerValue += uint8toStr(ui8a.slice(start, end))
    }
    parser.onHeaderEnd = function() {
      headerField = headerField.toLowerCase();

      // matches either a quoted-string or a token (RFC 2616 section 19.5.1)
      var m = headerValue.match(/\bname=("([^"]*)"|([^\(\)<>@,;:\\"\/\[\]\?=\{\}\s\t/]+))/i);
      if (headerField === 'content-disposition') {
        if (m) {
          part.name = m[2] || m[3] || '';
        }

        part.filename = _fileName(headerValue);
      } else if (headerField == 'content-type') {
        part.type = headerValue;
      } else if (headerField == 'content-transfer-encoding') {
        part.transferEncoding = headerValue.toLowerCase();
      }

      headerField = ''
      headerValue = ''
    }
    parser.onPartData = function(start, end, ui8a) {
      part.data.push(ui8a.slice(start, end))
    }
    parser.onPartEnd = function() {
      console.log(part)
      if (part.filename === undefined) {
        fd.append(part.name, part.data.map(uint8toStr).join(''))
      } else {
        var blob = new Blob(part.data, {type: part.type})
        fd.append(part.name, blob, part.filename)
      }
    }
    
    // The actual part that belongs more to Body.formData implementation
    return new Promise(function(rs){
      var ct = self.headers.get('content-type')
      if (ct.match(/multipart/i)) {
        var m = ct.match(/boundary=(?:"([^"]+)"|([^;]+))/i);
        if (m) {
          parser.initWithBoundary(m[1] || m[2]);
          return rs(self.arrayBuffer())
        }
      }

      throw new TypeError('no or bad content-type header, no multipart boundary');
    })
    .then(function(arrayBuffer) {
      parser.write(new Uint8Array(arrayBuffer))
      return fd
    })
    .catch(function(err) {
      console.log(err)
      throw new TypeError('Failed to fetch')
    })
  }


  Response.prototype.formData = Response.prototype.formData || toFormData
  Request.prototype.formData = Request.prototype.formData || toFormData

}());

Testing

var a = new FormData()
a.append('fieldName', 'fieldValue')
a.append('fieldFile', new Blob(['blob'], {type:'text/plain'}), 'blob.txt')
new Response(a).formData().then(e=>console.log([...e]))

I was wondering if you would like to implement this?
much credit to node-formidable for building one of the fastest popular body parser on npm
I just removed the Buffer part and replaced it with Uint8Array so it can work better in the browser

mislav · 2017-10-16T19:41:02Z

Thanks for sharing!

With more and more modern browsers shipping with native support for fetch, we aren't really motivated to significantly expand on our current implementation. Since it was serving most of our users relatively well until now, I think I can speak for the rest of my team that we'd like to keep it around the same size that it already is, and just implement those parts of the spec that are reasonably easy to implement. Looking at the code that you're shared, it looks impressive but it's not something we'd like to maintain over the long run.

However, if we could expose some of its functions so you can plug in this functionality as a separate library, we could consider that. Would that be an option?

jimmywarting · 2017-10-16T20:40:42Z

Ofc, I could make this as a separate module. It's more or less a plug in already

Just need to come up with a way for detecting if it's natively supported or not and patch it accordingly. Just looking for prototype.formData is insufficient with this fetch-polyfill.

  Response.prototype.formData = Response.prototype.formData || toFormData
  Request.prototype.formData = Request.prototype.formData || toFormData

You are already implementing one already that handles url encoded

I would like it if you throw an error or a warning that if they call Body.formData() on a multipart body they would have to also include the xxxx module from npm or something like that.

jimmywarting · 2017-10-16T20:48:22Z

Or how about lazy loading this from some CDN on demand?

md724235 · 2018-01-10T15:07:09Z

Hello, I would like having some more explanation about github.github.

mislav · 2018-05-25T16:58:05Z

Closing this since multipart parsing is not something we want to address or keep maintaining.

@jimmywarting Thanks for your suggestion! If you implement this as a plugin and need a way to hook into our polyfill, we can talk about that if you open a separate PR demonstrating how this could be done. BTW, our library has exports and a UMD build now (if that helps).

jimmywarting mentioned this issue Oct 31, 2017

Implement body.formData node-fetch/node-fetch#199

Closed

mislav closed this as completed May 25, 2018

github-actions bot locked as resolved and limited conversation to collaborators Oct 2, 2020

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

make Body.formData able to parse Multipart. #571

make Body.formData able to parse Multipart. #571

jimmywarting commented Oct 16, 2017 •

edited by mislav

mislav commented Oct 16, 2017 •

edited

jimmywarting commented Oct 16, 2017 •

edited

jimmywarting commented Oct 16, 2017 •

edited

md724235 commented Jan 10, 2018

mislav commented May 25, 2018

make Body.formData able to parse Multipart. #571

make Body.formData able to parse Multipart. #571

Comments

jimmywarting commented Oct 16, 2017 • edited by mislav

mislav commented Oct 16, 2017 • edited

jimmywarting commented Oct 16, 2017 • edited

jimmywarting commented Oct 16, 2017 • edited

md724235 commented Jan 10, 2018

mislav commented May 25, 2018

jimmywarting commented Oct 16, 2017 •

edited by mislav

mislav commented Oct 16, 2017 •

edited

jimmywarting commented Oct 16, 2017 •

edited

jimmywarting commented Oct 16, 2017 •

edited