Skip to content

Conversation

Llorx
Copy link

@Llorx Llorx commented Oct 3, 2025

I noticed that on long strings, escapeLiteral goes really slow. I created my own escapeLiteral and I decided to push it. I saw this issue when searching: #3194

On short strings, the performance loss is very low compared to the benefit on long strings. On short strings the original is only 1.17x faster, but on long strings the new one is 30x (!!) faster, so at the moment that the strings are a bit longer than 10 characters, the performance profit is going to kick in, and the performance loss on short strings is negligible.

Anyway, with this information I decided to add a quick check: If the string is shorter than 13 characters, use the original one, and if it is longer, use the optimized one. It has the performance profit in both situations:
image

EDIT: Had a typo in the benchmark, it is only 30x faster, not 55x.

Here the benchmark code:

const { IsoBench } = require("iso-bench");

function escapeLiteralOriginal(str) {
  let hasBackslash = false
  let escaped = "'"

  if (str == null) {
    return "''"
  }

  if (typeof str !== 'string') {
    return "''"
  }

  for (let i = 0; i < str.length; i++) {
    const c = str[i]
    if (c === "'") {
      escaped += c + c
    } else if (c === '\\') {
      escaped += c + c
      hasBackslash = true
    } else {
      escaped += c
    }
  }

  escaped += "'"

  if (hasBackslash === true) {
    escaped = ' E' + escaped
  }

  return escaped
}
const escapeLiteralNew = function (str) {
  if (typeof str !== 'string') {
    return "''"
  }
  let hasBackslash = false
  if (str.length < 13) {
    let escaped = "'"

    for (let i = 0; i < str.length; i++) {
      const c = str[i]
      if (c === "'") {
        escaped += c + c
      } else if (c === '\\') {
        escaped += c + c
        hasBackslash = true
      } else {
        escaped += c
      }
    }
  
    escaped += "'"
  
    if (hasBackslash === true) {
      escaped = ' E' + escaped
    }
    return escaped
  } else {
    let escaped = str
        .replace(/\\/g, () => {
            hasBackslash = true
            return '\\\\'
        })
        .replace(/'/g, "''")
    
    if (hasBackslash) {
        escaped = ` E'${escaped}'`
    } else {
        escaped = `'${escaped}'`
    }
    return escaped
  }
}
function newArray(size, backslash, quote, count = 100) {
    return new Array(count).fill(0).map((_, i) => {
        const str = "a".repeat(size);
        if (backslash) {
            const pos = i % size;
            str[pos] = "\\";
        }
        if (quote) {
            const pos = (i + 2) % size;
            str[pos] = "'";
        }
        return str;
    });
}
new IsoBench()
    .add("escapeLiteralNew", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralNew(arr[i]);
        }
    }, () => newArray(10, false, false))
    .add("escapeLiteralOriginal", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralOriginal(arr[i]);
        }
    }, () => newArray(10, false, false))
.endGroup("short string, backslash = false, quote = false")

    .add("escapeLiteralNew", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralNew(arr[i]);
        }
    }, () => newArray(10, true, false))
    .add("escapeLiteralOriginal", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralOriginal(arr[i]);
        }
    }, () => newArray(10, true, false))
.endGroup("short string, backslash = true, quote = false")

    .add("escapeLiteralNew", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralNew(arr[i]);
        }
    }, () => newArray(10, false, true))
    .add("escapeLiteralOriginal", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralOriginal(arr[i]);
        }
    }, () => newArray(10, false, true))
.endGroup("short string, backslash = false, quote = true")

    .add("escapeLiteralNew", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralNew(arr[i]);
        }
    }, () => newArray(10, true, true))
    .add("escapeLiteralOriginal", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralOriginal(arr[i]);
        }
    }, () => newArray(10, true, true))
.endGroup("short string, backslash = true, quote = true")

    .add("escapeLiteralNew", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralNew(arr[i]);
        }
    }, () => newArray(1000, false, false))
    .add("escapeLiteralOriginal", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralOriginal(arr[i]);
        }
    }, () => newArray(1000, false, false))
.endGroup("long string, backslash = false, quote = false")

    .add("escapeLiteralNew", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralNew(arr[i]);
        }
    }, () => newArray(1000, true, false))
    .add("escapeLiteralOriginal", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralOriginal(arr[i]);
        }
    }, () => newArray(1000, true, false))
.endGroup("long string, backslash = true, quote = false")

    .add("escapeLiteralNew", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralNew(arr[i]);
        }
    }, () => newArray(1000, false, true))
    .add("escapeLiteralOriginal", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralOriginal(arr[i]);
        }
    }, () => newArray(1000, false, true))
.endGroup("long string, backslash = false, quote = true")

    .add("escapeLiteralNew", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralNew(arr[i]);
        }
    }, () => newArray(1000, true, true))
    .add("escapeLiteralOriginal", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralOriginal(arr[i]);
        }
    }, () => newArray(1000, true, true))
.endGroup("long string, backslash = true, quote = true")
    .consoleLog()
    .run();

@Llorx
Copy link
Author

Llorx commented Oct 3, 2025

I don't understand the error the linter is returning

@Llorx Llorx changed the title Improve escapeIdentifier performance on long strings by 55x Improve escapeLiteral performance on long strings by 55x Oct 3, 2025
@Llorx Llorx changed the title Improve escapeLiteral performance on long strings by 55x Improve escapeLiteral performance on long strings by 30x Oct 3, 2025
@Llorx Llorx changed the title Improve escapeLiteral performance on long strings by 30x Improve escapeLiteral performance of long strings by 30x Oct 3, 2025
Copy link
Collaborator

@charmander charmander left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’m against the complexity of having a second path for short strings. (I don’t even think this function should have a second path for strings without backslashes, but that’d have to be a pg 9 change.) There are probably cleaner optimization options, since IIRC the original implementation here was just ported from C.

Also not sure how much performance matters past a certain point: as mentioned in #3194, people should pretty much always be using parameters instead of escapeLiteral anyway.

The linter is complaining about trailing spaces.

@charmander
Copy link
Collaborator

if (backslash) {
    const pos = i % size;
    str[pos] = "\\";
}
if (quote) {
    const pos = (i + 2) % size;
    str[pos] = "'";
}

str is a string (immutable), so this benchmark doesn’t actually test any quotes or backslashes.

@Llorx
Copy link
Author

Llorx commented Oct 6, 2025

if (backslash) {
    const pos = i % size;
    str[pos] = "\\";
}
if (quote) {
    const pos = (i + 2) % size;
    str[pos] = "'";
}

str is a string (immutable), so this benchmark doesn’t actually test any quotes or backslashes.

Ah my bad. I was working with buffers and failed miserably when doing the benchmark haha. Here is the fixed benchmark:
image

Backslash penalizes way more than quote because of the callback to avoid another iteration to find a backslash. Still a great performance improvement, specially when there's nothing to clean (which is the usual case).

The code:

const { IsoBench } = require("iso-bench");

function escapeLiteralOriginal(str) {
  let hasBackslash = false
  let escaped = "'"

  if (str == null) {
    return "''"
  }

  if (typeof str !== 'string') {
    return "''"
  }

  for (let i = 0; i < str.length; i++) {
    const c = str[i]
    if (c === "'") {
      escaped += c + c
    } else if (c === '\\') {
      escaped += c + c
      hasBackslash = true
    } else {
      escaped += c
    }
  }

  escaped += "'"

  if (hasBackslash === true) {
    escaped = ' E' + escaped
  }

  return escaped
}
const escapeLiteralNew = function (str) {
  if (typeof str !== 'string') {
    return "''"
  }
  let hasBackslash = false
  if (str.length < 13) {
    let escaped = "'"

    for (let i = 0; i < str.length; i++) {
      const c = str[i]
      if (c === "'") {
        escaped += c + c
      } else if (c === '\\') {
        escaped += c + c
        hasBackslash = true
      } else {
        escaped += c
      }
    }
  
    escaped += "'"
  
    if (hasBackslash === true) {
      escaped = ' E' + escaped
    }
    return escaped
  } else {
    let escaped = str
        .replace(/\\/g, () => {
            hasBackslash = true
            return '\\\\'
        })
        .replace(/'/g, "''")
    
    if (hasBackslash) {
        escaped = ` E'${escaped}'`
    } else {
        escaped = `'${escaped}'`
    }
    return escaped
  }
}
function newArray(size, backslash, quote, count = 100) {
    return new Array(count).fill(0).map((_, i) => {
        let str = "a".repeat(size);
        if (backslash) {
            const pos = i % size;
            str = `${str.substring(0, pos)}\\${str.substring(pos)}`;
        }
        if (quote) {
            const pos = (i + 2) % size;
            str = `${str.substring(0, pos)}'${str.substring(pos)}`;
        }
        return str;
    });
}
new IsoBench()
    .add("escapeLiteralNew", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralNew(arr[i]);
        }
    }, () => newArray(10, false, false))
    .add("escapeLiteralOriginal", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralOriginal(arr[i]);
        }
    }, () => newArray(10, false, false))
.endGroup("short string, backslash = false, quote = false")

    .add("escapeLiteralNew", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralNew(arr[i]);
        }
    }, () => newArray(10, true, false))
    .add("escapeLiteralOriginal", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralOriginal(arr[i]);
        }
    }, () => newArray(10, true, false))
.endGroup("short string, backslash = true, quote = false")

    .add("escapeLiteralNew", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralNew(arr[i]);
        }
    }, () => newArray(10, false, true))
    .add("escapeLiteralOriginal", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralOriginal(arr[i]);
        }
    }, () => newArray(10, false, true))
.endGroup("short string, backslash = false, quote = true")

    .add("escapeLiteralNew", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralNew(arr[i]);
        }
    }, () => newArray(10, true, true))
    .add("escapeLiteralOriginal", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralOriginal(arr[i]);
        }
    }, () => newArray(10, true, true))
.endGroup("short string, backslash = true, quote = true")

    .add("escapeLiteralNew", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralNew(arr[i]);
        }
    }, () => newArray(1000, false, false))
    .add("escapeLiteralOriginal", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralOriginal(arr[i]);
        }
    }, () => newArray(1000, false, false))
.endGroup("long string, backslash = false, quote = false")

    .add("escapeLiteralNew", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralNew(arr[i]);
        }
    }, () => newArray(1000, true, false))
    .add("escapeLiteralOriginal", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralOriginal(arr[i]);
        }
    }, () => newArray(1000, true, false))
.endGroup("long string, backslash = true, quote = false")

    .add("escapeLiteralNew", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralNew(arr[i]);
        }
    }, () => newArray(1000, false, true))
    .add("escapeLiteralOriginal", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralOriginal(arr[i]);
        }
    }, () => newArray(1000, false, true))
.endGroup("long string, backslash = false, quote = true")

    .add("escapeLiteralNew", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralNew(arr[i]);
        }
    }, () => newArray(1000, true, true))
    .add("escapeLiteralOriginal", (arr) => {
        for (let i = 0; i < arr.length; i++) {
            escapeLiteralOriginal(arr[i]);
        }
    }, () => newArray(1000, true, true))
.endGroup("long string, backslash = true, quote = true")
    .consoleLog()
    .run();

@Llorx
Copy link
Author

Llorx commented Oct 6, 2025

I’m against the complexity of having a second path for short strings. (I don’t even think this function should have a second path for strings without backslashes, but that’d have to be a pg 9 change.) There are probably cleaner optimization options, since IIRC the original implementation here was just ported from C.

Yeah, then I can go for the first one, which is way less complex.

Also not sure how much performance matters past a certain point: as mentioned in #3194, people should pretty much always be using parameters instead of escapeLiteral anyway.

The problem is that the performance of parameters is very low. I have a 4x penalty with 5k strings being 100k characters long each one, so if we can help the users to improve performance by just writing optimized code, I think that it should be the way to go. They still have the option to go the low performant way in this case with parameters if they want, but if they need performance (which is pretty usual when you are moving hundreds of thousands of characters), they can use the included utils in the library.

If we keep escapeLiteral the old way, the only options that you have is a very very low performant way with the included escapeLiteral function, or a just very low performant way using parameters. Or to write your own escapeLiteral function, which already having a escapeLiteral function in the library feels very redundant...

This function is actually the one I use because I need that performance, and I'm not alone as there's another ticket with that question. And also there are going to be users that just struggle with the performance because there's nothing better but don't open a issue: "this insert query lasts 5 seconds. It is what it is. Next ticket". So I just decided to publish it to help users gain this performance they didn't knew they were losing.

The linter is complaining about trailing spaces.

Ok, problems of the GitHub editor. I've done it online directly. Going to find the trailing spaces.

EDIT: Can't find the trailing spaces in the editor. If you like this, I'll clone and --fix it for you to merge.

@charmander
Copy link
Collaborator

charmander commented Oct 7, 2025

The problem is that the performance of parameters is very low. I have a 4x penalty with 5k strings being 100k characters long each one, so if we can help the users to improve performance by just writing optimized code, I think that it should be the way to go. They still have the option to go the low performant way in this case with parameters if they want, but if they need performance (which is pretty usual when you are moving hundreds of thousands of characters), they can use the included utils in the library.

How are you using parameters? Do you have a benchmark you can share for this too? It’s possible that a parameter-based approach is inherently slower with PostgreSQL, but it’s also possible that pg is just missing several optimizations that we could implement1, with a much cleaner result than serializing data into code – and/or that the way you’re using parameters is suboptimal.

Or to write your own escapeLiteral function, which already having a escapeLiteral function in the library feels very redundant...

I know it feels that way, but consider that the library’s escapeLiteral acts the way it does for compatibility with a previous bug (#3489) and for compatibility with libpq’s PQescapeLiteral, which itself is bogged down by compatibility concerns. If inserting data into your code truly turns out to be the best option and you want maximum performance, consider asserting that standard_conforming_strings is on and using a simple and fast implementation:

const escapeLiteralStandard = str =>
  `'${str.replaceAll("'", "''")}'`;

or

const escapeLiteralStandard = str => {
  if (typeof str !== 'string') {
    throw new TypeError('string expected');
  }

  let escaped = "'";
  let start = 0;

  for (let i; (i = str.indexOf("'", start)) !== -1;) {
    escaped += str.substring(start, i);
    escaped += "''";
    start = i + 1;
  }

  escaped += str.substring(start);
  escaped += "'";

  return escaped;
};

If being one configurable setting away from SQL injection makes you uneasy, I’d still opt for consistently returning C-escape string literals:

const escapeLiteralPg = str => {
  if (typeof str !== 'string') {
    throw new TypeError('string expected');
  }

  let escaped = "E'";
  let start = 0;
  const re = /[\\']/g;

  for (let m; (m = re.exec(str)) !== null;) {
    escaped += str.substring(start, m.index);
    escaped += '\\';
    escaped += m[0];
    start = m.index + 1;
  }

  escaped += str.substring(start);
  escaped += "'";

  return escaped;
};

or

const escapeLiteralPg = str => {
  if (typeof str !== 'string') {
    throw new TypeError('string expected');
  }

  let escaped = "E'";
  let start = 0;

  for (;;) {
    const i = str.indexOf("'", start);
    const sub = i === -1 ? str : str.substring(0, i);

    for (let j; (j = sub.indexOf('\\', start)) !== -1;) {
      escaped += str.substring(start, j);
      escaped += '\\\\';
      start = j + 1;
    }

    if (i === -1) {
      break;
    }

    escaped += str.substring(start, i);
    escaped += "\\'";
    start = i + 1;
  }

  escaped += str.substring(start);
  escaped += "'";

  return escaped;
};

Footnotes

  1. I worked on this recently, and pg is missing several optimizations. But it’d be nice to know if they’d solve your problem here too.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants