Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor string formatting across HTML and language, add "blockwrap" metatag #348

Merged
merged 12 commits into from
Dec 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
293 changes: 215 additions & 78 deletions src/data/things/language.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { Temporal, toTemporalInstant } from '@js-temporal/polyfill';

import * as html from '#html';
import {empty, withAggregate} from '#sugar';
import {isLanguageCode} from '#validators';
import {Tag} from '#html';

import {
getExternalLinkStringOfStyleFromDescriptors,
Expand Down Expand Up @@ -176,7 +177,7 @@ export class Language extends Thing {
const options =
(hasOptions
? args.at(-1)
: null);
: {});

if (!this.strings) {
throw new Error(`Strings unavailable`);
Expand All @@ -186,94 +187,201 @@ export class Language extends Thing {
throw new Error(`Invalid key ${key} accessed`);
}

const template = this.strings[key];

let output;

if (hasOptions) {
// Convert the keys on the options dict from camelCase to CONSTANT_CASE.
// (This isn't an OUTRAGEOUSLY versatile algorithm for doing that, 8ut
// like, who cares, dude?) Also, this is an array, 8ecause it's handy
// for the iterating we're a8out to do. Also strip HTML from arguments
// that are literal strings - real HTML content should always be proper
// HTML objects (see html.js).
const processedOptions =
Object.entries(options).map(([k, v]) => [
k.replace(/[A-Z]/g, '_$&').toUpperCase(),
this.#sanitizeStringArg(v),
]);

// Replacement time! Woot. Reduce comes in handy here!
output =
processedOptions.reduce(
(x, [k, v]) => x.replaceAll(`{${k}}`, v),
template);
} else {
// Without any options provided, just use the template as-is. This will
// still error if the template expected arguments, and otherwise will be
// the right value.
output = template;
// These will be filled up as we iterate over the template, slotting in
// each option (if it's present).
const missingOptionNames = new Set();
const outputParts = [];

// And this will have entries deleted as they're encountered in the
// template. Leftover entries are misplaced.
const optionsMap =
new Map(
Object.entries(options).map(([name, value]) => [
name
.replace(/[A-Z]/g, '_$&')
.toUpperCase(),
value,
]));

const output = this.#iterateOverTemplate({
template: this.strings[key],

match: /{(?<name>[A-Z0-9_]+)}/g,

insert: ({name: optionName}, canceledForming) => {
if (optionsMap.has(optionName)) {
let optionValue;

// We'll only need the option's value if we're going to use it as
// part of the formed output (see below).
if (!canceledForming) {
optionValue = optionsMap.get(optionName);
}

// But we always have to delete expected options off the provided
// option map, since the leftovers are what will be used to tell
// which are misplaced.
optionsMap.delete(optionName);

if (canceledForming) {
return undefined;
} else {
return optionValue;
}
} else {
// We don't need to continue forming the output if we've hit a
// missing option name, since the end result of this formatString
// call will be a thrown error, and formed output won't be needed.
missingOptionNames.add(optionName);
return undefined;
}
},
});

const misplacedOptionNames =
Array.from(optionsMap.keys());

withAggregate({message: `Errors in options for string "${key}"`}, ({push}) => {
if (!empty(missingOptionNames)) {
const names = missingOptionNames.join(`, `);
push(new Error(`Missing options: ${names}`));
}

if (!empty(misplacedOptionNames)) {
const names = misplacedOptionNames.join(`, `);
push(new Error(`Unexpected options: ${names}`));
}
});

return output;
}

#iterateOverTemplate({
template,
match: regexp,
insert: insertFn,
}) {
const outputParts = [];

let canceledForming = false;

let lastIndex = 0;
let partInProgress = '';

for (const match of template.matchAll(regexp)) {
const insertion =
insertFn(match.groups, canceledForming);

if (insertion === undefined) {
canceledForming = true;
}

// Don't proceed with forming logic if the insertion function has
// indicated that's not needed anymore - but continue iterating over
// the rest of the template's matches, so other iteration logic (with
// side effects) gets to process everything.
if (canceledForming) {
continue;
}

partInProgress += template.slice(lastIndex, match.index);

// Sanitize string arguments in particular. These are taken to come from
// (raw) data and may include special characters that aren't meant to be
// rendered as HTML markup.
const sanitizedInsertion =
this.#sanitizeValueForInsertion(insertion);

if (typeof sanitizedInsertion === 'string') {
// Join consecutive strings together.
partInProgress += sanitizedInsertion;
} else if (
sanitizedInsertion instanceof html.Tag &&
sanitizedInsertion.contentOnly
) {
// Collapse string-only tag contents onto the current string part.
partInProgress += sanitizedInsertion.toString();
} else {
// Push the string part in progress, then the insertion as-is.
outputParts.push(partInProgress);
outputParts.push(sanitizedInsertion);
partInProgress = '';
}

lastIndex = match.index + match[0].length;
}

if (canceledForming) {
return undefined;
}

// Tack onto the final partInProgress, which may still have a value by this
// point, if the final inserted value was a string. (Otherwise, it'll just
// be equal to the remaining template text.)
if (lastIndex < template.length) {
partInProgress += template.slice(lastIndex);
}

// Post-processing: if any expected arguments *weren't* replaced, that
// is almost definitely an error.
if (output.match(/\{[A-Z][A-Z0-9_]*\}/)) {
throw new Error(`Args in ${key} were missing - output: ${output}`);
if (partInProgress) {
outputParts.push(partInProgress);
}

// Last caveat: Wrap the output in an HTML tag so that it doesn't get
// treated as unsanitized HTML if *it* gets passed as an argument to
// *another* formatString call.
return this.#wrapSanitized(output);
return this.#wrapSanitized(outputParts);
}

// Escapes HTML special characters so they're displayed as-are instead of
// treated by the browser as a tag. This does *not* have an effect on actual
// html.Tag objects, which are treated as sanitized by default (so that they
// can be nested inside strings at all).
#sanitizeStringArg(arg) {
// Processes a value so that it's suitable to be inserted into a template.
// For strings, this escapes HTML special characters, displaying them as-are
// instead of representing HTML markup. For numbers and booleans, this turns
// them into string values, so they never accidentally get caught as falsy
// by #html stringification. Everything else - most importantly including
// html.Tag objects - gets left as-is, preserving the value exactly as it's
// provided.
#sanitizeValueForInsertion(value) {
const escapeHTML = CacheableObject.getUpdateValue(this, 'escapeHTML');

if (!escapeHTML) {
throw new Error(`escapeHTML unavailable`);
}

if (typeof arg !== 'string') {
return arg.toString();
}
switch (typeof value) {
case 'string':
return escapeHTML(value);

case 'number':
case 'boolean':
return value.toString();

return escapeHTML(arg);
default:
return value;
}
}

// Wraps the output of a formatting function in a no-name-nor-attributes
// HTML tag, which will indicate to other calls to formatString that this
// content is a string *that may contain HTML* and doesn't need to
// sanitized any further. It'll still .toString() to just the string
// contents, if needed.
#wrapSanitized(output) {
return new Tag(null, null, output);
#wrapSanitized(content) {
return html.tags(content, {
[html.joinChildren]: '',
[html.noEdgeWhitespace]: true,
});
}

// Similar to the above internal methods, but this one is public.
// It should be used when embedding content that may not have previously
// been sanitized directly into an HTML tag or template's contents.
// The templating engine usually handles this on its own, as does passing
// a value (sanitized or not) directly as an argument to formatString,
// but if you used a custom validation function ({validate: v => v.isHTML}
// instead of {type: 'string'} / {type: 'html'}) and are embedding the
// contents of a slot directly, it should be manually sanitized with this
// function first.
sanitize(arg) {
const escapeHTML = CacheableObject.getUpdateValue(this, 'escapeHTML');

if (!escapeHTML) {
throw new Error(`escapeHTML unavailable`);
// a value (sanitized or not) directly for inserting into formatting
// functions, but if you used a custom slot validation function (for example,
// {validate: v => v.isHTML} instead of {type: 'string'} / {type: 'html'})
// and are embedding the contents of the slot as a direct child of another
// tag, you should manually sanitize those contents with this function.
sanitize(value) {
if (typeof value === 'string') {
return this.#wrapSanitized(this.#sanitizeValueForInsertion(value));
} else {
return value;
}

return (
(typeof arg === 'string'
? new Tag(null, null, escapeHTML(arg))
: arg));
}

formatDate(date) {
Expand Down Expand Up @@ -466,35 +574,64 @@ export class Language extends Thing {
return this.formatString('count.words.withUnit.' + this.getUnitForm(value), {words});
}

#formatListHelper(array, processFn) {
// Operate on "insertion markers" instead of the actual contents of the
// array, because the process function (likely an Intl operation) is taken
// to only operate on strings. We'll insert the contents of the array back
// at these points afterwards.

const insertionMarkers =
Array.from(
{length: array.length},
(_item, index) => `<::insertion_${index}>`);

// Basically the same insertion logic as in formatString. Like there, we
// can't assume that insertion markers were kept in the same order as they
// were provided, so we'll refer to the marked index. But we don't need to
// worry about some of the indices *not* corresponding to a provided source
// item, like we do in formatString, so that cuts out a lot of the
// validation logic.

return this.#iterateOverTemplate({
template: processFn(insertionMarkers),

match: /<::insertion_(?<index>[0-9]+)>/g,

insert: ({index: markerIndex}) => {
return array[markerIndex];
},
});
}

// Conjunction list: A, B, and C
formatConjunctionList(array) {
this.assertIntlAvailable('intl_listConjunction');
return this.#wrapSanitized(
this.intl_listConjunction.format(
array.map(item => this.#sanitizeStringArg(item))));
return this.#formatListHelper(
array,
array => this.intl_listConjunction.format(array));
}

// Disjunction lists: A, B, or C
formatDisjunctionList(array) {
this.assertIntlAvailable('intl_listDisjunction');
return this.#wrapSanitized(
this.intl_listDisjunction.format(
array.map(item => this.#sanitizeStringArg(item))));
return this.#formatListHelper(
array,
array => this.intl_listDisjunction.format(array));
}

// Unit lists: A, B, C
formatUnitList(array) {
this.assertIntlAvailable('intl_listUnit');
return this.#wrapSanitized(
this.intl_listUnit.format(
array.map(item => this.#sanitizeStringArg(item))));
return this.#formatListHelper(
array,
array => this.intl_listUnit.format(array));
}

// Lists without separator: A B C
formatListWithoutSeparator(array) {
return this.#wrapSanitized(
array.map(item => this.#sanitizeStringArg(item))
.join(' '));
return this.#formatListHelper(
array,
array => array.join(' '));
}

// File sizes: 42.5 kB, 127.2 MB, 4.13 GB, 998.82 TB
Expand Down Expand Up @@ -528,13 +665,13 @@ export class Language extends Thing {
}
}

const countHelper = (stringKey, argName = stringKey) =>
const countHelper = (stringKey, optionName = stringKey) =>
function(value, {unit = false} = {}) {
return this.formatString(
unit
? `count.${stringKey}.withUnit.` + this.getUnitForm(value)
: `count.${stringKey}`,
{[argName]: this.formatNumber(value)});
{[optionName]: this.formatNumber(value)});
};

// TODO: These are hard-coded. Is there a better way?
Expand Down
4 changes: 4 additions & 0 deletions src/static/site6.css
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,10 @@ a:not([href]):hover {
white-space: nowrap;
}

.blockwrap {
display: inline-block;
}

.contribution.has-tooltip,
.datetimestamp.has-tooltip {
position: relative;
Expand Down