Skip to content

Commit

Permalink
Merged PR 307: v1.29.0- sub/superscript support, pronunciation guide …
Browse files Browse the repository at this point in the history
…fixes, new exports (#286)

- Add support for subscripts and superscripts, now that YAPP emits them
- Fix an issue where punctuation after a pronunciation guide made the guide count as a buzzable word
- Normalize quotes in pronunciation guides so fancy quotes and normal quotes are treated the same
- Export functions and types around formatted text so people can see how MODAQ breaks down the question text and get accurate word indexes for buzzes
- Bump version to 1.29.0
  • Loading branch information
alopezlago committed Mar 20, 2024
1 parent 458a25d commit aaa2f40
Show file tree
Hide file tree
Showing 9 changed files with 528 additions and 54 deletions.
20 changes: 20 additions & 0 deletions index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@ import { IGameFormat as gameFormat } from "./src/state/IGameFormat";
import { IBonus as bonus, IPacket as packet, ITossup as tossup } from "./src/state/IPacket";
import { IPlayer as player } from "./src/state/TeamState";
import { ModaqControl as control, IModaqControlProps as controlProps } from "./src/components/ModaqControl";
import {
IFormattingOptions as iFormattingOptions,
parseFormattedText as ftpParseFormattedText,
splitFormattedTextIntoWords as ftpSplitFormattedTextIntoWords,
defaultPronunciationGuideMarkers as ftpDefaultPronunciationGuideMarkers,
defaultReaderDirectives as ftpDefaultReaderDirectives,
} from "src/parser/FormattedTextParser";
import { IFormattedText as iFormattedText } from "src/parser/IFormattedText";

export const ModaqControl = control;

Expand All @@ -18,4 +26,16 @@ export type IPlayer = player;

export type IGameFormat = gameFormat;

export type IFormattingOptions = iFormattingOptions;

export type IFormattedText = iFormattedText;

export const GameFormats = gameFormats;

export const defaultPronunciationGuideMarkers = ftpDefaultPronunciationGuideMarkers;

export const defaultReaderDirectives = ftpDefaultReaderDirectives;

export const parseFormattedText = ftpParseFormattedText;

export const splitFormattedTextIntoWords = ftpSplitFormattedTextIntoWords;
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "modaq",
"version": "1.28.0",
"version": "1.29.0",
"description": "Quiz Bowl Reader using TypeScript, React, and MobX",
"repository": {
"type": "git",
Expand Down
7 changes: 3 additions & 4 deletions src/components/Answer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,9 @@ import { AppState } from "../state/AppState";

export const Answer = observer(function Answer(props: IAnswerProps): JSX.Element {
const appState: AppState = React.useContext(StateContext);
const formattedText: IFormattedText[] = FormattedTextParser.parseFormattedText(
props.text.trimLeft(),
appState.game.gameFormat.pronunciationGuideMarkers
);
const formattedText: IFormattedText[] = FormattedTextParser.parseFormattedText(props.text.trimLeft(), {
pronunciationGuideMarkers: appState.game.gameFormat.pronunciationGuideMarkers,
});

return (
<div>
Expand Down
4 changes: 3 additions & 1 deletion src/components/dialogs/ScoresheetDialog.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,9 @@ function getUnformattedAnswer(game: GameState, answer: string): string {
answer = answer.substring(0, alternateIndex).trim();
}

const text = FormattedTextParser.parseFormattedText(answer, game.gameFormat.pronunciationGuideMarkers)
const text = FormattedTextParser.parseFormattedText(answer, {
pronunciationGuideMarkers: game.gameFormat.pronunciationGuideMarkers,
})
.map((line) => line.text)
.join("");

Expand Down
149 changes: 121 additions & 28 deletions src/parser/FormattedTextParser.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,74 @@
import { IFormattedText } from "./IFormattedText";

export function parseFormattedText(text: string, pronunciationGuideMarkers?: [string, string]): IFormattedText[] {
/**
* Default pronunciation guide markers used if none are passed into `IFormattingOptions`
*/
export const defaultPronunciationGuideMarkers: [string, string] = ["(", ")"];

/**
* Default reader directives used if none are passed into `IFormattingOptions`
*/
export const defaultReaderDirectives: string[] = ["(emphasize)", "(pause)", "(read slowly)"];

This comment has been minimized.

Copy link
@hftf

hftf Apr 9, 2024

Collaborator

Can the versions with square brackets be included in the list as defaults as well?


/**
* Options for how to parse and format text
*/
export interface IFormattingOptions {
/**
* Two-element array where the first string is the tag for the start of a pronunciation guide and the second string
* is the tag for the end. For example, if the pronuncation guide looks like "(guide)", the array would be
* [ "(", ")" ]. Pronunciation guides don't count as words and are formatted differently from the rest of the
* question text.
* If no value is provided, then `defaultPronunciationGuideMarkers` will be used.
*/
pronunciationGuideMarkers?: [string, string];

/**
* Directives for the reader, like "(read slowly)". These don't count as words and are formatted differently from
* the rest of the question text.
* If no value is provided, then `defaultReaderDirectives` will be used.
*/
readerDirectives?: string[];
}

/**
* Takes text with formatting tags and turns it into an array of texts with formatting information included, such as
* which words are bolded.
* Note that if the '"' character is used in a pronunciation guide, it will also support '“' and '”', and vice versa.
* @param text The text to format, such a question or answerline.
* @param options Formtating options, such as what indicates the start of a pronunciation guide.
* @returns An array of `IFormattedText` that represents the text with formatting metadata, such as which words are
* bolded, underlined, etc.
*/
export function parseFormattedText(text: string, options?: IFormattingOptions): IFormattedText[] {
const result: IFormattedText[] = [];

if (text == undefined) {
return result;
}

options = options ?? {};
const pronunciationGuideMarkers: [[string, string]] = [
options.pronunciationGuideMarkers ?? defaultPronunciationGuideMarkers,
];

// Normalize quotes in pronunciation guides
if (pronunciationGuideMarkers[0][0].includes('"') || pronunciationGuideMarkers[0][1].includes('"')) {
pronunciationGuideMarkers.push([
pronunciationGuideMarkers[0][0].replace(/"/g, "“"),
pronunciationGuideMarkers[0][1].replace(/"/g, "”"),
]);
}

if (pronunciationGuideMarkers[0][0].includes("“") || pronunciationGuideMarkers[0][1].includes("”")) {
pronunciationGuideMarkers.push([
pronunciationGuideMarkers[0][0].replace(//g, '"'),
pronunciationGuideMarkers[0][1].replace(//g, '"'),
]);
}

const readerDirectives: string[] | undefined = options.readerDirectives ?? defaultReaderDirectives;

let bolded = false;
let emphasized = false;
let underlined = false;
Expand All @@ -15,26 +77,34 @@ export function parseFormattedText(text: string, pronunciationGuideMarkers?: [st
let pronunciation = false;
let startIndex = 0;

let extraTags = "";
for (const pronunciationGuideMarker of pronunciationGuideMarkers) {
extraTags += `|${escapeRegExp(pronunciationGuideMarker[0])}|${escapeRegExp(pronunciationGuideMarker[1])}`;
}

if (readerDirectives) {
extraTags += `|${readerDirectives.map((directive) => escapeRegExp(directive)).join("|")}`;
}

// If we need to support older browswers, use RegExp, exec, and a while loop. See
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/matchAll
const matchIterator: IterableIterator<RegExpMatchArray> =
pronunciationGuideMarkers == undefined
? text.matchAll(/<\/?em>|<\/?req>|<\/?b>|<\/?u>|<\/?sub>|<\/?sup>/gi)
: text.matchAll(
new RegExp(
`<\\/?em>|<\\/?req>|<\\/?b>|<\\/?u>|<\\/?sub>|<\\/?sup>|${escapeRegExp(
pronunciationGuideMarkers[0]
)}|${escapeRegExp(pronunciationGuideMarkers[1])}`,
"gi"
)
);
const matchIterator: IterableIterator<RegExpMatchArray> = text.matchAll(
new RegExp(`<\\/?em>|<\\/?req>|<\\/?b>|<\\/?u>|<\\/?sub>|<\\/?sup>${extraTags}`, "gi")
);

for (const match of matchIterator) {
// For the end of the pronunciation guide, we want to include it in the string, so add it to the current slice
const tagInTextLength: number =
pronunciationGuideMarkers != undefined && match[0].toLowerCase() === pronunciationGuideMarkers[1]
? pronunciationGuideMarkers[1].length
: 0;
// TODO: Do we need to do this with reader directives?
const tag: string = match[0];
const normalizedTag: string = tag.toLowerCase();
let tagInTextLength = 0;
for (const pronunciationGuideMarker of pronunciationGuideMarkers) {
if (normalizedTag === pronunciationGuideMarker[1].toLowerCase()) {
tagInTextLength = pronunciationGuideMarker[1].length;
break;
}
}

const matchIndex: number = match.index ?? 0;

const slice: string = text.substring(startIndex, matchIndex + tagInTextLength);
Expand All @@ -52,9 +122,8 @@ export function parseFormattedText(text: string, pronunciationGuideMarkers?: [st
}

// Once we got the slice of text, toggle the attribute for the next slice
const tag: string = match[0];
let skipTag = true;
switch (tag.toLowerCase()) {
switch (normalizedTag) {
case "<em>":
emphasized = true;
break;
Expand Down Expand Up @@ -94,16 +163,37 @@ export function parseFormattedText(text: string, pronunciationGuideMarkers?: [st
superscripted = false;
break;
default:
if (pronunciationGuideMarkers) {
if (tag === pronunciationGuideMarkers[0].toLowerCase()) {
let pronunciationGuideMatched = false;
for (const pronunciationGuideMarker of pronunciationGuideMarkers) {
if (normalizedTag === pronunciationGuideMarker[0].toLowerCase()) {
skipTag = false;
pronunciation = true;
break;
} else if (tag === pronunciationGuideMarkers[1].toLowerCase()) {
pronunciationGuideMatched = true;
} else if (normalizedTag === pronunciationGuideMarker[1].toLowerCase()) {
pronunciation = false;
break;
pronunciationGuideMatched = true;
}
}

if (pronunciationGuideMatched) {
break;
}

if (readerDirectives.some((directive) => directive.trim().toLowerCase() === normalizedTag)) {
// Treat it like a pronunciation guide for this one specific word
const readerDirectiveText: IFormattedText = {
text: tag,
bolded,
emphasized,
underlined,
subscripted,
superscripted,
pronunciation: true,
};
result.push(readerDirectiveText);
break;
}

throw `Unknown match: ${tag}`;
}

Expand Down Expand Up @@ -133,17 +223,20 @@ export function parseFormattedText(text: string, pronunciationGuideMarkers?: [st

// TODO: Look into removing the dependency with parseFormattedText, so that we only do one pass over the string instead
// of two passes.
export function splitFormattedTextIntoWords(
text: string,
pronunciationGuideMarkers?: [string, string]
): IFormattedText[][] {
/**
* Takes text with formatting tags and splits it into an array of words with formatting information for each word.
* @param text The text to format, such a question or answerline.
* @param options Formtating options, such as what indicates the start of a pronunciation guide.
* @returns An array of words represented as an `IFormattedText[]` representing all the formatting in that word.
*/
export function splitFormattedTextIntoWords(text: string, options?: IFormattingOptions): IFormattedText[][] {
// We need to take the list of formatted text and split them up into individual words.
// Algorithm: For each piece of formatted text, go through and split the text by the spaces in it.
// If there are no spaces, then add it to a variable tracking the last word.
// If there are spaces, add the last word to the list, and then add each non-empty segment (i.e. non-space) to the
// list, except for the last one. If the last segment isn't empty, set that as the "last word", and continue going
// through the list of formatted texts.
const formattedText: IFormattedText[] = parseFormattedText(text, pronunciationGuideMarkers);
const formattedText: IFormattedText[] = parseFormattedText(text, options);

const splitFormattedText: IFormattedText[][] = [];

Expand Down
15 changes: 15 additions & 0 deletions src/parser/IFormattedText.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,23 @@
export interface IFormattedText {
/**
* The text of this fragment
*/
text: string;
bolded: boolean;

/**
* If text is emphasized, which is italicized.
*/
emphasized: boolean;

/**
* `true` if this text should be formatted like a pronunciation guide or reader directive.
*/
pronunciation?: boolean;

/**
* Obsolete. Use bolded and underlined instead.
*/
required?: boolean;
underlined?: boolean;
subscripted?: boolean;
Expand Down
16 changes: 10 additions & 6 deletions src/state/PacketState.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,9 @@ export class Tossup implements IQuestion {
let powerMarkerIndex = 0;
for (let i = 0; i < format.powers.length; i++) {
const powerMarker: string = format.powers[i].marker.trim();
const currentPowerMarkerIndex = words.indexOf(powerMarker, powerMarkerIndex);
const currentPowerMarkerIndex = words.findIndex(
(value, index) => index >= powerMarkerIndex && value.startsWith(powerMarker)
);
if (currentPowerMarkerIndex === -1) {
continue;
}
Expand Down Expand Up @@ -129,7 +131,7 @@ export class Tossup implements IQuestion {
let canBuzzOn = true;
let index: number = wordIndex;
const trimmedText: string = fullText.trim();
const powerMarkerIndex: number = format.powers.findIndex((power) => power.marker === trimmedText);
const powerMarkerIndex: number = format.powers.findIndex((power) => trimmedText.startsWith(power.marker));
if (isLastWord) {
// Last word should always be the terminal character, which can't be a power or in a pronunciation guide
wordIndex++;
Expand Down Expand Up @@ -173,9 +175,9 @@ export class Tossup implements IQuestion {

private formattedQuestionText(format: IGameFormat): IFormattedText[][] {
// Include the ■ to give an end of question marker
return FormattedTextParser.splitFormattedTextIntoWords(this.question, format.pronunciationGuideMarkers).concat([
[{ text: "■END■", bolded: true, emphasized: false, required: false, pronunciation: false }],
]);
return FormattedTextParser.splitFormattedTextIntoWords(this.question, {
pronunciationGuideMarkers: format.pronunciationGuideMarkers,
}).concat([[{ text: "■END■", bolded: true, emphasized: false, required: false, pronunciation: false }]]);
}
}

Expand All @@ -197,7 +199,9 @@ export class Bonus {
}

export function getBonusWords(text: string, format: IGameFormat): IFormattedText[] {
return FormattedTextParser.parseFormattedText(text, format.pronunciationGuideMarkers);
return FormattedTextParser.parseFormattedText(text, {
pronunciationGuideMarkers: format.pronunciationGuideMarkers,
});
}

export type ITossupWord = IBuzzableTossupWord | INonbuzzableTossupWord;
Expand Down
Loading

0 comments on commit aaa2f40

Please sign in to comment.