Skip to content

Commit

Permalink
feat(@formatjs/icu-messageformat-parser): preprocess 'j' date time sk…
Browse files Browse the repository at this point in the history
…eleton before parsing as suggested in the Unicode guide (#3544)

feat(intl-messageformat): add support for 'j' time skeleton format

Co-authored-by: Batjaa Batbold <batbob@amazon.comm>
  • Loading branch information
batjaa and Batjaa Batbold committed Apr 27, 2022
1 parent b8d17d9 commit 7173e29
Show file tree
Hide file tree
Showing 11 changed files with 1,998 additions and 7 deletions.
22 changes: 22 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,25 @@ npm run release:next
```
bazel run //packages/intl-datetimeformat:tz_data.update
```

### Generating CLDR data

1. Check out `./BUILD` file for generatable data — which are identifiable via `generate_src_file()` call
```BUILD
generate_src_file(
name = "regex",
...
)
```
2. Create an empty file with the given `src` attribute — path is relative to module root
```shell
touch packages/icu-messageformat-parser/regex.generated.ts
```
3. Run update script
```shell
bazel run //packages/icu-messageformat-parser:regex.update
```
4. Verify
```shell
bazel run //packages/icu-messageformat-parser:regex
```
9 changes: 9 additions & 0 deletions packages/icu-messageformat-parser/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,15 @@ generate_src_file(
entry_point = "scripts/regex-gen.ts",
)

generate_src_file(
name = "time-data",
src = "time-data.generated.ts",
data = [
"@npm//cldr-core",
],
entry_point = "scripts/time-data-gen.ts",
)

package_json_test(
name = "package_json_test",
deps = SRC_DEPS,
Expand Down
96 changes: 96 additions & 0 deletions packages/icu-messageformat-parser/date-time-pattern-generator.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import {timeData} from './time-data.generated'

/**
* Returns the best matching date time pattern if a date time skeleton
* pattern is provided with a locale. Follows the Unicode specification:
* https://www.unicode.org/reports/tr35/tr35-dates.html#table-mapping-requested-time-skeletons-to-patterns
* @param skeleton date time skeleton pattern that possibly includes j, J or C
* @param locale
*/
export function getBestPattern(skeleton: string, locale: Intl.Locale) {
let skeletonCopy = ''
for (let patternPos = 0; patternPos < skeleton.length; patternPos++) {
const patternChar = skeleton.charAt(patternPos)

if (patternChar === 'j') {
let extraLength = 0
while (
patternPos + 1 < skeleton.length &&
skeleton.charAt(patternPos + 1) === patternChar
) {
extraLength++
patternPos++
}

let hourLen = 1 + (extraLength & 1)
let dayPeriodLen = extraLength < 2 ? 1 : 3 + (extraLength >> 1)
let dayPeriodChar = 'a'
let hourChar = getDefaultHourSymbolFromLocale(locale)

if (hourChar == 'H' || hourChar == 'k') {
dayPeriodLen = 0
}

while (dayPeriodLen-- > 0) {
skeletonCopy += dayPeriodChar
}
while (hourLen-- > 0) {
skeletonCopy = hourChar + skeletonCopy
}
} else if (patternChar === 'J') {
skeletonCopy += 'H'
} else {
skeletonCopy += patternChar
}
}

return skeletonCopy
}

/**
* Maps the [hour cycle type](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale/hourCycle)
* of the given `locale` to the corresponding time pattern.
* @param locale
*/
function getDefaultHourSymbolFromLocale(locale: Intl.Locale): string {
let hourCycle = locale.hourCycle

if (
hourCycle === undefined &&
// @ts-ignore hourCycle(s) is not identified yet
locale.hourCycles &&
// @ts-ignore
locale.hourCycles.length
) {
// @ts-ignore
hourCycle = locale.hourCycles[0]
}

if (hourCycle) {
switch (hourCycle) {
case 'h24':
return 'k'
case 'h23':
return 'H'
case 'h12':
return 'h'
case 'h11':
return 'K'
default:
throw new Error('Invalid hourCycle')
}
}

// TODO: Once hourCycle is fully supported remove the following with data generation
const languageTag = locale.language
let regionTag: string | undefined
if (languageTag !== 'root') {
regionTag = locale.maximize().region
}
const hourCycles =
timeData[regionTag || ''] ||
timeData[languageTag || ''] ||
timeData[`${languageTag}-001`] ||
timeData['001']
return hourCycles[0]
}
21 changes: 18 additions & 3 deletions packages/icu-messageformat-parser/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import {
parseNumberSkeletonFromString,
parseDateTimeSkeleton,
} from '@formatjs/icu-skeleton-parser'
import {getBestPattern} from './date-time-pattern-generator'

const SPACE_SEPARATOR_START_REGEX = new RegExp(
`^${SPACE_SEPARATOR_REGEX.source}*`
Expand Down Expand Up @@ -56,6 +57,8 @@ export interface ParserOptions {
* Default is false
*/
captureLocation?: boolean

locale?: Intl.Locale
}

export type Result<T, E> = {val: T; err: null} | {val: null; err: E}
Expand Down Expand Up @@ -236,6 +239,7 @@ if (REGEX_SUPPORTS_U_AND_Y) {
export class Parser {
private message: string
private position: Position
private locale?: Intl.Locale

private ignoreTag: boolean
private requiresOtherClause: boolean
Expand All @@ -245,6 +249,7 @@ export class Parser {
this.message = message
this.position = {offset: 0, line: 1, column: 1}
this.ignoreTag = !!options.ignoreTag
this.locale = options.locale
this.requiresOtherClause = !!options.requiresOtherClause
this.shouldParseSkeletons = !!options.shouldParseSkeletons
}
Expand Down Expand Up @@ -739,7 +744,7 @@ export class Parser {
// Extract style or skeleton
if (styleAndLocation && startsWith(styleAndLocation?.style, '::', 0)) {
// Skeleton starts with `::`.
const skeleton = trimStart(styleAndLocation.style.slice(2))
let skeleton = trimStart(styleAndLocation.style.slice(2))

if (argType === 'number') {
const result = this.parseNumberSkeletonFromString(
Expand All @@ -757,12 +762,22 @@ export class Parser {
if (skeleton.length === 0) {
return this.error(ErrorKind.EXPECT_DATE_TIME_SKELETON, location)
}

let dateTimePattern = skeleton

// Get "best match" pattern only if locale is passed, if not, let it
// pass as-is where `parseDateTimeSkeleton()` will throw an error
// for unsupported patterns.
if (this.locale) {
dateTimePattern = getBestPattern(skeleton, this.locale)
}

const style: DateTimeSkeleton = {
type: SKELETON_TYPE.dateTime,
pattern: skeleton,
pattern: dateTimePattern,
location: styleAndLocation.styleLocation,
parsedOptions: this.shouldParseSkeletons
? parseDateTimeSkeleton(skeleton)
? parseDateTimeSkeleton(dateTimePattern)
: {},
}

Expand Down
30 changes: 30 additions & 0 deletions packages/icu-messageformat-parser/scripts/time-data-gen.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import * as rawTimeData from 'cldr-core/supplemental/timeData.json'
import {outputFileSync} from 'fs-extra'
import minimist from 'minimist'

function main(args: minimist.ParsedArgs) {
const {timeData} = rawTimeData.supplemental
const data = Object.keys(timeData).reduce(
(all: Record<string, string[]>, k) => {
all[k.replace('_', '-')] =
timeData[k as keyof typeof timeData]._allowed.split(' ')
return all
},
{}
)
outputFileSync(
args.out,
`// @generated from time-data-gen.ts
// prettier-ignore
export const timeData: Record<string, string[]> = ${JSON.stringify(
data,
undefined,
2
)};
`
)
}

if (require.main === module) {
main(minimist(process.argv))
}

0 comments on commit 7173e29

Please sign in to comment.