diff --git a/.github/workflows/link-check-internal.yml b/.github/workflows/link-check-internal.yml index 3824e5c7e2f5..0d675fda80ba 100644 --- a/.github/workflows/link-check-internal.yml +++ b/.github/workflows/link-check-internal.yml @@ -15,7 +15,7 @@ on: required: true default: 'en' create_copilot_issue: - description: 'Create a Copilot-assigned issue with the top 5 redirects to fix' + description: 'Create a Copilot-assigned issue with the top 10 redirects to fix' type: boolean required: false default: false @@ -127,7 +127,7 @@ jobs: const report = JSON.parse(fs.readFileSync(reportFile, 'utf8')) const allRedirectGroups = report.groups.filter(g => g.isWarning) - const redirectGroups = allRedirectGroups.slice(0, 5) + const redirectGroups = allRedirectGroups.slice(0, 10) if (redirectGroups.length === 0) { core.info('No redirect groups found, skipping Copilot issue.') @@ -142,24 +142,30 @@ jobs: return `| \`${g.target}\` | \`${redirectTarget}\` | \`${file}\` | ${lines} |` }).join('\n') - const actionRunUrl = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}' - const hasMore = allRedirectGroups.length > redirectGroups.length - const countLine = hasMore - ? `These are the first ${redirectGroups.length} of ${allRedirectGroups.length} redirects found. The full list is available in the [workflow run artifacts](${actionRunUrl}).` - : `These are the first ${redirectGroups.length} of ${allRedirectGroups.length} redirects found.` + const artifactsUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}/artifacts` - const body = [ + const bodyLines = [ 'Copilot please fix the redirected internal links listed in the table below. All changes should be made within the `github/docs-internal` repository. For each entry, open the source file and replace the **Current Link** with the **Update To** path.', 'When all changes are made, open a pull request in `github/docs-internal` with the fixes. The pull request description should reference this issue to create a link between them. When the pull request is open, leave a comment on this issue with a link to it.', '', - countLine, + `These are the first ${redirectGroups.length} of ${allRedirectGroups.length} redirects found.`, '', '## Redirects to fix', '', '| Current Link | Update To | File | Line(s) |', '|---|---|---|---|', tableRows, - ].join('\n') + ] + + const MAX_ISSUE_BODY_LENGTH = 65536 + const artifactNote = `\n\n> [!NOTE]\n> The report was truncated because it exceeded the issue body length limit. [View the complete redirect report in the workflow artifacts](${artifactsUrl}).` + + let body = bodyLines.join('\n') + if (body.length > MAX_ISSUE_BODY_LENGTH) { + const truncatedLength = MAX_ISSUE_BODY_LENGTH - artifactNote.length + const lastNewline = body.lastIndexOf('\n', truncatedLength) + body = body.slice(0, lastNewline > 0 ? lastNewline : truncatedLength) + artifactNote + } // Use the REST API with agent_assignment to properly trigger Copilot cloud agent. // See: https://docs.github.com/en/copilot/how-tos/use-copilot-agents/cloud-agent/start-copilot-sessions#using-the-rest-api @@ -173,7 +179,7 @@ jobs: agent_assignment: { target_repo: 'github/docs-internal', base_branch: 'main', - custom_instructions: 'For each entry in the table, open the source file in the github/docs-internal repository and replace the Current Link with the Update To path. When all changes are made, open a pull request in github/docs-internal with the fixes. The pull request description should reference this issue to create a link between them. When the pull request is open, leave a comment on this issue with a link to it.', + custom_instructions: 'For each entry in the table, open the source file in the github/docs-internal repository and replace the Current Link with the Update To path. When all changes are made, open a pull request in github/docs-internal with the fixes. When the pull request is open, leave a comment on this issue with a link to it.', }, }) diff --git a/.github/workflows/sync-llms-txt.yml b/.github/workflows/sync-llms-txt.yml index 1e0e251b779c..b5cf8f2390e7 100644 --- a/.github/workflows/sync-llms-txt.yml +++ b/.github/workflows/sync-llms-txt.yml @@ -80,7 +80,8 @@ jobs: REPO="github/docs-internal" if gh api "repos/$REPO/git/ref/heads/$BRANCH" --jq '.object.sha' > /dev/null 2>&1; then echo "Branch $BRANCH exists, fetching" - git fetch origin "$BRANCH" + git -c url."https://x-access-token:${GH_TOKEN}@github.com/".insteadOf="https://github.com/" \ + fetch origin "$BRANCH" git checkout "$BRANCH" else echo "Branch $BRANCH does not exist, creating from main" diff --git a/Dockerfile b/Dockerfile index 567e20f66006..0e686615ed87 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,7 +10,7 @@ # --------------------------------------------------------------- # To update the sha: # https://github.com/github/gh-base-image/pkgs/container/gh-base-image%2Fgh-base-noble -FROM ghcr.io/github/gh-base-image/gh-base-noble:20260501-222137-g8063ecb13@sha256:9c650d5e53cbf6c95951373d197ede7955329ef695f5fc6fb8a1c56a8e3b817f AS base +FROM ghcr.io/github/gh-base-image/gh-base-noble:20260505-222701-gb8f4d82d0@sha256:e5ee5190511450a452713144fb1dcd957535ec7c68705efa48b3d2cbb9871b0d AS base # Install curl for Node install and determining the early access branch # Install git for cloning docs-early-access & translations repos diff --git a/content/admin/managing-accounts-and-repositories/managing-users-in-your-enterprise/viewing-people-in-your-enterprise.md b/content/admin/managing-accounts-and-repositories/managing-users-in-your-enterprise/viewing-people-in-your-enterprise.md index a10d183ceb65..52ff2fca2e7b 100644 --- a/content/admin/managing-accounts-and-repositories/managing-users-in-your-enterprise/viewing-people-in-your-enterprise.md +++ b/content/admin/managing-accounts-and-repositories/managing-users-in-your-enterprise/viewing-people-in-your-enterprise.md @@ -109,6 +109,9 @@ You may be able to view the email addresses for members of your enterprise on ei * If you verify a domain for your enterprise, you can view members' email addresses for the verified domain. For more information, see [AUTOTITLE](/admin/configuration/configuring-your-enterprise/verifying-or-approving-a-domain-for-your-enterprise). + > [!NOTE] + > Email addresses for verified domains are not returned in a guaranteed order. If a member has email addresses for multiple verified domains, old or stale email addresses may remain after an IdP change. The list of verified domain email addresses cannot reliably identify the member's canonical or current corporate email address. + * If you don't use {% data variables.product.prodname_emus %}, and you also don't configure SAML single sign-on (SSO), members access your enterprise's resources on {% data variables.product.github %} solely using a personal account. {% data reusables.saml.personal-accounts-determine-email-visibility %} If you use {% data variables.product.prodname_emus %}, verify a domain, or configure SAML SSO for your enterprise, you may be able to view the email addresses in one or more of the following ways. @@ -119,6 +122,8 @@ If you use {% data variables.product.prodname_emus %}, verify a domain, or confi * `GitHub com saml name`: The `NameID` from the user's linked SAML identity, which is typically the user's email address (for more information, see [AUTOTITLE](/admin/identity-and-access-management/using-saml-for-enterprise-iam/saml-configuration-reference)) * `GitHub com verified domain emails`: Email addresses for any verified domains (for more information, see [AUTOTITLE](/admin/configuration/configuring-your-enterprise/verifying-or-approving-a-domain-for-your-enterprise)) + The `GitHub com verified domain emails` value is unordered. Emails may be returned in a non-deterministic order, and you cannot request priority, sorting, or filtering for the user's current email address. + For more information, see [AUTOTITLE](/admin/user-management/managing-users-in-your-enterprise/exporting-membership-information-for-your-enterprise). {% data reusables.saml.use-api-to-get-externalidentity %} diff --git a/content/admin/managing-iam/reconfiguring-iam-for-enterprise-managed-users/migrating-your-enterprise-to-a-new-identity-provider-or-tenant.md b/content/admin/managing-iam/reconfiguring-iam-for-enterprise-managed-users/migrating-your-enterprise-to-a-new-identity-provider-or-tenant.md index b93005d66136..ace51bcdd1c1 100644 --- a/content/admin/managing-iam/reconfiguring-iam-for-enterprise-managed-users/migrating-your-enterprise-to-a-new-identity-provider-or-tenant.md +++ b/content/admin/managing-iam/reconfiguring-iam-for-enterprise-managed-users/migrating-your-enterprise-to-a-new-identity-provider-or-tenant.md @@ -74,7 +74,11 @@ If you don't already have single sign-on recovery codes for your enterprise, dow 1. Use a recovery code to sign into {% data variables.product.prodname_dotcom %} as the setup user, whose username is your enterprise's shortcode suffixed with `_admin`. For more information about the setup user, see [AUTOTITLE](/admin/identity-and-access-management/understanding-iam-for-enterprises/getting-started-with-enterprise-managed-users). 1. Disable authentication for your enterprise. For more information, see [AUTOTITLE](/admin/identity-and-access-management/configuring-authentication-for-enterprise-managed-users/disabling-authentication-for-enterprise-managed-users#disabling-authentication). -1. Wait up to an hour for {% data variables.product.github %} to suspend your enterprise's members, delete the linked SCIM identities, and delete the SCIM-provisioned IdP groups. +1. Wait for {% data variables.product.github %} to suspend your enterprise's members, delete the linked SCIM identities, and delete the SCIM-provisioned IdP groups. + +> [!NOTE] +> * After you disable authentication, GitHub runs multiple background tasks which must complete before you continue with the remaining steps in this article. For large enterprises, this can take several hours or even days. +> * To confirm completion, go to the authentication settings page for your enterprise (Enterprise settings → Authentication security). While the tasks are still running, the "Enable OIDC configuration" or "Add SAML configuration" button will be disabled, and you’ll see a warning like “Previous SAML provider is being removed.” ### 5. Validate suspension of your enterprise's members diff --git a/data/reusables/saml/use-api-to-get-externalidentity.md b/data/reusables/saml/use-api-to-get-externalidentity.md index 36feac3f2781..960aff6a63f7 100644 --- a/data/reusables/saml/use-api-to-get-externalidentity.md +++ b/data/reusables/saml/use-api-to-get-externalidentity.md @@ -1 +1,4 @@ 1. Use the GraphQL API to retrieve the `ExternalIdentity` for each member. For more information, see [AUTOTITLE](/graphql/overview/about-the-graphql-api) and [AUTOTITLE](/graphql/reference/objects#externalidentity) in the GraphQL API documentation. + + > [!NOTE] + > Access to external identities depends on whether SAML is configured at the organization or enterprise level. Organization-level external identities are available to organization owners, organization owner {% data variables.product.pat_generic_plural %} with the `read:org` or `admin:org` scope, and {% data variables.product.prodname_github_app %} installation tokens with read or write access to members when the app is installed on the organization. Enterprise-level external identities require an enterprise owner {% data variables.product.pat_generic %} with the `read:enterprise` or `admin:enterprise` scope. {% data variables.product.prodname_github_apps %} cannot access enterprise-level external identities, including enterprise-level SAML identities for {% data variables.product.prodname_ghe_cloud %} with personal user accounts. diff --git a/src/audit-logs/lib/index.ts b/src/audit-logs/lib/index.ts index 2a75aedf3850..080476bf4a62 100644 --- a/src/audit-logs/lib/index.ts +++ b/src/audit-logs/lib/index.ts @@ -2,6 +2,7 @@ import path from 'path' import { readCompressedJsonFileFallback } from '@/frame/lib/read-json-file' import { getOpenApiVersion } from '@/versions/lib/all-versions' +import { supported as supportedGhesReleases } from '@/versions/lib/enterprise-server-releases' import findPage from '@/frame/lib/find-page' import type { Context, Page } from '@/types' import type { @@ -300,6 +301,7 @@ export async function filterAndUpdateGhesDataByAllowlistValues({ auditLogPage, titleContext, globalFields = [], + supportedGhesVersions = supportedGhesReleases, }: { eventsToCheck: RawAuditLogEventT[] allowListValue: string @@ -308,9 +310,17 @@ export async function filterAndUpdateGhesDataByAllowlistValues({ auditLogPage: string titleContext?: TitleResolutionContext globalFields?: string[] + supportedGhesVersions?: string[] }) { if (!currentGhesEvents) currentGhesEvents = {} + // Upstream `audit-log-allowlists/data/schema.json` lags docs's deprecation + // schedule, so events still list `ghes` keys for versions we've already + // dropped from `supported` in `enterprise-server-releases.ts`. Without this + // filter, the nightly sync would re-add `src/audit-logs/data/ghes-X.Y/` + // dirs for those deprecated versions. See docs-engineering#6562. + const supportedGhesVersionSet = new Set(supportedGhesVersions) + const seenByGhesVersion = new Map() for (const [ghesVersion, events] of Object.entries(currentGhesEvents)) { if (!events[auditLogPage]) continue @@ -320,6 +330,7 @@ export async function filterAndUpdateGhesDataByAllowlistValues({ for (const event of eventsToCheck) { for (const ghesVersion of Object.keys(event.ghes)) { + if (!supportedGhesVersionSet.has(ghesVersion)) continue const ghesVersionAllowlists = event.ghes[ghesVersion]._allowlists const fullGhesVersion = `ghes-${ghesVersion}` diff --git a/src/audit-logs/tests/unit/filter-events.ts b/src/audit-logs/tests/unit/filter-events.ts index 8448289abd53..17783fe37914 100644 --- a/src/audit-logs/tests/unit/filter-events.ts +++ b/src/audit-logs/tests/unit/filter-events.ts @@ -223,6 +223,7 @@ describe('audit log event filtering', () => { appendedDescriptions: {}, }, auditLogPage, + supportedGhesVersions: ['3.10', '3.11', '3.12'], }) const getActions = (version: string) => currentEvents[version][auditLogPage].map((event) => event.action) @@ -230,4 +231,42 @@ describe('audit log event filtering', () => { expect(getActions('ghes-3.11').includes('repo.create')).toBe(true) expect(auditLogPage in currentEvents['ghes-3.12']).toBeFalsy() }) + + test('ghes skips versions not in the supported list', async () => { + const eventsToProcess: RawAuditLogEventT[] = [ + { + action: 'repo.create', + description: 'repo was created', + docs_reference_links: '', + _allowlists: [], + ghes: { + '3.14': { + _allowlists: ['user'], + }, + '3.15': { + _allowlists: ['user'], + }, + }, + }, + ] + + const currentEvents: VersionedAuditLogData = {} + const auditLogPage = 'user' + + await filterAndUpdateGhesDataByAllowlistValues({ + eventsToCheck: eventsToProcess, + allowListValue: 'user', + currentGhesEvents: currentEvents, + pipelineConfig: { + sha: '', + appendedDescriptions: {}, + }, + auditLogPage, + supportedGhesVersions: ['3.15'], + }) + expect(currentEvents['ghes-3.14']).toBeUndefined() + expect(currentEvents['ghes-3.15'][auditLogPage].map((event) => event.action)).toContain( + 'repo.create', + ) + }) }) diff --git a/src/graphql/data/ghec/schema.docs.graphql b/src/graphql/data/ghec/schema.docs.graphql index a9edf4e33961..adb881a77d3f 100644 --- a/src/graphql/data/ghec/schema.docs.graphql +++ b/src/graphql/data/ghec/schema.docs.graphql @@ -69460,7 +69460,7 @@ type User implements Actor & Agentic & Node & PackageOwner & ProfileOwner & Proj ): Organization """ - Verified email addresses that match verified domains for a specified organization the user is a member of. + Verified email addresses that match verified domains for a specified organization the user is a member of. Results are unordered. There is no way to specify ordering, priority, or filtering, and this field should not be used to determine a user's canonical or current corporate email in multi-domain contexts. """ organizationVerifiedDomainEmails( """ diff --git a/src/languages/lib/correct-translation-content.ts b/src/languages/lib/correct-translation-content.ts index 1a0376a00a74..37e4b6c36ebb 100644 --- a/src/languages/lib/correct-translation-content.ts +++ b/src/languages/lib/correct-translation-content.ts @@ -67,6 +67,17 @@ export function correctTranslatedContentStrings( content = content.replace(/^([ \t]*)\* ?\n[ \t]+/gm, '$1* ') content = content.replace(/^\|[ \t]*\n[ \t]+/gm, '| ') + // The same translator wrapping habit also strands heading markers + // (`#`/`##`/...), blockquote markers (`>`), and the opening `**` of a + // bold span on their own line, with the actual content pushed to the + // next line as deeply indented text. This breaks heading/blockquote/ + // bold rendering and leaves Liquid tags and `[AUTOTITLE]` links + // unexpanded. Rejoin them. Fence- and frontmatter-aware so we don't + // disturb fenced markdown examples or YAML frontmatter. + // ~3k headings, ~1.6k blockquotes, ~3.5k bold-after-marker cases + // measured across all eight translated languages. + content = joinDanglingMarkers(content) + // --- Per-language fixes (es, ja, pt, zh, ru, fr, ko, de) --- if (context.code === 'es') { @@ -809,6 +820,17 @@ export function correctTranslatedContentStrings( // `{% заголовки строк %}` — "row headers" = rowheaders (opener; `{% endrowheaders %}` stays in English) content = content.replaceAll('{% заголовки строк %}', '{% rowheaders %}') content = content.replaceAll('{%- заголовки строк %}', '{%- rowheaders %}') + // `{% windowsTerminal %}` — "Windows Terminal" platform tag with capital T + // (the correct tag name is lowercase `{% windowsterminal %}`) + content = content.replaceAll('{% windowsTerminal %}', '{% windowsterminal %}') + content = content.replaceAll('{%- windowsTerminal %}', '{%- windowsterminal %}') + // `{%- командная палитра ifversion %}` — "command palette ifversion" with word order swapped + // Russian "командная палитра" (command palette) was placed before "ifversion" and the + // feature-flag arg was dropped. Recover as `{%- ifversion command-palette %}`. + content = content.replace( + /\{%(-?)\s*командная\s+палитра\s+ifversion\s*(-?)%\}/g, + '{%$1 ifversion command-palette $2%}', + ) // `{% конец %}` after `{% raw %}` means `{% endraw %}`, not `{% endif %}`. // Handle this BEFORE the generic `{% конец %}` → `{% endif %}` fallback. // We use a split-based approach instead of `[^]*?` regex to avoid @@ -1016,6 +1038,17 @@ export function correctTranslatedContentStrings( '{% ifversion ghes %}Владелец предприятия может{%else %}{% data variables.product.company_short %} перенести образы Docker, ранее хранящиеся в реестре Docker на {% data variables.product.github %} на {% data variables.product.prodname_container_registry %}.', '{% ifversion ghes %}Владелец предприятия может{% else %}{% data variables.product.company_short %} может{% endif %} перенести образы Docker, ранее хранящиеся в реестре Docker на {% data variables.product.github %} на {% data variables.product.prodname_container_registry %}.', ) + + // [SCRAPE-6572] Per-file fix: + // repositories/viewing-activity-and-data-for-your-repository/viewing-a-projects-contributors.md + // (intro): translator swapped `{% endif %}` and `{% ifversion fpt or ghec %}`, + // leaving an orphan `endif` at the start of the intro and the `ifversion` + // unclosed. This broke the `/ru/repositories` landing page scrape since + // this page is one of its children. Restore correct ordering. + content = content.replaceAll( + 'Вы можете увидеть, кто внес{% endif %} коммиты в репозиторий{% ifversion fpt or ghec %} и его зависимости.', + 'Вы можете увидеть, кто внес коммиты в репозиторий{% ifversion fpt or ghec %} и его зависимости{% endif %}.', + ) } if (context.code === 'fr') { @@ -1030,6 +1063,11 @@ export function correctTranslatedContentStrings( content = content.replace(/\{%-?\s*référentiel\s*-?%\}/g, '') content = content.replace(/\{%-?\s*paramètres\s*-?%\}/g, '') content = content.replace(/\{%-?\s*product\s*-?%\}/g, '') + // `{% données.variables.X %}` — translator used `.` instead of space after "données" + content = content.replace( + /\{%(-?)\s*données\.(variables|reusables)\.([A-Za-z0-9._-]+)(\s*-?%\})/g, + '{%$1 data $2.$3$4', + ) content = content.replaceAll('{% données variables', '{% data variables') content = content.replaceAll('{% données réutilisables.', '{% data reusables.') content = content.replaceAll('{% variables de données.', '{% data variables.') @@ -1174,6 +1212,17 @@ export function correctTranslatedContentStrings( } if (context.code === 'ko') { + // `{% datda variables.` — typo of "data" (d-a-t-d-a instead of d-a-t-a) + content = content.replaceAll('{% datda variables', '{% data variables') + content = content.replaceAll('{%- datda variables', '{%- data variables') + // `{% data를 [Korean] variables.X %}` — Korean object-marker "를" (object case particle) + // was accidentally appended to "data", and Korean words follow before the path. + // e.g. `{% data를 탐색하고 수락하기 variables.copilot.next_edit_suggestions %}` + // Strip the Korean text and restore the correct `{% data variables.X %}` tag. + content = content.replace( + /\{%(-?)\s*data를\s+[가-힣\s]+variables\.([A-Za-z0-9._-]+)\s*(-?)%\}/g, + '{%$1 data variables.$2 $3%}', + ) content = content.replaceAll('[AUTOTITLE"을 참조하세요]', '[AUTOTITLE]') content = content.replaceAll('{% 데이터 variables', '{% data variables') content = content.replaceAll('{% 데이터 reusables.', '{% data reusables.') @@ -1293,6 +1342,18 @@ export function correctTranslatedContentStrings( '인스턴스에서 기본 제공 인증{% endif %}를 사용하는 경우 {% data variables.product.github %} 계정 {% ifversion ghes %}의 사용자 이름을 변경할 수 있습니다.', '{% data variables.product.github %} 계정의 사용자 이름을 변경할 수 있습니다.{% ifversion ghes %} 인스턴스에서 기본 제공 인증을 사용하는 경우.{% endif %}', ) + + // [SCRAPE-6572] Per-file fix: + // code-security/how-tos/secure-your-supply-chain/manage-your-dependency-security/configuring-access-to-private-registries-for-dependabot.md + // (intro): translator dropped the closing `{% endif %}` and replaced it + // with a duplicate `{% data variables.product.prodname_dependabot %}` + // reference. This left `{% ifversion dependabot-on-actions-self-hosted %}` + // unclosed and broke the `/ko/code-security` landing page scrape since + // this page is one of its children. Restore the `{% endif %}`. + content = content.replaceAll( + '자체 호스팅된 실행기에서 실행 중인 {% data variables.product.prodname_dependabot %}에 대한 액세스를 구성할 수도 있습니다.{% data variables.product.prodname_dependabot %}', + '자체 호스팅된 실행기에서 실행 중인 {% data variables.product.prodname_dependabot %}에 대한 액세스를 구성할 수도 있습니다.{% endif %}', + ) } if (context.code === 'de') { @@ -1300,6 +1361,9 @@ export function correctTranslatedContentStrings( content = content.replaceAll('{% daten variables', '{% data variables') content = content.replaceAll('{% Daten reusables', '{% data reusables') content = content.replaceAll('{%- Daten reusables', '{%- data reusables') + // `{% Datenseite variables.` — "Datenseite" (data page) compound used instead of "data" + content = content.replaceAll('{% Datenseite variables', '{% data variables') + content = content.replaceAll('{%- Datenseite variables', '{%- data variables') // `wiederverwendbare` is German for "reusables" — fix translated reusables paths content = content.replaceAll('{% data wiederverwendbare.', '{% data reusables.') content = content.replaceAll('{% Daten wiederverwendbare.', '{% data reusables.') @@ -1989,3 +2053,113 @@ export function correctTranslatedContentStrings( return content } + +/** + * Rejoin marker lines that the translation pipeline split from their content. + * + * Translators sometimes leave a heading marker (`#`/`##`/...), blockquote + * marker (`>`), or the opening `**` of a bold span (immediately following a + * list/heading/blockquote/table marker) on its own line, with the rest of + * the content pushed to the next line as deeply indented text. This breaks + * rendering (empty headings, broken blockquotes, unrendered bold, unexpanded + * Liquid and `[AUTOTITLE]` links). + * + * Conservative thresholds: + * - Marker line has 0–3 leading spaces (CommonMark heading/blockquote rule). + * - Continuation line has 6+ leading spaces (avoids 4-space indented code). + * - Marker line contains *only* the marker (and optional trailing whitespace). + * - Skip fenced code blocks (``` and ~~~) and YAML frontmatter (`---`...`---`). + */ +function joinDanglingMarkers(content: string): string { + const lines = content.split('\n') + const out: string[] = [] + let inFence = false + let fenceChar = '' + let fenceLen = 0 + let inFrontmatter = lines[0] === '---' + + // Marker-only line patterns (run only against non-fenced, non-frontmatter lines). + const headingOnly = /^([ \t]{0,3})(#{1,6})[ \t]*$/ + const blockquoteOnly = /^([ \t]{0,3}>)[ \t]*$/ + // Bold-open after a list/heading/blockquote/table marker (no other content). + const markerThenBoldOnly = + /^([ \t]{0,3}(?:[*+-]|\d+\.)[ \t]+|[ \t]{0,3}>[ \t]+|[ \t]{0,3}#{1,6}[ \t]+|\|[ \t]*)\*\*[ \t]*$/ + // Continuation: 6+ leading spaces and at least one non-whitespace character. + const deepIndented = /^[ \t]{6,}(\S.*)$/ + + for (let i = 0; i < lines.length; i++) { + const line = lines[i] + + // YAML frontmatter close: `---` or `...` after the opening `---`. + if (inFrontmatter && i > 0 && (line === '---' || line === '...')) { + inFrontmatter = false + out.push(line) + continue + } + + // While inside frontmatter, pass lines through verbatim. Crucially, + // do NOT run fence detection here — a frontmatter line starting with + // ``` or ~~~ (e.g. inside a multiline scalar) would otherwise toggle + // `inFence` and cause the rest of the document after frontmatter + // closes to be (mis-)treated as inside a fence. + if (inFrontmatter) { + out.push(line) + continue + } + + // CommonMark fenced code block: 0–3 leading spaces, then 3+ ` or ~. + const fenceMatch = line.match(/^[ \t]{0,3}(`{3,}|~{3,})/) + if (fenceMatch) { + const marker = fenceMatch[1] + if (!inFence) { + inFence = true + fenceChar = marker[0] + fenceLen = marker.length + } else if (marker[0] === fenceChar && marker.length >= fenceLen) { + inFence = false + fenceChar = '' + fenceLen = 0 + } + out.push(line) + continue + } + + if (inFence) { + out.push(line) + continue + } + + const next = i + 1 < lines.length ? lines[i + 1] : undefined + const nextDeep = next !== undefined ? next.match(deepIndented) : null + if (!nextDeep) { + out.push(line) + continue + } + const nextContent = nextDeep[1] + + const heading = line.match(headingOnly) + if (heading) { + out.push(`${heading[1]}${heading[2]} ${nextContent}`) + i++ + continue + } + + const bq = line.match(blockquoteOnly) + if (bq) { + out.push(`${bq[1]} ${nextContent}`) + i++ + continue + } + + const boldOpen = line.match(markerThenBoldOnly) + if (boldOpen) { + out.push(`${boldOpen[1]}**${nextContent}`) + i++ + continue + } + + out.push(line) + } + + return out.join('\n') +} diff --git a/src/languages/tests/correct-translation-content.ts b/src/languages/tests/correct-translation-content.ts index 6b82bb0a507d..3680fecce191 100644 --- a/src/languages/tests/correct-translation-content.ts +++ b/src/languages/tests/correct-translation-content.ts @@ -662,6 +662,21 @@ describe('correctTranslatedContentStrings', () => { expect(fix('{%- заголовки строк %}', 'ru')).toBe('{%- rowheaders %}') }) + test('fixes windowsTerminal → windowsterminal', () => { + expect(fix('{% windowsTerminal %}', 'ru')).toBe('{% windowsterminal %}') + expect(fix('{%- windowsTerminal %}', 'ru')).toBe('{%- windowsterminal %}') + }) + + test('fixes командная палитра ifversion → ifversion command-palette', () => { + expect(fix('{%- командная палитра ifversion %}', 'ru')).toBe( + '{%- ifversion command-palette %}', + ) + expect(fix('{% командная палитра ifversion %}', 'ru')).toBe('{% ifversion command-palette %}') + expect(fix('{%- командная палитра ifversion -%}', 'ru')).toBe( + '{%- ifversion command-palette -%}', + ) + }) + test('fixes translated feature flag names', () => { expect(fix('обязательный-2fa-dotcom-участник', 'ru')).toBe( 'mandatory-2fa-dotcom-contributors', @@ -765,6 +780,11 @@ describe('correctTranslatedContentStrings', () => { expect(fix('{% de data variables.product.github %}', 'fr')).toBe( '{% data variables.product.github %}', ) + // `{% données.variables.X %}` — dot instead of space after "données" + expect(fix('{% données.variables.copilot.copilot_chat_short %}', 'fr')).toBe( + '{% data variables.copilot.copilot_chat_short %}', + ) + expect(fix('{% données.reusables.foo.bar %}', 'fr')).toBe('{% data reusables.foo.bar %}') }) test('fixes translated else', () => { @@ -882,6 +902,24 @@ describe('correctTranslatedContentStrings', () => { expect(fix('[AUTOTITLE"을 참조하세요]', 'ko')).toBe('[AUTOTITLE]') }) + test('fixes datda → data typo', () => { + expect(fix('{% datda variables.product.github %}', 'ko')).toBe( + '{% data variables.product.github %}', + ) + expect(fix('{%- datda variables.copilot.foo %}', 'ko')).toBe( + '{%- data variables.copilot.foo %}', + ) + }) + + test('fixes data를 Korean-particle corruption', () => { + expect( + fix('{% data를 탐색하고 수락하기 variables.copilot.next_edit_suggestions %}', 'ko'), + ).toBe('{% data variables.copilot.next_edit_suggestions %}') + expect( + fix('{%- data를 탐색하고 수락하기 variables.copilot.next_edit_suggestions -%}', 'ko'), + ).toBe('{%- data variables.copilot.next_edit_suggestions -%}') + }) + test('fixes translated data tags', () => { expect(fix('{% 데이터 variables.product.github %}', 'ko')).toBe( '{% data variables.product.github %}', @@ -992,6 +1030,13 @@ describe('correctTranslatedContentStrings', () => { ) expect(fix('{% Daten reusables.foo %}', 'de')).toBe('{% data reusables.foo %}') expect(fix('{%- Daten reusables.foo %}', 'de')).toBe('{%- data reusables.foo %}') + // `{% Datenseite variables.` — "Datenseite" (data page) compound = data + expect(fix('{% Datenseite variables.product.prodname_github_app %}', 'de')).toBe( + '{% data variables.product.prodname_github_app %}', + ) + expect(fix('{%- Datenseite variables.product.foo %}', 'de')).toBe( + '{%- data variables.product.foo %}', + ) }) test('fixes hyphenated data tags without space', () => { @@ -1552,6 +1597,132 @@ describe('correctTranslatedContentStrings', () => { // Valid table rows are not modified expect(fix('| a | b |\n| c | d |', 'es')).toBe('| a | b |\n| c | d |') }) + + test('rejoins dangling heading markers (all languages)', () => { + const broken = '### \n {% data variables.product.github %} の使用' + const expected = '### {% data variables.product.github %} の使用' + for (const lang of ['ja', 'de', 'es', 'fr', 'ko', 'pt', 'ru', 'zh']) { + expect(fix(broken, lang)).toBe(expected) + } + // All heading levels + expect(fix('# \n Title', 'ja')).toBe('# Title') + expect(fix('###### \n Title', 'ja')).toBe('###### Title') + // 0–3 leading spaces are accepted + expect(fix(' ### \n Title', 'ja')).toBe(' ### Title') + // Valid headings are not modified + expect(fix('### Already correct', 'ja')).toBe('### Already correct') + // 4-space indented heading-like text is not collapsed (looks like code) + expect(fix(' ###\n code', 'ja')).toBe(' ###\n code') + // Shallow next-line indent (<6) is not collapsed + expect(fix('### \n Title', 'ja')).toBe('### \n Title') + }) + + test('rejoins dangling blockquote markers (all languages)', () => { + const broken = '> \n {% data variables.product.github %} は preview 中です。' + const expected = '> {% data variables.product.github %} は preview 中です。' + for (const lang of ['ja', 'de', 'es', 'fr', 'ko', 'pt', 'ru', 'zh']) { + expect(fix(broken, lang)).toBe(expected) + } + // 0–3 leading spaces are accepted + expect(fix(' > \n Quote', 'ja')).toBe(' > Quote') + // Valid blockquotes are not modified + expect(fix('> Already correct', 'ja')).toBe('> Already correct') + expect(fix('>\n> Continued blockquote', 'ja')).toBe('>\n> Continued blockquote') + }) + + test('rejoins dangling bold-open after a marker (all languages)', () => { + const broken = + '* **\n {% data variables.product.prodname_copilot_short %}へのアクセス**。 More text' + const expected = + '* **{% data variables.product.prodname_copilot_short %}へのアクセス**。 More text' + for (const lang of ['ja', 'de', 'es', 'fr', 'ko', 'pt', 'ru', 'zh']) { + expect(fix(broken, lang)).toBe(expected) + } + // Numbered list marker + expect(fix('1. **\n Important**: text', 'ja')).toBe('1. **Important**: text') + // Heading marker + expect(fix('### **\n Bold heading**', 'ja')).toBe('### **Bold heading**') + // Blockquote marker + expect(fix('> **\n Quoted bold**', 'ja')).toBe('> **Quoted bold**') + // Table cell + expect(fix('| **\n Cell bold** | x', 'ja')).toBe('| **Cell bold** | x') + // Bare `**` (no preceding marker) is not collapsed — could be a closing + // bold marker followed by legitimate indented continuation. + expect(fix('**\n text', 'ja')).toBe('**\n text') + }) + + test('does not modify content inside fenced code blocks', () => { + // Markdown example inside ```md fence should be preserved verbatim + const fenced = '```md\n### \n Heading example\n```' + expect(fix(fenced, 'ja')).toBe(fenced) + // Tilde fences are also respected + const tilde = '~~~md\n> \n Quote example\n~~~' + expect(fix(tilde, 'ja')).toBe(tilde) + // Bold-open inside code fence + const boldFenced = '```md\n* **\n bold example**\n```' + expect(fix(boldFenced, 'ja')).toBe(boldFenced) + }) + + test('does not modify YAML frontmatter', () => { + // Multiline YAML scalars and indented values must not be joined + const fm = `--- +title: Example +intro: > + Multiline + continued +versions: + fpt: '*' +--- + +### + Real heading after frontmatter` + const expected = `--- +title: Example +intro: > + Multiline + continued +versions: + fpt: '*' +--- + +### Real heading after frontmatter` + expect(fix(fm, 'ja')).toBe(expected) + }) + + test('frontmatter containing fence-like characters does not break body fence tracking', () => { + // A multiline scalar in frontmatter that includes ``` (or ~~~) must + // NOT toggle the body's fence-tracking state. After frontmatter + // closes, dangling markers in the body should still be rejoined. + const fm = `--- +title: Example +intro: | + \`\`\` + fence-like text inside frontmatter + \`\`\` +--- + +### + Real heading after frontmatter` + const expected = `--- +title: Example +intro: | + \`\`\` + fence-like text inside frontmatter + \`\`\` +--- + +### Real heading after frontmatter` + expect(fix(fm, 'ja')).toBe(expected) + }) + + test('does not collapse nested-list indented code blocks', () => { + // A list item followed by blank line + 6-space-indented "code" should + // be left alone because the marker line itself is empty (not a + // bare `>`/`#`/`* **` form), and the previous content line is not + // a heading/blockquote/bold-open marker. + const nested = '1. Run this command:\n\n gh auth login' + expect(fix(nested, 'ja')).toBe(nested) + }) }) // ─── EDGE CASES ──────────────────────────────────────────────────── @@ -1833,4 +2004,27 @@ Para más información, consulta "[AUTOTITLE](/path)". expect(out).toBe(text) }) }) + + // ─── SCRAPE-6572: search-scrape failures ───────────────────────────── + // Tests for the per-file Liquid corrections added to stop the daily + // search-scrape failures reported in github/docs-engineering#6572. + describe('SCRAPE-6572 per-file fixes', () => { + test('ko: configuring-access-to-private-registries-for-dependabot intro missing endif', () => { + const broken = + '자체 호스팅된 실행기에서 실행 중인 {% data variables.product.prodname_dependabot %}에 대한 액세스를 구성할 수도 있습니다.{% data variables.product.prodname_dependabot %}' + const out = fix(broken, 'ko') + expect(out).toContain('{% endif %}') + expect(out).not.toMatch( + /구성할 수도 있습니다\.\{% data variables\.product\.prodname_dependabot %\}$/, + ) + }) + + test('ru: viewing-a-projects-contributors intro swapped endif/ifversion', () => { + const broken = + 'Вы можете увидеть, кто внес{% endif %} коммиты в репозиторий{% ifversion fpt or ghec %} и его зависимости.' + const out = fix(broken, 'ru') + expect(out).not.toContain('внес{% endif %}') + expect(out).toMatch(/\{% ifversion fpt or ghec %\} и его зависимости\{% endif %\}\.$/) + }) + }) })