Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions .github/workflows/convert-and-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Convert all Windows protocol specs to markdown, build a clean publish tree,
# then force-push it to an orphaned 'publish' branch (e.g. for GitHub Pages).
name: Convert and publish

on:
workflow_dispatch:

jobs:
convert-and-publish:
runs-on: windows-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Install OpenXML module
shell: pwsh
run: |
Set-PSRepository -Name PSGallery -InstallationPolicy Trusted
Install-Module -Name OpenXML -Force -Scope CurrentUser

- name: Import module and convert all specs
shell: pwsh
run: |
Import-Module .\AwakeCoding.OpenSpecs -Force
Get-OpenSpecCatalog |
Save-OpenSpecDocument -Format DOCX -OutputPath ./downloads-convert -Force |
Where-Object { $_.Status -in 'Downloaded', 'Exists' } |
Convert-OpenSpecToMarkdown -OutputPath ./converted-specs -Force

- name: Build publish directory and index
shell: pwsh
run: |
Import-Module .\AwakeCoding.OpenSpecs -Force
.\scripts\Prepare-Publish.ps1 -ConvertedSpecsPath ./converted-specs -PublishPath ./publish
Update-OpenSpecIndex -Path ./publish

- name: Zip publish contents
shell: pwsh
run: |
Compress-Archive -Path ./publish/* -DestinationPath ./publish.zip -Force

- name: Upload publish artifact
uses: actions/upload-artifact@v4
with:
name: publish
path: publish.zip

- name: Push to orphaned publish branch
shell: pwsh
working-directory: publish
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
$RemoteRepo = "https://${Env:GITHUB_ACTOR}:${Env:GITHUB_TOKEN}@github.com/${Env:GITHUB_REPOSITORY}.git"
git init
git config user.name "GitHub Actions"
git config user.email "github-actions-bot@users.noreply.github.com"
git add .
git commit -m "Publish converted Open Specs markdown (${Env:GITHUB_REPOSITORY})"
git push --force "${RemoteRepo}" "HEAD:publish"
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
artifacts/
downloads/
downloads*/
converted*/
reports*/
3 changes: 2 additions & 1 deletion AwakeCoding.OpenSpecs/AwakeCoding.OpenSpecs.psd1
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
'Convert-OpenSpecToMarkdown',
'Invoke-OpenSpecConversionPipeline',
'Get-OpenSpecConversionReport',
'Test-OpenSpecMarkdownFidelity'
'Test-OpenSpecMarkdownFidelity',
'Update-OpenSpecIndex'
)
CmdletsToExport = @()
VariablesToExport = @()
Expand Down
591 changes: 556 additions & 35 deletions AwakeCoding.OpenSpecs/Private/ConvertFrom-OpenSpecDocx.ps1

Large diffs are not rendered by default.

102 changes: 98 additions & 4 deletions AwakeCoding.OpenSpecs/Private/Invoke-OpenSpecMarkdownCleanup.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ function Invoke-OpenSpecMarkdownCleanup {
$result = $tocResult.Markdown
foreach ($issue in $tocResult.Issues) { [void]$issues.Add($issue) }

$guidResult = Resolve-OpenSpecGuidSectionAnchors -Markdown $result
$result = $guidResult.Markdown
foreach ($issue in $guidResult.Issues) { [void]$issues.Add($issue) }

$mathResult = ConvertTo-OpenSpecNormalizedMathText -Markdown $result
$result = $mathResult.Markdown
foreach ($issue in $mathResult.Issues) { [void]$issues.Add($issue) }
Expand Down Expand Up @@ -672,7 +676,7 @@ function ConvertTo-OpenSpecInternalLinks {
if ($frag) { $frag } else { '#' }
}
else {
"../$id/index.md$frag"
"../$id/$id.md$frag"
}
$rewriteCount++
if ($rewriteSamples.Count -lt $sampleCap) {
Expand Down Expand Up @@ -817,6 +821,96 @@ function ConvertTo-OpenSpecNormalizedMathText {
}
}

function Resolve-OpenSpecGuidSectionAnchors {
[CmdletBinding()]
param(
[Parameter(Mandatory)]
[string]$Markdown
)

$issues = New-Object System.Collections.Generic.List[object]
$result = $Markdown
$rewriteCount = 0

# Build a mapping from GUID-based anchors to human-readable Section_X.Y.Z
# anchors. In the converted markdown, each heading is preceded by a pair of
# anchor tags:
# <a id="section_<GUID>"></a>
# <a id="Section_X.Y.Z"></a>
# Cross-reference links in the body text reference sections using the GUID
# form (#Section_<GUID> or #section_<GUID>), which is both unreadable and
# may not resolve due to a case mismatch (the bookmark anchor uses
# lowercase "section_" while the hyperlink uses "Section_"). Replacing
# these with the Section_X.Y.Z form fixes both issues.
$guidToSection = @{}

# Order 1: GUID anchor followed by Section anchor (most common)
$pairRegex1 = [regex]::new(
'<a\s+id="section_(?<guid>[0-9a-f]{32})"></a>\s*\r?\n<a\s+id="(?<section>Section_\d+(?:\.\d+)*)"></a>',
[System.Text.RegularExpressions.RegexOptions]::IgnoreCase
)
foreach ($m in $pairRegex1.Matches($result)) {
$guid = $m.Groups['guid'].Value.ToLowerInvariant()
if (-not $guidToSection.ContainsKey($guid)) {
$guidToSection[$guid] = $m.Groups['section'].Value
}
}

# Order 2: Section anchor followed by GUID anchor (fallback)
$pairRegex2 = [regex]::new(
'<a\s+id="(?<section>Section_\d+(?:\.\d+)*)"></a>\s*\r?\n<a\s+id="section_(?<guid>[0-9a-f]{32})"></a>',
[System.Text.RegularExpressions.RegexOptions]::IgnoreCase
)
foreach ($m in $pairRegex2.Matches($result)) {
$guid = $m.Groups['guid'].Value.ToLowerInvariant()
if (-not $guidToSection.ContainsKey($guid)) {
$guidToSection[$guid] = $m.Groups['section'].Value
}
}

if ($guidToSection.Count -eq 0) {
return [pscustomobject]@{
Markdown = $result
Issues = $issues.ToArray()
}
}

# Rewrite all link targets that reference GUID-based section anchors.
# Matches both (#Section_GUID) and (#section_GUID) forms.
$rewriteCounter = @{ Value = 0 }
$result = [regex]::Replace(
$result,
'\(#[Ss]ection_(?<guid>[0-9a-f]{32})\)',
{
param($m)
$guid = $m.Groups['guid'].Value.ToLowerInvariant()
if ($guidToSection.ContainsKey($guid)) {
$rewriteCounter.Value++
"(#$($guidToSection[$guid]))"
}
else {
$m.Value
}
}
)
$rewriteCount = $rewriteCounter.Value

if ($rewriteCount -gt 0) {
[void]$issues.Add([pscustomobject]@{
Type = 'GuidAnchorResolved'
Severity = 'Info'
Count = $rewriteCount
MappedAnchors = $guidToSection.Count
Reason = 'GUID-based section anchors were resolved to section number anchors.'
})
}

[pscustomobject]@{
Markdown = $result
Issues = $issues.ToArray()
}
}

function Resolve-OpenSpecLinkTarget {
[CmdletBinding()]
param(
Expand All @@ -841,7 +935,7 @@ function Resolve-OpenSpecLinkTarget {
return [pscustomobject]@{ Url = if ($fragment) { $fragment } else { '#' }; Rewritten = $true }
}

return [pscustomobject]@{ Url = "../$targetId/index.md$fragment"; Rewritten = $true }
return [pscustomobject]@{ Url = "../$targetId/$targetId.md$fragment"; Rewritten = $true }
}

if ($decoded -match '(?i)(?:https?://learn\.microsoft\.com)?/?openspecs/windows_protocols/(?<slug>(?:ms|mc)-[a-z0-9-]+)(?:/[^#?]+)?') {
Expand All @@ -850,7 +944,7 @@ function Resolve-OpenSpecLinkTarget {
return [pscustomobject]@{ Url = if ($fragment) { $fragment } else { '#' }; Rewritten = $true }
}

return [pscustomobject]@{ Url = "../$targetId/index.md$fragment"; Rewritten = $true }
return [pscustomobject]@{ Url = "../$targetId/$targetId.md$fragment"; Rewritten = $true }
}

if ($decoded -match '(?i)%5b(?<id>(?:MS|MC)-[A-Z0-9-]+)%5d\.(?:pdf|docx)$') {
Expand All @@ -859,7 +953,7 @@ function Resolve-OpenSpecLinkTarget {
return [pscustomobject]@{ Url = if ($fragment) { $fragment } else { '#' }; Rewritten = $true }
}

return [pscustomobject]@{ Url = "../$targetId/index.md$fragment"; Rewritten = $true }
return [pscustomobject]@{ Url = "../$targetId/$targetId.md$fragment"; Rewritten = $true }
}

return [pscustomobject]@{ Url = $Url; Rewritten = $false }
Expand Down
5 changes: 3 additions & 2 deletions AwakeCoding.OpenSpecs/Public/Convert-OpenSpecToMarkdown.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ function Convert-OpenSpecToMarkdown {
[void](New-Item -Path $artifactDirectory -ItemType Directory -Force)
}

$markdownPath = Join-Path -Path $specDirectory -ChildPath 'index.md'
$markdownPath = Join-Path -Path $specDirectory -ChildPath "$safeProtocol.md"
if ((Test-Path -LiteralPath $markdownPath) -and -not $Force) {
[pscustomobject]@{
PSTypeName = 'AwakeCoding.OpenSpecs.ConversionResult'
Expand All @@ -112,7 +112,8 @@ function Convert-OpenSpecToMarkdown {
if ($resolvedFormat -eq 'DOCX') {
$toolchain = Get-OpenSpecToolchain -RequireDocxConverter
$rawMarkdownPath = Join-Path -Path $artifactDirectory -ChildPath 'raw-docx.md'
$conversionStep = ConvertFrom-OpenSpecDocx -InputPath $sourcePath -OutputPath $rawMarkdownPath -Toolchain $toolchain
$mediaDirectory = Join-Path -Path $specDirectory -ChildPath 'media'
$conversionStep = ConvertFrom-OpenSpecDocx -InputPath $sourcePath -OutputPath $rawMarkdownPath -Toolchain $toolchain -MediaOutputDirectory $mediaDirectory
}
elseif ($resolvedFormat -eq 'PDF') {
$toolchain = Get-OpenSpecToolchain -RequirePdfConverter
Expand Down
41 changes: 39 additions & 2 deletions AwakeCoding.OpenSpecs/Public/Test-OpenSpecMarkdownFidelity.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,39 @@ function Test-OpenSpecMarkdownFidelity {
}

[bool]$hasHeadings = $markdown -match '(?m)^#'
[bool]$hasTables = $markdown -match '(?m)^\|.+\|$'
[bool]$hasTables = $markdown -match '(?m)^\|.+\|\r?$'
[bool]$hasNormative = $markdown -match '\b(MUST|SHOULD|MAY|REQUIRED|OPTIONAL)\b'

$pass = $hasHeadings -and $hasTables
# Anchor validation: check that TOC links resolve and anchors are correct
$sectionAnchors = [regex]::Matches($markdown, '<a id="Section_[^"]+"></a>')
$tocAnchors = [regex]::Matches($markdown, '<a id="_Toc\d+"></a>')
$tocLinks = [regex]::Matches($markdown, '\]\(#Section_[^)]+\)')
$numberedHeadings = [regex]::Matches($markdown, '(?m)^#{1,6} \d+')
$boldPairs = [int]([regex]::Matches($markdown, '\*\*').Count / 2)

[bool]$hasSectionAnchors = $sectionAnchors.Count -gt 0
[bool]$noTocAnchors = $tocAnchors.Count -eq 0
[bool]$hasTocLinks = $tocLinks.Count -gt 0
[bool]$hasNumberedHeadings = $numberedHeadings.Count -gt 0

# Validate that TOC links resolve to existing anchors
$anchorIds = [System.Collections.Generic.HashSet[string]]::new(
[System.StringComparer]::OrdinalIgnoreCase
)
foreach ($m in [regex]::Matches($markdown, '<a id="([^"]+)"></a>')) {
[void]$anchorIds.Add($m.Groups[1].Value)
}

$unresolvedLinks = 0
foreach ($m in [regex]::Matches($markdown, '\]\(#([^)]+)\)')) {
$target = $m.Groups[1].Value
if (-not $anchorIds.Contains($target)) {
$unresolvedLinks++
}
}

$pass = $hasHeadings -and $hasTables -and $hasSectionAnchors -and
$noTocAnchors -and $hasTocLinks -and $hasNumberedHeadings

[pscustomobject]@{
PSTypeName = 'AwakeCoding.OpenSpecs.FidelityResult'
Expand All @@ -27,6 +56,14 @@ function Test-OpenSpecMarkdownFidelity {
HasHeadings = $hasHeadings
HasTables = $hasTables
HasNormativeKeywords = $hasNormative
HasSectionAnchors = $hasSectionAnchors
SectionAnchorCount = $sectionAnchors.Count
NoTocAnchors = $noTocAnchors
TocAnchorCount = $tocAnchors.Count
TocLinkCount = $tocLinks.Count
NumberedHeadingCount = $numberedHeadings.Count
BoldPairCount = $boldPairs
UnresolvedLinkCount = $unresolvedLinks
IssueCount = $report.IssueCount
MarkdownPath = $report.MarkdownPath
ReportPath = $report.ReportPath
Expand Down
77 changes: 77 additions & 0 deletions AwakeCoding.OpenSpecs/Public/Update-OpenSpecIndex.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
function Update-OpenSpecIndex {
[CmdletBinding()]
param(
[Parameter(Mandatory)]
[string]$Path
)

if (-not (Test-Path -LiteralPath $Path)) {
throw "Output directory not found: $Path"
}

$specDirs = Get-ChildItem -LiteralPath $Path -Directory | Sort-Object Name
$entries = New-Object System.Collections.Generic.List[pscustomobject]

foreach ($dir in $specDirs) {
$specName = $dir.Name
$mdFile = Join-Path -Path $dir.FullName -ChildPath "$specName.md"

# Fall back to index.md for specs not yet reconverted.
if (-not (Test-Path -LiteralPath $mdFile)) {
$mdFile = Join-Path -Path $dir.FullName -ChildPath 'index.md'
}

if (-not (Test-Path -LiteralPath $mdFile)) {
continue
}

$mdFileName = [System.IO.Path]::GetFileName($mdFile)

# Extract the title from line 3 of the markdown.
# Expected format:
# Line 1: **[MS-RDPECLIP]:**
# Line 2: (blank)
# Line 3: **Remote Desktop Protocol: Clipboard Virtual Channel Extension**
$lines = Get-Content -LiteralPath $mdFile -TotalCount 5
$title = ''
if ($lines.Count -ge 3) {
$rawTitle = $lines[2]
# Strip surrounding bold markers (**...**)
$title = $rawTitle -replace '^\*\*(.+)\*\*$', '$1'
$title = $title.Trim()
}

if ([string]::IsNullOrWhiteSpace($title)) {
$title = $specName
}

[void]$entries.Add([pscustomobject]@{
Name = $specName
Title = $title
Link = "$specName/$mdFileName"
})
}

$sb = New-Object System.Text.StringBuilder
[void]$sb.AppendLine('# Microsoft Open Specifications')
[void]$sb.AppendLine()
[void]$sb.AppendLine("$($entries.Count) protocol specifications converted to Markdown.")
[void]$sb.AppendLine()
[void]$sb.AppendLine('| Protocol | Title |')
[void]$sb.AppendLine('|---|---|')

foreach ($entry in $entries) {
[void]$sb.AppendLine("| [$($entry.Name)]($($entry.Link)) | $($entry.Title) |")
}

$readmePath = Join-Path -Path $Path -ChildPath 'README.md'
$sb.ToString() | Set-Content -LiteralPath $readmePath -Encoding UTF8

Write-Verbose "Generated index at $readmePath with $($entries.Count) entries."

[pscustomobject]@{
PSTypeName = 'AwakeCoding.OpenSpecs.IndexResult'
Path = $readmePath
EntryCount = $entries.Count
}
}
Loading