Skip to content

Commit

Permalink
Fix parsing of HTML returned from raw API
Browse files Browse the repository at this point in the history
Github must have changed something on their end, the HTML seems to have changed a little.
  • Loading branch information
jkburges committed Sep 21, 2023
1 parent 661b5c5 commit daabc77
Showing 1 changed file with 8 additions and 7 deletions.
15 changes: 8 additions & 7 deletions gh-md-toc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env bash

set -x
#
# Steps:
#
Expand Down Expand Up @@ -235,23 +236,23 @@ gh_toc_grab() {
}
modified_href = modified_href res
}
print sprintf("%*s", (level-1)*'"$2"', "") "* [" text "](" gh_url modified_href ")"
print sprintf("%*s", (level)*'"$2"', "") "* [" text "](" gh_url modified_href ")"
'
if [ `uname -s` == "OS/390" ]; then
grepcmd="pcregrep -o"
echoargs=""
awkscript='{
level = substr($0, length($0), 1)
text = substr($0, match($0, /a>.*<\/h/)+2, RLENGTH-5)
level = substr($0, 3, 1)
text = substr($0, match($0, />[^<]*<span aria-hidden/)+1, RLENGTH-17)
href = substr($0, match($0, "href=\"([^\"]+)?\"")+6, RLENGTH-7)
'"$common_awk_script"'
}'
else
grepcmd="grep -Eo"
echoargs="-e"
awkscript='{
level = substr($0, length($0), 1)
text = substr($0, match($0, /a>.*<\/h/)+2, RLENGTH-5)
level = substr($0, 3, 1)
text = substr($0, match($0, />[^<]*<span aria-hidden/)+1, RLENGTH-17)
href = substr($0, match($0, "href=\"[^\"]+?\"")+6, RLENGTH-7)
'"$common_awk_script"'
}'
Expand All @@ -266,7 +267,7 @@ gh_toc_grab() {
sed -e ':a' -e 'N' -e '$!ba' -e 's/\n<\/h/<\/h/g' |

# find strings that corresponds to template
$grepcmd '<a.*id="user-content-[^"]*".*</h[1-6]' |
$grepcmd '<h.*id="user-content-[^"]*".*</h[1-6]' |

# remove code tags
sed 's/<code>//g' | sed 's/<\/code>//g' |
Expand All @@ -275,7 +276,7 @@ gh_toc_grab() {
sed 's/<g-emoji[^>]*[^<]*<\/g-emoji> //g' |

# now all rows are like:
# <a id="user-content-..." href="..."><span ...></span></a> ... </h1
# <h1 id="user-content-..."><a href="..."> ... <span ...></span></a></h1
# format result line
# * $0 - whole string
# * last element of each row: "</hN" where N in (1,2,3,...)
Expand Down

0 comments on commit daabc77

Please sign in to comment.