Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve code diff highlight, fix incorrect rendered diff result #19958

Merged
merged 34 commits into from Jul 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
7850972
Improve code highlight
wxiaoguang Jun 13, 2022
ca6a537
Merge branch 'main' into fix-code-highlight
wxiaoguang Jun 13, 2022
f0e2f97
remove unnecessary code, add more comments
wxiaoguang Jun 13, 2022
ec8da9e
Merge branch 'main' into fix-code-highlight
wxiaoguang Jun 13, 2022
de042fc
Merge branch 'main' into fix-code-highlight
wxiaoguang Jun 14, 2022
62eb155
clear edge cases, fine tune tests
wxiaoguang Jun 14, 2022
bb64dff
remove line wrapper tags when doing highlight code diff
wxiaoguang Jun 15, 2022
9b183f5
Merge branch 'main' into fix-code-highlight
wxiaoguang Jun 15, 2022
f172f88
Merge branch 'main' into fix-code-highlight
wxiaoguang Jun 15, 2022
6226d38
Merge branch 'main' into fix-code-highlight
wxiaoguang Jun 17, 2022
adaf320
fine tune
wxiaoguang Jun 17, 2022
9b5b079
Merge branch 'main' into fix-code-highlight
wxiaoguang Jun 17, 2022
4aa94e8
Update services/gitdiff/gitdiff.go
wxiaoguang Jun 17, 2022
17e0660
Merge branch 'main' into fix-code-highlight
lunny Jun 18, 2022
77445c2
fine tune comments
wxiaoguang Jun 18, 2022
6b359d9
Merge branch 'main' into fix-code-highlight
wxiaoguang Jun 18, 2022
9953e09
Merge branch 'main' into fix-code-highlight
wxiaoguang Jun 18, 2022
5fc6801
Merge branch 'main' into fix-code-highlight
lunny Jun 18, 2022
57f7bf8
Merge branch 'main' into fix-code-highlight
lunny Jun 27, 2022
47030f1
Merge branch 'main' into fix-code-highlight
lafriks Jun 27, 2022
27c78e1
Merge branch 'main' into fix-code-highlight
zeripath Jul 6, 2022
df5fef8
Update gitdiff.go
wxiaoguang Jul 7, 2022
843579a
Update gitdiff.go
wxiaoguang Jul 7, 2022
2e798d8
Update gitdiff.go
wxiaoguang Jul 7, 2022
ece0e3f
more tests
wxiaoguang Jul 9, 2022
c9a1566
private function & private struct
wxiaoguang Jul 9, 2022
f21f422
demo for why the tags are all closed
wxiaoguang Jul 9, 2022
3c2aa66
add comment
wxiaoguang Jul 9, 2022
90c6440
placeholder for html entity
wxiaoguang Jul 9, 2022
dc74baf
fine tune comments
wxiaoguang Jul 9, 2022
4afb1cc
always output the code/name part of the html entities if the placehol…
wxiaoguang Jul 9, 2022
4d1d8a4
fine tune comments
wxiaoguang Jul 9, 2022
9cf358b
demo why the tags are always balanced
wxiaoguang Jul 9, 2022
e6215ae
Merge branch 'main' into fix-code-highlight
wxiaoguang Jul 23, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 5 additions & 3 deletions modules/highlight/highlight.go
Expand Up @@ -40,9 +40,11 @@ var (
// NewContext loads custom highlight map from local config
func NewContext() {
once.Do(func() {
keys := setting.Cfg.Section("highlight.mapping").Keys()
for i := range keys {
highlightMapping[keys[i].Name()] = keys[i].Value()
if setting.Cfg != nil {
keys := setting.Cfg.Section("highlight.mapping").Keys()
for i := range keys {
highlightMapping[keys[i].Name()] = keys[i].Value()
}
}

// The size 512 is simply a conservative rule of thumb
Expand Down
312 changes: 21 additions & 291 deletions services/gitdiff/gitdiff.go
Expand Up @@ -15,7 +15,6 @@ import (
"io"
"net/url"
"os"
"regexp"
"sort"
"strings"
"time"
Expand All @@ -40,7 +39,7 @@ import (
"golang.org/x/text/transform"
)

// DiffLineType represents the type of a DiffLine.
// DiffLineType represents the type of DiffLine.
type DiffLineType uint8

// DiffLineType possible values.
Expand All @@ -51,7 +50,7 @@ const (
DiffLineSection
)

// DiffFileType represents the type of a DiffFile.
// DiffFileType represents the type of DiffFile.
type DiffFileType uint8

// DiffFileType possible values.
Expand Down Expand Up @@ -100,12 +99,12 @@ type DiffLineSectionInfo struct {
// BlobExcerptChunkSize represent max lines of excerpt
const BlobExcerptChunkSize = 20

// GetType returns the type of a DiffLine.
// GetType returns the type of DiffLine.
func (d *DiffLine) GetType() int {
return int(d.Type)
}

// CanComment returns whether or not a line can get commented
// CanComment returns whether a line can get commented
func (d *DiffLine) CanComment() bool {
return len(d.Comments) == 0 && d.Type != DiffLineSection
}
Expand Down Expand Up @@ -191,287 +190,13 @@ var (
codeTagSuffix = []byte(`</span>`)
)

var (
unfinishedtagRegex = regexp.MustCompile(`<[^>]*$`)
trailingSpanRegex = regexp.MustCompile(`<span\s*[[:alpha:]="]*?[>]?$`)
entityRegex = regexp.MustCompile(`&[#]*?[0-9[:alpha:]]*$`)
)

// shouldWriteInline represents combinations where we manually write inline changes
func shouldWriteInline(diff diffmatchpatch.Diff, lineType DiffLineType) bool {
if true &&
diff.Type == diffmatchpatch.DiffEqual ||
diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd ||
diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel {
return true
}
return false
}

func fixupBrokenSpans(diffs []diffmatchpatch.Diff) []diffmatchpatch.Diff {
// Create a new array to store our fixed up blocks
fixedup := make([]diffmatchpatch.Diff, 0, len(diffs))

// semantically label some numbers
const insert, delete, equal = 0, 1, 2

// record the positions of the last type of each block in the fixedup blocks
last := []int{-1, -1, -1}
operation := []diffmatchpatch.Operation{diffmatchpatch.DiffInsert, diffmatchpatch.DiffDelete, diffmatchpatch.DiffEqual}

// create a writer for insert and deletes
toWrite := []strings.Builder{
{},
{},
}

// make some flags for insert and delete
unfinishedTag := []bool{false, false}
unfinishedEnt := []bool{false, false}

// store stores the provided text in the writer for the typ
store := func(text string, typ int) {
(&(toWrite[typ])).WriteString(text)
}

// hasStored returns true if there is stored content
hasStored := func(typ int) bool {
return (&toWrite[typ]).Len() > 0
}

// stored will return that content
stored := func(typ int) string {
return (&toWrite[typ]).String()
}

// empty will empty the stored content
empty := func(typ int) {
(&toWrite[typ]).Reset()
}

// pop will remove the stored content appending to a diff block for that typ
pop := func(typ int, fixedup []diffmatchpatch.Diff) []diffmatchpatch.Diff {
if hasStored(typ) {
if last[typ] > last[equal] {
fixedup[last[typ]].Text += stored(typ)
} else {
fixedup = append(fixedup, diffmatchpatch.Diff{
Type: operation[typ],
Text: stored(typ),
})
}
empty(typ)
}
return fixedup
}

// Now we walk the provided diffs and check the type of each block in turn
for _, diff := range diffs {

typ := delete // flag for handling insert or delete typs
switch diff.Type {
case diffmatchpatch.DiffEqual:
// First check if there is anything stored
if hasStored(insert) || hasStored(delete) {
// There are two reasons for storing content:
// 1. Unfinished Entity <- Could be more efficient here by not doing this if we're looking for a tag
if unfinishedEnt[insert] || unfinishedEnt[delete] {
// we look for a ';' to finish an entity
idx := strings.IndexRune(diff.Text, ';')
if idx >= 0 {
// if we find a ';' store the preceding content to both insert and delete
store(diff.Text[:idx+1], insert)
store(diff.Text[:idx+1], delete)

// and remove it from this block
diff.Text = diff.Text[idx+1:]

// reset the ent flags
unfinishedEnt[insert] = false
unfinishedEnt[delete] = false
} else {
// otherwise store it all on insert and delete
store(diff.Text, insert)
store(diff.Text, delete)
// and empty this block
diff.Text = ""
}
}
// 2. Unfinished Tag
if unfinishedTag[insert] || unfinishedTag[delete] {
// we look for a '>' to finish a tag
idx := strings.IndexRune(diff.Text, '>')
if idx >= 0 {
store(diff.Text[:idx+1], insert)
store(diff.Text[:idx+1], delete)
diff.Text = diff.Text[idx+1:]
unfinishedTag[insert] = false
unfinishedTag[delete] = false
} else {
store(diff.Text, insert)
store(diff.Text, delete)
diff.Text = ""
}
}

// If we've completed the required tag/entities
if !(unfinishedTag[insert] || unfinishedTag[delete] || unfinishedEnt[insert] || unfinishedEnt[delete]) {
// pop off the stack
fixedup = pop(insert, fixedup)
fixedup = pop(delete, fixedup)
}

// If that has left this diff block empty then shortcut
if len(diff.Text) == 0 {
continue
}
}

// check if this block ends in an unfinished tag?
idx := unfinishedtagRegex.FindStringIndex(diff.Text)
if idx != nil {
unfinishedTag[insert] = true
unfinishedTag[delete] = true
} else {
// otherwise does it end in an unfinished entity?
idx = entityRegex.FindStringIndex(diff.Text)
if idx != nil {
unfinishedEnt[insert] = true
unfinishedEnt[delete] = true
}
}

// If there is an unfinished component
if idx != nil {
// Store the fragment
store(diff.Text[idx[0]:], insert)
store(diff.Text[idx[0]:], delete)
// and remove it from this block
diff.Text = diff.Text[:idx[0]]
}

// If that hasn't left the block empty
if len(diff.Text) > 0 {
// store the position of the last equal block and store it in our diffs
last[equal] = len(fixedup)
fixedup = append(fixedup, diff)
}
continue
case diffmatchpatch.DiffInsert:
typ = insert
fallthrough
case diffmatchpatch.DiffDelete:
// First check if there is anything stored for this type
if hasStored(typ) {
// if there is prepend it to this block, empty the storage and reset our flags
diff.Text = stored(typ) + diff.Text
empty(typ)
unfinishedEnt[typ] = false
unfinishedTag[typ] = false
}

// check if this block ends in an unfinished tag
idx := unfinishedtagRegex.FindStringIndex(diff.Text)
if idx != nil {
unfinishedTag[typ] = true
} else {
// otherwise does it end in an unfinished entity
idx = entityRegex.FindStringIndex(diff.Text)
if idx != nil {
unfinishedEnt[typ] = true
}
}

// If there is an unfinished component
if idx != nil {
// Store the fragment
store(diff.Text[idx[0]:], typ)
// and remove it from this block
diff.Text = diff.Text[:idx[0]]
}

// If that hasn't left the block empty
if len(diff.Text) > 0 {
// if the last block of this type was after the last equal block
if last[typ] > last[equal] {
// store this blocks content on that block
fixedup[last[typ]].Text += diff.Text
} else {
// otherwise store the position of the last block of this type and store the block
last[typ] = len(fixedup)
fixedup = append(fixedup, diff)
}
}
continue
}
}

// pop off any remaining stored content
fixedup = pop(insert, fixedup)
fixedup = pop(delete, fixedup)

return fixedup
}

func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineType) DiffInline {
func diffToHTML(lineWrapperTags []string, diffs []diffmatchpatch.Diff, lineType DiffLineType) string {
buf := bytes.NewBuffer(nil)
match := ""

diffs = fixupBrokenSpans(diffs)

// restore the line wrapper tags <span class="line"> and <span class="cl">, if necessary
for _, tag := range lineWrapperTags {
buf.WriteString(tag)
}
for _, diff := range diffs {
if shouldWriteInline(diff, lineType) {
if len(match) > 0 {
diff.Text = match + diff.Text
match = ""
}
// Chroma HTML syntax highlighting is done before diffing individual lines in order to maintain consistency.
// Since inline changes might split in the middle of a chroma span tag or HTML entity, make we manually put it back together
// before writing so we don't try insert added/removed code spans in the middle of one of those
// and create broken HTML. This is done by moving incomplete HTML forward until it no longer matches our pattern of
// a line ending with an incomplete HTML entity or partial/opening <span>.

// EX:
// diffs[{Type: dmp.DiffDelete, Text: "language</span><span "},
// {Type: dmp.DiffEqual, Text: "c"},
// {Type: dmp.DiffDelete, Text: "lass="p">}]

// After first iteration
// diffs[{Type: dmp.DiffDelete, Text: "language</span>"}, //write out
// {Type: dmp.DiffEqual, Text: "<span c"},
// {Type: dmp.DiffDelete, Text: "lass="p">,</span>}]

// After second iteration
// {Type: dmp.DiffEqual, Text: ""}, // write out
// {Type: dmp.DiffDelete, Text: "<span class="p">,</span>}]

// Final
// {Type: dmp.DiffDelete, Text: "<span class="p">,</span>}]
// end up writing <span class="removed-code"><span class="p">,</span></span>
// Instead of <span class="removed-code">lass="p",</span></span>

m := trailingSpanRegex.FindStringSubmatchIndex(diff.Text)
if m != nil {
match = diff.Text[m[0]:m[1]]
diff.Text = strings.TrimSuffix(diff.Text, match)
}
m = entityRegex.FindStringSubmatchIndex(diff.Text)
if m != nil {
match = diff.Text[m[0]:m[1]]
diff.Text = strings.TrimSuffix(diff.Text, match)
}
// Print an existing closing span first before opening added/remove-code span so it doesn't unintentionally close it
if strings.HasPrefix(diff.Text, "</span>") {
buf.WriteString("</span>")
diff.Text = strings.TrimPrefix(diff.Text, "</span>")
}
// If we weren't able to fix it then this should avoid broken HTML by not inserting more spans below
// The previous/next diff section will contain the rest of the tag that is missing here
if strings.Count(diff.Text, "<") != strings.Count(diff.Text, ">") {
buf.WriteString(diff.Text)
continue
}
}
switch {
case diff.Type == diffmatchpatch.DiffEqual:
buf.WriteString(diff.Text)
Expand All @@ -485,7 +210,10 @@ func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineT
buf.Write(codeTagSuffix)
}
}
return DiffInlineWithUnicodeEscape(template.HTML(buf.String()))
for range lineWrapperTags {
buf.WriteString("</span>")
}
return buf.String()
}

// GetLine gets a specific line by type (add or del) and file line number
Expand Down Expand Up @@ -597,10 +325,12 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) Dif
return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content)
}

diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, language, diff1[1:]), highlight.Code(diffSection.FileName, language, diff2[1:]), true)
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)

return diffToHTML(diffSection.FileName, diffRecord, diffLine.Type)
hcd := newHighlightCodeDiff()
diffRecord := hcd.diffWithHighlight(diffSection.FileName, language, diff1[1:], diff2[1:])
// it seems that Gitea doesn't need the line wrapper of Chroma, so do not add them back
// if the line wrappers are still needed in the future, it can be added back by "diffToHTML(hcd.lineWrapperTags. ...)"
diffHTML := diffToHTML(nil, diffRecord, diffLine.Type)
return DiffInlineWithUnicodeEscape(template.HTML(diffHTML))
}

// DiffFile represents a file diff.
Expand Down Expand Up @@ -1289,7 +1019,7 @@ func readFileName(rd *strings.Reader) (string, bool) {
if char == '"' {
fmt.Fscanf(rd, "%q ", &name)
if len(name) == 0 {
log.Error("Reader has no file name: %v", rd)
log.Error("Reader has no file name: reader=%+v", rd)
return "", true
}

Expand All @@ -1311,7 +1041,7 @@ func readFileName(rd *strings.Reader) (string, bool) {
}
}
if len(name) < 2 {
log.Error("Unable to determine name from reader: %v", rd)
log.Error("Unable to determine name from reader: reader=%+v", rd)
return "", true
}
return name[2:], ambiguity
Expand Down