diff --git a/.gitignore b/.gitignore index 9cf7e3821..b018fafac 100644 --- a/.gitignore +++ b/.gitignore @@ -17,4 +17,6 @@ bin/ .DS_Store # binary -github-mcp-server \ No newline at end of file +github-mcp-server + +.history \ No newline at end of file diff --git a/pkg/github/issues.go b/pkg/github/issues.go index a43979bad..94f2f35e8 100644 --- a/pkg/github/issues.go +++ b/pkg/github/issues.go @@ -10,6 +10,7 @@ import ( "time" ghErrors "github.com/github/github-mcp-server/pkg/errors" + "github.com/github/github-mcp-server/pkg/sanitize" "github.com/github/github-mcp-server/pkg/translations" "github.com/go-viper/mapstructure/v2" "github.com/google/go-github/v76/github" @@ -211,7 +212,7 @@ func fragmentToIssue(fragment IssueFragment) *github.Issue { return &github.Issue{ Number: github.Ptr(int(fragment.Number)), - Title: github.Ptr(string(fragment.Title)), + Title: github.Ptr(sanitize.FilterInvisibleCharacters(string(fragment.Title))), CreatedAt: &github.Timestamp{Time: fragment.CreatedAt.Time}, UpdatedAt: &github.Timestamp{Time: fragment.UpdatedAt.Time}, User: &github.User{ @@ -219,7 +220,7 @@ func fragmentToIssue(fragment IssueFragment) *github.Issue { }, State: github.Ptr(string(fragment.State)), ID: github.Ptr(fragment.DatabaseID), - Body: github.Ptr(string(fragment.Body)), + Body: github.Ptr(sanitize.FilterInvisibleCharacters(string(fragment.Body))), Labels: foundLabels, Comments: github.Ptr(int(fragment.Comments.TotalCount)), } @@ -323,6 +324,16 @@ func GetIssue(ctx context.Context, client *github.Client, owner string, repo str return mcp.NewToolResultError(fmt.Sprintf("failed to get issue: %s", string(body))), nil } + // Sanitize title/body on response + if issue != nil { + if issue.Title != nil { + issue.Title = github.Ptr(sanitize.FilterInvisibleCharacters(*issue.Title)) + } + if issue.Body != nil { + issue.Body = github.Ptr(sanitize.FilterInvisibleCharacters(*issue.Body)) + } + } + r, err := json.Marshal(issue) if err != nil { return nil, fmt.Errorf("failed to marshal issue: %w", err) diff --git a/pkg/github/pullrequests.go b/pkg/github/pullrequests.go index 76be20311..4f5e1952c 100644 --- a/pkg/github/pullrequests.go +++ b/pkg/github/pullrequests.go @@ -14,6 +14,7 @@ import ( "github.com/shurcooL/githubv4" ghErrors "github.com/github/github-mcp-server/pkg/errors" + "github.com/github/github-mcp-server/pkg/sanitize" "github.com/github/github-mcp-server/pkg/translations" ) @@ -123,6 +124,16 @@ func GetPullRequest(ctx context.Context, client *github.Client, owner, repo stri return mcp.NewToolResultError(fmt.Sprintf("failed to get pull request: %s", string(body))), nil } + // sanitize title/body on response + if pr != nil { + if pr.Title != nil { + pr.Title = github.Ptr(sanitize.FilterInvisibleCharacters(*pr.Title)) + } + if pr.Body != nil { + pr.Body = github.Ptr(sanitize.FilterInvisibleCharacters(*pr.Body)) + } + } + r, err := json.Marshal(pr) if err != nil { return nil, fmt.Errorf("failed to marshal response: %w", err) @@ -804,6 +815,19 @@ func ListPullRequests(getClient GetClientFn, t translations.TranslationHelperFun return mcp.NewToolResultError(fmt.Sprintf("failed to list pull requests: %s", string(body))), nil } + // sanitize title/body on each PR + for _, pr := range prs { + if pr == nil { + continue + } + if pr.Title != nil { + pr.Title = github.Ptr(sanitize.FilterInvisibleCharacters(*pr.Title)) + } + if pr.Body != nil { + pr.Body = github.Ptr(sanitize.FilterInvisibleCharacters(*pr.Body)) + } + } + r, err := json.Marshal(prs) if err != nil { return nil, fmt.Errorf("failed to marshal response: %w", err) diff --git a/pkg/sanitize/sanitize.go b/pkg/sanitize/sanitize.go new file mode 100644 index 000000000..d6c224d2e --- /dev/null +++ b/pkg/sanitize/sanitize.go @@ -0,0 +1,56 @@ +package sanitize + +// FilterInvisibleCharacters removes invisible or control characters that should not appear +// in user-facing titles or bodies. This includes: +// - Unicode tag characters: U+E0001, U+E0020–U+E007F +// - BiDi control characters: U+202A–U+202E, U+2066–U+2069 +// - Hidden modifier characters: U+200B, U+200C, U+200E, U+200F, U+00AD, U+FEFF, U+180E, U+2060–U+2064 +func FilterInvisibleCharacters(input string) string { + if input == "" { + return input + } + + // Filter runes + out := make([]rune, 0, len(input)) + for _, r := range input { + if !shouldRemoveRune(r) { + out = append(out, r) + } + } + return string(out) +} + +func shouldRemoveRune(r rune) bool { + switch r { + case 0x200B, // ZERO WIDTH SPACE + 0x200C, // ZERO WIDTH NON-JOINER + 0x200E, // LEFT-TO-RIGHT MARK + 0x200F, // RIGHT-TO-LEFT MARK + 0x00AD, // SOFT HYPHEN + 0xFEFF, // ZERO WIDTH NO-BREAK SPACE + 0x180E: // MONGOLIAN VOWEL SEPARATOR + return true + case 0xE0001: // TAG + return true + } + + // Ranges + // Unicode tags: U+E0020–U+E007F + if r >= 0xE0020 && r <= 0xE007F { + return true + } + // BiDi controls: U+202A–U+202E + if r >= 0x202A && r <= 0x202E { + return true + } + // BiDi isolates: U+2066–U+2069 + if r >= 0x2066 && r <= 0x2069 { + return true + } + // Hidden modifiers: U+2060–U+2064 + if r >= 0x2060 && r <= 0x2064 { + return true + } + + return false +} diff --git a/pkg/sanitize/sanitize_test.go b/pkg/sanitize/sanitize_test.go new file mode 100644 index 000000000..69f13b054 --- /dev/null +++ b/pkg/sanitize/sanitize_test.go @@ -0,0 +1,188 @@ +package sanitize + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestFilterInvisibleCharacters(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "empty string", + input: "", + expected: "", + }, + { + name: "normal text without invisible characters", + input: "Hello World", + expected: "Hello World", + }, + { + name: "text with zero width space", + input: "Hello\u200BWorld", + expected: "HelloWorld", + }, + { + name: "text with zero width non-joiner", + input: "Hello\u200CWorld", + expected: "HelloWorld", + }, + { + name: "text with left-to-right mark", + input: "Hello\u200EWorld", + expected: "HelloWorld", + }, + { + name: "text with right-to-left mark", + input: "Hello\u200FWorld", + expected: "HelloWorld", + }, + { + name: "text with soft hyphen", + input: "Hello\u00ADWorld", + expected: "HelloWorld", + }, + { + name: "text with zero width no-break space (BOM)", + input: "Hello\uFEFFWorld", + expected: "HelloWorld", + }, + { + name: "text with mongolian vowel separator", + input: "Hello\u180EWorld", + expected: "HelloWorld", + }, + { + name: "text with unicode tag character", + input: "Hello\U000E0001World", + expected: "HelloWorld", + }, + { + name: "text with unicode tag range characters", + input: "Hello\U000E0020World\U000E007FTest", + expected: "HelloWorldTest", + }, + { + name: "text with bidi control characters", + input: "Hello\u202AWorld\u202BTest\u202CEnd\u202DMore\u202EFinal", + expected: "HelloWorldTestEndMoreFinal", + }, + { + name: "text with bidi isolate characters", + input: "Hello\u2066World\u2067Test\u2068End\u2069Final", + expected: "HelloWorldTestEndFinal", + }, + { + name: "text with hidden modifier characters", + input: "Hello\u2060World\u2061Test\u2062End\u2063More\u2064Final", + expected: "HelloWorldTestEndMoreFinal", + }, + { + name: "multiple invisible characters mixed", + input: "Hello\u200B\u200C\u200E\u200F\u00AD\uFEFF\u180E\U000E0001World", + expected: "HelloWorld", + }, + { + name: "text with normal unicode characters (should be preserved)", + input: "Hello 世界 🌍 αβγ", + expected: "Hello 世界 🌍 αβγ", + }, + { + name: "invisible characters at start and end", + input: "\u200BHello World\u200C", + expected: "Hello World", + }, + { + name: "only invisible characters", + input: "\u200B\u200C\u200E\u200F", + expected: "", + }, + { + name: "real-world example with title", + input: "Fix\u200B bug\u00AD in\u202A authentication\u202C", + expected: "Fix bug in authentication", + }, + { + name: "issue body with mixed content", + input: "This is a\u200B bug report.\n\nSteps to reproduce:\u200C\n1. Do this\u200E\n2. Do that\u200F", + expected: "This is a bug report.\n\nSteps to reproduce:\n1. Do this\n2. Do that", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := FilterInvisibleCharacters(tt.input) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestShouldRemoveRune(t *testing.T) { + tests := []struct { + name string + rune rune + expected bool + }{ + // Individual characters that should be removed + {name: "zero width space", rune: 0x200B, expected: true}, + {name: "zero width non-joiner", rune: 0x200C, expected: true}, + {name: "left-to-right mark", rune: 0x200E, expected: true}, + {name: "right-to-left mark", rune: 0x200F, expected: true}, + {name: "soft hyphen", rune: 0x00AD, expected: true}, + {name: "zero width no-break space", rune: 0xFEFF, expected: true}, + {name: "mongolian vowel separator", rune: 0x180E, expected: true}, + {name: "unicode tag", rune: 0xE0001, expected: true}, + + // Range tests - Unicode tags: U+E0020–U+E007F + {name: "unicode tag range start", rune: 0xE0020, expected: true}, + {name: "unicode tag range middle", rune: 0xE0050, expected: true}, + {name: "unicode tag range end", rune: 0xE007F, expected: true}, + {name: "before unicode tag range", rune: 0xE001F, expected: false}, + {name: "after unicode tag range", rune: 0xE0080, expected: false}, + + // Range tests - BiDi controls: U+202A–U+202E + {name: "bidi control range start", rune: 0x202A, expected: true}, + {name: "bidi control range middle", rune: 0x202C, expected: true}, + {name: "bidi control range end", rune: 0x202E, expected: true}, + {name: "before bidi control range", rune: 0x2029, expected: false}, + {name: "after bidi control range", rune: 0x202F, expected: false}, + + // Range tests - BiDi isolates: U+2066–U+2069 + {name: "bidi isolate range start", rune: 0x2066, expected: true}, + {name: "bidi isolate range middle", rune: 0x2067, expected: true}, + {name: "bidi isolate range end", rune: 0x2069, expected: true}, + {name: "before bidi isolate range", rune: 0x2065, expected: false}, + {name: "after bidi isolate range", rune: 0x206A, expected: false}, + + // Range tests - Hidden modifiers: U+2060–U+2064 + {name: "hidden modifier range start", rune: 0x2060, expected: true}, + {name: "hidden modifier range middle", rune: 0x2062, expected: true}, + {name: "hidden modifier range end", rune: 0x2064, expected: true}, + {name: "before hidden modifier range", rune: 0x205F, expected: false}, + {name: "after hidden modifier range", rune: 0x2065, expected: false}, + + // Characters that should NOT be removed + {name: "regular ascii letter", rune: 'A', expected: false}, + {name: "regular ascii digit", rune: '1', expected: false}, + {name: "regular ascii space", rune: ' ', expected: false}, + {name: "newline", rune: '\n', expected: false}, + {name: "tab", rune: '\t', expected: false}, + {name: "unicode letter", rune: '世', expected: false}, + {name: "emoji", rune: '🌍', expected: false}, + {name: "greek letter", rune: 'α', expected: false}, + {name: "punctuation", rune: '.', expected: false}, + {name: "hyphen (normal)", rune: '-', expected: false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := shouldRemoveRune(tt.rune) + assert.Equal(t, tt.expected, result, "rune: U+%04X (%c)", tt.rune, tt.rune) + }) + } +}