diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml new file mode 100644 index 0000000..aae10a2 --- /dev/null +++ b/.github/workflows/regression.yml @@ -0,0 +1,72 @@ +name: Regression Test + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + run_all_tests: + runs-on: ubuntu-latest + #if: "!contains(github.event.pull_request.title, '[NO-REGRESSION-TEST]')" + steps: + - name: Setup Go environment + uses: actions/setup-go@v5 + with: + go-version: '1.22' + + - name: Setup Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + toolchain: stable + components: rust-analyzer + + - name: Setup Python environment + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Checkout pull request code + uses: actions/checkout@v4 + with: + path: 'pr_repo' + submodules: true + + - name: Install Python dependencies + run: | + pip install -r ./pr_repo/script/requirements.txt + pip install ./pr_repo/pylsp + + - name: Checkout main branch code + uses: actions/checkout@v4 + with: + ref: 'main' + path: 'main_repo' + + - name: Compile both binaries + run: | + (cd main_repo && go build -o ../abcoder_old) + (cd pr_repo && go build -o ../abcoder_new) + + - name: Run test scripts and generate outputs + run: | + LANGS="go rust python" OUTDIR=out_old ABCEXE=./abcoder_old ./pr_repo/script/run_all_testdata.sh + LANGS="go rust python" OUTDIR=out_new ABCEXE=./abcoder_new ./pr_repo/script/run_all_testdata.sh + + - name: Compare outputs and check for regression + id: diff_check + run: ./pr_repo/script/diffjson.py out_old out_new + continue-on-error: true + + - name: Upload output directories + uses: actions/upload-artifact@v4 + if: always() + with: + name: regression-outputs + path: | + out_old + out_new + retention-days: 3 diff --git a/lang/lsp/client.go b/lang/lsp/client.go index d4ecbf2..750c419 100644 --- a/lang/lsp/client.go +++ b/lang/lsp/client.go @@ -17,6 +17,7 @@ package lsp import ( "bufio" "context" + "encoding/json" "fmt" "io" "os" @@ -65,22 +66,6 @@ func NewLSPClient(repo string, openfile string, wait time.Duration, opts ClientO if err != nil { return nil, err } - - // wait for "textDocument/publishDiagnostics" notification - // resp := cli.WaitFirstNotify("textDocument/publishDiagnostics") - // again: - // var diagnostics lsp.PublishDiagnosticsParams - // if err := json.Unmarshal(*resp.Params, &diagnostics); err != nil { - // logger.Fatalf("Failed to unmarshal diagnostics: %v", err) - // } - // if len(diagnostics.Diagnostics) > 0 { - // // wait again - // resp = cli.WaitFirstNotify("textDocument/publishDiagnostics") - // if retry > 0 { - // retry-- - // goto again - // } - // } } time.Sleep(wait) @@ -93,6 +78,19 @@ func (c *LSPClient) Close() error { return c.Conn.Close() } +// Extra wrapper around json rpc to +// 1. implement a transparent, generic cache +func (cli *LSPClient) Call(ctx context.Context, method string, params, result interface{}, opts ...jsonrpc2.CallOption) error { + var raw json.RawMessage + if err := cli.Conn.Call(ctx, method, params, &raw); err != nil { + return err + } + if err := json.Unmarshal(raw, result); err != nil { + return err + } + return nil +} + type initializeParams struct { ProcessID int `json:"processId,omitempty"` diff --git a/lang/lsp/clients_test.go b/lang/lsp/clients_test.go index 3b5e729..d28c8f6 100644 --- a/lang/lsp/clients_test.go +++ b/lang/lsp/clients_test.go @@ -160,10 +160,10 @@ Output` // references refRange := Range{ Start: Position{ - Line: 13, - Character: 13, + Line: 48, + Character: 6, }, - } + } // trait $0MyTrait { t.Run("references", func(t *testing.T) { id := Location{ URI: entity_mod_uri, @@ -173,6 +173,9 @@ Output` if err != nil { t.Fatalf("Find Reference failed: %v", err) } + if len(references) != 4 { + t.Fatalf("Expected 4 references, got %d\n%+v\n", len(references), references) + } if _, err := json.Marshal(references); err != nil { t.Fatalf("Marshal Reference failed: %v", err) } @@ -198,6 +201,12 @@ Output` if err != nil { t.Fatalf("Semantic Tokens failed: %v", err) } + if len(tokens) != 149 { + t.Fatalf("Expected 149 semantic tokens, got %d\n%+v", len(tokens), tokens) + } + if len(tokens) == 0 { + t.Fatalf("Semantic Tokens should not be empty") + } if _, err := json.Marshal(tokens); err != nil { t.Fatalf("Marshal Semantic Tokens failed: %v", err) } @@ -230,18 +239,6 @@ Output` if len(definition) != 1 { t.Fatalf("Find Definition should have found entry, but got none at %#v", pos) } - // t.Logf("Find Definition %#v ->\n%#v", pos, definition) - } - }) - - // workspaceSymbol - t.Run("workspaceSymbol", func(t *testing.T) { - symbols, err := rustLSP.WorkspaceSymbols(context.Background(), "add") - if err != nil { - t.Fatalf("Workspace Symbol failed: %v", err) - } - if _, err := json.Marshal(symbols); err != nil { - t.Fatalf("Marshal Workspace Symbols failed: %v", err) } }) diff --git a/lang/lsp/handler.go b/lang/lsp/handler.go index ab12a8a..40274fc 100644 --- a/lang/lsp/handler.go +++ b/lang/lsp/handler.go @@ -93,9 +93,9 @@ loop: func (h *lspHandler) Handle(ctx context.Context, conn *jsonrpc2.Conn, req *jsonrpc2.Request) { // This method will be called for both requests and notifications - log.Info("handle method: %s\n", req.Method) + log.Debug("handle method: %s\n", req.Method) if req.Params != nil { - log.Info("param: %s\n", string(*req.Params)) + log.Debug("param: %s\n", string(*req.Params)) } if req.Notif { // This is a notification @@ -126,6 +126,10 @@ func (h *lspHandler) sendNotify(req *jsonrpc2.Request) { func (h *lspHandler) handleNotification(ctx context.Context, conn *jsonrpc2.Conn, req *jsonrpc2.Request) { switch req.Method { + case "textDocument/publishDiagnostics": + // This notification is sent from the server to the client to signal results of validation runs. + log.Debug("Received publishDiagnostics notification:\n%s\n", string(*req.Params)) + return // exit case "exit": log.Info("Received exit notification\n") diff --git a/lang/lsp/lsp.go b/lang/lsp/lsp.go index e59d1b6..d1c4bc8 100644 --- a/lang/lsp/lsp.go +++ b/lang/lsp/lsp.go @@ -15,16 +15,11 @@ package lsp import ( - "context" "encoding/json" "fmt" - "math" - "os" "path/filepath" - "sort" "strings" - "github.com/cloudwego/abcoder/lang/utils" "github.com/sourcegraph/go-lsp" ) @@ -79,8 +74,6 @@ type Range struct { End Position `json:"end"` } -type _Range Range - func (r Range) String() string { return fmt.Sprintf("%s-%s", r.Start, r.End) } @@ -89,10 +82,33 @@ func (r Range) MarshalText() ([]byte, error) { return []byte(r.String()), nil } +type _Range Range + func (r Range) MarshalJSON() ([]byte, error) { return json.Marshal(_Range(r)) } +func isPositionInRange(pos Position, r Range, close bool) bool { + if pos.Line < r.Start.Line || pos.Line > r.End.Line { + return false + } + if pos.Line == r.Start.Line && pos.Character < r.Start.Character { + return false + } + if pos.Line == r.End.Line { + if close { + return pos.Character <= r.End.Character + } else { + return pos.Character < r.End.Character + } + } + return true +} + +func (a Range) Include(b Range) bool { + return isPositionInRange(b.Start, a, false) && isPositionInRange(b.End, a, true) +} + type Location struct { URI DocumentURI `json:"uri"` Range Range `json:"range"` @@ -108,19 +124,29 @@ func SetLocationMarshalJSONInline(inline bool) { locationMarshalJSONInline = inline } -type location Location +type _Location Location func (l Location) MarshalJSON() ([]byte, error) { if locationMarshalJSONInline { return []byte(fmt.Sprintf("%q", l.String())), nil } - return json.Marshal(location(l)) + return json.Marshal(_Location(l)) } func (l Location) MarshalText() ([]byte, error) { return []byte(l.String()), nil } +func (a Location) Include(b Location) bool { + if a == b { + return true + } + if a.URI != b.URI { + return false + } + return isPositionInRange(b.Range.Start, a.Range, false) && isPositionInRange(b.Range.End, a.Range, true) +} + type DocumentURI lsp.DocumentURI func (l DocumentURI) File() string { @@ -134,11 +160,6 @@ func NewURI(file string) DocumentURI { return DocumentURI("file://" + file) } -type DocumentRange struct { - TextDocument lsp.TextDocumentIdentifier `json:"textDocument"` - Range Range `json:"range"` -} - type TextDocumentItem struct { URI DocumentURI `json:"uri"` LanguageID string `json:"languageId"` @@ -197,484 +218,3 @@ type Token struct { func (t *Token) String() string { return fmt.Sprintf("%s %s %v %s", t.Text, t.Type, t.Modifiers, t.Location) } - -type DidOpenTextDocumentParams struct { - TextDocument TextDocumentItem `json:"textDocument"` -} - -func (cli *LSPClient) DidOpen(ctx context.Context, file DocumentURI) (*TextDocumentItem, error) { - if f, ok := cli.files[file]; ok { - return f, nil - } - text, err := os.ReadFile(file.File()) - if err != nil { - return nil, err - } - f := &TextDocumentItem{ - URI: DocumentURI(file), - LanguageID: cli.Language.String(), - Version: 1, - Text: string(text), - LineCounts: utils.CountLines(string(text)), - } - cli.files[file] = f - req := DidOpenTextDocumentParams{ - TextDocument: *f, - } - if err := cli.Notify(ctx, "textDocument/didOpen", req); err != nil { - return nil, err - } - return f, nil -} - -func (cli *LSPClient) DocumentSymbols(ctx context.Context, file DocumentURI) (map[Range]*DocumentSymbol, error) { - // f, ok := cli.files[file] - // if ok { - // return f.Symbols, nil - // } - // open file first - f, err := cli.DidOpen(ctx, file) - if err != nil { - return nil, err - } - if f.Symbols != nil { - return f.Symbols, nil - } - req := lsp.DocumentSymbolParams{ - TextDocument: lsp.TextDocumentIdentifier{ - URI: lsp.DocumentURI(file), - }, - } - var resp []DocumentSymbol - if err := cli.Call(ctx, "textDocument/documentSymbol", req, &resp); err != nil { - return nil, err - } - // cache symbols - f.Symbols = make(map[Range]*DocumentSymbol, len(resp)) - for i := range resp { - s := &resp[i] - f.Symbols[s.Location.Range] = s - } - return f.Symbols, nil -} - -func (cli *LSPClient) References(ctx context.Context, id Location) ([]Location, error) { - if _, err := cli.DidOpen(ctx, id.URI); err != nil { - return nil, err - } - uri := lsp.DocumentURI(id.URI) - req := lsp.ReferenceParams{ - TextDocumentPositionParams: lsp.TextDocumentPositionParams{ - TextDocument: lsp.TextDocumentIdentifier{ - URI: uri, - }, - Position: lsp.Position{ - Line: id.Range.Start.Line, - Character: id.Range.Start.Character + 1, - }, - }, - Context: lsp.ReferenceContext{ - IncludeDeclaration: true, - }, - } - var resp []Location - if err := cli.Call(ctx, "textDocument/references", req, &resp); err != nil { - return nil, err - } - return resp, nil -} - -// Some language servers do not provide semanticTokens/range. -// In that case, we fall back to semanticTokens/full and then filter the tokens manually. -func (cli *LSPClient) getSemanticTokensRange(ctx context.Context, req DocumentRange, resp *SemanticTokens) error { - if cli.hasSemanticTokensRange { - if err := cli.Call(ctx, "textDocument/semanticTokens/range", req, resp); err != nil { - return err - } - return nil - } - // fall back to semanticTokens/full - req1 := struct { - TextDocument lsp.TextDocumentIdentifier `json:"textDocument"` - }{TextDocument: req.TextDocument} - if err := cli.Call(ctx, "textDocument/semanticTokens/full", req1, resp); err != nil { - return err - } - filterSemanticTokensInRange(resp, req.Range) - return nil -} - -func filterSemanticTokensInRange(resp *SemanticTokens, r Range) { - curPos := Position{ - Line: 0, - Character: 0, - } - newData := []uint32{} - includedIs := []int{} - for i := 0; i < len(resp.Data); i += 5 { - deltaLine := int(resp.Data[i]) - deltaStart := int(resp.Data[i+1]) - if deltaLine != 0 { - curPos.Line += deltaLine - curPos.Character = deltaStart - } else { - curPos.Character += deltaStart - } - if isPositionInRange(curPos, r, true) { - if len(newData) == 0 { - // add range start to initial delta - newData = append(newData, resp.Data[i:i+5]...) - newData[0] = uint32(curPos.Line) - newData[1] = uint32(curPos.Character) - } else { - newData = append(newData, resp.Data[i:i+5]...) - } - includedIs = append(includedIs, i) - } - } - resp.Data = newData -} - -func (cli *LSPClient) SemanticTokens(ctx context.Context, id Location) ([]Token, error) { - // open file first - syms, err := cli.DocumentSymbols(ctx, id.URI) - if err != nil { - return nil, err - } - sym := syms[id.Range] - if sym != nil && sym.Tokens != nil { - return sym.Tokens, nil - } - - uri := lsp.DocumentURI(id.URI) - req := DocumentRange{ - TextDocument: lsp.TextDocumentIdentifier{ - URI: uri, - }, - Range: id.Range, - } - - var resp SemanticTokens - if err := cli.getSemanticTokensRange(ctx, req, &resp); err != nil { - return nil, err - } - - toks := cli.getAllTokens(resp, id.URI) - if sym != nil { - sym.Tokens = toks - } - return toks, nil -} - -func (cli *LSPClient) Definition(ctx context.Context, uri DocumentURI, pos Position) ([]Location, error) { - // open file first - f, err := cli.DidOpen(ctx, uri) - if err != nil { - return nil, err - } - if f.Definitions != nil { - if locations, ok := f.Definitions[pos]; ok { - return locations, nil - } - } - - // call - req := lsp.TextDocumentPositionParams{ - TextDocument: lsp.TextDocumentIdentifier{ - URI: lsp.DocumentURI(uri), - }, - Position: lsp.Position(pos), - } - var resp []Location - if err := cli.Call(ctx, "textDocument/definition", req, &resp); err != nil { - return nil, err - } - - // cache definitions - if f.Definitions == nil { - f.Definitions = make(map[Position][]Location) - } - f.Definitions[pos] = resp - return resp, nil -} - -func (cli *LSPClient) TypeDefinition(ctx context.Context, uri DocumentURI, pos Position) ([]Location, error) { - req := lsp.TextDocumentPositionParams{ - TextDocument: lsp.TextDocumentIdentifier{ - URI: lsp.DocumentURI(uri), - }, - Position: lsp.Position(pos), - } - var resp []Location - if err := cli.Call(ctx, "textDocument/typeDefinition", req, &resp); err != nil { - return nil, err - } - return resp, nil -} - -// read file and get the text of block of range -func (cli *LSPClient) Locate(id Location) (string, error) { - f, ok := cli.files[id.URI] - if !ok { - // open file os - fd, err := os.ReadFile(id.URI.File()) - if err != nil { - return "", err - } - text := string(fd) - f = &TextDocumentItem{ - URI: DocumentURI(id.URI), - LanguageID: cli.Language.String(), - Version: 1, - Text: text, - LineCounts: utils.CountLines(text), - } - cli.files[id.URI] = f - } - - text := f.Text - // get block text of range - start := f.LineCounts[id.Range.Start.Line] + id.Range.Start.Character - end := f.LineCounts[id.Range.End.Line] + id.Range.End.Character - return text[start:end], nil -} - -// get line text of pos -func (cli *LSPClient) Line(uri DocumentURI, pos int) string { - f, ok := cli.files[uri] - if !ok { - // open file os - fd, err := os.ReadFile(uri.File()) - if err != nil { - return "" - } - text := string(fd) - f = &TextDocumentItem{ - URI: DocumentURI(uri), - LanguageID: cli.Language.String(), - Version: 1, - Text: text, - LineCounts: utils.CountLines(text), - } - cli.files[uri] = f - } - if pos < 0 || pos >= len(f.LineCounts) { - return "" - } - start := f.LineCounts[pos] - end := len(f.Text) - if pos+1 < len(f.LineCounts) { - end = f.LineCounts[pos+1] - } - return f.Text[start:end] -} - -func (cli *LSPClient) LineCounts(uri DocumentURI) []int { - f, ok := cli.files[uri] - if !ok { - // open file os - fd, err := os.ReadFile(uri.File()) - if err != nil { - return nil - } - text := string(fd) - f = &TextDocumentItem{ - URI: DocumentURI(uri), - LanguageID: cli.Language.String(), - Version: 1, - Text: text, - LineCounts: utils.CountLines(text), - } - cli.files[uri] = f - } - return f.LineCounts -} - -func (cli *LSPClient) GetFile(uri DocumentURI) *TextDocumentItem { - return cli.files[uri] -} - -func (cli *LSPClient) GetParent(sym *DocumentSymbol) (ret *DocumentSymbol) { - if sym == nil { - return nil - } - if f, ok := cli.files[sym.Location.URI]; ok { - for _, s := range f.Symbols { - if s != sym && s.Location.Range.Include(sym.Location.Range) { - if ret == nil || ret.Location.Range.Include(s.Location.Range) { - ret = s - } - } - } - } - return -} - -func (cli *LSPClient) getAllTokens(tokens SemanticTokens, file DocumentURI) []Token { - start := Position{Line: 0, Character: 0} - end := Position{Line: math.MaxInt32, Character: math.MaxInt32} - return cli.getRangeTokens(tokens, file, Range{Start: start, End: end}) -} - -func (cli *LSPClient) getRangeTokens(tokens SemanticTokens, file DocumentURI, r Range) []Token { - symbols := make([]Token, 0, len(tokens.Data)/5) - line := 0 - character := 0 - - for i := 0; i < len(tokens.Data); i += 5 { - deltaLine := int(tokens.Data[i]) - deltaStart := int(tokens.Data[i+1]) - length := int(tokens.Data[i+2]) - tokenType := int(tokens.Data[i+3]) - tokenModifiersBitset := int(tokens.Data[i+4]) - - line += deltaLine - if deltaLine == 0 { - character += deltaStart - } else { - character = deltaStart - } - - currentPos := Position{Line: line, Character: character} - if isPositionInRange(currentPos, r, false) { - // fmt.Printf("Token at line %d, character %d, length %d, type %d, modifiers %b\n", line, character, length, tokenType, tokenModifiersBitset) - tokenTypeName := getSemanticTokenType(tokenType, cli.tokenTypes) - tokenModifierNames := getSemanticTokenModifier(tokenModifiersBitset, cli.tokenModifiers) - loc := Location{URI: file, Range: Range{Start: currentPos, End: Position{Line: line, Character: character + length}}} - text, _ := cli.Locate(loc) - symbols = append(symbols, Token{ - Location: loc, - Type: tokenTypeName, - Modifiers: tokenModifierNames, - Text: text, - }) - } - } - - // sort it by start position - sort.Slice(symbols, func(i, j int) bool { - if symbols[i].Location.URI != symbols[j].Location.URI { - return symbols[i].Location.URI < symbols[j].Location.URI - } - if symbols[i].Location.Range.Start.Line != symbols[j].Location.Range.Start.Line { - return symbols[i].Location.Range.Start.Line < symbols[j].Location.Range.Start.Line - } - return symbols[i].Location.Range.Start.Character < symbols[j].Location.Range.Start.Character - }) - - return symbols -} - -func (a Location) Include(b Location) bool { - if a == b { - return true - } - if a.URI != b.URI { - return false - } - return isPositionInRange(b.Range.Start, a.Range, false) && isPositionInRange(b.Range.End, a.Range, true) -} - -func (a Range) Include(b Range) bool { - return isPositionInRange(b.Start, a, false) && isPositionInRange(b.End, a, true) -} - -func isPositionInRange(pos Position, r Range, close bool) bool { - if pos.Line < r.Start.Line || pos.Line > r.End.Line { - return false - } - if pos.Line == r.Start.Line && pos.Character < r.Start.Character { - return false - } - if pos.Line == r.End.Line { - if close { - return pos.Character <= r.End.Character - } else { - return pos.Character < r.End.Character - } - } - return true -} - -func getSemanticTokenType(id int, semanticTokenTypes []string) string { - if id < len(semanticTokenTypes) { - return semanticTokenTypes[id] - } - return fmt.Sprintf("unknown(%d)", id) -} - -func getSemanticTokenModifier(bitset int, semanticTokenModifiers []string) []string { - var result []string - for i, modifier := range semanticTokenModifiers { - if bitset&(1< 0 && !stack[len(stack)-1].Location.Range.Include(symbol.Location.Range) { - stack = stack[:len(stack)-1] - } - - // If the stack is not empty, the top symbol is the parent of the current symbol - if len(stack) > 0 { - parent := stack[len(stack)-1] - parent.Children = append(parent.Children, symbol) - } else { - // If the stack is empty, the current symbol is a root symbol - rootSymbols = append(rootSymbols, symbol) - } - - // Push the current symbol onto the stack - stack = append(stack, symbol) - } - - return rootSymbols -} diff --git a/lang/lsp/lsp_methods.go b/lang/lsp/lsp_methods.go new file mode 100644 index 0000000..99b3e0a --- /dev/null +++ b/lang/lsp/lsp_methods.go @@ -0,0 +1,471 @@ +// Copyright 2025 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lsp + +import ( + "context" + "fmt" + "math" + "os" + "sort" + + "github.com/cloudwego/abcoder/lang/utils" + lsp "github.com/sourcegraph/go-lsp" +) + +type DocumentRange struct { + TextDocument lsp.TextDocumentIdentifier `json:"textDocument"` + Range Range `json:"range"` +} + +type SemanticTokensFullParams struct { + TextDocument lsp.TextDocumentIdentifier `json:"textDocument"` +} + +type DidOpenTextDocumentParams struct { + TextDocument TextDocumentItem `json:"textDocument"` +} + +func (cli *LSPClient) DidOpen(ctx context.Context, file DocumentURI) (*TextDocumentItem, error) { + if f, ok := cli.files[file]; ok { + return f, nil + } + text, err := os.ReadFile(file.File()) + if err != nil { + return nil, err + } + f := &TextDocumentItem{ + URI: DocumentURI(file), + LanguageID: cli.Language.String(), + Version: 1, + Text: string(text), + LineCounts: utils.CountLines(string(text)), + } + cli.files[file] = f + req := DidOpenTextDocumentParams{ + TextDocument: *f, + } + if err := cli.Notify(ctx, "textDocument/didOpen", req); err != nil { + return nil, err + } + return f, nil +} + +func (cli *LSPClient) DocumentSymbols(ctx context.Context, file DocumentURI) (map[Range]*DocumentSymbol, error) { + // open file first + f, err := cli.DidOpen(ctx, file) + if err != nil { + return nil, err + } + if f.Symbols != nil { + return f.Symbols, nil + } + uri := lsp.DocumentURI(file) + req := lsp.DocumentSymbolParams{ + TextDocument: lsp.TextDocumentIdentifier{ + URI: uri, + }, + } + var resp []DocumentSymbol + if err := cli.Call(ctx, "textDocument/documentSymbol", req, &resp); err != nil { + return nil, err + } + // cache symbols + f.Symbols = make(map[Range]*DocumentSymbol, len(resp)) + for i := range resp { + s := &resp[i] + f.Symbols[s.Location.Range] = s + } + return f.Symbols, nil +} + +func (cli *LSPClient) References(ctx context.Context, id Location) ([]Location, error) { + if _, err := cli.DidOpen(ctx, id.URI); err != nil { + return nil, err + } + uri := lsp.DocumentURI(id.URI) + req := lsp.ReferenceParams{ + TextDocumentPositionParams: lsp.TextDocumentPositionParams{ + TextDocument: lsp.TextDocumentIdentifier{ + URI: uri, + }, + Position: lsp.Position{ + Line: id.Range.Start.Line, + Character: id.Range.Start.Character + 1, + }, + }, + Context: lsp.ReferenceContext{ + IncludeDeclaration: true, + }, + } + var resp []Location + if err := cli.Call(ctx, "textDocument/references", req, &resp); err != nil { + return nil, err + } + return resp, nil +} + +// Some language servers do not provide semanticTokens/range. +// In that case, we fall back to semanticTokens/full and then filter the tokens manually. +func (cli *LSPClient) getSemanticTokensRange(ctx context.Context, req DocumentRange, resp *SemanticTokens) error { + if cli.hasSemanticTokensRange { + if err := cli.Call(ctx, "textDocument/semanticTokens/range", req, resp); err != nil { + return err + } + return nil + } + // fall back to semanticTokens/full + req1 := SemanticTokensFullParams{ + TextDocument: req.TextDocument, + } + if err := cli.Call(ctx, "textDocument/semanticTokens/full", req1, resp); err != nil { + return err + } + filterSemanticTokensInRange(resp, req.Range) + return nil +} + +func filterSemanticTokensInRange(resp *SemanticTokens, r Range) { + curPos := Position{ + Line: 0, + Character: 0, + } + newData := []uint32{} + includedIs := []int{} + for i := 0; i < len(resp.Data); i += 5 { + deltaLine := int(resp.Data[i]) + deltaStart := int(resp.Data[i+1]) + if deltaLine != 0 { + curPos.Line += deltaLine + curPos.Character = deltaStart + } else { + curPos.Character += deltaStart + } + if isPositionInRange(curPos, r, true) { + if len(newData) == 0 { + // add range start to initial delta + newData = append(newData, resp.Data[i:i+5]...) + newData[0] = uint32(curPos.Line) + newData[1] = uint32(curPos.Character) + } else { + newData = append(newData, resp.Data[i:i+5]...) + } + includedIs = append(includedIs, i) + } + } + resp.Data = newData +} + +func (cli *LSPClient) SemanticTokens(ctx context.Context, id Location) ([]Token, error) { + // open file first + syms, err := cli.DocumentSymbols(ctx, id.URI) + if err != nil { + return nil, err + } + sym := syms[id.Range] + if sym != nil && sym.Tokens != nil { + return sym.Tokens, nil + } + + uri := lsp.DocumentURI(id.URI) + req := DocumentRange{ + TextDocument: lsp.TextDocumentIdentifier{ + URI: uri, + }, + Range: id.Range, + } + + var resp SemanticTokens + if err := cli.getSemanticTokensRange(ctx, req, &resp); err != nil { + return nil, err + } + + toks := cli.getAllTokens(resp, id.URI) + if sym != nil { + sym.Tokens = toks + } + return toks, nil +} + +func (cli *LSPClient) Definition(ctx context.Context, uri DocumentURI, pos Position) ([]Location, error) { + // open file first + f, err := cli.DidOpen(ctx, uri) + if err != nil { + return nil, err + } + if f.Definitions != nil { + if locations, ok := f.Definitions[pos]; ok { + return locations, nil + } + } + + // call + req := lsp.TextDocumentPositionParams{ + TextDocument: lsp.TextDocumentIdentifier{ + URI: lsp.DocumentURI(uri), + }, + Position: lsp.Position(pos), + } + var resp []Location + if err := cli.Call(ctx, "textDocument/definition", req, &resp); err != nil { + return nil, err + } + + // cache definitions + if f.Definitions == nil { + f.Definitions = make(map[Position][]Location) + } + f.Definitions[pos] = resp + return resp, nil +} + +func (cli *LSPClient) TypeDefinition(ctx context.Context, uri DocumentURI, pos Position) ([]Location, error) { + req := lsp.TextDocumentPositionParams{ + TextDocument: lsp.TextDocumentIdentifier{ + URI: lsp.DocumentURI(uri), + }, + Position: lsp.Position(pos), + } + var resp []Location + if err := cli.Call(ctx, "textDocument/typeDefinition", req, &resp); err != nil { + return nil, err + } + return resp, nil +} + +// read file and get the text of block of range +func (cli *LSPClient) Locate(id Location) (string, error) { + f, ok := cli.files[id.URI] + if !ok { + // open file os + fd, err := os.ReadFile(id.URI.File()) + if err != nil { + return "", err + } + text := string(fd) + f = &TextDocumentItem{ + URI: DocumentURI(id.URI), + LanguageID: cli.Language.String(), + Version: 1, + Text: text, + LineCounts: utils.CountLines(text), + } + cli.files[id.URI] = f + } + + text := f.Text + // get block text of range + start := f.LineCounts[id.Range.Start.Line] + id.Range.Start.Character + end := f.LineCounts[id.Range.End.Line] + id.Range.End.Character + return text[start:end], nil +} + +// get line text of pos +func (cli *LSPClient) Line(uri DocumentURI, pos int) string { + f, ok := cli.files[uri] + if !ok { + // open file os + fd, err := os.ReadFile(uri.File()) + if err != nil { + return "" + } + text := string(fd) + f = &TextDocumentItem{ + URI: DocumentURI(uri), + LanguageID: cli.Language.String(), + Version: 1, + Text: text, + LineCounts: utils.CountLines(text), + } + cli.files[uri] = f + } + if pos < 0 || pos >= len(f.LineCounts) { + return "" + } + start := f.LineCounts[pos] + end := len(f.Text) + if pos+1 < len(f.LineCounts) { + end = f.LineCounts[pos+1] + } + return f.Text[start:end] +} + +func (cli *LSPClient) LineCounts(uri DocumentURI) []int { + f, ok := cli.files[uri] + if !ok { + // open file os + fd, err := os.ReadFile(uri.File()) + if err != nil { + return nil + } + text := string(fd) + f = &TextDocumentItem{ + URI: DocumentURI(uri), + LanguageID: cli.Language.String(), + Version: 1, + Text: text, + LineCounts: utils.CountLines(text), + } + cli.files[uri] = f + } + return f.LineCounts +} + +func (cli *LSPClient) GetFile(uri DocumentURI) *TextDocumentItem { + return cli.files[uri] +} + +func (cli *LSPClient) GetParent(sym *DocumentSymbol) (ret *DocumentSymbol) { + if sym == nil { + return nil + } + if f, ok := cli.files[sym.Location.URI]; ok { + for _, s := range f.Symbols { + if s != sym && s.Location.Range.Include(sym.Location.Range) { + if ret == nil || ret.Location.Range.Include(s.Location.Range) { + ret = s + } + } + } + } + return +} + +func (cli *LSPClient) getAllTokens(tokens SemanticTokens, file DocumentURI) []Token { + start := Position{Line: 0, Character: 0} + end := Position{Line: math.MaxInt32, Character: math.MaxInt32} + return cli.getRangeTokens(tokens, file, Range{Start: start, End: end}) +} + +func (cli *LSPClient) getRangeTokens(tokens SemanticTokens, file DocumentURI, r Range) []Token { + symbols := make([]Token, 0, len(tokens.Data)/5) + line := 0 + character := 0 + + for i := 0; i < len(tokens.Data); i += 5 { + deltaLine := int(tokens.Data[i]) + deltaStart := int(tokens.Data[i+1]) + length := int(tokens.Data[i+2]) + tokenType := int(tokens.Data[i+3]) + tokenModifiersBitset := int(tokens.Data[i+4]) + + line += deltaLine + if deltaLine == 0 { + character += deltaStart + } else { + character = deltaStart + } + + currentPos := Position{Line: line, Character: character} + if isPositionInRange(currentPos, r, false) { + // fmt.Printf("Token at line %d, character %d, length %d, type %d, modifiers %b\n", line, character, length, tokenType, tokenModifiersBitset) + tokenTypeName := getSemanticTokenType(tokenType, cli.tokenTypes) + tokenModifierNames := getSemanticTokenModifier(tokenModifiersBitset, cli.tokenModifiers) + loc := Location{URI: file, Range: Range{Start: currentPos, End: Position{Line: line, Character: character + length}}} + text, _ := cli.Locate(loc) + symbols = append(symbols, Token{ + Location: loc, + Type: tokenTypeName, + Modifiers: tokenModifierNames, + Text: text, + }) + } + } + + // sort it by start position + sort.Slice(symbols, func(i, j int) bool { + if symbols[i].Location.URI != symbols[j].Location.URI { + return symbols[i].Location.URI < symbols[j].Location.URI + } + if symbols[i].Location.Range.Start.Line != symbols[j].Location.Range.Start.Line { + return symbols[i].Location.Range.Start.Line < symbols[j].Location.Range.Start.Line + } + return symbols[i].Location.Range.Start.Character < symbols[j].Location.Range.Start.Character + }) + + return symbols +} + +func (cli *LSPClient) FileStructure(ctx context.Context, file DocumentURI) ([]*DocumentSymbol, error) { + syms, err := cli.DocumentSymbols(ctx, file) + if err != nil { + return nil, err + } + // construct symbol hierarchy through range relation, and represent it to DocumentSymobl.Children + symbols := make([]*DocumentSymbol, 0, len(syms)) + for _, sym := range syms { + symbols = append(symbols, sym) + } + return constructSymbolHierarchy(symbols), nil +} + +func getSemanticTokenType(id int, semanticTokenTypes []string) string { + if id < len(semanticTokenTypes) { + return semanticTokenTypes[id] + } + return fmt.Sprintf("unknown(%d)", id) +} + +func getSemanticTokenModifier(bitset int, semanticTokenModifiers []string) []string { + var result []string + for i, modifier := range semanticTokenModifiers { + if bitset&(1< 0 && !stack[len(stack)-1].Location.Range.Include(symbol.Location.Range) { + stack = stack[:len(stack)-1] + } + + // If the stack is not empty, the top symbol is the parent of the current symbol + if len(stack) > 0 { + parent := stack[len(stack)-1] + parent.Children = append(parent.Children, symbol) + } else { + // If the stack is empty, the current symbol is a root symbol + rootSymbols = append(rootSymbols, symbol) + } + + // Push the current symbol onto the stack + stack = append(stack, symbol) + } + + return rootSymbols +} diff --git a/lang/lsp/testutils.go b/lang/lsp/testutils.go index 2cd00ac..4a272e8 100644 --- a/lang/lsp/testutils.go +++ b/lang/lsp/testutils.go @@ -40,6 +40,6 @@ func InitLSPForFirstTest(lang uniast.Language, server string) (*LSPClient, strin return nil, "", err } clients[lang] = client - time.Sleep(3 * time.Second) // wait for LSP server to be ready + time.Sleep(5 * time.Second) // wait for LSP server to be ready return client, testdata, nil } diff --git a/script/diffjson.py b/script/diffjson.py new file mode 100755 index 0000000..c7bf4ce --- /dev/null +++ b/script/diffjson.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python3 +import argparse +import json +import sys +from pathlib import Path +from typing import Literal + +from deepdiff import DeepDiff + +# Define status types for clarity +Status = Literal["OK", "BAD", "FILE_ERROR"] + + +def format_diff_custom(diff: DeepDiff) -> str: + """ + Formats a DeepDiff object into a custom human-readable string. + This provides a clear, indented view of changes. + """ + output = [] + + # Helper to format a value for printing. Pretty-prints dicts/lists. + def format_value(value): + if isinstance(value, (dict, list)): + return json.dumps(value, indent=2) + return repr(value) + + # Handle changed values + if "values_changed" in diff: + for path, changes in diff["values_changed"].items(): + output.append(f"Value Changed at: {path}") + output.append(f" - old: {format_value(changes['old_value'])}") + output.append(f" + new: {format_value(changes['new_value'])}") + output.append("--------------------") + + # Handle added items to lists/sets + if "iterable_item_added" in diff: + for path, value in diff["iterable_item_added"].items(): + output.append(f"Item Added at: {path}") + output.append(f" + new: {format_value(value)}") + output.append("--------------------") + + # Handle removed items from lists/sets + if "iterable_item_removed" in diff: + for path, value in diff["iterable_item_removed"].items(): + output.append(f"Item Removed at: {path}") + output.append(f" - old: {format_value(value)}") + output.append("--------------------") + + # Handle added keys in dictionaries + if "dictionary_item_added" in diff: + for path in diff["dictionary_item_added"]: + output.append(f"Dictionary Key Added: {path}") + output.append("--------------------") + + # Handle removed keys in dictionaries + if "dictionary_item_removed" in diff: + for path in diff["dictionary_item_removed"]: + output.append(f"Dictionary Key Removed: {path}") + output.append("--------------------") + + # Clean up the last separator for a tidy output + if output and output[-1] == "--------------------": + output.pop() + + return "\n".join(output) + + +def compare_json_files(file1_path: Path, file2_path: Path) -> Status: + """ + Compares the content of two JSON files without printing output. + + Returns: + "OK" if they match, "BAD" if they don't, "FILE_ERROR" on read/parse error. + """ + try: + with open(file1_path, "r", encoding="utf-8") as f1: + json1 = json.load(f1) + with open(file2_path, "r", encoding="utf-8") as f2: + json2 = json.load(f2) + except (FileNotFoundError, json.JSONDecodeError): + return "FILE_ERROR" + + diff = DeepDiff(json1, json2, ignore_order=True) + + return "BAD" if diff else "OK" + + +def process_directory_comparison(old_dir: Path, new_dir: Path) -> bool: + """ + Compares JSON files across two directories and prints results in a list format. + """ + results: dict[str, list[str]] = {"OK": [], "BAD": [], "MISS": [], "NEW": []} + old_files = {p.name for p in old_dir.glob("*.json")} + new_files = {p.name for p in new_dir.glob("*.json")} + + for filename in sorted(old_files.intersection(new_files)): + status = compare_json_files(old_dir / filename, new_dir / filename) + results["BAD" if status != "OK" else "OK"].append(filename) + + for filename in sorted(old_files - new_files): + results["MISS"].append(filename) + + for filename in sorted(new_files - old_files): + results["NEW"].append(filename) + + for filename in results["OK"]: + print(f"[OK ] {filename}") + for filename in results["NEW"]: + print(f"[NEW ] {filename}") + for filename in results["BAD"]: + print(f"[BAD ] {filename}", file=sys.stderr) + for filename in results["MISS"]: + print(f"[MISS] {filename}", file=sys.stderr) + + return bool(results["BAD"] or results["MISS"]) + + +def main(): + parser = argparse.ArgumentParser( + description="Compare two JSON files or two directories of JSON files." + ) + parser.add_argument( + "path1", type=Path, help="Path to the first file or 'old' directory." + ) + parser.add_argument( + "path2", type=Path, help="Path to the second file or 'new' directory." + ) + args = parser.parse_args() + + path1, path2 = args.path1, args.path2 + + if not path1.exists() or not path2.exists(): + print( + f"Error: Path does not exist: {path1 if not path1.exists() else path2}", + file=sys.stderr, + ) + return 1 + + # --- Handle Directory Comparison --- + if path1.is_dir() and path2.is_dir(): + print(f"Comparing directories:\n- Old: {path1}\n- New: {path2}\n") + if process_directory_comparison(path1, path2): + print("\nComparison finished with errors.", file=sys.stderr) + return 1 + else: + print("\nComparison finished successfully.") + return 0 + + # --- Handle Single File Comparison --- + elif path1.is_file() and path2.is_file(): + try: + with open(path1, "r", encoding="utf-8") as f1: + json1 = json.load(f1) + with open(path2, "r", encoding="utf-8") as f2: + json2 = json.load(f2) + except (FileNotFoundError, json.JSONDecodeError) as e: + print(f"Error reading or parsing file: {e}", file=sys.stderr) + return 1 + + diff = DeepDiff(json1, json2, ignore_order=True) + + if diff: + print( + f"Differences found between '{path1.name}' and '{path2.name}':\n", + file=sys.stderr, + ) + # Format the diff into a custom readable format and print to stderr + custom_output = format_diff_custom(diff) + print(custom_output, file=sys.stderr) + return 1 + else: + print(f"Files '{path1.name}' and '{path2.name}' are identical.") + return 0 + + # --- Handle Invalid Input --- + else: + print( + "Error: Both arguments must be files or both must be directories.", + file=sys.stderr, + ) + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/script/requirements.txt b/script/requirements.txt new file mode 100644 index 0000000..084ef59 --- /dev/null +++ b/script/requirements.txt @@ -0,0 +1 @@ +deepdiff diff --git a/script/run_all_testdata.sh b/script/run_all_testdata.sh new file mode 100755 index 0000000..533c3cc --- /dev/null +++ b/script/run_all_testdata.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# Generate uniast for all testdata. +# +# USAGE: +# 1. Save the uniast to out/ +# $ OUTDIR=out/ ./script/run_all_testdata.sh +# +# 2. Save the uniast to out/ , colorize output for human readable terminal +# OUTDIR=out/ PARALLEL_FLAGS=--ctag ./script/run_all_testdata.sh +# +# 3. Use a custom abcoder executable +# OUTDIR=out/ ABCEXE="./other_abcoder" ./script/run_all_testdata.sh + +SCRIPT_DIR=$(dirname "$(readlink -f "$0")") +REPO_ROOT=$(realpath --relative-to=$(pwd) "$SCRIPT_DIR/..") + +ABCEXE=${ABCEXE:-"$REPO_ROOT/abcoder"} +OUTDIR=${OUTDIR:?Error: OUTDIR is a mandatory environment variable} +PARALLEL_FLAGS=${PARALLEL_FLAGS:---tag} +LANGS=${LANGS:-"go rust python cxx"} + +detect_jobs() { + local ABCEXE=${1:-$ABCEXE} + for lang in ${LANGS[@]}; do + for repo in "$REPO_ROOT/testdata/$lang"/*; do + local rel_path=$(realpath --relative-to="$REPO_ROOT/testdata" "$repo") + local outname=$(echo "$rel_path" | sed 's/[/:? ]/_/g') + echo $ABCEXE parse $lang $repo -o $OUTDIR/$outname.json + done + done +} + +if [[ ! -x "$ABCEXE" ]]; then + echo "Error: The specified abcoder executable '$ABCEXE' does not exist or is not executable." >&2 + exit 1 +fi +mkdir -pv "$OUTDIR" +detect_jobs +echo +detect_jobs | parallel $PARALLEL_FLAGS -j$(nproc --all) --jobs 0 "eval {}" 2>&1 diff --git a/testdata/rust/0_rust2/src/entity/inter.rs b/testdata/rust/0_rust2/src/entity/inter.rs index fd78410..c1bcc46 100644 --- a/testdata/rust/0_rust2/src/entity/inter.rs +++ b/testdata/rust/0_rust2/src/entity/inter.rs @@ -12,22 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -/** - * Copyright 2025 ByteDance Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - pub trait Addable { fn id() -> i64; fn add(&self, b: i64) -> i64; @@ -35,23 +19,25 @@ pub trait Addable { pub struct AnyInt(i64); -//impl AnyInt { -// pub fn id() -> i64 { -// 0 -// } -// pub fn add(&self, b: i64) -> i64 { -// self.0 + b -// } -//} +impl AnyInt { + //pub fn id() -> i64 { + pub fn idx() -> i64 { + 0 + } + //pub fn add(&self, b: i64) -> i64 { + pub fn addx(&self, b: i64) -> i64 { + self.0 + b + } +} impl Addable for AnyInt { fn add(&self, b: i64) -> i64 { // use the method defined in the struct - self.add(b) + self.addx(b) } fn id() -> i64 { // use the method defined in the struct - AnyInt::id() + AnyInt::idx() } }