From f240b078abaf39ba87140264c9893076c26f57a2 Mon Sep 17 00:00:00 2001
From: Hoblovski <dzy.0424thu@gmail.com>
Date: Fri, 29 Aug 2025 14:52:45 +0800
Subject: [PATCH 01/11] feat: split lsp. prepare for fix

---
 lang/lsp/client.go       |  30 ++-
 lang/lsp/clients_test.go |  11 -
 lang/lsp/handler.go      |   5 +
 lang/lsp/lsp.go          | 530 +++------------------------------------
 lang/lsp/lsp_methods.go  | 471 ++++++++++++++++++++++++++++++++++
 5 files changed, 525 insertions(+), 522 deletions(-)
 create mode 100644 lang/lsp/lsp_methods.go

diff --git a/lang/lsp/client.go b/lang/lsp/client.go
index d4ecbf2..750c419 100644
--- a/lang/lsp/client.go
+++ b/lang/lsp/client.go
@@ -17,6 +17,7 @@ package lsp
 import (
 	"bufio"
 	"context"
+	"encoding/json"
 	"fmt"
 	"io"
 	"os"
@@ -65,22 +66,6 @@ func NewLSPClient(repo string, openfile string, wait time.Duration, opts ClientO
 		if err != nil {
 			return nil, err
 		}
-
-		// wait for "textDocument/publishDiagnostics" notification
-		// 	resp := cli.WaitFirstNotify("textDocument/publishDiagnostics")
-		// again:
-		// 	var diagnostics lsp.PublishDiagnosticsParams
-		// 	if err := json.Unmarshal(*resp.Params, &diagnostics); err != nil {
-		// 		logger.Fatalf("Failed to unmarshal diagnostics: %v", err)
-		// 	}
-		// 	if len(diagnostics.Diagnostics) > 0 {
-		// 		// wait again
-		// 		resp = cli.WaitFirstNotify("textDocument/publishDiagnostics")
-		// 		if retry > 0 {
-		// 			retry--
-		// 			goto again
-		// 		}
-		// 	}
 	}
 
 	time.Sleep(wait)
@@ -93,6 +78,19 @@ func (c *LSPClient) Close() error {
 	return c.Conn.Close()
 }
 
+// Extra wrapper around json rpc to
+// 1. implement a transparent, generic cache
+func (cli *LSPClient) Call(ctx context.Context, method string, params, result interface{}, opts ...jsonrpc2.CallOption) error {
+	var raw json.RawMessage
+	if err := cli.Conn.Call(ctx, method, params, &raw); err != nil {
+		return err
+	}
+	if err := json.Unmarshal(raw, result); err != nil {
+		return err
+	}
+	return nil
+}
+
 type initializeParams struct {
 	ProcessID int `json:"processId,omitempty"`
 
diff --git a/lang/lsp/clients_test.go b/lang/lsp/clients_test.go
index 3b5e729..47e1fe4 100644
--- a/lang/lsp/clients_test.go
+++ b/lang/lsp/clients_test.go
@@ -234,17 +234,6 @@ Output`
 		}
 	})
 
-	// workspaceSymbol
-	t.Run("workspaceSymbol", func(t *testing.T) {
-		symbols, err := rustLSP.WorkspaceSymbols(context.Background(), "add")
-		if err != nil {
-			t.Fatalf("Workspace Symbol failed: %v", err)
-		}
-		if _, err := json.Marshal(symbols); err != nil {
-			t.Fatalf("Marshal Workspace Symbols failed: %v", err)
-		}
-	})
-
 	// fileStructure
 	t.Run("FileStructure", func(t *testing.T) {
 		symbols, err := rustLSP.FileStructure(context.Background(), main_uri)
diff --git a/lang/lsp/handler.go b/lang/lsp/handler.go
index ab12a8a..b37e7e1 100644
--- a/lang/lsp/handler.go
+++ b/lang/lsp/handler.go
@@ -17,6 +17,7 @@ package lsp
 import (
 	"container/list"
 	"context"
+	"fmt"
 	"sync"
 	"time"
 
@@ -126,6 +127,10 @@ func (h *lspHandler) sendNotify(req *jsonrpc2.Request) {
 
 func (h *lspHandler) handleNotification(ctx context.Context, conn *jsonrpc2.Conn, req *jsonrpc2.Request) {
 	switch req.Method {
+	case "textDocument/publishDiagnostics":
+		// This notification is sent from the server to the client to signal results of validation runs.
+		fmt.Printf("Received publishDiagnostics notification:\n%s\n", string(*req.Params))
+		return
 	// exit
 	case "exit":
 		log.Info("Received exit notification\n")
diff --git a/lang/lsp/lsp.go b/lang/lsp/lsp.go
index e59d1b6..d1c4bc8 100644
--- a/lang/lsp/lsp.go
+++ b/lang/lsp/lsp.go
@@ -15,16 +15,11 @@
 package lsp
 
 import (
-	"context"
 	"encoding/json"
 	"fmt"
-	"math"
-	"os"
 	"path/filepath"
-	"sort"
 	"strings"
 
-	"github.com/cloudwego/abcoder/lang/utils"
 	"github.com/sourcegraph/go-lsp"
 )
 
@@ -79,8 +74,6 @@ type Range struct {
 	End   Position `json:"end"`
 }
 
-type _Range Range
-
 func (r Range) String() string {
 	return fmt.Sprintf("%s-%s", r.Start, r.End)
 }
@@ -89,10 +82,33 @@ func (r Range) MarshalText() ([]byte, error) {
 	return []byte(r.String()), nil
 }
 
+type _Range Range
+
 func (r Range) MarshalJSON() ([]byte, error) {
 	return json.Marshal(_Range(r))
 }
 
+func isPositionInRange(pos Position, r Range, close bool) bool {
+	if pos.Line < r.Start.Line || pos.Line > r.End.Line {
+		return false
+	}
+	if pos.Line == r.Start.Line && pos.Character < r.Start.Character {
+		return false
+	}
+	if pos.Line == r.End.Line {
+		if close {
+			return pos.Character <= r.End.Character
+		} else {
+			return pos.Character < r.End.Character
+		}
+	}
+	return true
+}
+
+func (a Range) Include(b Range) bool {
+	return isPositionInRange(b.Start, a, false) && isPositionInRange(b.End, a, true)
+}
+
 type Location struct {
 	URI   DocumentURI `json:"uri"`
 	Range Range       `json:"range"`
@@ -108,19 +124,29 @@ func SetLocationMarshalJSONInline(inline bool) {
 	locationMarshalJSONInline = inline
 }
 
-type location Location
+type _Location Location
 
 func (l Location) MarshalJSON() ([]byte, error) {
 	if locationMarshalJSONInline {
 		return []byte(fmt.Sprintf("%q", l.String())), nil
 	}
-	return json.Marshal(location(l))
+	return json.Marshal(_Location(l))
 }
 
 func (l Location) MarshalText() ([]byte, error) {
 	return []byte(l.String()), nil
 }
 
+func (a Location) Include(b Location) bool {
+	if a == b {
+		return true
+	}
+	if a.URI != b.URI {
+		return false
+	}
+	return isPositionInRange(b.Range.Start, a.Range, false) && isPositionInRange(b.Range.End, a.Range, true)
+}
+
 type DocumentURI lsp.DocumentURI
 
 func (l DocumentURI) File() string {
@@ -134,11 +160,6 @@ func NewURI(file string) DocumentURI {
 	return DocumentURI("file://" + file)
 }
 
-type DocumentRange struct {
-	TextDocument lsp.TextDocumentIdentifier `json:"textDocument"`
-	Range        Range                      `json:"range"`
-}
-
 type TextDocumentItem struct {
 	URI         DocumentURI               `json:"uri"`
 	LanguageID  string                    `json:"languageId"`
@@ -197,484 +218,3 @@ type Token struct {
 func (t *Token) String() string {
 	return fmt.Sprintf("%s %s %v %s", t.Text, t.Type, t.Modifiers, t.Location)
 }
-
-type DidOpenTextDocumentParams struct {
-	TextDocument TextDocumentItem `json:"textDocument"`
-}
-
-func (cli *LSPClient) DidOpen(ctx context.Context, file DocumentURI) (*TextDocumentItem, error) {
-	if f, ok := cli.files[file]; ok {
-		return f, nil
-	}
-	text, err := os.ReadFile(file.File())
-	if err != nil {
-		return nil, err
-	}
-	f := &TextDocumentItem{
-		URI:        DocumentURI(file),
-		LanguageID: cli.Language.String(),
-		Version:    1,
-		Text:       string(text),
-		LineCounts: utils.CountLines(string(text)),
-	}
-	cli.files[file] = f
-	req := DidOpenTextDocumentParams{
-		TextDocument: *f,
-	}
-	if err := cli.Notify(ctx, "textDocument/didOpen", req); err != nil {
-		return nil, err
-	}
-	return f, nil
-}
-
-func (cli *LSPClient) DocumentSymbols(ctx context.Context, file DocumentURI) (map[Range]*DocumentSymbol, error) {
-	// f, ok := cli.files[file]
-	// if ok {
-	// 	return f.Symbols, nil
-	// }
-	// open file first
-	f, err := cli.DidOpen(ctx, file)
-	if err != nil {
-		return nil, err
-	}
-	if f.Symbols != nil {
-		return f.Symbols, nil
-	}
-	req := lsp.DocumentSymbolParams{
-		TextDocument: lsp.TextDocumentIdentifier{
-			URI: lsp.DocumentURI(file),
-		},
-	}
-	var resp []DocumentSymbol
-	if err := cli.Call(ctx, "textDocument/documentSymbol", req, &resp); err != nil {
-		return nil, err
-	}
-	// cache symbols
-	f.Symbols = make(map[Range]*DocumentSymbol, len(resp))
-	for i := range resp {
-		s := &resp[i]
-		f.Symbols[s.Location.Range] = s
-	}
-	return f.Symbols, nil
-}
-
-func (cli *LSPClient) References(ctx context.Context, id Location) ([]Location, error) {
-	if _, err := cli.DidOpen(ctx, id.URI); err != nil {
-		return nil, err
-	}
-	uri := lsp.DocumentURI(id.URI)
-	req := lsp.ReferenceParams{
-		TextDocumentPositionParams: lsp.TextDocumentPositionParams{
-			TextDocument: lsp.TextDocumentIdentifier{
-				URI: uri,
-			},
-			Position: lsp.Position{
-				Line:      id.Range.Start.Line,
-				Character: id.Range.Start.Character + 1,
-			},
-		},
-		Context: lsp.ReferenceContext{
-			IncludeDeclaration: true,
-		},
-	}
-	var resp []Location
-	if err := cli.Call(ctx, "textDocument/references", req, &resp); err != nil {
-		return nil, err
-	}
-	return resp, nil
-}
-
-// Some language servers do not provide semanticTokens/range.
-// In that case, we fall back to semanticTokens/full and then filter the tokens manually.
-func (cli *LSPClient) getSemanticTokensRange(ctx context.Context, req DocumentRange, resp *SemanticTokens) error {
-	if cli.hasSemanticTokensRange {
-		if err := cli.Call(ctx, "textDocument/semanticTokens/range", req, resp); err != nil {
-			return err
-		}
-		return nil
-	}
-	// fall back to semanticTokens/full
-	req1 := struct {
-		TextDocument lsp.TextDocumentIdentifier `json:"textDocument"`
-	}{TextDocument: req.TextDocument}
-	if err := cli.Call(ctx, "textDocument/semanticTokens/full", req1, resp); err != nil {
-		return err
-	}
-	filterSemanticTokensInRange(resp, req.Range)
-	return nil
-}
-
-func filterSemanticTokensInRange(resp *SemanticTokens, r Range) {
-	curPos := Position{
-		Line:      0,
-		Character: 0,
-	}
-	newData := []uint32{}
-	includedIs := []int{}
-	for i := 0; i < len(resp.Data); i += 5 {
-		deltaLine := int(resp.Data[i])
-		deltaStart := int(resp.Data[i+1])
-		if deltaLine != 0 {
-			curPos.Line += deltaLine
-			curPos.Character = deltaStart
-		} else {
-			curPos.Character += deltaStart
-		}
-		if isPositionInRange(curPos, r, true) {
-			if len(newData) == 0 {
-				// add range start to initial delta
-				newData = append(newData, resp.Data[i:i+5]...)
-				newData[0] = uint32(curPos.Line)
-				newData[1] = uint32(curPos.Character)
-			} else {
-				newData = append(newData, resp.Data[i:i+5]...)
-			}
-			includedIs = append(includedIs, i)
-		}
-	}
-	resp.Data = newData
-}
-
-func (cli *LSPClient) SemanticTokens(ctx context.Context, id Location) ([]Token, error) {
-	// open file first
-	syms, err := cli.DocumentSymbols(ctx, id.URI)
-	if err != nil {
-		return nil, err
-	}
-	sym := syms[id.Range]
-	if sym != nil && sym.Tokens != nil {
-		return sym.Tokens, nil
-	}
-
-	uri := lsp.DocumentURI(id.URI)
-	req := DocumentRange{
-		TextDocument: lsp.TextDocumentIdentifier{
-			URI: uri,
-		},
-		Range: id.Range,
-	}
-
-	var resp SemanticTokens
-	if err := cli.getSemanticTokensRange(ctx, req, &resp); err != nil {
-		return nil, err
-	}
-
-	toks := cli.getAllTokens(resp, id.URI)
-	if sym != nil {
-		sym.Tokens = toks
-	}
-	return toks, nil
-}
-
-func (cli *LSPClient) Definition(ctx context.Context, uri DocumentURI, pos Position) ([]Location, error) {
-	// open file first
-	f, err := cli.DidOpen(ctx, uri)
-	if err != nil {
-		return nil, err
-	}
-	if f.Definitions != nil {
-		if locations, ok := f.Definitions[pos]; ok {
-			return locations, nil
-		}
-	}
-
-	// call
-	req := lsp.TextDocumentPositionParams{
-		TextDocument: lsp.TextDocumentIdentifier{
-			URI: lsp.DocumentURI(uri),
-		},
-		Position: lsp.Position(pos),
-	}
-	var resp []Location
-	if err := cli.Call(ctx, "textDocument/definition", req, &resp); err != nil {
-		return nil, err
-	}
-
-	// cache definitions
-	if f.Definitions == nil {
-		f.Definitions = make(map[Position][]Location)
-	}
-	f.Definitions[pos] = resp
-	return resp, nil
-}
-
-func (cli *LSPClient) TypeDefinition(ctx context.Context, uri DocumentURI, pos Position) ([]Location, error) {
-	req := lsp.TextDocumentPositionParams{
-		TextDocument: lsp.TextDocumentIdentifier{
-			URI: lsp.DocumentURI(uri),
-		},
-		Position: lsp.Position(pos),
-	}
-	var resp []Location
-	if err := cli.Call(ctx, "textDocument/typeDefinition", req, &resp); err != nil {
-		return nil, err
-	}
-	return resp, nil
-}
-
-// read file and get the text of block of range
-func (cli *LSPClient) Locate(id Location) (string, error) {
-	f, ok := cli.files[id.URI]
-	if !ok {
-		// open file os
-		fd, err := os.ReadFile(id.URI.File())
-		if err != nil {
-			return "", err
-		}
-		text := string(fd)
-		f = &TextDocumentItem{
-			URI:        DocumentURI(id.URI),
-			LanguageID: cli.Language.String(),
-			Version:    1,
-			Text:       text,
-			LineCounts: utils.CountLines(text),
-		}
-		cli.files[id.URI] = f
-	}
-
-	text := f.Text
-	// get block text of range
-	start := f.LineCounts[id.Range.Start.Line] + id.Range.Start.Character
-	end := f.LineCounts[id.Range.End.Line] + id.Range.End.Character
-	return text[start:end], nil
-}
-
-// get line text of pos
-func (cli *LSPClient) Line(uri DocumentURI, pos int) string {
-	f, ok := cli.files[uri]
-	if !ok {
-		// open file os
-		fd, err := os.ReadFile(uri.File())
-		if err != nil {
-			return ""
-		}
-		text := string(fd)
-		f = &TextDocumentItem{
-			URI:        DocumentURI(uri),
-			LanguageID: cli.Language.String(),
-			Version:    1,
-			Text:       text,
-			LineCounts: utils.CountLines(text),
-		}
-		cli.files[uri] = f
-	}
-	if pos < 0 || pos >= len(f.LineCounts) {
-		return ""
-	}
-	start := f.LineCounts[pos]
-	end := len(f.Text)
-	if pos+1 < len(f.LineCounts) {
-		end = f.LineCounts[pos+1]
-	}
-	return f.Text[start:end]
-}
-
-func (cli *LSPClient) LineCounts(uri DocumentURI) []int {
-	f, ok := cli.files[uri]
-	if !ok {
-		// open file os
-		fd, err := os.ReadFile(uri.File())
-		if err != nil {
-			return nil
-		}
-		text := string(fd)
-		f = &TextDocumentItem{
-			URI:        DocumentURI(uri),
-			LanguageID: cli.Language.String(),
-			Version:    1,
-			Text:       text,
-			LineCounts: utils.CountLines(text),
-		}
-		cli.files[uri] = f
-	}
-	return f.LineCounts
-}
-
-func (cli *LSPClient) GetFile(uri DocumentURI) *TextDocumentItem {
-	return cli.files[uri]
-}
-
-func (cli *LSPClient) GetParent(sym *DocumentSymbol) (ret *DocumentSymbol) {
-	if sym == nil {
-		return nil
-	}
-	if f, ok := cli.files[sym.Location.URI]; ok {
-		for _, s := range f.Symbols {
-			if s != sym && s.Location.Range.Include(sym.Location.Range) {
-				if ret == nil || ret.Location.Range.Include(s.Location.Range) {
-					ret = s
-				}
-			}
-		}
-	}
-	return
-}
-
-func (cli *LSPClient) getAllTokens(tokens SemanticTokens, file DocumentURI) []Token {
-	start := Position{Line: 0, Character: 0}
-	end := Position{Line: math.MaxInt32, Character: math.MaxInt32}
-	return cli.getRangeTokens(tokens, file, Range{Start: start, End: end})
-}
-
-func (cli *LSPClient) getRangeTokens(tokens SemanticTokens, file DocumentURI, r Range) []Token {
-	symbols := make([]Token, 0, len(tokens.Data)/5)
-	line := 0
-	character := 0
-
-	for i := 0; i < len(tokens.Data); i += 5 {
-		deltaLine := int(tokens.Data[i])
-		deltaStart := int(tokens.Data[i+1])
-		length := int(tokens.Data[i+2])
-		tokenType := int(tokens.Data[i+3])
-		tokenModifiersBitset := int(tokens.Data[i+4])
-
-		line += deltaLine
-		if deltaLine == 0 {
-			character += deltaStart
-		} else {
-			character = deltaStart
-		}
-
-		currentPos := Position{Line: line, Character: character}
-		if isPositionInRange(currentPos, r, false) {
-			// fmt.Printf("Token at line %d, character %d, length %d, type %d, modifiers %b\n", line, character, length, tokenType, tokenModifiersBitset)
-			tokenTypeName := getSemanticTokenType(tokenType, cli.tokenTypes)
-			tokenModifierNames := getSemanticTokenModifier(tokenModifiersBitset, cli.tokenModifiers)
-			loc := Location{URI: file, Range: Range{Start: currentPos, End: Position{Line: line, Character: character + length}}}
-			text, _ := cli.Locate(loc)
-			symbols = append(symbols, Token{
-				Location:  loc,
-				Type:      tokenTypeName,
-				Modifiers: tokenModifierNames,
-				Text:      text,
-			})
-		}
-	}
-
-	// sort it by start position
-	sort.Slice(symbols, func(i, j int) bool {
-		if symbols[i].Location.URI != symbols[j].Location.URI {
-			return symbols[i].Location.URI < symbols[j].Location.URI
-		}
-		if symbols[i].Location.Range.Start.Line != symbols[j].Location.Range.Start.Line {
-			return symbols[i].Location.Range.Start.Line < symbols[j].Location.Range.Start.Line
-		}
-		return symbols[i].Location.Range.Start.Character < symbols[j].Location.Range.Start.Character
-	})
-
-	return symbols
-}
-
-func (a Location) Include(b Location) bool {
-	if a == b {
-		return true
-	}
-	if a.URI != b.URI {
-		return false
-	}
-	return isPositionInRange(b.Range.Start, a.Range, false) && isPositionInRange(b.Range.End, a.Range, true)
-}
-
-func (a Range) Include(b Range) bool {
-	return isPositionInRange(b.Start, a, false) && isPositionInRange(b.End, a, true)
-}
-
-func isPositionInRange(pos Position, r Range, close bool) bool {
-	if pos.Line < r.Start.Line || pos.Line > r.End.Line {
-		return false
-	}
-	if pos.Line == r.Start.Line && pos.Character < r.Start.Character {
-		return false
-	}
-	if pos.Line == r.End.Line {
-		if close {
-			return pos.Character <= r.End.Character
-		} else {
-			return pos.Character < r.End.Character
-		}
-	}
-	return true
-}
-
-func getSemanticTokenType(id int, semanticTokenTypes []string) string {
-	if id < len(semanticTokenTypes) {
-		return semanticTokenTypes[id]
-	}
-	return fmt.Sprintf("unknown(%d)", id)
-}
-
-func getSemanticTokenModifier(bitset int, semanticTokenModifiers []string) []string {
-	var result []string
-	for i, modifier := range semanticTokenModifiers {
-		if bitset&(1<<uint(i)) != 0 {
-			result = append(result, modifier)
-		}
-	}
-	for i := len(semanticTokenModifiers); i < 32; i++ {
-		if bitset&(1<<uint(i)) != 0 {
-			result = append(result, fmt.Sprintf("unknown(%d)", i))
-		}
-	}
-	return result
-}
-
-func (cli *LSPClient) WorkspaceSymbols(ctx context.Context, query string) ([]DocumentSymbol, error) {
-	req := lsp.WorkspaceSymbolParams{
-		Query: query,
-	}
-	var resp []DocumentSymbol
-	if err := cli.Call(ctx, "workspace/symbol", req, &resp); err != nil {
-		return nil, err
-	}
-	return resp, nil
-}
-
-func (cli *LSPClient) FileStructure(ctx context.Context, file DocumentURI) ([]*DocumentSymbol, error) {
-	syms, err := cli.DocumentSymbols(ctx, file)
-	if err != nil {
-		return nil, err
-	}
-	// construct symbol hierarchy through range relation, and represent it to DocumentSymobl.Children
-	symbols := make([]*DocumentSymbol, 0, len(syms))
-	for _, sym := range syms {
-		symbols = append(symbols, sym)
-	}
-	return constructSymbolHierarchy(symbols), nil
-}
-
-// constructSymbolHierarchy constructs a symbol hierarchy through range relation and represents it in DocumentSymbol.Children.
-func constructSymbolHierarchy(symbols []*DocumentSymbol) []*DocumentSymbol {
-	// Sort symbols by their start position
-	sort.Slice(symbols, func(i, j int) bool {
-		if symbols[i].Location.Range.Start.Line == symbols[j].Location.Range.Start.Line {
-			return symbols[i].Location.Range.Start.Character < symbols[j].Location.Range.Start.Character
-		}
-		return symbols[i].Location.Range.Start.Line < symbols[j].Location.Range.Start.Line
-	})
-
-	var rootSymbols []*DocumentSymbol
-	var stack []*DocumentSymbol
-
-	for i := range symbols {
-		symbol := symbols[i]
-
-		// Pop symbols from the stack that are not parents of the current symbol
-		for len(stack) > 0 && !stack[len(stack)-1].Location.Range.Include(symbol.Location.Range) {
-			stack = stack[:len(stack)-1]
-		}
-
-		// If the stack is not empty, the top symbol is the parent of the current symbol
-		if len(stack) > 0 {
-			parent := stack[len(stack)-1]
-			parent.Children = append(parent.Children, symbol)
-		} else {
-			// If the stack is empty, the current symbol is a root symbol
-			rootSymbols = append(rootSymbols, symbol)
-		}
-
-		// Push the current symbol onto the stack
-		stack = append(stack, symbol)
-	}
-
-	return rootSymbols
-}
diff --git a/lang/lsp/lsp_methods.go b/lang/lsp/lsp_methods.go
new file mode 100644
index 0000000..99b3e0a
--- /dev/null
+++ b/lang/lsp/lsp_methods.go
@@ -0,0 +1,471 @@
+// Copyright 2025 CloudWeGo Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package lsp
+
+import (
+	"context"
+	"fmt"
+	"math"
+	"os"
+	"sort"
+
+	"github.com/cloudwego/abcoder/lang/utils"
+	lsp "github.com/sourcegraph/go-lsp"
+)
+
+type DocumentRange struct {
+	TextDocument lsp.TextDocumentIdentifier `json:"textDocument"`
+	Range        Range                      `json:"range"`
+}
+
+type SemanticTokensFullParams struct {
+	TextDocument lsp.TextDocumentIdentifier `json:"textDocument"`
+}
+
+type DidOpenTextDocumentParams struct {
+	TextDocument TextDocumentItem `json:"textDocument"`
+}
+
+func (cli *LSPClient) DidOpen(ctx context.Context, file DocumentURI) (*TextDocumentItem, error) {
+	if f, ok := cli.files[file]; ok {
+		return f, nil
+	}
+	text, err := os.ReadFile(file.File())
+	if err != nil {
+		return nil, err
+	}
+	f := &TextDocumentItem{
+		URI:        DocumentURI(file),
+		LanguageID: cli.Language.String(),
+		Version:    1,
+		Text:       string(text),
+		LineCounts: utils.CountLines(string(text)),
+	}
+	cli.files[file] = f
+	req := DidOpenTextDocumentParams{
+		TextDocument: *f,
+	}
+	if err := cli.Notify(ctx, "textDocument/didOpen", req); err != nil {
+		return nil, err
+	}
+	return f, nil
+}
+
+func (cli *LSPClient) DocumentSymbols(ctx context.Context, file DocumentURI) (map[Range]*DocumentSymbol, error) {
+	// open file first
+	f, err := cli.DidOpen(ctx, file)
+	if err != nil {
+		return nil, err
+	}
+	if f.Symbols != nil {
+		return f.Symbols, nil
+	}
+	uri := lsp.DocumentURI(file)
+	req := lsp.DocumentSymbolParams{
+		TextDocument: lsp.TextDocumentIdentifier{
+			URI: uri,
+		},
+	}
+	var resp []DocumentSymbol
+	if err := cli.Call(ctx, "textDocument/documentSymbol", req, &resp); err != nil {
+		return nil, err
+	}
+	// cache symbols
+	f.Symbols = make(map[Range]*DocumentSymbol, len(resp))
+	for i := range resp {
+		s := &resp[i]
+		f.Symbols[s.Location.Range] = s
+	}
+	return f.Symbols, nil
+}
+
+func (cli *LSPClient) References(ctx context.Context, id Location) ([]Location, error) {
+	if _, err := cli.DidOpen(ctx, id.URI); err != nil {
+		return nil, err
+	}
+	uri := lsp.DocumentURI(id.URI)
+	req := lsp.ReferenceParams{
+		TextDocumentPositionParams: lsp.TextDocumentPositionParams{
+			TextDocument: lsp.TextDocumentIdentifier{
+				URI: uri,
+			},
+			Position: lsp.Position{
+				Line:      id.Range.Start.Line,
+				Character: id.Range.Start.Character + 1,
+			},
+		},
+		Context: lsp.ReferenceContext{
+			IncludeDeclaration: true,
+		},
+	}
+	var resp []Location
+	if err := cli.Call(ctx, "textDocument/references", req, &resp); err != nil {
+		return nil, err
+	}
+	return resp, nil
+}
+
+// Some language servers do not provide semanticTokens/range.
+// In that case, we fall back to semanticTokens/full and then filter the tokens manually.
+func (cli *LSPClient) getSemanticTokensRange(ctx context.Context, req DocumentRange, resp *SemanticTokens) error {
+	if cli.hasSemanticTokensRange {
+		if err := cli.Call(ctx, "textDocument/semanticTokens/range", req, resp); err != nil {
+			return err
+		}
+		return nil
+	}
+	// fall back to semanticTokens/full
+	req1 := SemanticTokensFullParams{
+		TextDocument: req.TextDocument,
+	}
+	if err := cli.Call(ctx, "textDocument/semanticTokens/full", req1, resp); err != nil {
+		return err
+	}
+	filterSemanticTokensInRange(resp, req.Range)
+	return nil
+}
+
+func filterSemanticTokensInRange(resp *SemanticTokens, r Range) {
+	curPos := Position{
+		Line:      0,
+		Character: 0,
+	}
+	newData := []uint32{}
+	includedIs := []int{}
+	for i := 0; i < len(resp.Data); i += 5 {
+		deltaLine := int(resp.Data[i])
+		deltaStart := int(resp.Data[i+1])
+		if deltaLine != 0 {
+			curPos.Line += deltaLine
+			curPos.Character = deltaStart
+		} else {
+			curPos.Character += deltaStart
+		}
+		if isPositionInRange(curPos, r, true) {
+			if len(newData) == 0 {
+				// add range start to initial delta
+				newData = append(newData, resp.Data[i:i+5]...)
+				newData[0] = uint32(curPos.Line)
+				newData[1] = uint32(curPos.Character)
+			} else {
+				newData = append(newData, resp.Data[i:i+5]...)
+			}
+			includedIs = append(includedIs, i)
+		}
+	}
+	resp.Data = newData
+}
+
+func (cli *LSPClient) SemanticTokens(ctx context.Context, id Location) ([]Token, error) {
+	// open file first
+	syms, err := cli.DocumentSymbols(ctx, id.URI)
+	if err != nil {
+		return nil, err
+	}
+	sym := syms[id.Range]
+	if sym != nil && sym.Tokens != nil {
+		return sym.Tokens, nil
+	}
+
+	uri := lsp.DocumentURI(id.URI)
+	req := DocumentRange{
+		TextDocument: lsp.TextDocumentIdentifier{
+			URI: uri,
+		},
+		Range: id.Range,
+	}
+
+	var resp SemanticTokens
+	if err := cli.getSemanticTokensRange(ctx, req, &resp); err != nil {
+		return nil, err
+	}
+
+	toks := cli.getAllTokens(resp, id.URI)
+	if sym != nil {
+		sym.Tokens = toks
+	}
+	return toks, nil
+}
+
+func (cli *LSPClient) Definition(ctx context.Context, uri DocumentURI, pos Position) ([]Location, error) {
+	// open file first
+	f, err := cli.DidOpen(ctx, uri)
+	if err != nil {
+		return nil, err
+	}
+	if f.Definitions != nil {
+		if locations, ok := f.Definitions[pos]; ok {
+			return locations, nil
+		}
+	}
+
+	// call
+	req := lsp.TextDocumentPositionParams{
+		TextDocument: lsp.TextDocumentIdentifier{
+			URI: lsp.DocumentURI(uri),
+		},
+		Position: lsp.Position(pos),
+	}
+	var resp []Location
+	if err := cli.Call(ctx, "textDocument/definition", req, &resp); err != nil {
+		return nil, err
+	}
+
+	// cache definitions
+	if f.Definitions == nil {
+		f.Definitions = make(map[Position][]Location)
+	}
+	f.Definitions[pos] = resp
+	return resp, nil
+}
+
+func (cli *LSPClient) TypeDefinition(ctx context.Context, uri DocumentURI, pos Position) ([]Location, error) {
+	req := lsp.TextDocumentPositionParams{
+		TextDocument: lsp.TextDocumentIdentifier{
+			URI: lsp.DocumentURI(uri),
+		},
+		Position: lsp.Position(pos),
+	}
+	var resp []Location
+	if err := cli.Call(ctx, "textDocument/typeDefinition", req, &resp); err != nil {
+		return nil, err
+	}
+	return resp, nil
+}
+
+// read file and get the text of block of range
+func (cli *LSPClient) Locate(id Location) (string, error) {
+	f, ok := cli.files[id.URI]
+	if !ok {
+		// open file os
+		fd, err := os.ReadFile(id.URI.File())
+		if err != nil {
+			return "", err
+		}
+		text := string(fd)
+		f = &TextDocumentItem{
+			URI:        DocumentURI(id.URI),
+			LanguageID: cli.Language.String(),
+			Version:    1,
+			Text:       text,
+			LineCounts: utils.CountLines(text),
+		}
+		cli.files[id.URI] = f
+	}
+
+	text := f.Text
+	// get block text of range
+	start := f.LineCounts[id.Range.Start.Line] + id.Range.Start.Character
+	end := f.LineCounts[id.Range.End.Line] + id.Range.End.Character
+	return text[start:end], nil
+}
+
+// get line text of pos
+func (cli *LSPClient) Line(uri DocumentURI, pos int) string {
+	f, ok := cli.files[uri]
+	if !ok {
+		// open file os
+		fd, err := os.ReadFile(uri.File())
+		if err != nil {
+			return ""
+		}
+		text := string(fd)
+		f = &TextDocumentItem{
+			URI:        DocumentURI(uri),
+			LanguageID: cli.Language.String(),
+			Version:    1,
+			Text:       text,
+			LineCounts: utils.CountLines(text),
+		}
+		cli.files[uri] = f
+	}
+	if pos < 0 || pos >= len(f.LineCounts) {
+		return ""
+	}
+	start := f.LineCounts[pos]
+	end := len(f.Text)
+	if pos+1 < len(f.LineCounts) {
+		end = f.LineCounts[pos+1]
+	}
+	return f.Text[start:end]
+}
+
+func (cli *LSPClient) LineCounts(uri DocumentURI) []int {
+	f, ok := cli.files[uri]
+	if !ok {
+		// open file os
+		fd, err := os.ReadFile(uri.File())
+		if err != nil {
+			return nil
+		}
+		text := string(fd)
+		f = &TextDocumentItem{
+			URI:        DocumentURI(uri),
+			LanguageID: cli.Language.String(),
+			Version:    1,
+			Text:       text,
+			LineCounts: utils.CountLines(text),
+		}
+		cli.files[uri] = f
+	}
+	return f.LineCounts
+}
+
+func (cli *LSPClient) GetFile(uri DocumentURI) *TextDocumentItem {
+	return cli.files[uri]
+}
+
+func (cli *LSPClient) GetParent(sym *DocumentSymbol) (ret *DocumentSymbol) {
+	if sym == nil {
+		return nil
+	}
+	if f, ok := cli.files[sym.Location.URI]; ok {
+		for _, s := range f.Symbols {
+			if s != sym && s.Location.Range.Include(sym.Location.Range) {
+				if ret == nil || ret.Location.Range.Include(s.Location.Range) {
+					ret = s
+				}
+			}
+		}
+	}
+	return
+}
+
+func (cli *LSPClient) getAllTokens(tokens SemanticTokens, file DocumentURI) []Token {
+	start := Position{Line: 0, Character: 0}
+	end := Position{Line: math.MaxInt32, Character: math.MaxInt32}
+	return cli.getRangeTokens(tokens, file, Range{Start: start, End: end})
+}
+
+func (cli *LSPClient) getRangeTokens(tokens SemanticTokens, file DocumentURI, r Range) []Token {
+	symbols := make([]Token, 0, len(tokens.Data)/5)
+	line := 0
+	character := 0
+
+	for i := 0; i < len(tokens.Data); i += 5 {
+		deltaLine := int(tokens.Data[i])
+		deltaStart := int(tokens.Data[i+1])
+		length := int(tokens.Data[i+2])
+		tokenType := int(tokens.Data[i+3])
+		tokenModifiersBitset := int(tokens.Data[i+4])
+
+		line += deltaLine
+		if deltaLine == 0 {
+			character += deltaStart
+		} else {
+			character = deltaStart
+		}
+
+		currentPos := Position{Line: line, Character: character}
+		if isPositionInRange(currentPos, r, false) {
+			// fmt.Printf("Token at line %d, character %d, length %d, type %d, modifiers %b\n", line, character, length, tokenType, tokenModifiersBitset)
+			tokenTypeName := getSemanticTokenType(tokenType, cli.tokenTypes)
+			tokenModifierNames := getSemanticTokenModifier(tokenModifiersBitset, cli.tokenModifiers)
+			loc := Location{URI: file, Range: Range{Start: currentPos, End: Position{Line: line, Character: character + length}}}
+			text, _ := cli.Locate(loc)
+			symbols = append(symbols, Token{
+				Location:  loc,
+				Type:      tokenTypeName,
+				Modifiers: tokenModifierNames,
+				Text:      text,
+			})
+		}
+	}
+
+	// sort it by start position
+	sort.Slice(symbols, func(i, j int) bool {
+		if symbols[i].Location.URI != symbols[j].Location.URI {
+			return symbols[i].Location.URI < symbols[j].Location.URI
+		}
+		if symbols[i].Location.Range.Start.Line != symbols[j].Location.Range.Start.Line {
+			return symbols[i].Location.Range.Start.Line < symbols[j].Location.Range.Start.Line
+		}
+		return symbols[i].Location.Range.Start.Character < symbols[j].Location.Range.Start.Character
+	})
+
+	return symbols
+}
+
+func (cli *LSPClient) FileStructure(ctx context.Context, file DocumentURI) ([]*DocumentSymbol, error) {
+	syms, err := cli.DocumentSymbols(ctx, file)
+	if err != nil {
+		return nil, err
+	}
+	// construct symbol hierarchy through range relation, and represent it to DocumentSymobl.Children
+	symbols := make([]*DocumentSymbol, 0, len(syms))
+	for _, sym := range syms {
+		symbols = append(symbols, sym)
+	}
+	return constructSymbolHierarchy(symbols), nil
+}
+
+func getSemanticTokenType(id int, semanticTokenTypes []string) string {
+	if id < len(semanticTokenTypes) {
+		return semanticTokenTypes[id]
+	}
+	return fmt.Sprintf("unknown(%d)", id)
+}
+
+func getSemanticTokenModifier(bitset int, semanticTokenModifiers []string) []string {
+	var result []string
+	for i, modifier := range semanticTokenModifiers {
+		if bitset&(1<<uint(i)) != 0 {
+			result = append(result, modifier)
+		}
+	}
+	for i := len(semanticTokenModifiers); i < 32; i++ {
+		if bitset&(1<<uint(i)) != 0 {
+			result = append(result, fmt.Sprintf("unknown(%d)", i))
+		}
+	}
+	return result
+}
+
+// constructSymbolHierarchy constructs a symbol hierarchy through range relation and represents it in DocumentSymbol.Children.
+func constructSymbolHierarchy(symbols []*DocumentSymbol) []*DocumentSymbol {
+	// Sort symbols by their start position
+	sort.Slice(symbols, func(i, j int) bool {
+		if symbols[i].Location.Range.Start.Line == symbols[j].Location.Range.Start.Line {
+			return symbols[i].Location.Range.Start.Character < symbols[j].Location.Range.Start.Character
+		}
+		return symbols[i].Location.Range.Start.Line < symbols[j].Location.Range.Start.Line
+	})
+
+	var rootSymbols []*DocumentSymbol
+	var stack []*DocumentSymbol
+
+	for i := range symbols {
+		symbol := symbols[i]
+
+		// Pop symbols from the stack that are not parents of the current symbol
+		for len(stack) > 0 && !stack[len(stack)-1].Location.Range.Include(symbol.Location.Range) {
+			stack = stack[:len(stack)-1]
+		}
+
+		// If the stack is not empty, the top symbol is the parent of the current symbol
+		if len(stack) > 0 {
+			parent := stack[len(stack)-1]
+			parent.Children = append(parent.Children, symbol)
+		} else {
+			// If the stack is empty, the current symbol is a root symbol
+			rootSymbols = append(rootSymbols, symbol)
+		}
+
+		// Push the current symbol onto the stack
+		stack = append(stack, symbol)
+	}
+
+	return rootSymbols
+}

From 458e1ba6ed01b443aace61f578c1216440a40342 Mon Sep 17 00:00:00 2001
From: Hoblovski <dzy.0424thu@gmail.com>
Date: Fri, 29 Aug 2025 15:16:47 +0800
Subject: [PATCH 02/11] fix: improve tests

---
 lang/lsp/clients_test.go | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/lang/lsp/clients_test.go b/lang/lsp/clients_test.go
index 47e1fe4..d28c8f6 100644
--- a/lang/lsp/clients_test.go
+++ b/lang/lsp/clients_test.go
@@ -160,10 +160,10 @@ Output`
 	// references
 	refRange := Range{
 		Start: Position{
-			Line:      13,
-			Character: 13,
+			Line:      48,
+			Character: 6,
 		},
-	}
+	} // trait $0MyTrait {
 	t.Run("references", func(t *testing.T) {
 		id := Location{
 			URI:   entity_mod_uri,
@@ -173,6 +173,9 @@ Output`
 		if err != nil {
 			t.Fatalf("Find Reference failed: %v", err)
 		}
+		if len(references) != 4 {
+			t.Fatalf("Expected 4 references, got %d\n%+v\n", len(references), references)
+		}
 		if _, err := json.Marshal(references); err != nil {
 			t.Fatalf("Marshal Reference failed: %v", err)
 		}
@@ -198,6 +201,12 @@ Output`
 		if err != nil {
 			t.Fatalf("Semantic Tokens failed: %v", err)
 		}
+		if len(tokens) != 149 {
+			t.Fatalf("Expected 149 semantic tokens, got %d\n%+v", len(tokens), tokens)
+		}
+		if len(tokens) == 0 {
+			t.Fatalf("Semantic Tokens should not be empty")
+		}
 		if _, err := json.Marshal(tokens); err != nil {
 			t.Fatalf("Marshal Semantic Tokens failed: %v", err)
 		}
@@ -230,7 +239,6 @@ Output`
 			if len(definition) != 1 {
 				t.Fatalf("Find Definition should have found entry, but got none at %#v", pos)
 			}
-			// t.Logf("Find Definition %#v ->\n%#v", pos, definition)
 		}
 	})
 

From c17ecad1e7490eec2141cf88fb4718f7720381c1 Mon Sep 17 00:00:00 2001
From: Hoblovski <dzy.0424thu@gmail.com>
Date: Fri, 29 Aug 2025 15:21:05 +0800
Subject: [PATCH 03/11] fix: remove duplicate copyright

---
 testdata/rust/0_rust2/src/entity/inter.rs | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/testdata/rust/0_rust2/src/entity/inter.rs b/testdata/rust/0_rust2/src/entity/inter.rs
index fd78410..41a5260 100644
--- a/testdata/rust/0_rust2/src/entity/inter.rs
+++ b/testdata/rust/0_rust2/src/entity/inter.rs
@@ -12,22 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-/**
- * Copyright 2025 ByteDance Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
 pub trait Addable {
     fn id() -> i64;
     fn add(&self, b: i64) -> i64;

From 046f38766a9fa7d64032bca93112ac7e617f27a2 Mon Sep 17 00:00:00 2001
From: Hoblovski <dzy.0424thu@gmail.com>
Date: Fri, 29 Aug 2025 16:40:51 +0800
Subject: [PATCH 04/11] fix: rust0 didnt compile

---
 testdata/rust/0_rust2/src/entity/inter.rs | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/testdata/rust/0_rust2/src/entity/inter.rs b/testdata/rust/0_rust2/src/entity/inter.rs
index 41a5260..c1bcc46 100644
--- a/testdata/rust/0_rust2/src/entity/inter.rs
+++ b/testdata/rust/0_rust2/src/entity/inter.rs
@@ -19,23 +19,25 @@ pub trait Addable {
 
 pub struct AnyInt(i64);
 
-//impl AnyInt {
-//    pub fn id() -> i64 {
-//        0
-//    }
-//    pub fn add(&self, b: i64) -> i64 {
-//        self.0 + b
-//    }
-//}
+impl AnyInt {
+    //pub fn id() -> i64 {
+    pub fn idx() -> i64 {
+        0
+    }
+    //pub fn add(&self, b: i64) -> i64 {
+    pub fn addx(&self, b: i64) -> i64 {
+        self.0 + b
+    }
+}
 
 impl Addable for AnyInt {
     fn add(&self, b: i64) -> i64 {
         // use the method defined in the struct
-        self.add(b)
+        self.addx(b)
     }
     fn id() -> i64 {
         // use the method defined in the struct
-        AnyInt::id()
+        AnyInt::idx()
     }
 }
 

From f22aba5ebb32076f6b452f6a0b20057cfd4cae43 Mon Sep 17 00:00:00 2001
From: Hoblovski <dzy.0424thu@gmail.com>
Date: Fri, 29 Aug 2025 16:46:18 +0800
Subject: [PATCH 05/11] fix: unstable CI due to insufficient sleep

The LSP needs time to init, but we didn't give it enough time.
It didn't sleep well, and returned partial results.
---
 lang/lsp/testutils.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lang/lsp/testutils.go b/lang/lsp/testutils.go
index 2cd00ac..4a272e8 100644
--- a/lang/lsp/testutils.go
+++ b/lang/lsp/testutils.go
@@ -40,6 +40,6 @@ func InitLSPForFirstTest(lang uniast.Language, server string) (*LSPClient, strin
 		return nil, "", err
 	}
 	clients[lang] = client
-	time.Sleep(3 * time.Second) // wait for LSP server to be ready
+	time.Sleep(5 * time.Second) // wait for LSP server to be ready
 	return client, testdata, nil
 }

From 36683ad4d81c11f06bdfde408ae92fa93109756e Mon Sep 17 00:00:00 2001
From: Hoblovski <dzy.0424thu@gmail.com>
Date: Fri, 29 Aug 2025 17:18:50 +0800
Subject: [PATCH 06/11] feat: run all tests in parallel

---
 script/run_testdata.sh | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100755 script/run_testdata.sh

diff --git a/script/run_testdata.sh b/script/run_testdata.sh
new file mode 100755
index 0000000..116015e
--- /dev/null
+++ b/script/run_testdata.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+# Generate uniast for all testdata. Must be run from repo root
+#
+# USAGE:
+# 1. Save the uniast to out/
+# $ OUTDIR=out/ ./script/run_testdata.sh
+#
+# 2. Save the uniast to out/ , colorize output for human readable terminal
+# OUTDIR=out/ PARALLEL_FLAGS=--ctag ./script/run_testdata.sh
+#
+# 3. Use a custom abcoder executable
+# OUTDIR=out/ ABCEXE="./other_abcoder" ./script/run_testdata.sh
+
+ABCEXE=${ABCEXE:-./abcoder}
+OUTDIR=${OUTDIR:?Error: OUTDIR is a mandatory environment variable}
+PARALLEL_FLAGS=${PARALLEL_FLAGS:---tag}
+
+LANGS=(go rust python cxx)
+
+detect_jobs() {
+	local ABCEXE=${1:-$ABCEXE}
+	for lang in ${LANGS[@]}; do
+		for repo in testdata/$lang/*; do
+			outname=$(echo $repo | sed 's/^testdata\///; s/[/:? ]/_/g')
+			echo $ABCEXE parse $lang $repo -o $OUTDIR/$outname.json
+		done
+	done
+}
+
+mkdir -pv "$OUTDIR"
+detect_jobs | parallel $PARALLEL_FLAGS echo {}
+echo
+detect_jobs | parallel $PARALLEL_FLAGS -j$(nproc --all) --jobs 0 "eval {}" 2>&1

From c5771c71f2f0c482ff3620c9d4cfb5bb0e0b8ca4 Mon Sep 17 00:00:00 2001
From: Hoblovski <dzy.0424thu@gmail.com>
Date: Fri, 29 Aug 2025 17:41:23 +0800
Subject: [PATCH 07/11] feat: scripts for regression testing

---
 script/diffjson.py                            | 185 ++++++++++++++++++
 .../{run_testdata.sh => run_all_testdata.sh}  |   4 +
 2 files changed, 189 insertions(+)
 create mode 100755 script/diffjson.py
 rename script/{run_testdata.sh => run_all_testdata.sh} (86%)

diff --git a/script/diffjson.py b/script/diffjson.py
new file mode 100755
index 0000000..c7bf4ce
--- /dev/null
+++ b/script/diffjson.py
@@ -0,0 +1,185 @@
+#!/usr/bin/env python3
+import argparse
+import json
+import sys
+from pathlib import Path
+from typing import Literal
+
+from deepdiff import DeepDiff
+
+# Define status types for clarity
+Status = Literal["OK", "BAD", "FILE_ERROR"]
+
+
+def format_diff_custom(diff: DeepDiff) -> str:
+    """
+    Formats a DeepDiff object into a custom human-readable string.
+    This provides a clear, indented view of changes.
+    """
+    output = []
+
+    # Helper to format a value for printing. Pretty-prints dicts/lists.
+    def format_value(value):
+        if isinstance(value, (dict, list)):
+            return json.dumps(value, indent=2)
+        return repr(value)
+
+    # Handle changed values
+    if "values_changed" in diff:
+        for path, changes in diff["values_changed"].items():
+            output.append(f"Value Changed at: {path}")
+            output.append(f"  - old: {format_value(changes['old_value'])}")
+            output.append(f"  + new: {format_value(changes['new_value'])}")
+            output.append("--------------------")
+
+    # Handle added items to lists/sets
+    if "iterable_item_added" in diff:
+        for path, value in diff["iterable_item_added"].items():
+            output.append(f"Item Added at: {path}")
+            output.append(f"  + new: {format_value(value)}")
+            output.append("--------------------")
+
+    # Handle removed items from lists/sets
+    if "iterable_item_removed" in diff:
+        for path, value in diff["iterable_item_removed"].items():
+            output.append(f"Item Removed at: {path}")
+            output.append(f"  - old: {format_value(value)}")
+            output.append("--------------------")
+
+    # Handle added keys in dictionaries
+    if "dictionary_item_added" in diff:
+        for path in diff["dictionary_item_added"]:
+            output.append(f"Dictionary Key Added: {path}")
+            output.append("--------------------")
+
+    # Handle removed keys in dictionaries
+    if "dictionary_item_removed" in diff:
+        for path in diff["dictionary_item_removed"]:
+            output.append(f"Dictionary Key Removed: {path}")
+            output.append("--------------------")
+
+    # Clean up the last separator for a tidy output
+    if output and output[-1] == "--------------------":
+        output.pop()
+
+    return "\n".join(output)
+
+
+def compare_json_files(file1_path: Path, file2_path: Path) -> Status:
+    """
+    Compares the content of two JSON files without printing output.
+
+    Returns:
+        "OK" if they match, "BAD" if they don't, "FILE_ERROR" on read/parse error.
+    """
+    try:
+        with open(file1_path, "r", encoding="utf-8") as f1:
+            json1 = json.load(f1)
+        with open(file2_path, "r", encoding="utf-8") as f2:
+            json2 = json.load(f2)
+    except (FileNotFoundError, json.JSONDecodeError):
+        return "FILE_ERROR"
+
+    diff = DeepDiff(json1, json2, ignore_order=True)
+
+    return "BAD" if diff else "OK"
+
+
+def process_directory_comparison(old_dir: Path, new_dir: Path) -> bool:
+    """
+    Compares JSON files across two directories and prints results in a list format.
+    """
+    results: dict[str, list[str]] = {"OK": [], "BAD": [], "MISS": [], "NEW": []}
+    old_files = {p.name for p in old_dir.glob("*.json")}
+    new_files = {p.name for p in new_dir.glob("*.json")}
+
+    for filename in sorted(old_files.intersection(new_files)):
+        status = compare_json_files(old_dir / filename, new_dir / filename)
+        results["BAD" if status != "OK" else "OK"].append(filename)
+
+    for filename in sorted(old_files - new_files):
+        results["MISS"].append(filename)
+
+    for filename in sorted(new_files - old_files):
+        results["NEW"].append(filename)
+
+    for filename in results["OK"]:
+        print(f"[OK  ]  {filename}")
+    for filename in results["NEW"]:
+        print(f"[NEW ]  {filename}")
+    for filename in results["BAD"]:
+        print(f"[BAD ]  {filename}", file=sys.stderr)
+    for filename in results["MISS"]:
+        print(f"[MISS]  {filename}", file=sys.stderr)
+
+    return bool(results["BAD"] or results["MISS"])
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Compare two JSON files or two directories of JSON files."
+    )
+    parser.add_argument(
+        "path1", type=Path, help="Path to the first file or 'old' directory."
+    )
+    parser.add_argument(
+        "path2", type=Path, help="Path to the second file or 'new' directory."
+    )
+    args = parser.parse_args()
+
+    path1, path2 = args.path1, args.path2
+
+    if not path1.exists() or not path2.exists():
+        print(
+            f"Error: Path does not exist: {path1 if not path1.exists() else path2}",
+            file=sys.stderr,
+        )
+        return 1
+
+    # --- Handle Directory Comparison ---
+    if path1.is_dir() and path2.is_dir():
+        print(f"Comparing directories:\n- Old: {path1}\n- New: {path2}\n")
+        if process_directory_comparison(path1, path2):
+            print("\nComparison finished with errors.", file=sys.stderr)
+            return 1
+        else:
+            print("\nComparison finished successfully.")
+            return 0
+
+    # --- Handle Single File Comparison ---
+    elif path1.is_file() and path2.is_file():
+        try:
+            with open(path1, "r", encoding="utf-8") as f1:
+                json1 = json.load(f1)
+            with open(path2, "r", encoding="utf-8") as f2:
+                json2 = json.load(f2)
+        except (FileNotFoundError, json.JSONDecodeError) as e:
+            print(f"Error reading or parsing file: {e}", file=sys.stderr)
+            return 1
+
+        diff = DeepDiff(json1, json2, ignore_order=True)
+
+        if diff:
+            print(
+                f"Differences found between '{path1.name}' and '{path2.name}':\n",
+                file=sys.stderr,
+            )
+            # Format the diff into a custom readable format and print to stderr
+            custom_output = format_diff_custom(diff)
+            print(custom_output, file=sys.stderr)
+            return 1
+        else:
+            print(f"Files '{path1.name}' and '{path2.name}' are identical.")
+            return 0
+
+    # --- Handle Invalid Input ---
+    else:
+        print(
+            "Error: Both arguments must be files or both must be directories.",
+            file=sys.stderr,
+        )
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/script/run_testdata.sh b/script/run_all_testdata.sh
similarity index 86%
rename from script/run_testdata.sh
rename to script/run_all_testdata.sh
index 116015e..ee43da3 100755
--- a/script/run_testdata.sh
+++ b/script/run_all_testdata.sh
@@ -27,6 +27,10 @@ detect_jobs() {
 	done
 }
 
+if [[ ! -x "$ABCEXE" ]]; then
+    echo "Error: The specified abcoder executable '$ABCEXE' does not exist or is not executable." >&2
+    exit 1
+fi
 mkdir -pv "$OUTDIR"
 detect_jobs | parallel $PARALLEL_FLAGS echo {}
 echo

From d60b73b4b4de1f624864f2f23187229cf2e2afe5 Mon Sep 17 00:00:00 2001
From: Hoblovski <dzy.0424thu@gmail.com>
Date: Fri, 29 Aug 2025 18:03:58 +0800
Subject: [PATCH 08/11] feat: regression CI

---
 .github/workflows/regression.yml | 50 ++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 .github/workflows/regression.yml

diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml
new file mode 100644
index 0000000..f57c903
--- /dev/null
+++ b/.github/workflows/regression.yml
@@ -0,0 +1,50 @@
+name: Regression Test
+
+on:
+  push:
+
+jobs:
+  run_all_tests:
+    runs-on: ubuntu-latest
+    #if: "!contains(github.event.pull_request.title, '[NO-REGRESSION-TEST]')"
+    steps:
+      - name: Checkout pull request code
+        uses: actions/checkout@v4
+        with:
+          path: 'pr_repo'
+
+      - name: Checkout main branch code
+        uses: actions/checkout@v4
+        with:
+          ref: 'main'
+          path: 'main_repo'
+
+      - name: Setup Go environment
+        uses: actions/setup-go@v5
+        with:
+          go-version: '1.22'
+
+      - name: Compile both binaries
+        run: |
+          (cd main_repo && go build -o ../abcoder_old)
+          (cd pr_repo && go build -o ../abcoder_new)
+      
+      - name: Run test scripts and generate outputs
+        run: |
+          OUTDIR=out_old ABCEXE=./abcoder_old ./pr_repo/script/run_all_testdata.sh
+          OUTDIR=out_new ABCEXE=./abcoder_new ./pr_repo/script/run_all_testdata.sh
+      
+      - name: Compare outputs and check for regression
+        id: diff_check
+        run: ./pr_repo/script/diffjson.py out_old out_new
+        continue-on-error: true
+
+      - name: Upload output directories
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: regression-outputs
+          path: |
+            out_old
+            out_new
+          retention-days: 3

From b2172bbed772c5762dc91fc075d72a1dfcc55b0b Mon Sep 17 00:00:00 2001
From: Hoblovski <dzy.0424thu@gmail.com>
Date: Fri, 29 Aug 2025 18:23:33 +0800
Subject: [PATCH 09/11] fix: allow run_all_testdata to be run from anywhere

---
 script/run_all_testdata.sh | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/script/run_all_testdata.sh b/script/run_all_testdata.sh
index ee43da3..e81de21 100755
--- a/script/run_all_testdata.sh
+++ b/script/run_all_testdata.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Generate uniast for all testdata. Must be run from repo root
+# Generate uniast for all testdata.
 #
 # USAGE:
 # 1. Save the uniast to out/
@@ -11,7 +11,10 @@
 # 3. Use a custom abcoder executable
 # OUTDIR=out/ ABCEXE="./other_abcoder" ./script/run_testdata.sh
 
-ABCEXE=${ABCEXE:-./abcoder}
+SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
+REPO_ROOT=$(realpath --relative-to=$(pwd) "$SCRIPT_DIR/..")
+
+ABCEXE=${ABCEXE:-"$REPO_ROOT/abcoder"}
 OUTDIR=${OUTDIR:?Error: OUTDIR is a mandatory environment variable}
 PARALLEL_FLAGS=${PARALLEL_FLAGS:---tag}
 
@@ -20,18 +23,19 @@ LANGS=(go rust python cxx)
 detect_jobs() {
 	local ABCEXE=${1:-$ABCEXE}
 	for lang in ${LANGS[@]}; do
-		for repo in testdata/$lang/*; do
-			outname=$(echo $repo | sed 's/^testdata\///; s/[/:? ]/_/g')
+		for repo in "$REPO_ROOT/testdata/$lang"/*; do
+			local rel_path=$(realpath --relative-to="$REPO_ROOT/testdata" "$repo")
+			local outname=$(echo "$rel_path" | sed 's/[/:? ]/_/g')
 			echo $ABCEXE parse $lang $repo -o $OUTDIR/$outname.json
 		done
 	done
 }
 
 if [[ ! -x "$ABCEXE" ]]; then
-    echo "Error: The specified abcoder executable '$ABCEXE' does not exist or is not executable." >&2
-    exit 1
+	echo "Error: The specified abcoder executable '$ABCEXE' does not exist or is not executable." >&2
+	exit 1
 fi
 mkdir -pv "$OUTDIR"
-detect_jobs | parallel $PARALLEL_FLAGS echo {}
+detect_jobs
 echo
 detect_jobs | parallel $PARALLEL_FLAGS -j$(nproc --all) --jobs 0 "eval {}" 2>&1

From 21b896584a50c27ced2070968303ecf44b64c1fd Mon Sep 17 00:00:00 2001
From: Hoblovski <dzy.0424thu@gmail.com>
Date: Fri, 29 Aug 2025 18:50:36 +0800
Subject: [PATCH 10/11] fix: fix regression CI

---
 .github/workflows/regression.yml | 40 +++++++++++++++++++++++++-------
 script/requirements.txt          |  1 +
 script/run_all_testdata.sh       |  3 +--
 3 files changed, 33 insertions(+), 11 deletions(-)
 create mode 100644 script/requirements.txt

diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml
index f57c903..aae10a2 100644
--- a/.github/workflows/regression.yml
+++ b/.github/workflows/regression.yml
@@ -2,16 +2,43 @@ name: Regression Test
 
 on:
   push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
 
 jobs:
   run_all_tests:
     runs-on: ubuntu-latest
     #if: "!contains(github.event.pull_request.title, '[NO-REGRESSION-TEST]')"
     steps:
+      - name: Setup Go environment
+        uses: actions/setup-go@v5
+        with:
+          go-version: '1.22'
+
+      - name: Setup Rust toolchain
+        uses: dtolnay/rust-toolchain@stable
+        with:
+          toolchain: stable
+          components: rust-analyzer
+
+      - name: Setup Python environment
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
       - name: Checkout pull request code
         uses: actions/checkout@v4
         with:
           path: 'pr_repo'
+          submodules: true
+
+      - name: Install Python dependencies
+        run: |
+          pip install -r ./pr_repo/script/requirements.txt
+          pip install ./pr_repo/pylsp
 
       - name: Checkout main branch code
         uses: actions/checkout@v4
@@ -19,21 +46,16 @@ jobs:
           ref: 'main'
           path: 'main_repo'
 
-      - name: Setup Go environment
-        uses: actions/setup-go@v5
-        with:
-          go-version: '1.22'
-
       - name: Compile both binaries
         run: |
           (cd main_repo && go build -o ../abcoder_old)
           (cd pr_repo && go build -o ../abcoder_new)
-      
+
       - name: Run test scripts and generate outputs
         run: |
-          OUTDIR=out_old ABCEXE=./abcoder_old ./pr_repo/script/run_all_testdata.sh
-          OUTDIR=out_new ABCEXE=./abcoder_new ./pr_repo/script/run_all_testdata.sh
-      
+          LANGS="go rust python" OUTDIR=out_old ABCEXE=./abcoder_old ./pr_repo/script/run_all_testdata.sh
+          LANGS="go rust python" OUTDIR=out_new ABCEXE=./abcoder_new ./pr_repo/script/run_all_testdata.sh
+
       - name: Compare outputs and check for regression
         id: diff_check
         run: ./pr_repo/script/diffjson.py out_old out_new
diff --git a/script/requirements.txt b/script/requirements.txt
new file mode 100644
index 0000000..084ef59
--- /dev/null
+++ b/script/requirements.txt
@@ -0,0 +1 @@
+deepdiff
diff --git a/script/run_all_testdata.sh b/script/run_all_testdata.sh
index e81de21..fb9d2dc 100755
--- a/script/run_all_testdata.sh
+++ b/script/run_all_testdata.sh
@@ -17,8 +17,7 @@ REPO_ROOT=$(realpath --relative-to=$(pwd) "$SCRIPT_DIR/..")
 ABCEXE=${ABCEXE:-"$REPO_ROOT/abcoder"}
 OUTDIR=${OUTDIR:?Error: OUTDIR is a mandatory environment variable}
 PARALLEL_FLAGS=${PARALLEL_FLAGS:---tag}
-
-LANGS=(go rust python cxx)
+LANGS=${LANGS:-"go rust python cxx"}
 
 detect_jobs() {
 	local ABCEXE=${1:-$ABCEXE}

From ca872e9c47dd604f5ce6ba1598708137ddc7ecc0 Mon Sep 17 00:00:00 2001
From: Hoblovski <dzy.0424thu@gmail.com>
Date: Tue, 2 Sep 2025 14:24:15 +0800
Subject: [PATCH 11/11] fix: fix nits from review

---
 lang/lsp/handler.go        | 7 +++----
 script/run_all_testdata.sh | 6 +++---
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/lang/lsp/handler.go b/lang/lsp/handler.go
index b37e7e1..40274fc 100644
--- a/lang/lsp/handler.go
+++ b/lang/lsp/handler.go
@@ -17,7 +17,6 @@ package lsp
 import (
 	"container/list"
 	"context"
-	"fmt"
 	"sync"
 	"time"
 
@@ -94,9 +93,9 @@ loop:
 
 func (h *lspHandler) Handle(ctx context.Context, conn *jsonrpc2.Conn, req *jsonrpc2.Request) {
 	// This method will be called for both requests and notifications
-	log.Info("handle method: %s\n", req.Method)
+	log.Debug("handle method: %s\n", req.Method)
 	if req.Params != nil {
-		log.Info("param: %s\n", string(*req.Params))
+		log.Debug("param: %s\n", string(*req.Params))
 	}
 	if req.Notif {
 		// This is a notification
@@ -129,7 +128,7 @@ func (h *lspHandler) handleNotification(ctx context.Context, conn *jsonrpc2.Conn
 	switch req.Method {
 	case "textDocument/publishDiagnostics":
 		// This notification is sent from the server to the client to signal results of validation runs.
-		fmt.Printf("Received publishDiagnostics notification:\n%s\n", string(*req.Params))
+		log.Debug("Received publishDiagnostics notification:\n%s\n", string(*req.Params))
 		return
 	// exit
 	case "exit":
diff --git a/script/run_all_testdata.sh b/script/run_all_testdata.sh
index fb9d2dc..533c3cc 100755
--- a/script/run_all_testdata.sh
+++ b/script/run_all_testdata.sh
@@ -3,13 +3,13 @@
 #
 # USAGE:
 # 1. Save the uniast to out/
-# $ OUTDIR=out/ ./script/run_testdata.sh
+# $ OUTDIR=out/ ./script/run_all_testdata.sh
 #
 # 2. Save the uniast to out/ , colorize output for human readable terminal
-# OUTDIR=out/ PARALLEL_FLAGS=--ctag ./script/run_testdata.sh
+# OUTDIR=out/ PARALLEL_FLAGS=--ctag ./script/run_all_testdata.sh
 #
 # 3. Use a custom abcoder executable
-# OUTDIR=out/ ABCEXE="./other_abcoder" ./script/run_testdata.sh
+# OUTDIR=out/ ABCEXE="./other_abcoder" ./script/run_all_testdata.sh
 
 SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
 REPO_ROOT=$(realpath --relative-to=$(pwd) "$SCRIPT_DIR/..")