In [8]:
import json
from collections import defaultdict

def parse_go_test_log(file_path: str):
    tests = defaultdict(lambda: {"status": None, "elapsed": None, "outputs": []})

    with open(file_path, "r") as f:
        for line in f:
            try:
                event = json.loads(line)
            except json.JSONDecodeError:
                continue  # skip malformed lines

            test_name = event.get("Test")
            action = event.get("Action")
            output = event.get("Output")

            if not test_name:
                continue

            # Capture outputs
            if action == "output" and output:
                tests[test_name]["outputs"].append(output.strip())

            # Capture pass/fail status
            if action in ["fail", "pass"]:
                tests[test_name]["status"] = action
                tests[test_name]["elapsed"] = event.get("Elapsed", None)

    # Keep only failing tests
    failures = []
    for test_name, data in tests.items():
        if data["status"] == "fail":
            failures.append({
                "test_name": test_name,
                "status": data["status"],
                "elapsed": data["elapsed"],
                "log": "\n".join(data["outputs"])
            })

    return failures


In [27]:
failures = parse_go_test_log("test.log")
count=0
for f in failures:
    if f["status"] == "fail":
        count+=1
        print(f"\nTest: {f['test_name']}")
        print(f"Status: {f['status']}")
        print(f"Log:\n{f['log']}")
        print("******\n")

print("Total Test failures:",count)

In [20]:
system_prompt = """You are an expert Go developer and test automation engineer specializing in the Testify framework. 

When analyzing Go testify test failures, focus on:
1. Go-specific issues: Goroutine leaks, race conditions, interface implementations, nil pointer dereferences
2. Testify patterns: Suite setup/teardown issues, assertion failures, mock problems  
3. Common Go testing pitfalls: Table-driven test issues, test isolation problems, timing-sensitive tests

Your output **must be a valid JSON object** with this structure:

{
  "Testname": "<name of the test>",
  "RootCause": "<What caused the failure in 1-2 sentences>",
  "Fix": "<Concrete solution in 1-2 sentences>",
  "Prevention": "<How to avoid this in the future>"
}

**Rules**:
- Do NOT include any extra text outside this JSON.
- Keep the response under 200 words.
- Focus on actionable insights.
- Use valid JSON syntax with double quotes.
"""

In [21]:
def user_prompt_for(test_failure: dict) -> str:
    """
    Build the user prompt for LLM analysis using a concise format.

    Args:
        test_failure (dict): Single test failure dict containing 'test_name' and 'log'

    Returns:
        str: The formatted user prompt string
    """
    prompt = f"""
Provide concise root cause analysis focusing on Go/testify specifics.

Test Name: {test_failure['test_name']}
Log:
{test_failure['log']}

Return only JSON like this example:
{{
  "TestName": "TEST_XCP",
  "RootCause": "Database timeout",
  "GoTestifyIssue": "Nil pointer dereference",
  "Fix": "Increase DB pool size or retry",
  "Prevention": "Add error handling and unit tests"
}}
"""
    return prompt




In [22]:
import json
from typing import List, Dict
from openai import OpenAI

client = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')

def llama_summarize(messages: List[Dict]) -> Dict:
    """
    Calls the Llama model via OpenAI-compatible API.
    Returns structured JSON content.
    """
    response = client.chat.completions.create(
        model="llama3.2",
        messages=messages,
        response_format={"type": "json"}  # ensures JSON output
    )
    return json.loads(response.choices[0].message.content)






# ----------------------
# Single failure analysis
# ----------------------
def analyze_failure(test_failure: Dict) -> Dict:
    """
    Analyze a single test failure using Llama, with safe JSON parsing.
    """
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(test_failure)}
    ]

    try:
        response_text = llama_summarize(messages)  # Calls Llama client

        try:
            analysis = json.loads(response_text)
        except json.JSONDecodeError:
            print(f"Warning: LLM returned invalid JSON for {test_failure['test_name']}")
            analysis = {
                "TestName": test_failure["test_name"],
                "RootCause": response_text.strip(),
                "GoTestifyIssue": "",
                "Fix": "",
                "Prevention": ""
            }

        # Ensure TestName is included
        if "TestName" not in analysis:
            analysis["TestName"] = test_failure["test_name"]

    except Exception as e:
        analysis = {
            "TestName": test_failure["test_name"],
            "RootCause": "Error analyzing with LLM",
            "GoTestifyIssue": "",
            "Fix": "",
            "Prevention": "",
            "Error": str(e)
        }

    return {
        "test_name": test_failure["test_name"],
        "elapsed": test_failure.get("elapsed"),
        "analysis": analysis
    }



In [23]:
import time
import json
from typing import List, Dict

def analyze_failures(failures: List[Dict]) -> List[Dict]:
    """
    Analyze failed test logs with Llama and return structured JSON per failure.
    
    Args:
        failures (list): List of failed test dicts from parse_go_test_log
    
    Returns:
        list: Structured analysis results per failure
    """
    analyses = []
    failed_tests = [f for f in failures if f["status"] == "fail"]
    total = len(failed_tests)
    start_time = time.time()

    print(f"Starting analysis for {total} failed tests...\n")

    for idx, f in enumerate(failed_tests, start=1):
        test_name = f["test_name"]
        print(f"[{idx}/{total}] Analyzing test: {test_name} ...")
        test_start = time.time()

        # Build messages for Llama
        user_msg = user_prompt_for(f)
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_msg}
        ]

        try:
            # Call Llama via client wrapper
            analysis = llama_summarize(messages)

            # Include TestName explicitly for clarity
            structured_analysis = {
                "TestName": test_name,
                **analysis
            }

        except Exception as e:
            structured_analysis = {
                "TestName": test_name,
                "RootCause": "Error analyzing with LLM",
                "GoTestifyIssue": "",
                "Fix": "",
                "Prevention": "",
                "Error": str(e)
            }

        analyses.append({
            "test_name": test_name,
            "elapsed": f.get("elapsed"),
            "analysis": structured_analysis
        })

        # ETA calculation
        test_elapsed = time.time() - test_start
        avg_time = (time.time() - start_time) / idx
        remaining = total - idx
        eta = remaining * avg_time

        # Console feedback with LLM output
        print(f"[{idx}/{total}] Completed analysis for: {test_name} "
              f"(took {test_elapsed:.2f}s, ETA: {eta:.2f}s)")
        print(f"LLM Analysis:\n{json.dumps(structured_analysis, indent=2)}\n")

    total_elapsed = time.time() - start_time
    print(f"Analysis complete for all {total} failures in {total_elapsed:.2f}s.\n")

    return analyses


In [28]:
def main(log_file: str = "test.log"):
    """
    Main function to parse Go test logs, analyze failures with Llama,
    and display structured results.
    """
    # Step 1: Parse the Go test log
    failures = parse_go_test_log(log_file)

    # Step 2: Filter only failures
    failed_tests = [f for f in failures if f.get("status") == "fail"]

    if not failed_tests:
        print("✅ No test failures found.")
        return

    # # Optional: print raw failed logs for debug
    # for f in failed_tests:
    #     print(f"\nTest: {f['test_name']}")
    #     print(f"Elapsed: {f.get('elapsed')}")
    #     print(f"Log:\n{f['log']}")
    #     print("******\n")

    # Step 3: Analyze failures with LLM
    analyses = analyze_failures(failed_tests)

    # Step 4: Print structured analysis
    for analysis in analyses:
        a = analysis["analysis"]
        print("\n===============================")
        print(f"Test: {a.get('TestName', analysis['test_name'])}")
        print(f"Root Cause: {a.get('RootCause','')}")
        print(f"Go/Testify Issue: {a.get('GoTestifyIssue','')}")
        print(f"Fix: {a.get('Fix','')}")
        print(f"Prevention: {a.get('Prevention','')}")
        if "Error" in a:
            print(f"⚠️ LLM Error: {a['Error']}")
        print("===============================\n")


if __name__ == "__main__":
    main()



Starting analysis for 7 failed tests...

[1/7] Analyzing test: TestEntitySanityTestSuite ...
[1/7] Completed analysis for: TestEntitySanityTestSuite (took 2.61s, ETA: 15.63s)
LLM Analysis:
{
  "TestName": "TestEntitySanityTestSuite",
  "RootCause": "Error analyzing with LLM",
  "GoTestifyIssue": "",
  "Fix": "",
  "Prevention": "",
  "Error": "Expecting value: line 1 column 1 (char 0)"
}

[2/7] Analyzing test: TestEntitySanityTestSuite/Test_XCP_Tagging_Entity_Idempotency_Validation_0021 ...
[2/7] Completed analysis for: TestEntitySanityTestSuite/Test_XCP_Tagging_Entity_Idempotency_Validation_0021 (took 2.68s, ETA: 13.21s)
LLM Analysis:
{
  "TestName": "TestEntitySanityTestSuite/Test_XCP.Tagging_Entity_Idempotency_Validation_0021",
  "RootCause": "Potential nil pointer dereference due to mismatched expected and actual response schema.",
  "GoTestifyIssue": "Missing assertion for nil check in 'TaggingEntity'",
  "Fix": "Add assertions for nil checks, e.g. `if taggingEntity == nil { t.Err