In [1]:
!pip install anthropic-haystack



In [2]:
import os
from getpass import getpass
from typing import List

from haystack_experimental.components.agents import Agent
from haystack_experimental.components.tools.tool_context import ToolContext

from haystack_experimental import Pipeline
from haystack.tools import Tool, tool
from haystack.dataclasses import ChatMessage, Document
from haystack.components.builders import ChatPromptBuilder

from github_components.repo_viewer import GithubRepositoryViewer
from github_components.issue_viewer import GithubIssueViewer

In [3]:
from agent_prompts import issue_prompt, repo_viewer_prompt, repo_viewer_schema

In [4]:
def view_repository(ctx:  ToolContext, repo: str, path: str = "") -> str:
    viewer = GithubRepositoryViewer(raise_on_failure=False)

    result = viewer.run(path=path, repo=repo)

    result_str = ""
    for document in result["documents"]:
        if document.meta["type"] in ["file", "dir", "error"]:
            result_str += document.content + "\n"
        else:
            result_str += f"File Content for {document.meta['path']}\n\n"
            result_str += document.content

    if len(result_str) > 150_000:
        result_str = result_str[:150_000] + "...(large file can't be fully displayed)"

    if output_docs := ctx.get_output("documents"):
        output_docs.extend(result["documents"])
        ctx.set_output(key="documents", value=output_docs)
    else:
        ctx.set_output(key="documents", value=result["documents"])

    return result_str

In [5]:
repo_viewer_tool = Tool(
    name="view_repository",
    description=repo_viewer_prompt,
    parameters=repo_viewer_schema,
    function=view_repository,
)

In [6]:
@tool
def create_comment(comment: str) -> str:
    """
    Use this to create a Github comment once you finished your exploration.
    """
    return comment


In [7]:
os.environ["LLM_API_KEY"] = getpass("Anthropic Key: ")

Anthropic Key:  ········


In [8]:
agent = Agent(
    model="anthropic:claude-3-5-sonnet-latest",
    system_prompt=issue_prompt,
    tools=[repo_viewer_tool, create_comment],
    generation_kwargs={"max_tokens": 8000},
    handoff="create_comment",
    output_variables={"documents": List[Document]}
)

  warn(msg)


In [9]:
issue_template = """
Issue from: {{ url }}
{% for document in documents %}
{% if loop.index == 1 %}
**Title: {{ document.meta.title }}**
{% endif %}
<issue-comment>
{{document.content}}
</issue-comment>
{% endfor %}
    """

issue_builder = ChatPromptBuilder(template=[ChatMessage.from_user(issue_template)])

issue_fetcher = GithubIssueViewer()

In [10]:
issue_url = "https://github.com/deepset-ai/haystack/issues/8740"

In [11]:
pp = Pipeline()

pp.add_component("issue_fetcher", issue_fetcher)
pp.add_component("issue_builder", issue_builder)
pp.add_component("agent", agent)

pp.connect("issue_fetcher.documents", "issue_builder.documents")
pp.connect("issue_builder.prompt", "agent.messages")

<haystack_experimental.core.pipeline.pipeline.Pipeline object at 0x128f2c050>
🚅 Components
  - issue_fetcher: GithubIssueViewer
  - issue_builder: ChatPromptBuilder
  - agent: Agent
🛤️ Connections
  - issue_fetcher.documents -> issue_builder.documents (List[Document])
  - issue_builder.prompt -> agent.messages (List[ChatMessage])

In [12]:
result = pp.run({"url": issue_url})

In [15]:
from IPython.display import display, Markdown
display(Markdown(result["agent"]["messages"][-1].tool_call_result.result))

I've reviewed the current implementation and agree that the serialization format should be improved. Here's a proposed solution:

# Implementation Details

The `to_dict` and `from_dict` methods should be updated to use clean keys in the serialized format while maintaining the internal implementation with underscores. Here's what the changes would look like:

```python
def to_dict(self) -> Dict[str, Any]:
    serialized: Dict[str, Any] = {}
    serialized["role"] = self._role.value
    serialized["meta"] = self._meta
    serialized["name"] = self._name
    content: List[Dict[str, Any]] = []
    for part in self._content:
        if isinstance(part, TextContent):
            content.append({"text": part.text})
        elif isinstance(part, ToolCall):
            content.append({"tool_call": asdict(part)})
        elif isinstance(part, ToolCallResult):
            content.append({"tool_call_result": asdict(part)})
        else:
            raise TypeError(f"Unsupported type in ChatMessage content: `{type(part).__name__}` for `{part}`.")

    serialized["content"] = content
    return serialized

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "ChatMessage":
    # Convert external format to internal format
    internal_data = {
        "_role": ChatRole(data["role"]),
        "_meta": data.get("meta", {}),
        "_name": data.get("name"),
    }
    
    content: List[ChatMessageContentT] = []
    for part in data["content"]:
        if "text" in part:
            content.append(TextContent(text=part["text"]))
        elif "tool_call" in part:
            content.append(ToolCall(**part["tool_call"]))
        elif "tool_call_result" in part:
            result = part["tool_call_result"]["result"]
            origin = ToolCall(**part["tool_call_result"]["origin"])
            error = part["tool_call_result"]["error"]
            tcr = ToolCallResult(result=result, origin=origin, error=error)
            content.append(tcr)
        else:
            raise ValueError(f"Unsupported content in serialized ChatMessage: `{part}`")

    internal_data["_content"] = content
    return cls(**internal_data)
```

# Benefits

1. Cleaner external API that doesn't expose implementation details
2. Consistent with other serialization formats (like the OpenAI format)
3. More intuitive for users working with serialized data
4. Internal implementation remains unchanged

# Breaking Changes

This would be a breaking change for code that currently works with the serialized format. To minimize impact:

1. Document the change clearly in release notes
2. Consider adding a deprecation cycle with warnings for the old format
3. Provide a migration guide:
   ```python
   # Before
   {"_role": "user", "_content": [...], "_meta": {...}}
   
   # After
   {"role": "user", "content": [...], "meta": {...}}
   ```

# Next Steps

1. Implement the changes in a new PR
2. Add tests for both the new format and backward compatibility (if we add a deprecation cycle)
3. Update documentation to reflect the new serialization format
4. Include migration instructions in the release notes

Would you like me to create a PR with these changes?

In [17]:
for document in result["agent"]["documents"]:
    print(document.meta["url"])

https://github.com/deepset-ai/haystack/tree/main/.github
https://github.com/deepset-ai/haystack/tree/main/docker
https://github.com/deepset-ai/haystack/tree/main/docs
https://github.com/deepset-ai/haystack/tree/main/e2e
https://github.com/deepset-ai/haystack/tree/main/examples
https://github.com/deepset-ai/haystack/tree/main/haystack
https://github.com/deepset-ai/haystack/tree/main/proposals
https://github.com/deepset-ai/haystack/tree/main/releasenotes
https://github.com/deepset-ai/haystack/tree/main/test
https://github.com/deepset-ai/haystack/blob/main/.gitignore
https://github.com/deepset-ai/haystack/blob/main/.pre-commit-config.yaml
https://github.com/deepset-ai/haystack/blob/main/CITATION.cff
https://github.com/deepset-ai/haystack/blob/main/code_of_conduct.txt
https://github.com/deepset-ai/haystack/blob/main/CONTRIBUTING.md
https://github.com/deepset-ai/haystack/blob/main/LICENSE
https://github.com/deepset-ai/haystack/blob/main/license-header.txt
https://github.com/deepset-ai/hayst