In [1]:
import phoenix as px
session = px.launch_app()

  from .autonotebook import tqdm as notebook_tqdm
INFO:phoenix.config:📋 Ensuring phoenix working directory: /Users/adamhedib/.phoenix


🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


In [2]:
import requests
import datetime
import re
from typing import Union

from tinyagents import chainable, loop, passthrough, respond

@chainable(kind="agent")
class LegalResearcher:
    name: str = "legal_researcher"

    def __init__(self, llm, max_steps: int = 5):
        self.max_steps = max_steps
        self.prompt = """You are a highly capable AI legal researcher for the UK jurisdiction. You will be provided with a legal query from the user, and you break the query down into a set of legal problems or "questions" that needed to be investigated. Once you have concluded your investigation and analysis, you must respond with a final answer to the user. Your final answer must cite the documents (if any) that were used to guide your answer by referencing the document id in sqaure brackets such as ".. some statement [.. document ID goes here]".

        Below is an example of an interaction between you and the user:

        User Query: [.. user query will be provided here]

        Question: [.. provide your question here]
        Search results: [.. returned search results]
        Thought: [.. provide your thoughts on the above results here]

        You may repeat the process above up to {max_steps} times. 

        When you are finished with your analysis, you must provide your summary as such:

        Final answer: [.. provide your final answer here]
        
        Begin!

        User Query: {user_query}
        {history}"""
        self.llm = llm

        # update metadata for tracing
        self._metadata["prompt"] = self.prompt
        self._metadata["max_steps"] = max_steps

    def run(self, state: Union[dict, str]):
        if not isinstance(state, dict):
            state = {
                "user_query": state,
                "steps": []
            }
        
        user_query = state["user_query"]
        history = self._get_history(state)
        output = self.llm.generate_content(self.prompt.format(history=history, max_steps=self.max_steps, user_query=user_query)).text
    
        if "Question:" in output:
            state["steps"].append(
                dict(question=output.split("Question: ")[-1]))
            
        if "Thought:" in output:
            last_step = state["steps"][-1]
            last_step["thought"] = output.split("Thought:")[-1]
            state["steps"][-1] = last_step
            
        if "Final answer:" in output:
            state["final_answer"] = self._replace_citations(output.split("Final answer: ")[-1], state)
        
        return state
    
    def output_handler(self, state: dict):
        if "final_answer" in state:
            return respond(state["final_answer"])
        
        return passthrough(state)
    
    @staticmethod
    def _replace_citations(output: str, state: dict):
        """ messy means of replacing document ID references with source link """
        citations_map = {}
        for step in state["steps"]:
            if not "results" in step:
                continue
            for id_, result in step["results"].items():
                citations_map[id_] = result["link"]
                
        citations = re.findall("\[(.*?)\]", output)
        for citation in citations:
            try:
                if "Document ID:" in citation:
                    citation = citation.replace("Document ID:", "").strip()
                    output = output.replace("Document ID: ", "")
                
                if "," in citation:
                    for citation_ in citation.split(","):
                        output = output.replace(output, citations_map[citation_.strip()])
                    continue

                if "www" in citation:
                    continue
                    
                output = output.replace(citation, citations_map[citation])
            except KeyError:
                output = output.replace(citation, "")

        return output
    
    def _get_history(self, state: dict):
        if len(state["steps"]) == 0:
            return "Question: "
        
        step_template ="""\n\nQuestion: {question}\nSearch Results: {results}\nThought: {thought}"""
        history = ""
        for step in state["steps"]:
            history += step_template.format(question=step["question"], results=self._format_results(step["results"]), thought=step.get("thought"))

        return history
    
    @staticmethod
    def _format_results(results: dict):
        return "\n\n".join([f"Document ID: {id_}\n\tTitle: {result['title']}\n\tSnippet: {result['snippet']}" for id_, result in results.items()])

@chainable(kind="tool")
class SearchTool:
    name: str = "search_tool"

    def __init__(self, api_key: str, sites: list[str]):
        self.api_key = api_key
        self.sites = " OR ".join(["site:" + site for site in sites])

        # put sites in metadata for tracing
        self._metadata["sites"] = sites

    def run(self, state: dict):
        for step in state["steps"]:
            if "results" not in step:
                step["results"] = self.get_results(step["question"])

        return state

    def get_results(self, query: str):
        payload =  {
            "url": "https://google.serper.dev/search",
            "headers": {
                "X-API-KEY": self.api_key,
            },
            "params": {
                "q": f"{self.sites} {query}"
            }
        }
        response = requests.get(**payload).json()

        results = {
            f"{datetime.datetime.now().strftime('%d%Y%H%M%S%fZ')}": {
                "snippet": result["snippet"],
                "link": result["link"],
                "title": result["title"]
            } for result in response["organic"]
        }
        return results

ERROR [strawberry.execution] Unknown project: UHJvamVjdDoy

GraphQL request:4:3
3 | ) {
4 |   node(id: $id) {
  |   ^
5 |     __typename
Traceback (most recent call last):
  File "/Users/adamhedib/Library/Caches/pypoetry/virtualenvs/tinyagents-JQO3J91T-py3.11/lib/python3.11/site-packages/graphql/execution/execute.py", line 528, in await_result
    return_type, field_nodes, info, path, await result
                                          ^^^^^^^^^^^^
  File "/Users/adamhedib/Library/Caches/pypoetry/virtualenvs/tinyagents-JQO3J91T-py3.11/lib/python3.11/site-packages/strawberry/schema/schema_converter.py", line 750, in _async_resolver
    return await await_maybe(
           ^^^^^^^^^^^^^^^^^^
  File "/Users/adamhedib/Library/Caches/pypoetry/virtualenvs/tinyagents-JQO3J91T-py3.11/lib/python3.11/site-packages/strawberry/utils/await_maybe.py", line 12, in await_maybe
    return await value
           ^^^^^^^^^^^
  File "/Users/adamhedib/Library/Caches/pypoetry/virtualenvs/tinyagents-JQO3J

2024-08-15 09:34:46,860	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [3]:
import os
import google.generativeai as genai
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

llm = genai.GenerativeModel(
    "gemini-1.5-flash", 
    generation_config=dict(
        stop_sequences=["Search results:"]
    )
)

agent = LegalResearcher(llm=llm, max_steps=3)

In [4]:
api_key = os.environ["SERPER_API_KEY"]

search_tool = SearchTool(api_key=api_key, sites=["handbook.fca.org.uk"])

In [5]:
from tinyagents.tracing import create_phoenix_tracer

tracer = create_phoenix_tracer(project_name="default")

graph = loop(agent, search_tool, name="legal_researcher_agent", max_iter=4).as_graph()

runner = graph.compile(tracer=tracer)

In [6]:
query = "What is the definition of an e-money provider?"

output = runner.invoke(query)

[36;1m[1;3m
 > Running node: legal_researcher
[0m
[33;1m[1;3m	Input: What is the definition of an e-money provider?
[0m


I0000 00:00:1723710887.181225 1613591 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


[32;1m[1;3m	Output (legal_researcher): {
  "content": {
    "user_query": "What is the definition of an e-money provider?",
    "steps": [
      {
        "question": "What are the legal requirements for an entity to be classified as an e-money provider in the UK? \n\n## "
      }
    ]
  },
  "action": null,
  "ref": null
}[0m
[36;1m[1;3m
 > Running node: search_tool
[0m
[33;1m[1;3m	Input: {'user_query': 'What is the definition of an e-money provider?', 'steps': [{'question': 'What are the legal requirements for an entity to be classified as an e-money provider in the UK? \n\n## '}]}
[0m
[32;1m[1;3m	Output (search_tool): {
  "content": {
    "user_query": "What is the definition of an e-money provider?",
    "steps": [
      {
        "question": "What are the legal requirements for an entity to be classified as an e-money provider in the UK? \n\n## ",
        "results": {
          "152024093453519697Z": {
            "snippet": "(i) allow the holder to acquire goods or se

In [7]:
from IPython.display import Markdown, display

display(Markdown(output))

An entity is classified as an e-money provider in the UK if it meets the requirements of the Electronic Money Regulations 2011 [https://www.handbook.fca.org.uk/instrument/2011/2011_7.pdf]. This includes obtaining authorization from the Financial Conduct Authority (FCA) [https://www.handbook.fca.org.uk/handbook/PERG/15/?view=chapter]. An entity that intends to issue electronic money must comply with the Electronic Money Regulations 2011, which set out the requirements for authorization and registration [https://www.handbook.fca.org.uk/handbook/PERG/15/?view=chapter]. 
