<a href="https://colab.research.google.com/github/barbaroja2000/agents/blob/main/crew_ai_exa_anthropic_claude3_haiku_confluence.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Autonomous Research Agent - Haiku

* Research Agent with Crewai as orchestrator
* Search via [EXA.ai](https://exa.ai) - Ancedata but seems to generate better quality search output than Tavily - YMMV mind....
* Super fast [Anthropic Claude 3 Haiku model](https://www.anthropic.com/news/claude-3-haiku)
* Prompts (Agent Backstory & task) finessed using Anthropic [meta-prompt](https://colab.research.google.com/drive/1SoAajN8CBYTl79VyTwxtxncfCWlHlyy9)
* Publishes to Confluence
* Monitoring in Langsmith

In [1]:
!pip install -Uq 'crewai[tools]' langchain-exa langchain-anthropic atlassian-python-api markdown2 bs4 requests langchain

Exception in thread Thread-5 (attachment_entry):
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/debugpy/server/api.py", line 237, in listen
    sock, _ = endpoints_listener.accept()
  File "/usr/lib/python3.10/socket.py", line 293, in accept
    fd, addr = self._accept()
TimeoutError: timed out

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/google/colab/_debugpy.py", line 52, in attachment_entry
    debugpy.listen(_dap_port)
  File "/usr/local/lib/python3.10/dist-packages/debugpy/public_api.py", line 31, in wrapper
    return wrapped(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/debugpy/server/api.py", line 143, in debug
    log.reraise

In [2]:
research_topic= "Microsoft Autogen AI Agent Framework"

In [3]:
#@title Imports

import os
from crewai import Agent
import markdown2
import os
import json
from bs4 import BeautifulSoup
import json
from crewai_tools import tool
from exa_py import Exa
from atlassian import Confluence

In [4]:
#@title Passwords etc

from google.colab import userdata
exa_api_key=userdata.get('exa_api_key')
anthropic_api_key=userdata.get('anthropic_api_key')

os.environ["CONFLUENCE_API_KEY"]=userdata.get('confluence_api_key')
os.environ["CONFLUENCE_USERNAME"]=userdata.get('confluence_username')
os.environ["CONFLUENCE_URI"]=userdata.get('confluence_uri')

In [5]:
#@title Langsmith
os.environ["LANGCHAIN_API_KEY"] =os.environ["LANGCHAIN_HUB_API_KEY"] = userdata.get("langchain_api_key")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "Autonomous Research Agent - Haiku"

In [6]:
#@title Tools

exa = Exa(api_key=exa_api_key)

@tool("EXA search tool")
def search(query: str):
    """Search for a webpage based on the query."""
    return exa.search(f"{query}", use_autoprompt=True, num_results=5)

@tool("EXA similar pages tool")
def find_similar(url: str):
    """Search for webpages similar to a given URL.
    The url passed in should be a URL returned from `search`.
    """
    return exa.find_similar(url, num_results=5)

@tool("EXA get page contents tool")
def get_contents(ids: list[str]):
    """Get the contents of a webpage.
    The ids passed in should be a list of ids returned from `search`.
    """
    return exa.get_contents(ids)

@tool("Confluence Publisher")
def confluence_publisher_tool(content: str) -> str:
    """Use this tool to publish to Confluence"""
    confluence_uri = os.environ["CONFLUENCE_URI"]
    username = os.environ["CONFLUENCE_USERNAME"]
    password = os.environ["CONFLUENCE_API_KEY"]
    space_key = 'AI'

    # Convert Markdown content of the Article to HTML
    html_content = markdown2.markdown(content)

    soup = BeautifulSoup(html_content, 'html.parser')

    # Extract the first <h1> tag content for the title
    h1_tag = soup.find('h1')
    page_title = f"{h1_tag.text}" if h1_tag else 'No title found'

    # Remove the <h1> tag to get the rest of the HTML content without the title
    if h1_tag:
        h1_tag.decompose()

    html_content = str(soup)

    # Initialize Confluence client
    confluence = Confluence(
        url=confluence_uri,
        username=username,
        password=password
    )

    # Get page by title
    parent_info = confluence.get_page_by_title(space_key, "Research")

    # Extract the page ID
    parent_id = parent_info['id']

    # Create or update page with HTML content
    response = confluence.create_page(
        space_key,
        page_title,
        html_content,
        parent_id=parent_id,
        type='page',
        representation='storage',
        editor='v2',
        full_width=False
    )

    return json.dumps(response)

In [7]:
#@title Model

from langchain_anthropic import ChatAnthropic
from langchain_core.prompts import ChatPromptTemplate

model = ChatAnthropic(model='claude-3-haiku-20240307', anthropic_api_key=anthropic_api_key)

In [8]:
#@title Prompts from gists

prompt_dict = {
    "crewai_writer_agent_backstory": "https://gist.githubusercontent.com/barbaroja2000/46d1a4412dd9f8ca6afae2ff87dda346/raw/a061ecaadd54ea096a7679cd33bc906c3fe64b78/crewai_writer_agent_backstory",
    "crewai_researcher_agent_backstory": "https://gist.githubusercontent.com/barbaroja2000/40cd328296296084ef6ede51add117ee/raw/528a8383023dd48a1af0e86e35bb244751b130bc/crewai_researcher_agent_backstory",
    "crewai_research_task" : "https://gist.githubusercontent.com/barbaroja2000/aceab69b70b3a48127695e92d490c881/raw/78f76757535b5714e927a33a90367fd2c5b4fdbb/crewai_research_task",
    "crewai_writer_task" : "https://gist.githubusercontent.com/barbaroja2000/304a343fb1780916e480847a53461207/raw/2193cecace28c13f36f40832f812bb7bd1f4da1e/crewai_writer_task",
}

In [9]:
#@title Get Prompts

import requests

# Function to download and return Gist content
def download_gist(gist_url):
    try:
        response = requests.get(gist_url)
        response.raise_for_status()  # Raise an error for bad responses
        return response.text
    except requests.RequestException as e:
        print(f"Error downloading Gist: {e}")
        return None

for key, value in prompt_dict.items():
  globals()[key] = download_gist(value)
  print(f"{key}\n", globals()[key])

crewai_writer_agent_backstory
 Expert in tech content strategy with a deep understanding of the tech industry.

Skills:

* Converts complex technical concepts into engaging, easy-to-understand content.
* Expert in structuring articles for readability and engagement.
* Uses subheadings, bullet points, and lists for clarity.
* Applies analogies and real-world examples to demystify technical topics.
* Incorporates visuals like diagrams and infographics to support text.
* Varies sentence structure for dynamic reading.

Signature Style:

* Strong openings and closings that resonate with readers.
* Ability to refine and polish drafts into polished pieces.
* A meticulous attention to detail, aiming to inform, engage, and inspire.

Impact: 

* Transforms technical, dry subjects into compelling reads, making technology accessible to a broader audience.
crewai_researcher_agent_backstory
 I am an advanced AI research assistant, I have been developed by leading experts in the field of artificial i

In [10]:
#@title Agents

max_iter=10

# Creating a senior researcher agent with memory and verbose mode
researcher = Agent(
  role='Senior Researcher',
  goal='Efficiently gather, synthesize, and present the most relevant and accurate information on {topic}',
  verbose=True,
  memory=True,
  max_iter=max_iter,
  backstory=(crewai_researcher_agent_backstory),
  tools=[search],
  allow_delegation=False,
  llm=model

)

# Creating a writer agent with custom tools and delegation capability
writer = Agent(
  role='Writer',
  goal='Narrate compelling tech stories about {topic}',
  verbose=True,
  memory=True,
  max_iter=max_iter,
  backstory=(crewai_writer_agent_backstory),
  allow_delegation=False,
  llm=model
)


# Creating a publisher agent
publisher = Agent(
  role='Publisher',
  goal='Publishes research to Confluence',
  verbose=True,
  memory=True,
  max_iter=max_iter,
  backstory=("I am an AI Publisher. My sole function is to publish finished markdown articles to Confluence."),
  tools=[confluence_publisher_tool],
  allow_delegation=False,
  llm=model
)

In [11]:
#@title Tasks

from crewai import Task
from pydantic import BaseModel

# Research task
research_task = Task(
  description=(crewai_research_task),
  expected_output="A comprehensive 12 paragraph report on {topic}",
  tools=[search],
  agent=researcher,
  human_input=True,
)


# Writing task with language model configuration
write_task = Task(
  description=(crewai_writer_task),
  expected_output="A comprehensive 8 paragraph article, with 4-5 key points bulleted at the end. All formatted in markdown",
  agent=writer,
  async_execution=False,
  output_file='new-blog-post.md'  # Example of output customization
)


# Writing task with language model configuration
publish_task = Task(
  description=(
"Publish task to Confluence"
  ),
  expected_output="An article published to confluence.",
  tools=[confluence_publisher_tool],
  agent=publisher,
  async_execution=False
)


In [12]:
#@title Crew

from crewai import Crew, Process

# Forming the tech-focused crew with enhanced configurations
crew = Crew(
  agents=[researcher, writer, publisher],
  tasks=[research_task, write_task, publish_task],
  process=Process.sequential  # Optional: Sequential task execution is default
)

In [13]:
#@title Kick off

# Starting the task execution process with enhanced feedback
result = crew.kickoff({"topic": research_topic})
print(result)



[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mThought: To research Microsoft Autogen AI Agent Framework, I will use the EXA search tool to gather relevant information from authoritative sources.

Action: EXA search tool
Action Input: {
  "query": "Microsoft Autogen AI Agent Framework"
}[0m[93m 

Title: AutoGen | AutoGen
URL: https://microsoft.github.io/autogen/
ID: R35J8HFgE5uP6Zb7QHi0ZQ
Score: 0.21848386526107788
Published Date: 2023-01-01
Author: None
Text: None
Highlights: None
Highlight Scores: None


Title: AI Builder documentation
URL: https://learn.microsoft.com/en-us/ai-builder/
ID: g1t18LcdnjtO9ZjNUVyVvg
Score: 0.18271562457084656
Published Date: 2023-01-01
Author: None
Text: None
Highlights: None
Highlight Scores: None


Title: Azure Automated Machine Learning - AutoML | Microsoft Azure
URL: https://azure.microsoft.com/en-us/products/machine-learning/automatedml/
ID: lqP87fYeSlUmkSmE4i9CXA
Score: 0.18218904733657837
Published Date: 2023-09-18
Author: None