In [10]:
# General 
import os
import pandas as pd
from langchain.experimental.autonomous_agents.autogpt.agent import AutoGPT
from langchain.chat_models import ChatOpenAI

from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent
from langchain.docstore.document import Document
import asyncio
import nest_asyncio


# Needed synce jupyter runs an async eventloop
nest_asyncio.apply()

In [11]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=1.0)

In [12]:
# Tools
import os
from contextlib import contextmanager
from typing import Optional
from langchain.agents import tool
from langchain.tools.file_management.read import ReadFileTool
from langchain.tools.file_management.write import WriteFileTool

ROOT_DIR = "./data/"

@contextmanager
def pushd(new_dir):
    """Context manager for changing the current working directory."""
    prev_dir = os.getcwd()
    os.chdir(new_dir)
    try:
        yield
    finally:
        os.chdir(prev_dir)

@tool
def process_csv(
    csv_file_path: str, instructions: str, output_path: Optional[str] = None
) -> str:
    """Process a CSV by with pandas in a limited REPL.\
 Only use this after writing data to disk as a csv file.\
 Any figures must be saved to disk to be viewed by the human.\
 Instructions should be written in natural language, not code. Assume the dataframe is already loaded."""
    with pushd(ROOT_DIR):
        try:
            df = pd.read_csv(csv_file_path)
        except Exception as e:
            return f"Error: {e}"
        agent = create_pandas_dataframe_agent(llm, df, max_iterations=30, verbose=True)
        if output_path is not None:
            instructions += f" Save output to disk at {output_path}"
        try:
            result = agent.run(instructions)
            return result
        except Exception as e:
            return f"Error: {e}"

In [13]:
async def async_load_playwright(url: str) -> str:
    """Load the specified URLs using Playwright and parse using BeautifulSoup."""
    from bs4 import BeautifulSoup
    from playwright.async_api import async_playwright

    results = ""
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        try:
            page = await browser.new_page()
            await page.goto(url)

            page_source = await page.content()
            soup = BeautifulSoup(page_source, "html.parser")

            for script in soup(["script", "style"]):
                script.extract()

            text = soup.get_text()
            lines = (line.strip() for line in text.splitlines())
            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
            results = "\n".join(chunk for chunk in chunks if chunk)
        except Exception as e:
            results = f"Error: {e}"
        await browser.close()
    return results

def run_async(coro):
    event_loop = asyncio.get_event_loop()
    return event_loop.run_until_complete(coro)

@tool
def browse_web_page(url: str) -> str:
    """Verbose way to scrape a whole webpage. Likely to cause issues parsing."""
    return run_async(async_load_playwright(url))

In [14]:
from langchain.tools import BaseTool, DuckDuckGoSearchRun
from langchain.text_splitter import RecursiveCharacterTextSplitter

from pydantic import Field
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain, BaseCombineDocumentsChain

def _get_text_splitter():
    return RecursiveCharacterTextSplitter(
        # Set a really small chunk size, just to show.
        chunk_size = 500,
        chunk_overlap  = 20,
        length_function = len,
    )


class WebpageQATool(BaseTool):
    name = "query_webpage"
    description = "Browse a webpage and retrieve the information relevant to the question."
    text_splitter: RecursiveCharacterTextSplitter = Field(default_factory=_get_text_splitter)
    qa_chain: BaseCombineDocumentsChain
    
    def _run(self, url: str, question: str) -> str:
        """Useful for browsing websites and scraping the text information."""
        result = browse_web_page.run(url)
        docs = [Document(page_content=result, metadata={"source": url})]
        web_docs = self.text_splitter.split_documents(docs)
        results = []
        # TODO: Handle this with a MapReduceChain
        for i in range(0, len(web_docs), 4):
            input_docs = web_docs[i:i+4]
            window_result = self.qa_chain({"input_documents": input_docs, "question": question}, return_only_outputs=True)
            results.append(f"Response from window {i} - {window_result}")
        results_docs = [Document(page_content="\n".join(results), metadata={"source": url})]
        return self.qa_chain({"input_documents": results_docs, "question": question}, return_only_outputs=True)
    
    async def _arun(self, url: str, question: str) -> str:
        raise NotImplementedError
      

In [15]:
query_website_tool = WebpageQATool(qa_chain=load_qa_with_sources_chain(llm))

In [None]:
from langchain.agents import Tool
from langchain.tools.file_management.write import WriteFileTool
from langchain.tools.file_management.read import ReadFileTool
from langchain.utilities import PythonREPL
from langchain.tools import DuckDuckGoSearchRun 
from langchain.tools.file_management import (
    ReadFileTool,
    CopyFileTool,
    DeleteFileTool,
    MoveFileTool,
    WriteFileTool,
    ListDirectoryTool,
)
from langchain.agents.agent_toolkits import FileManagementToolkit
from tempfile import TemporaryDirectory
from langchain.tools import ShellTool
from langchain.agents import load_tools



# We'll make a temporary directory to avoid clutter
working_directory = TemporaryDirectory()
#Shell Tool
shell_tool = ShellTool()
requests_tools = load_tools(["requests_all"])

#DuckDuckGo Class
duckduck_search = DuckDuckGoSearchRun()

#Python REPL Class
python_repl = PythonREPL()

shell_tool = Tool(
    name="shell",
    description="This tool gives you access to a system's shell. Can be used to run shell commands",
    func=shell_tool.run
)


#DuckDuckGo Tool
duckduck_tool=Tool(
        name = "DuckDuckGo",
        func=duckduck_search.run,
        description="useful for when you need to answer questions about current events. You should ask targeted questions"
)

#Python REPL Agent Tool Class
repl_tool = Tool(
    name="python_repl",
    description="A Python shell. Use this to execute python commands. Input should be a valid python command. If you want to see the output of a value, you should print it out with `print(...)`.",
    func=python_repl.run
)
toolkit = FileManagementToolkit(root_dir=str(working_directory.name)) # If you don't provide a root_dir, operations will default to the current working directory
directory_tools = toolkit.get_tools()

In [16]:
# Memory
import faiss
from langchain.vectorstores import FAISS
from langchain.docstore import InMemoryDocstore
from langchain.embeddings import OpenAIEmbeddings
from langchain.tools.human.tool import HumanInputRun

embeddings_model = OpenAIEmbeddings()
embedding_size = 1536
index = faiss.IndexFlatL2(embedding_size)
vectorstore = FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), {})

In [17]:
web_search = DuckDuckGoSearchRun()

In [18]:
tools = [
    web_search,
    WriteFileTool(root_dir="./data"),
    ReadFileTool(root_dir="./data"),
    process_csv,
    query_website_tool,
    WriteFileTool(),
    ReadFileTool(),
    repl_tool,
    duckduck_tool, 
    #directory_tools, 
    shell_tool, 
    #requests_tool
    # HumanInputRun(), # Activate if you want the permit asking for help from the human
]

In [19]:
agent = AutoGPT.from_llm_and_tools(
    ai_name="Tom",
    ai_role="Assistant",
    tools=tools,
    llm=llm,
    memory=vectorstore.as_retriever(search_kwargs={"k": 8}),
    # human_in_the_loop=True, # Set to True if you want to add feedback at each step.
)
# agent.chain.verbose = True

In [20]:
agent.run(["We're online learning platform which just launched an ecommerce platform where people can buy courses (like on Udemy). Do some research on megatrends in e-learning and e-commerce in Africa. Create a folder for this called'e-Commerce Platform Report'"])

{
    "thoughts": {
        "text": "I need to start by conducting some research on megatrends in e-learning and e-commerce in Africa. This will allow me to create an e-Commerce Platform Report, which I can present to my colleagues to help us optimize the platform's offering for our users.",
        "reasoning": "Before I can create the e-Commerce Platform Report, I need to gather sufficient information on the current marketplace and identify key trends and challenges facing the industry.",
        "plan": "- Conduct research using DuckDuckGo Search on e-learning and e-commerce in Africa\n- Synthesize the information in a clear and well-organized report format\n- Share with colleagues and work together to optimize our platform for our users",
        "criticism": "I need to ensure that I only use reliable sources when researching, since quality of information is crucial for the accuracy of my report.",
        "speak": "I am going to start work on the e-Commerce Platform Report by cond



{
    "thoughts": {
        "text": "Now that I have gathered some key insights on e-learning and e-commerce trends in Africa, I need to use process_csv to organize this information in a clear and concise manner.",
        "reasoning": "Process_csv is the best option to create this report, since it allows me to manipulate the data in a way that suits my needs and helps me to identify trends and patterns which I can present to my colleagues.",
        "plan": "- Use process_csv to create a csv file which contains all of the key insights and trends from my research in a clear format.\n- Use 'instructions' to specify what manipulations to apply to the csv, such as filtering or sorting.\n- Output the processed data and save it to a file, which will serve as the basis for my report.",
        "criticism": "I need to ensure that the instructions I apply to process_csv are appropriate and will make it simpler to interpret the main trends in e-learning and e-commerce in Africa.",
        "spea

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 0fc2e362d85d218bfc2e73cbda721778 in your message.).


{
    "thoughts": {
        "text": "I need to retrieve the file 'trends.csv' using the 'query_webpage' command, as it contains the data on which to base my research.",
        "reasoning": "The 'query_webpage' command is an efficient way to retrieve data from webpages which is not already available as a file.",
        "plan": "- Use the 'query_webpage' command to browse to the relevant website and retrieve the 'trends.csv' file.", 
        "criticism": "I need to make sure my search terms are precise and the resulting CSV file contains the relevant data.",
        "speak": "I am going to use the 'query_webpage' command to retrieve the 'trends.csv' file."
    },
    "command": {
        "name": "query_webpage",
        "args": {
            "url": "https://example.com/trends.csv",
            "question": "Please download the 'trends.csv' file from the website."
        }
    }
}
{
    "thoughts": {
        "text": "Now I need to use the 'read_file' command to read the contents of the 

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 741a02f1e7d4a9a6e93c3dcc155f4b4c in your message.).


{
    "thoughts": {
        "text": "Now that the 'trends.csv' file has been downloaded from the website, I will use the 'read_file' command to read the contents of the file so that I can begin analyzing the data and identifying megatrends in e-learning and e-commerce in Africa.",
        "reasoning": "Reading the contents of the 'trends.csv' file is necessary to analyze the data contained in the file before megatrends can be identified.",
        "plan": "- Use the 'read_file' command to load the contents of the 'trends.csv' file.\\n- Determine the relevant megatrends in e-learning and e-commerce in Africa and save this information in a new file within the 'data' directory.\\n- Once megatrends have been identified with the 'process_csv' command, store them in their respective 'megatrends.json' files.",
        "criticism": "",
        "speak": "I will now use the 'read_file' command to load the contents of the 'trends.csv' file and start analyzing the data to identify relevant megatre

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 8ea96acea3ee9c7b8653c48295ef82f3 in your message.).


{
    "thoughts": {
        "text": "Now that the updated eCommerce trends were written to \'trends-updated.csv\', I will use the \'process_csv\' command to extract the updated trends, and I will store it in a new csv called \'trends2.csv\' saved in the \'data\' folder. By doing this, I will make the data easier to work with.",
        "reasoning": "The \'process_csv\' command is an efficient and versatile tool that was used earlier to read, filter, and manipulate data from a CSV file. This command is ideal for the current objective of reading a CSV file and extracting specific data.",
        "plan": "- Use the \'process_csv\' command to read in \'trends-updated.csv\' and process it.\\n- Then I will extract just the updated trends data and save it to a new file called \'trends2.csv\' in the \'data\' folder.\\n- After running the command, I will use the \'read_file\' command to ensure that the data was correctly saved to the new file.",
        "criticism": "I must remember that the CS

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID d3781cfb47068073ccc7d9975d619f81 in your message.).


{
    "thoughts": {
        "text": "Now that I have retrieved information on the top e-commerce trends in Africa, I will process the retrieved data in a CSV format to generate insights about the prevalent trends in Africa's e-commerce ecosystem.",
        "reasoning": "Processing the data will help in generating insights that can be used to make informed decisions regarding the development of the e-commerce platform.",
        "plan": "- Save the retrieved data to a csv file\\n- Use the command \'process_csv\' to process the retrieved data and generate insights.\\n- Save the generated insights to a file for future reference.",
        "criticism": "It is important to be careful when drawing conclusions from data and to ensure that the data is reliable and trustworthy when making decisive conclusions.",
        "speak": "Now that I have retrieved information on the top e-commerce trends in Africa, I will process the retrieved data from DuckDuckGo Search by using the \'process_csv\' com