In [None]:
import os
from dotenv import load_dotenv
from langchain_google_vertexai import ChatVertexAI
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.agents import Tool, create_react_agent, AgentExecutor
from langchain.tools import BaseTool
from langchain.utilities import ArxivAPIWrapper
from langchain_experimental.utilities.python import PythonREPL
from langchain_core.prompts import PromptTemplate

# Load environment variables
load_dotenv()

# Retrieve API keys and project details from environment variables
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
GOOGLE_CLOUD_PROJECT = os.getenv("GOOGLE_CLOUD_PROJECT")
GOOGLE_CLOUD_PROJECT_LOCATION = os.getenv("GOOGLE_CLOUD_PROJECT_LOCATION")

# Flag to choose between Gemini and OpenAI models
use_gemini = True

# Initialize the language model (LLM) based on the chosen provider
if use_gemini:
    import vertexai
    from google.auth import default

    # Initialize Vertex AI with the specified project and location
    credentials, project = default()
    vertexai.init(project=GOOGLE_CLOUD_PROJECT, location=GOOGLE_CLOUD_PROJECT_LOCATION)

    # Create a ChatVertexAI instance with the specified model and temperature
    llm = ChatVertexAI(
        model_name="gemini-1.5-pro",
        temperature=0.2,
        google_api_key=GEMINI_API_KEY
    )
else:
    # Create a ChatOpenAI instance with the specified model and temperature
    llm = ChatOpenAI(
        model_name="gpt-4o",
        temperature=0.2,
        openai_api_key=OPENAI_API_KEY
    )

# Initialize the Arxiv API wrapper
arxiv = ArxivAPIWrapper()

# Define a custom tool for searching papers on ArXiv
class ArxivSearchTool(BaseTool):
    name: str = "arxiv_search"
    description: str = "Search for papers on ArXiv by topic."
    
    def _run(self, query: str) -> str:
        try:
            # Run the Arxiv API wrapper with the given query
            results = arxiv.run(query)
            # Split the results into individual papers
            papers = [p.strip() for p in results.split("\n\n") if p.strip()]
            # Format the results into a markdown string
            formatted = "# ArXiv Search Results\n\n"
            for i, p in enumerate(papers, start=1):
                formatted += f"## Paper {i}\n{p}\n\n"
            return formatted
        except Exception as e:
            # Return an error message if the search fails
            return f"Error searching ArXiv: {str(e)}"
    
    async def _arun(self, query: str):
        # Asynchronous version is not implemented
        raise NotImplementedError("Async not implemented")

# Instantiate the ArxivSearchTool
arxiv_tool = ArxivSearchTool()

# Initialize the Python REPL tool
python_repl = PythonREPL()
repl_tool = Tool(
    name="python_repl",
    description=(
        "A Python shell. Use this to execute python commands. "
        "Input should be a valid python command. If you want to see the output of a value, "
        "you should print it out with `print(...)`."
    ),
    func=python_repl.run,
)

# List of tools available to the agent
tools = [arxiv_tool, repl_tool]

# Initialize conversation memory to keep track of the dialogue history
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

# Define the prompt template with required placeholders
template = """
You are a researcher specialized in analyzing scientific papers.
Your tasks are to:
1. Search for papers on a given topic using the available tools.
2. Analyze the retrieved papers and extract key insights.
3. Generate a comprehensive markdown report with these sections:
   Introduction, Overview of Papers, Detailed Analysis, and Summary and Conclusions.
4. Use the python_repl tool to convert the markdown report into a PDF file.
Be concise yet thorough in your analysis.

Available tools:
{tools}

Use the following format:

Question: {input}
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: {input}
Thought: {agent_scratchpad}
"""

# Create a PromptTemplate instance from the template string
prompt = PromptTemplate.from_template(template)

# Create the ReAct agent using the language model, tools, and prompt
researcher_agent = create_react_agent(llm, tools, prompt)

# Wrap the agent with AgentExecutor to manage its execution
agent_executor = AgentExecutor.from_agent_and_tools(
    agent=researcher_agent, tools=tools, memory=memory, verbose=True
)

# Function to run the research workflow for a given topic
def run_research_workflow(topic: str) -> str:
    # Generate the output filename based on the topic
    output_filename = f"{topic.replace(' ', '_')}_review.pdf"
    # Define the prompt for the agent
    prompt = (
        f"Generate a comprehensive technical review on the topic '{topic}'. "
        "Steps: "
        "1. Use the arxiv_search tool to find recent papers. "
        "2. Analyze the papers and extract key insights. "
        "3. Create a markdown report with sections: Introduction, Overview of Papers, Detailed Analysis, and Summary and Conclusions. "
        f"4. Use the python_repl tool to convert the markdown report to a PDF file named {output_filename}. "
        "Provide both the markdown report and confirmation of PDF creation in your final output."
    )
    agent_executor.run(prompt)
    return output_filename

if __name__ == "__main__":
    topic = "BCG Signal Processing"
    pdf_path = run_research_workflow(topic)
    print(f"Research document should be available at: {pdf_path}")


In [12]:
from langchain.agents import initialize_agent
from langchain.agents import Tool
from langchain_experimental.utilities.python import PythonREPL
from langchain_experimental.tools import PythonREPLTool
from langchain_core.tools import Tool

python_repl = PythonREPL()
repl_tool = Tool(
    name="python_repl",
    description=(
        "A Python shell. Use this to execute python commands. "
        "Input should be a valid python command. If you want to see the output of a value, "
        "you should print it out with `print(...)`."
    ),
    func=python_repl.run,
)

agent = initialize_agent(
    tools=[repl_tool],
    llm=llm,
)

agent.invoke("what is 2 + 2?")

{'input': 'what is 2 + 2?', 'output': '4'}