In [2]:
import requests

def search_arxiv(query, max_results=1):
    base_url = "http://export.arxiv.org/api/query?"
    search_query = query.replace(' ', '+')
    url = f"{base_url}search_query=all:{search_query}&start=0&max_results={max_results}"
    
    response = requests.get(url)
    return response.text

def download_arxiv_pdf(arxiv_id, save_path):
    url = f"https://arxiv.org/pdf/{arxiv_id}.pdf"
    response = requests.get(url)
    if response.status_code == 200:
        with open(save_path, 'wb') as f:
            f.write(response.content)
        return f"PDF downloaded and saved to {save_path}"
    else:
        return f"Failed to download PDF. Status code: {response.status_code}"




In [4]:
# Manual test
query = "deep rl in llms"
search_result = search_arxiv(query)
print("Search Result (raw XML):")
print(search_result)



Search Result (raw XML):
<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
  <link href="http://arxiv.org/api/query?search_query%3Dall%3Adeep%20rl%20in%20llms%26id_list%3D%26start%3D0%26max_results%3D1" rel="self" type="application/atom+xml"/>
  <title type="html">ArXiv Query: search_query=all:deep rl in llms&amp;id_list=&amp;start=0&amp;max_results=1</title>
  <id>http://arxiv.org/api/VKkYz4lI6LafGlPn0ufclG7hHs4</id>
  <updated>2024-08-02T00:00:00-04:00</updated>
  <opensearch:totalResults xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">2367834</opensearch:totalResults>
  <opensearch:startIndex xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">0</opensearch:startIndex>
  <opensearch:itemsPerPage xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">1</opensearch:itemsPerPage>
  <entry>
    <id>http://arxiv.org/abs/2402.01874v1</id>
    <updated>2024-02-02T20:01:15Z</updated>
    <published>2024-02-02T20:01:15Z</published>
    <title>The 

In [6]:
# For simplicity, let's assume the first result is what we want
# In a real scenario, you'd parse the XML to extract the ID properly
#  <id>http://arxiv.org/api/VKkYz4lI6LafGlPn0ufclG7hHs4</id>
arxiv_id = "2402.01874v1"

download_result = download_arxiv_pdf(arxiv_id, "downloaded_paper.pdf")
print(download_result)

PDF downloaded and saved to downloaded_paper.pdf


In [8]:
import PyPDF2

def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, 'rb') as file:
        pdf_reader = PyPDF2.PdfReader(file)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

# Usage example
pdf_path = "downloaded_paper.pdf"  # Make sure this matches the path where you saved the PDF
extracted_text = extract_text_from_pdf(pdf_path)
print(extracted_text[:1500])  # Print the first 500 characters as a sample


THERL/LLM T AXONOMY TREE: REVIEWING SYNERGIES
BETWEEN REINFORCEMENT LEARNING AND LARGE LANGUAGE
MODELS
Moschoula Pternea
Microsoft
mpternea@microsoft.comPrerna Singh
Microsoft
prernasingh@microsoft.comAbir Chakraborty
Microsoft
abir.chakraborty@microsoft.com
Yagna Oruganti
Microsoft
yaorugan@microsoft.comMirco Milletari
Microsoft
mimillet@microsoft.comSayli Bapat
Microsoft
saylibapat@microsoft.com
Kebei Jiang
Microsoft
kebei.jiang@microsoft.com
ABSTRACT
In this work, we review research studies that combine Reinforcement Learning (RL) and Large
Language Models (LLMs), two areas that owe their momentum to the development of deep neural
networks. We propose a novel taxonomy of three main classes based on the way that the two model
types interact with each other. The first class, RL4LLM , includes studies where RL is leveraged to
improve the performance of LLMs on tasks related to Natural Language Processing. RL4LLM is
divided into two sub-categories depending on whether RL is used to dire

In [19]:
import os
import requests
import xml.etree.ElementTree as ET
import PyPDF2
from langchain_anthropic import ChatAnthropic
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage

# Load environment variables
from dotenv import load_dotenv
load_dotenv()

anthropic_api_key = os.environ['ANTHROPIC_API_KEY']
llm = ChatAnthropic(model="claude-3-5-sonnet-20240620", api_key=anthropic_api_key)

@tool
def search_arxiv(query: str, max_results: int = 3) -> str:
    """
    Searches arXiv for papers based on the given query.
    Returns a list of paper titles and their arXiv IDs.
    """
    base_url = "http://export.arxiv.org/api/query?"
    search_query = query.replace(' ', '+')
    url = f"{base_url}search_query=all:{search_query}&start=0&max_results={max_results}"
    
    response = requests.get(url)
    if response.status_code != 200:
        return f"Error: Unable to fetch results. Status code: {response.status_code}"
    
    root = ET.fromstring(response.content)
    
    results = []
    for entry in root.findall('{http://www.w3.org/2005/Atom}entry'):
        title = entry.find('{http://www.w3.org/2005/Atom}title').text.strip()
        arxiv_id = entry.find('{http://www.w3.org/2005/Atom}id').text.split('/')[-1]
        summary = entry.find('{http://www.w3.org/2005/Atom}summary').text.strip()
        results.append(f"Title: {title}\nArXiv ID: {arxiv_id}\nSummary: {summary}\n")
    
    return "\n".join(results) if results else "No results found."

# @tool
# def choose_paper(arxiv_id: str) -> str:
#     """
#     Selects a paper by its arXiv ID.
#     """
#     return arxiv_id

@tool
def download_and_extract_text(arxiv_id: str) -> str:
    """
    Downloads the PDF for a given arXiv ID and extracts its text content.
    """
    url = f"https://arxiv.org/pdf/{arxiv_id}.pdf"
    response = requests.get(url)
    if response.status_code != 200:
        return f"Failed to download PDF. Status code: {response.status_code}"
    
    with open("temp.pdf", 'wb') as f:
        f.write(response.content)
    
    text = ""
    with open("temp.pdf", 'rb') as file:
        pdf_reader = PyPDF2.PdfReader(file)
        for page in pdf_reader.pages:
            text += page.extract_text()
    
    os.remove("temp.pdf")  # Clean up the temporary file
    
    return text[:1000000]  # Limit to first 1,000,000 characters to avoid potential token limits


In [20]:
from IPython.display import display, HTML
import pprint

tools = [search_arxiv, download_and_extract_text]
llm_with_tools = llm.bind_tools(tools)

# Initial query
query = "Find papers about deep rl in llms and reasoning."
messages = [HumanMessage(content=f"Search arXiv for 3 papers related to: {query}")]

# Prompt the LLM to search arXiv
search_response = llm_with_tools.invoke(messages)
# print(search_response.content)
# print(search_response.tool_calls)
messages.append(search_response)

# Execute the tool call to actually perform the search
search_results = None
for tool_call in search_response.tool_calls:
    if tool_call["name"].lower() == "search_arxiv":
        search_results_tool_msg = search_arxiv.invoke(tool_call)
        break

if search_results_tool_msg is None:
    raise ValueError("The LLM did not call the search_arxiv tool as expected.")

# Add the search results to the messages
messages.append(search_results_tool_msg)



def format_messages(messages):
    formatted_output = "<pre style='white-space: pre-wrap; word-wrap: break-word;'>"
    formatted_output += pprint.pformat(messages, width=80, depth=None)
    formatted_output += "</pre>"
    return HTML(formatted_output)

# At the end of your script, replace print(messages) with:
display(format_messages(messages))

In [21]:
# Ask LLM to choose the most relevant paper
messages.append(HumanMessage(content="Based on these results, which paper seems most relevant and interesting? Please use the download_and_extract_text tool to indicate your choice by providing the arXiv ID."))
paper_choice = llm_with_tools.invoke(messages)
print(paper_choice.content)
print(paper_choice.tool_calls)
messages.append(paper_choice)

# Extract chosen arXiv ID from the tool call and invoke the download_and_extract_text tool
pdf_text_response = None
for tool_call in paper_choice.tool_calls:
    if tool_call['name'].lower() == 'download_and_extract_text':
        pdf_text_response = download_and_extract_text.invoke(tool_call)
        break

if pdf_text_response is None:
    raise ValueError("The LLM did not make a valid paper choice using the tool.")

# add it to the messages
messages.append(pdf_text_response)
print(pdf_text_response.content[:1000])


# Ask LLM to summarize the paper and create a Twitter thread
messages.append(HumanMessage(content="Based on the extracted text, please provide a summary of the paper and create a Twitter thread (5-7 tweets) highlighting its key points and significance."))
final_response = llm_with_tools.invoke(messages)
print(final_response.content)

[{'text': 'Based on the search results, the most relevant and interesting paper appears to be:\n\n"Enhancing Logical Reasoning in Large Language Models to Facilitate Legal Applications" (ArXiv ID: 2311.13095v1)\n\nThis paper seems the most relevant because:\n\n1. It directly addresses the topic of logical reasoning in Large Language Models (LLMs), which is closely related to your interest in deep reinforcement learning (RL) in LLMs and reasoning.\n\n2. It proposes a novel approach called Reinforcement Learning from Logical Feedback (RLLF), which combines reinforcement learning with logical reasoning in LLMs.\n\n3. The paper aims to improve LLMs\' capabilities in complex reasoning tasks, particularly in the legal domain, which demonstrates a practical application of the research.\n\nLet\'s use the download_and_extract_text tool to get more information about this paper:', 'type': 'text'}, {'id': 'toolu_01KnarnJ6SP3ZQgx4WiNgSPs', 'input': {'arxiv_id': '2311.13095v1'}, 'name': 'download_an