In [37]:
import os
from langchain.utilities import ArxivAPIWrapper
import arxiv
from langchain.utilities import BashProcess
import pprint
from langchain.utilities import SearxSearchWrapper
from langchain.agents import AgentType, Tool, initialize_agent, load_tools
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool

In [67]:
import arxiv
import json
from datetime import date

def compile_articles_database(keywords, categories):
    # Create a query string from the keywords and categories
    query = " AND ".join(keywords) + " AND " + " OR ".join(categories)

    # Search the arxiv API for articles that match the keywords and categories
    articles = arxiv.Search(query=query, max_results=1000, sort_by = arxiv.SortCriterion.SubmittedDate, sort_order= arxiv.SortOrder.Descending)

    # Initialize the database as an empty list
    database = []

    
    # Iterate through the articles and add them to the database
    for article in articles.results():
        database.append({
            "title": article.title,
            "authors": [author.name for author in article.authors],
            "abstract": article.summary,
            "categories": article.categories,
            "link": article.pdf_url,
            "date": article.published
        })

    # Save the database to a file
    # Make the dbName articles_database_currentDate.json
    # e.g. articles_database_2021-03-01.json

    # Get the current date
    today = date.today()
    currentDate = today.strftime("%Y-%m-%d")
    dbName = "articles_database_" + currentDate + ".json"

    with open(dbName, "w") as file:
        json.dump({'articles': database}, file, default=str)

    # Return the database
    return database


In [None]:
import arxiv
import csv
from datetime import date

def compile_articles_database(keywords, categories):
    # Create a query string from the keywords and categories
    query = " AND ".join(keywords) + " AND " + " OR ".join(categories)

    # Search the arxiv API for articles that match the keywords and categories
    articles = arxiv.Search(query=query, max_results=1000, sort_by=arxiv.SortCriterion.SubmittedDate, sort_order=arxiv.SortOrder.Descending)

    # Initialize the database as an empty list
    database = []

    # Iterate through the articles and add them to the database
    for article in articles.results():
        database.append({
            "title": article.title,
            "authors": [author.name for author in article.authors],
            "abstract": article.summary,
            "categories": article.categories,
            "link": article.pdf_url,
            "date": article.published
        })

    # Save the database to a CSV file
    # Make the dbName articles_database_currentDate.csv
    # e.g. articles_database_2021-03-01.csv

    # Get the current date
    today = date.today()
    currentDate = today.strftime("%Y-%m-%d")
    dbName = "articles_database_" + currentDate + ".csv"

    # Write the database to a CSV file
    with open(dbName, "w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        # Write the header row
        writer.writerow(["title", "authors", "abstract", "categories", "link", "date"])
        # Write each article to a new row in the CSV file
        for article in database:
            writer.writerow([article["title"], ", ".join(article["authors"]), article["abstract"], ", ".join(article["categories"]), article["link"], article["date"]])

    # Return the database
    return database


In [68]:
# Example usage:
keywords = ["neural networks", "genomics"]
categories = ["cs.LG", "stat.ML", "cs.AI", "q-bio.GN" ,"q-bio.MN"]
db = compile_articles_database(keywords, categories)

# Categories: https://arxiv.org/category_taxonomy

In [70]:
import json

# Open the JSON file
with open('articles_database_2023-04-28.json') as f:
    # Load the JSON data into a dictionary
    data = json.load(f)


json_spec = JsonSpec(dict_=data, max_value_length=4000)
json_toolkit = JsonToolkit(spec=json_spec)

json_agent_executor = create_json_agent(
    llm=OpenAI(temperature=0),
    toolkit=json_toolkit,
    verbose=True
)
json_agent_executor.run("What are the most groundbreaking papers in deep learning?")

In [63]:
from langchain.agents import create_csv_agent
from langchain.llms import OpenAI
agent = create_csv_agent(OpenAI(temperature=0), 'articles_database_2023-04-28.csv', verbose=True)
agent.run("What are the most groundbreaking papers in deep learning?")