<a href="https://colab.research.google.com/github/airas-org/airas/blob/develop/notebook/airas_dev52.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip install --index-url https://test.pypi.org/simple/ \
            --extra-index-url https://pypi.org/simple \
            airas==0.0.11.dev52

In [2]:
import logging

log_format = "[%(levelname)s] %(name)s: %(message)s"
logging.basicConfig(level=logging.INFO, format=log_format, force=True)


In [3]:
import os
from google.colab import userdata

# ==============================================================================
# SECRET KEY CONFIGURATION
# ==============================================================================
# INSTRUCTIONS:
# 1. Open the "Secrets" manager (key icon) in the Google Colab sidebar.
# 2. Add each secret name listed below (e.g., "OPENAI_API_KEY") and paste
#    its corresponding value.
# ==============================================================================


# --- Required API Keys---

# OpenAI API Key
# URL: https://platform.openai.com/settings/organization/api-keys
os.environ["OPENAI_API_KEY"] = userdata.get("OPENAI_API_KEY")

# GitHub Personal Access Token (Fine-grained)
# URL: https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens
#
# Required settings for the token:
# - Resource owner: "auto-res2"
# - Permissions:    Grant full access to all scopes (Repositories, Workflows, etc.).
os.environ["GITHUB_PERSONAL_ACCESS_TOKEN"] = userdata.get("GITHUB_PERSONAL_ACCESS_TOKEN")


# --- Additional API Keys ---

os.environ["GEMINI_API_KEY"] = userdata.get("GEMINI_API_KEY")
os.environ["DEVIN_API_KEY"] = userdata.get("DEVIN_API_KEY")
os.environ["QDRANT_API_KEY"] = userdata.get("QDRANT_API_KEY")
# os.environ["SEMANTIC_SCHOLAR_API_KEY"] = userdata.get("SEMANTIC_SCHOLAR_API_KEY")

In [4]:
from airas.features import (
    AnalyticSubgraph,
    CreateBibfileSubgraph,
    CreateBranchSubgraph,
    CreateCodeSubgraph,
    CreateCodeWithDevinSubgraph,
    CreateExperimentalDesignSubgraph,
    CreateMethodSubgraph,
    ExtractReferenceTitlesSubgraph,
    FixCodeSubgraph,
    FixCodeWithDevinSubgraph,
    GenerateQueriesSubgraph,
    GetPaperTitlesFromDBSubgraph,
    GitHubActionsExecutorSubgraph,
    GithubDownloadSubgraph,
    GithubUploadSubgraph,
    HtmlSubgraph,
    LatexSubgraph,
    PrepareRepositorySubgraph,
    ReadmeSubgraph,
    RetrieveCodeSubgraph,
    RetrievePaperContentSubgraph,
    ReviewPaperSubgraph,
    SummarizePaperSubgraph,
    WriterSubgraph,
)

In [6]:
# ==============================================================================
# CONFIGURATION: DEFINE YOUR RESEARCH TASK
# ==============================================================================
# This is the primary cell for user input.
# Modify the variables below to set up your repository and research goal.
# ==============================================================================


# --- GitHub Repository Settings ---

# Set the repository owner.
github_owner = "auto-res2"

# Set the repository name.
repository_name = "your-repository-name"


# Set the branch name.
# Each branch represents a distinct attempt or experiment for the research topic.
branch_name = "your-branch-name"


# --- Research Topic ---

# Describe your research topic here.
# Example: "I want to speed up the diffusion model."
research_topic = "your-research-topic"

In [7]:
# ==============================================================================
# LLM SELECTION
# ==============================================================================
# This cell defines the list of all supported Large Language Models (LLMs).
# Any model in these lists can be used within any subgraph.
# ==============================================================================


# Set the default LLM model for this session.
llm_name = "o3-mini-2025-01-31"


# --- Supported Models ---

OPENAI_MODEL = [
    "o4-mini-2025-04-16",
    "o3-2025-04-16",
    "o3-mini-2025-01-31",
    "o1-pro-2025-03-19",
    "o1-2024-12-17",
    "gpt-5-2025-08-07",
    "gpt-5-mini-2025-08-07",
    "gpt-5-nano-2025-08-07",
    "gpt-4.1-2025-04-14",
    "gpt-4o-2024-11-20",
    "gpt-4o-mini-2024-07-18",
]

# If GEMINI_API_KEY is available
VERTEXAI_MODEL = [
    "gemini-2.5-pro",
    "gemini-2.5-flash",
    "gemini-2.5-flash-lite-preview-06-17",
    "gemini-2.0-flash-001",
    "gemini-2.0-flash-lite-001",
    "gemini-embedding-001",
]


In [8]:
# ==============================================================================
# Subgraph Configuration
# ==============================================================================
# In this cell, we instantiate all the necessary subgraphs for the research
# workflow. They are grouped into logical phases for clarity:
# 1. Research and Information Gathering
# 2. Implementation and Experimentation
# 3. Writing and Publication

In [9]:
# --- Phase 1: Research and Information Gathering ---


# Subgraph for generating search queries based on the research topic.
generate_queries = GenerateQueriesSubgraph(llm_name=llm_name, n_queries=5)


# Subgraph for retrieving paper titles from a local database.
get_paper_titles = GetPaperTitlesFromDBSubgraph(max_results_per_query=3, semantic_search=True)
# get_paper_titles = GetPaperTitlesFromWebSubgraph(max_results_per_query=5)


# Subgraph for retrieving the full content of papers.
retrieve_paper_content = RetrievePaperContentSubgraph(paper_provider="arxiv", target_study_list_source="research_study_list")


# Subgraph for summarizing the content of retrieved papers.
summarize_paper = SummarizePaperSubgraph(llm_name=llm_name)


# Subgraph for retrieving code related to the research papers.
retrieve_code = RetrieveCodeSubgraph(llm_name=llm_name)


# Subgraph for extracting reference titles from papers.
reference_extractor = ExtractReferenceTitlesSubgraph(llm_name=llm_name, paper_retrieval_limit=10)


# Subgraph for retrieving the content of reference papers.
retrieve_reference_paper_content = RetrievePaperContentSubgraph(paper_provider="arxiv", target_study_list_source="reference_research_study_list")

In [10]:
# --- Phase 2: Implementation and Experimentation ---


# Subgraph for creating a new research method based on the findings.
create_method = CreateMethodSubgraph(llm_name=llm_name, refine_iterations=5)


# Subgraph for designing experiments to validate the new method.
create_experimental_design = CreateExperimentalDesignSubgraph(llm_name=llm_name)


# Subgraph for generating code to implement the experimental design.
coder = CreateCodeWithDevinSubgraph()
# coder = CreateCodeSubgraph(llm_name=llm_name)


# Subgraph for executing code using GitHub Actions.
executor = GitHubActionsExecutorSubgraph(gpu_enabled=True)


# Subgraph for fixing code errors.
fixer = FixCodeWithDevinSubgraph(llm_name=llm_name)
# fixer = FixCodeSubgraph(llm_name=llm_name)


# Subgraph for analyzing the results of the experiments.
analysis = AnalyticSubgraph(llm_name=llm_name)

In [11]:
# --- Phase 3: Writing and Publication ---


# Subgraph for creating a BibTeX file for citations.
create_bibfile = CreateBibfileSubgraph(llm_name=llm_name, latex_template_name="iclr2024", max_filtered_references=30)


# Subgraph for writing the final research paper.
writer = WriterSubgraph(llm_name=llm_name, max_refinement_count=2)


# Subgraph for reviewing the generated paper against a set of criteria.
review = ReviewPaperSubgraph(llm_name=llm_name)


# Subgraph for generating a LaTeX version of the paper.
latex = LatexSubgraph(llm_name=llm_name, latex_template_name="iclr2024", max_revision_count=3)


# Subgraph for generating a README file for the project.
readme = ReadmeSubgraph()


# Subgraph for generating an HTML version of the paper.
html = HtmlSubgraph(llm_name=llm_name)


# Subgraph for uploading the project to GitHub.
uploader = GithubUploadSubgraph()

In [None]:
# ==============================================================================
# PREPARE THE GITHUB REPOSITORY
# ==============================================================================

from airas.types.github import GitHubRepositoryInfo


# Set up GitHub repository information and research topic.
state = {
    "github_repository_info": GitHubRepositoryInfo(
      github_owner=github_owner,
      repository_name=repository_name,
      branch_name=branch_name
    ),
    "research_topic": research_topic,
}

_ = PrepareRepositorySubgraph().run(state)

In [None]:
# ==============================================================================
# EXECUTE THE RESEARCH WORKFLOW
# ==============================================================================

from tqdm import tqdm

subgraph_list = [
    generate_queries,
    get_paper_titles,
    retrieve_paper_content,
    summarize_paper,
    retrieve_code,
    create_method,
    create_experimental_design,
    coder,
    executor,
    fixer,
    analysis,
    reference_extractor,
    retrieve_reference_paper_content,
    create_bibfile,
    writer,
    review,
    latex,
    readme,
    html,
]

def run_subgraphs(subgraph_list, state):
    for subgraph in tqdm(subgraph_list, desc="Executing Research Workflow"):
        subgraph_name = subgraph.__class__.__name__
        print(f"--- Running Subgraph: {subgraph_name} ---")

        if isinstance(subgraph, (FixCodeSubgraph, FixCodeWithDevinSubgraph)):
            while True:
                if state.get("executed_flag") is True:
                    state = analysis.run(state)
                    break
                else:
                    state = fixer.run(state)
                    state = executor.run(state)
        else:
            state = subgraph.run(state)

        _ = uploader.run(state)
        print(f"--- Finished Subgraph: {subgraph_name} ---\n")


# Execute the research workflow
_ = run_subgraphs(subgraph_list, state)

In [None]:
# NOTE: Currently, the cells below this point are not working.

# ==============================================================================
# OPTIONAL: CREATE A NEW BRANCH AND RESTART FROM A SUBGRAPH
# ==============================================================================
# Use this cell if you want to create a new branch and re-run the process
# from a specific point.
#
# This is useful for experimenting with a different approach midway through
# the workflow without altering the original branch.
# ==============================================================================


# --- CONFIGURATION ---

# Set the new branch name.
new_branch_name = "your-new-branch-name"

# Set the subgraph name where you want execution to resume.
# Example: "CreateMethodSubgraph"
restart_from_subgraph = "subgraph-name-to-restart-from"


# Create a new branch.
_ = CreateBranchSubgraph(
    new_branch_name = new_branch_name,
    restart_from_subgraph = restart_from_subgraph,
).run(state)

In [None]:
# To restart the workflow from a specific subgraph (e.g., the one defined in
# the `restart_from_subgraph` variable), manually comment out the lines
# in this list that come before your desired starting point.

from airas.types.github import GitHubRepositoryInfo

# EXAMPLE: To restart from `create_method`, modify the list like this:

subgraph_list = [
    # generate_queries,
    # get_paper_titles,
    # retrieve_paper_content,
    # summarize_paper,
    # retrieve_code,
    create_method,
    create_experimental_design,
    coder,
    executor,
    fixer,
    analysis,
    reference_extractor,
    retrieve_reference_paper_content,
    create_bibfile,
    writer,
    review,
    latex,
    readme,
    html,
]


# Re-initialize GitHub repository information for the new branch.
state = {
    "github_repository_info": GitHubRepositoryInfo(
      github_owner=github_owner,
      repository_name=repository_name,
      branch_name=new_branch_name
    ),
}
# Download the state up to restart_from_subgraph from the new branch
state = GithubDownloadSubgraph().run(state)
# Re-execute the research workflow
_ = run_subgraphs(subgraph_list, state)