In [None]:
import os
import json
from agents.general_agent import Agent
from agents.eval_agent import EvalAgent

In [None]:
# Inline implementation of ConceptExtractionAgent for debugging
class ConceptExtractionAgent:
    def __init__(self, gpt_agent, queries_folder="queries"):
        self.gpt_agent = gpt_agent
        self.queries_folder = queries_folder

    def extract_concepts(self, filename):
        """
        Extract concepts from a project description using GPT API.
        """
        file_path = os.path.join(self.queries_folder, filename)
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"File {file_path} not found.")

        with open(file_path, "r") as file:
            project_data = json.load(file)

        # Extract the project description
        description = project_data.get("description", "")
        if not description:
            raise ValueError("Project description is missing in the JSON file.")

        # Query GPT to infer concepts
        return self._query_gpt_for_concepts(description)

    def _query_gpt_for_concepts(self, description):
        """
        Query GPT to infer concepts from the project description.
        """
        input_text = f"""
        Analyze the following project description and extract concepts into four categories. 
        Each category has specific guidelines to ensure proper classification:

        - Fundamental Concepts: These are foundational to the core concepts and provide the necessary theoretical or technical background 
          to understand and work with the core concepts. They are more advanced than prerequisites but not as specific as core concepts. 
          Examples: Machine Learning Basics, Deep Learning, Robotic Kinematics, Control Theory.

        - Core Concepts: These are the central focus areas of the project and represent the main topics or themes the project is addressing. 
          They are typically the tags or keywords associated with the project. 
          Examples: Reinforcement Learning (RL), Computer Vision, Robotics.

        - Specialized Concepts: These are specific applications or implementations of the core concepts within the context of the project. 
          They are highly specific and often represent advanced or applied topics. 
          Examples: RL for Grasping Strategies, Sim-to-Real Transfer, Affordance-Based Manipulation.

        Project Description:
        {description}

        Provide the output in the following JSON format.
        Do NOT include tick marks or any other formatting. Just ONLY provide the JSON object:
        {{
            "prerequisites": ["<list of concepts>"],
            "fundamental_concepts": ["<list of concepts>"],
            "core_concepts": ["<list of concepts>"],
            "specialized_concepts": ["<list of concepts>"]
        }}
        """
        try:
            response = self.gpt_agent.query("You are a helpful assistant.", input_text)
            print("GPT response:", response)  # Debugging line
            return json.loads(response)
        except json.JSONDecodeError:
            raise ValueError(
                "Failed to parse GPT response. Ensure the response is in the correct JSON format."
            )

In [35]:
# Get the current working directory of the notebook
notebook_dir = os.getcwd()

queries_folder = os.path.join(notebook_dir, "..", "queries")

gpt_agent = Agent(api_key=os.getenv("OPENAI_API_KEY"))

concept_agent = ConceptExtractionAgent(gpt_agent, queries_folder=queries_folder)

try:
    concepts = concept_agent.extract_concepts("robotics_grasp.json")
    core_concepts = concepts["core_concepts"]
    specialized_concepts = concepts["specialized_concepts"]
    fundamental_concepts = concepts["fundamental_concepts"]
    prerequisites = concepts["prerequisites"]
except (FileNotFoundError, ValueError) as e:
    print(f"Error: {e}")
    core_concepts = []
    specialized_concepts = []
    fundamental_concepts = []
    prerequisites = []

GPT response: {
    "prerequisites": ["Programming", "Basic Physics"],
    "fundamental_concepts": ["Machine Learning Basics", "Robotic Kinematics", "Control Theory"],
    "core_concepts": ["Reinforcement Learning", "Robotics", "Computer Vision"],
    "specialized_concepts": ["RL for Grasping Strategies", "Sim-to-Real Transfer", "Affordance-Based Manipulation", "Pick-and-Place Tasks"]
}


In [None]:
def query_gpt_for_survey_papers(concept):
    """
    Query GPT-4 to get survey papers for a given concept.
    """
    instructions = "You are a helpful assistant."
    input_text = f"Provide 4 survey papers on the topic '{concept}'. For each paper, include the title and Semantic Scholar DOI."
    return gpt_agent.query(instructions, input_text)

In [6]:
# Query GPT-4 for survey papers for each core concept
survey_papers = {}
for concept in core_concepts:
    print(f"Querying GPT-4 for survey papers on: {concept}")
    
    input_text = f"""
    Provide 6 survey papers on the topic '{concept}' along with their DOI id's, URLs, and other metadata in the following JSON format.
    Do NOT include tick marks or any other formatting. Just ONLY provide the JSON object: 
    {{
        "papers": [
            {{
                "title": "<title>",
                "doi": "<doi>",
                "url": "<url>",
                "abstract": "<abstract>"
            }},
            ...
        ]
    }}
    """
    try:
        gpt_response = gpt_agent.query("You are a helpful assistant.", input_text)
        print(f"GPT-4 Response for {concept}:\n{json.dumps(json.loads(gpt_response), indent=4)}")
        survey_papers[concept] = json.loads(gpt_response)
    except json.JSONDecodeError:
        print(f"Failed to parse GPT-4 response for {concept}. Response: {gpt_response}")
        survey_papers[concept] = []

Querying GPT-4 for survey papers on: Reinforcement Learning (RL)
GPT-4 Response for Reinforcement Learning (RL):
{
    "papers": [
        {
            "title": "A Survey of Reinforcement Learning Algorithms for Dynamically Varying Environments",
            "doi": "10.1109/ACCESS.2020.3019890",
            "url": "https://ieeexplore.ieee.org/document/9174910",
            "abstract": "This paper surveys reinforcement learning algorithms that adapt to dynamically changing environments, focusing on methods that adjust to new conditions without retraining from scratch."
        },
        {
            "title": "Deep Reinforcement Learning: A Survey",
            "doi": "10.1145/3386252",
            "url": "https://dl.acm.org/doi/10.1145/3386252",
            "abstract": "This survey provides a comprehensive overview of deep reinforcement learning, discussing key algorithms, theoretical foundations, and applications across various domains."
        },
        {
            "title": "A 

In [10]:
# Separate dictionaries for seminal paper counts by topic
seminal_paper_counts_by_topic = {}

# Initialize top_references as an empty dictionary
top_references = {}

for concept, papers in survey_papers.items():
    print(f"\nProcessing concept: {concept}")
    top_references[concept] = {}
    seminal_paper_counts_by_topic[concept] = {}

    for paper in papers.get("papers", []):
        title = paper.get("title", "Unknown Title")
        print(f"Retrieving top references for paper: {title}")
        
        input_text = f"""
        For the paper titled '{title}', provide the 5 most seminal works (including papers and textbooks) in the field that are related to this paper and would likely be cited. 
        If you cannot access external databases, respond with 5 hypothetical seminal works based on the paper title in the following example JSON format (not with this content, but with the same structure):
        Do NOT include tick marks or any other formatting. Just ONLY provide the JSON object: 
        {{
            "seminal_works": [
            {{"title": "Seminal Work 1", "year": 1998}},
            {{"title": "Seminal Work 2", "year": 2013}},
            {{"title": "Seminal Work 3", "year": 2015}},
            {{"title": "Seminal Work 4", "year": 2020}},
            {{"title": "Seminal Work 5", "year": 2021}}
            ]
        }}
        """
        try:
            gpt_response = gpt_agent.query("You are a helpful assistant.", input_text)
            references = json.loads(gpt_response).get("seminal_works", [])
            top_references[concept][title] = references
            paper["references"] = references
            
            # Aggregate references into the topic-specific seminal paper counts
            for ref in references:
                ref_title = ref["title"]
                if ref_title in seminal_paper_counts_by_topic[concept]:
                    seminal_paper_counts_by_topic[concept][ref_title] += 1
                else:
                    seminal_paper_counts_by_topic[concept][ref_title] = 1

        except json.JSONDecodeError:
            print(f"Failed to parse GPT-4 response for paper: {title}")
            top_references[concept][title] = []
            paper["references"] = []
    
    # Output the seminal paper counts for the current topic by value
    print(f"\nSeminal Paper Counts for {concept} (sorted by count):")
    sorted_counts = sorted(seminal_paper_counts_by_topic[concept].items(), key=lambda item: item[1], reverse=True)
    for paper_title, count in sorted_counts:
        print(f"{paper_title}: {count}")


Processing concept: Reinforcement Learning (RL)
Retrieving top references for paper: A Survey of Reinforcement Learning Algorithms for Dynamically Varying Environments
Retrieving top references for paper: Deep Reinforcement Learning: A Survey
Retrieving top references for paper: A Survey on Multi-Agent Reinforcement Learning: From Foundations to Applications
Retrieving top references for paper: A Survey of Inverse Reinforcement Learning: Challenges, Methods and Progress
Retrieving top references for paper: A Survey on Safe Reinforcement Learning: Methods, Challenges, and Future Directions
Retrieving top references for paper: A Survey of Model-Based Reinforcement Learning: Recent Advances and Applications

Seminal Paper Counts for Reinforcement Learning (RL) (sorted by count):
Reinforcement Learning: An Introduction: 5
Dynamic Environments and Reinforcement Learning: 1
Algorithms for Adaptive Learning in Non-Stationary Environments: 1
Deep Reinforcement Learning with Non-Stationary Rew

In [11]:
# Initialize the evaluation agent
eval_agent = EvalAgent(seminal_paper_counts_by_topic)

# Select papers based on the criteria
selected_papers = eval_agent.select_papers()

# Output the selected papers for evaluation
print("\nSelected Papers for Evaluation:")
for topic, papers in selected_papers.items():
    print(f"\nTopic: {topic}")
    for paper in papers:
        print(f"- {paper}")


Selected Papers for Evaluation:

Topic: Reinforcement Learning (RL)
- Reinforcement Learning: An Introduction
- Dynamic Environments and Reinforcement Learning
- Algorithms for Adaptive Learning in Non-Stationary Environments
- Deep Reinforcement Learning with Non-Stationary Rewards
- Meta-Learning for Dynamic Environments
- Playing Atari with Deep Reinforcement Learning
- Human-level control through deep reinforcement learning
- Asynchronous Methods for Deep Reinforcement Learning

Topic: Computer Vision
- ImageNet Classification with Deep Convolutional Neural Networks
- Deep Learning
- ImageNet: A Large-Scale Hierarchical Image Database
- Deep Residual Learning for Image Recognition
- Very Deep Convolutional Networks for Large-Scale Image Recognition
- Generative Adversarial Nets
- AlexNet: ImageNet Classification with Deep Convolutional Neural Networks
- Generative Adversarial Networks
- AutoAugment: Learning Augmentation Policies from Data
