In [28]:
# Imports
import warnings
warnings.filterwarnings('ignore')

from crewai import Agent, Task, Crew
import os
from dotenv import load_dotenv

# Load environment variables from .env
load_dotenv()
os.environ["OPENAI_MODEL_NAME"] = 'gpt-3.5-turbo'

In [29]:
from crewai_tools import SerperDevTool, ScrapeWebsiteTool, WebsiteSearchTool, FileReadTool
from PyPDF2 import PdfReader
from langchain.chat_models import ChatOpenAI

In [30]:
file_read_tool = FileReadTool()

study_agent = Agent(
    role="Study Agent for Healthcare + GenAI Papers",
    goal="Read and summarize generative AI techniques applied to healthcare in each paper.",
    backstory=(
        "I’m a research analyst with a strong understanding of AI and biomedical literature. "
        "My mission is to extract key insights from each research paper and present them in a digestible way."
    ),
    tools=[file_read_tool],
    allow_delegation=False,
    verbose=True
)

In [31]:
def chunk_text(text, max_words=1500):
    words = text.split()
    return [" ".join(words[i:i+max_words]) for i in range(0, len(words), max_words)]




def read_and_summarize_papers(agent: Agent, folder_path: str):
    summaries = {}
    llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

    for filename in os.listdir(folder_path):
        if filename.endswith(".pdf") or filename.endswith(".txt"):
            file_path = os.path.join(folder_path, filename)
            print(f"\n🔍 Reading: {filename}")

            # Read the content
            if filename.endswith(".pdf"):
                content = extract_text_from_pdf(file_path)
            else:
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()

            # Split content into chunks
            chunks = chunk_text(content, max_words=1500)

            # Summarize each chunk separately
            partial_summaries = []
            for i, chunk in enumerate(chunks):
                prompt = f"""
You are summarizing part {i+1} of an academic paper. Follow this structure and do NOT make assumptions.

1. **AI Technique(s)**: What GenAI method(s) are used? Name models, frameworks, architectures.
2. **Healthcare Application**: What specific medical domain or task is this applied to?
3. **Methodology**: Detailed description of the model pipeline or process.
4. **Key Findings or Contributions**: What are the main results or insights?
5. **Limitations or Challenges**: Any weaknesses or barriers mentioned?

Text from the paper:
---------------------
{chunk}
"""
                summary = llm.predict(prompt)
                partial_summaries.append(summary)

            # Combine partial summaries into a full structured summary
            combined_prompt = f"""
You are generating a final structured summary of the paper '{filename}'. Use only the information in the following parts. Do not add anything new. Follow this format:

### {filename}
- **AI Technique(s)**:
- **Healthcare Application**:
- **Methodology**:
- **Key Findings or Contributions**:
- **Limitations or Challenges**:

Paper Parts:
---------------------
{chr(10).join(partial_summaries)}
"""
            final_summary = llm.predict(combined_prompt)
            summaries[filename] = final_summary

            print(f"✅ Done summarizing: {filename}\n")

    return summaries


In [None]:
if __name__ == "__main__":
    folder_path = "/Users/brunamedeiros/Documents/University of Chicago/Spring 2025 - Capstone I/Assignment-Research (2)/Papers"
    
    print("🚀 Starting paper analysis...")
    summaries = read_and_summarize_papers(study_agent, folder_path)

    # Print all summaries
    for fname, summ in summaries.items():
        print(f"\n📄 === {fname} Summary ===\n{summ}\n")

    # Optional: Save to file
    with open("summaries_output.txt", "w", encoding="utf-8") as f:
        for fname, summ in summaries.items():
            f.write(f"=== {fname} ===\n{summ}\n\n")

    print("✅ All summaries saved to summaries_output.txt")


🚀 Starting paper analysis...

🔍 Reading: Systematic Review LLM Apps.pdf
✅ Done summarizing: Systematic Review LLM Apps.pdf


🔍 Reading: Transformative impact of LLM in Medicine.pdf
✅ Done summarizing: Transformative impact of LLM in Medicine.pdf


🔍 Reading: yang-et-al-2024-application-of-large-language-models-in-disease-diagnosis-and-treatment.pdf
✅ Done summarizing: yang-et-al-2024-application-of-large-language-models-in-disease-diagnosis-and-treatment.pdf


🔍 Reading: Multimodal in healthcare.pdf
✅ Done summarizing: Multimodal in healthcare.pdf


🔍 Reading: Agents in Clinic.pdf
✅ Done summarizing: Agents in Clinic.pdf


🔍 Reading: Autonomous Agents 2024 in medicine.pdf
✅ Done summarizing: Autonomous Agents 2024 in medicine.pdf


🔍 Reading: Polaris LLM Constellation.pdf
✅ Done summarizing: Polaris LLM Constellation.pdf


🔍 Reading: LLM Agents in Medicine.pdf
✅ Done summarizing: LLM Agents in Medicine.pdf


🔍 Reading: MedAide.pdf
✅ Done summarizing: MedAide.pdf


🔍 Reading: Adaptive R

In [33]:
with open("summaries_output.txt", "r", encoding="utf-8") as f:
    contents = f.read()
    print(contents)

=== Systematic Review LLM Apps.pdf ===
### Systematic Review LLM Apps.pdf
- **AI Technique(s)**: Large Language Models (LLMs) and Generative Pre-trained Transformers (GPT)
- **Healthcare Application**: Various healthcare tasks including assessing medical knowledge, making diagnoses, educating patients, generating medical reports, and more.
- **Methodology**: Systematic review of studies categorizing evaluation data types, NLP/NLU tasks, dimensions of evaluation, and medical specialties studied.
- **Key Findings or Contributions**: Limited use of real patient care data for LLM evaluation, focus on accuracy over fairness, bias, and toxicity assessments, and gaps in evaluating LLMs for real-world deployment.
- **Limitations or Challenges**: Shallow and fragmented evaluation of LLMs in healthcare, lack of standardized evaluation dimensions, and limited assessment of real patient care data across medical specialties and tasks.

=== Transformative impact of LLM in Medicine.pdf ===
### Transf

In [38]:

# Agent 2: Research Gap Finder & Idea Generator
idea_generator = Agent(
    role="Research Gap & Idea Generator",
    goal="Identify gaps in existing research and generate innovative GenAI + Healthcare project ideas",
    backstory=(
        "I am a practical-minded AI researcher and healthcare systems strategist. "
        "My aim is to propose solutions that can make an immediate, tangible impact in clinical workflows, "
        "patient care, and hospital operational efficiency. I leverage cutting-edge generative AI methods, "
        "ranging from medical imaging enhancements to large-scale language model applications for real-time clinical decision support."
        "I focus on ideas that go beyond theoretical frameworks—my goal is to design next-step solutions "
        "like improved radiology scan interpretations, advanced visual analytics for EHR data, GPT-based "
        "chatbots that provide safe triage advice, or workflow optimization using generative insights. "
        "Every concept I propose should address a clear healthcare need and be feasible to pilot or deploy "
        "in real clinical environments."
    ),
    allow_delegation=True,
    verbose=True
)

# Agent 3: Critique & Decision Maker
critic_agent = Agent(
    role="Research Critique & Decision Agent",
    goal="Critique all proposed research ideas and select the single most impactful and feasible proposal.",
    backstory=(
        "I am a senior medical and AI expert. I ensure that every idea is not only innovative but also practical, medically relevant."
    ),
    allow_delegation=True,
    verbose=True
)

# Agent 4: Proposal Writer
writer_agent = Agent(
    role="Research Proposal Writer",
    goal="Write a formal, structured proposal for the selected GenAI+Healthcare research idea.",
    backstory=(
        "I specialize in crafting structured academic proposals. I transform ideas into polished documents with the right sections and citations."
    ),
    allow_delegation=False,
    verbose=True
)

# ===============================
# 6. TASKS
# ===============================

with open("summaries_output.txt", "r", encoding="utf-8") as f:
    summaries_text = f.read()

# Task 1: Idea generation based on long-form summaries
idea_task = Task(
    description=(
        f"Based on the following summaries of recent papers, identify 3–5 meaningful research gaps.\n"
        f"Then propose 3 novel GenAI + Healthcare research directions that would fill those gaps.\n\n"
        f"PAPER SUMMARIES:\n{summaries_text}"
    ),
    expected_output="3 proposed GenAI + Healthcare research ideas with justification and associated research gaps.",
    agent=idea_generator
)

# Task 2: Critique and select final proposal
critique_task = Task(
    description=(
        "Review the 3 research ideas proposed. Score them on originality, feasibility, and impact (scale 1–5).\n"
        "Then select the single best idea and explain your reasoning.\n"
        "You may interact with the idea_generator agent to refine the ideas if needed."
    ),
    expected_output="The best research idea with a short justification and a cutoff decision.",
    agent=critic_agent
)

# Task 3: Final proposal writing
write_task = Task(
    description=(
        "Write a complete research proposal for the selected idea.\n"
        "Your sections must be: Title, Abstract (150–250 words), Background & Literature Review,\n"
        "Problem Statement & Research Gap, Proposed Gen AI Approach, Expected Impact in Healthcare,\n"
        "Limitations or Ethical Considerations, and References.\n"
        "You may cite from the paper summaries if applicable. Do not hallucinate information."
    ),
    expected_output="A structured research proposal document in full.",
    agent=writer_agent
)

# ===============================
# 7. CREW SETUP & RUN
# ===============================

crew = Crew(
    agents=[idea_generator, critic_agent, writer_agent],
    tasks=[idea_task, critique_task, write_task],
    verbose=True
)

if __name__ == "__main__":
    print("\n🚀 Running multi-agent research proposal workflow...")
    result = crew.kickoff()
    print("\n✅ Final Proposal:\n")
    print(result)

    # Optional: Save to file
    with open("final_proposal.txt", "w", encoding="utf-8") as f:
        f.write(result)
    print("\n📄 Proposal saved to final_proposal.txt")





🚀 Running multi-agent research proposal workflow...
[1m[95m [DEBUG]: == Working Agent: Research Gap & Idea Generator[00m
[1m[95m [INFO]: == Starting Task: Based on the following summaries of recent papers, identify 3–5 meaningful research gaps.
Then propose 3 novel GenAI + Healthcare research directions that would fill those gaps.

PAPER SUMMARIES:
=== Systematic Review LLM Apps.pdf ===
### Systematic Review LLM Apps.pdf
- **AI Technique(s)**: Large Language Models (LLMs) and Generative Pre-trained Transformers (GPT)
- **Healthcare Application**: Various healthcare tasks including assessing medical knowledge, making diagnoses, educating patients, generating medical reports, and more.
- **Methodology**: Systematic review of studies categorizing evaluation data types, NLP/NLU tasks, dimensions of evaluation, and medical specialties studied.
- **Key Findings or Contributions**: Limited use of real patient care data for LLM evaluation, focus on accuracy over fairness, bias, and toxic

---

Our final structure: 
agent 1: reads the papers (Papers.zip)
agent 2: find research gaps (input from 1) and generate new ideas from that 
agent 3: critique //
- agent 3 decides cut-off
- 2 and 3 back and forth
agent 4: receives the final proposal from agent 3 on what the final proposal is and generates the written document. Agent 4 should generate output in this format " Title, Abstract (150–250 words), Background & Literature Review, Problem Statement & Research Gap, Proposed Gen AI Approach, Expected Impact in Healthcare, Limitations or Ethical Considerations, and References". Lastly, it should include citations (based on memory).

questions:
- By creating a crew to coordinate the actions of the four agents, memory will be activated (agent 4 will be able to access information analyzed by previous agents). is this assumption correct? 
- what commands can we do to ensure hallucinations do not take place?
- our study_agent returned very small summaries of the papers. We are worried inputing only this information for the rest of agents will result in hallucination? what is your opinion on the matter?
- How can we appropriately establish a cut-off in agent 3? Agent 3 will make the decision on what best research proposal is, but how do we control such "cut-off" or limit?
- We want agent 2 and 3 to delegate, and I know we do this by setting allow_delegation=True. How can I ensure agents 2 and 3 delegate between themselvess?
- How can we control number of iterations?
