In [None]:
import os
import traceback
import subprocess
from crewai import Agent, Task, Crew, Process
from crewai_tools import PDFSearchTool, SerperDevTool, ScrapeWebsiteTool
from dotenv import load_dotenv
import json
from datetime import datetime
import sys
from io import StringIO
from contextlib import redirect_stdout, redirect_stderr

In [None]:
class DualLogger:
    """Logger that writes to both file and displays progress in notebook"""
    def __init__(self, log_dir="./logs"):
        self.log_dir = log_dir
        os.makedirs(log_dir, exist_ok=True)
        
        # Create timestamped log file
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        self.log_file = os.path.join(log_dir, f"proposal_generation_{timestamp}.log")
        self.summary_file = os.path.join(log_dir, f"proposal_summary_{timestamp}.md")
        
        # Open log file for writing
        self.log_handle = open(self.log_file, 'w')
        print(f"📁 Log file created: {self.log_file}")
        
    def log(self, message, level="INFO"):
        """Log message to both file and console"""
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        formatted_message = f"[{timestamp}] [{level}] {message}"
        
        # Write to file
        self.log_handle.write(formatted_message + "\n")
        self.log_handle.flush()  # Force write to disk
        
        # Write to console with color coding
        if level == "ERROR":
            print(f"❌ {message}")
        elif level == "WARNING":
            print(f"⚠️  {message}")
        else:
            print(f"✅ {message}")
            
    def capture_crew_output(self, func, *args, **kwargs):
        """Capture all output from crew execution"""
        class TeeOutput:
            def __init__(self, file_handle, console):
                self.file = file_handle
                self.console = console
                
            def write(self, data):
                self.file.write(data)
                self.file.flush()
                self.console.write(data)
                
            def flush(self):
                self.file.flush()
                self.console.flush()
        
        tee = TeeOutput(self.log_handle, sys.stdout)
        original_stdout = sys.stdout
        
        try:
            sys.stdout = tee
            result = func(*args, **kwargs)
            return result
        except Exception as e:
            self.log(f"Error during execution: {str(e)}", "ERROR")
            raise
        finally:
            sys.stdout = original_stdout
                    
    def create_summary(self, topic, result, start_time, end_time, task_outputs):
        """Create a summary report"""
        duration = (end_time - start_time).total_seconds() / 60
        
        summary = f"""# Research Proposal Generation Summary

## Execution Details
- **Topic**: {topic}
- **Start Time**: {start_time.strftime('%Y-%m-%d %H:%M:%S')}
- **End Time**: {end_time.strftime('%Y-%m-%d %H:%M:%S')}
- **Duration**: {duration:.2f} minutes
- **Full Log**: {self.log_file}

## Task Completion Status
"""
        
        for task_name, output in task_outputs.items():
            status = "✅ Completed" if output else "❌ Failed"
            summary += f"- **{task_name}**: {status}\n"
            
        summary += f"""
## Final Output
{result if result else "No final output generated"}

## Files Generated
- Draft: research_proposal_draft.md
- Final: research_proposal_final_edited.md
- PDF: research_proposal_final.pdf (if pandoc available)
"""
        
        with open(self.summary_file, 'w') as f:
            f.write(summary)
            
        self.log(f"Summary saved to: {self.summary_file}")
        return summary
    
    def close(self):
        """Close the log file"""
        if hasattr(self, 'log_handle') and not self.log_handle.closed:
            self.log_handle.close()
    
    def __del__(self):
        """Ensure log file is closed"""
        self.close()


In [None]:
logger = DualLogger()
logger.log("Gen AI Healthcare Research Proposal Generator Started")
logger.log("=" * 70)

In [None]:
logger.log("Testing logger functionality...")
logger.log("This should appear in the log file", "INFO")
logger.log("This is a warning message", "WARNING")
logger.log("This is an error message", "ERROR")

In [None]:
if os.path.exists(logger.log_file):
    with open(logger.log_file, 'r') as f:
        content = f.read()
        print(f"\n📄 Current log file content ({len(content)} chars):")
        print("-" * 50)
        print(content[:500] + "..." if len(content) > 500 else content)
        print("-" * 50)

In [None]:
load_dotenv()
model_name = os.getenv('OPENAI_MODEL_NAME', 'Not Set - CrewAI Default')
logger.log(f"Using LLM Model: {model_name}")

In [None]:
def check_environment():
    """Check if all requirements are met"""
    logger.log("Checking environment requirements...")
    issues = []
    
    # Check API keys
    if not os.getenv("OPENAI_API_KEY"):
        issues.append("❌ OPENAI_API_KEY not found in environment")
    else:
        logger.log("OpenAI API key found")
        
    if not os.getenv("SERPER_API_KEY"):
        issues.append("❌ SERPER_API_KEY not found in environment")
    else:
        logger.log("Serper API key found")
    
    # Check PDF directory
    if not os.path.exists('./papers/'):
        issues.append("❌ Papers directory not found")
    else:
        pdf_files = [f for f in os.listdir('./papers/') if f.endswith('.pdf')]
        logger.log(f"Found {len(pdf_files)} PDF files in ./papers/")
        for pdf in pdf_files[:5]:  # Show first 5 PDFs
            logger.log(f"   - {pdf}")
        if len(pdf_files) > 5:
            logger.log(f"   ... and {len(pdf_files) - 5} more")
    
    return issues

In [None]:
issues = check_environment()
if issues:
    logger.log("Issues found:", "WARNING")
    for issue in issues:
        logger.log(issue, "WARNING")
    raise Exception("Please fix environment issues before continuing.")
else:
    logger.log("All environment checks passed!")

In [None]:
pdf_files_directory = "./papers/"
logger.log("Initializing AI tools...")

# Get list of PDF files
pdf_files = [f for f in os.listdir('./papers/') if f.endswith('.pdf')]
pdf_list_str = "\n".join([f"   - {pdf}" for pdf in pdf_files])

In [None]:
pdf_search_tool = PDFSearchTool(
    config=dict(
        llm=dict(
            provider="openai",
            config=dict(
                model=os.getenv('OPENAI_MODEL_NAME', 'gpt-4o'),
                temperature=0.5,
            ),
        ),
        embedder=dict(
            provider="openai",
            config=dict(
                model="text-embedding-3-small",
            ),
        ),
    )
)


In [None]:
logger.log("Verifying PDF files...")
for pdf in pdf_files:
    full_path = f"./papers/{pdf}"
    if os.path.exists(full_path):
        logger.log(f"✓ Found: {pdf}")
    else:
        logger.log(f"✗ Missing: {pdf}", "ERROR")

In [None]:
serper_dev_tool = SerperDevTool()
scrape_website_tool = ScrapeWebsiteTool()
logger.log("Tools initialized successfully")

In [None]:
try:
    from crewai_tools import RAGTool
    
    rag_tool = RAGTool(
        path="./papers/",
        config=dict(
            llm=dict(
                provider="openai",
                config=dict(
                    model=os.getenv('OPENAI_MODEL_NAME', 'gpt-4o'),
                    temperature=0.5,
                ),
            ),
            embedder=dict(
                provider="openai",
                config=dict(
                    model="text-embedding-3-small",
                ),
            ),
        )
    )
    logger.log("RAG tool initialized as backup")
except ImportError:
    logger.log("RAG tool not available", "WARNING")
    rag_tool = None

In [None]:
def test_pdf_search():
    """Test if PDF search is working correctly"""
    logger.log("Testing PDF search functionality...")
    
    test_queries = [
        ("diagnostic accuracy", "./papers/LLM Agents in Medicine.pdf"),
        ("multimodal", "./papers/Multimodal in healthcare.pdf"),
        ("adaptive agents", "./papers/Adaptive Reasoning Language Agents.pdf")
    ]
    
    for query, pdf_path in test_queries:
        try:
            logger.log(f"Testing: {query} in {pdf_path}")
            result = pdf_search_tool._run(query=query, pdf=pdf_path)
            
            # Check if result is unique
            if "J Med Internet Res 2024 | vol. 26 | e59505" in result:
                logger.log(f"⚠️  WARNING: Generic content returned for {pdf_path}", "WARNING")
            else:
                logger.log(f"✅ Unique content retrieved for {pdf_path}")
                logger.log(f"   Preview: {result[:100]}...")
                
        except Exception as e:
            logger.log(f"❌ Error testing {pdf_path}: {str(e)}", "ERROR")
    
    logger.log("PDF search test completed")

In [None]:
test_pdf_search()

In [None]:
def verify_pdf_indexing():
    """Verify that PDFs are properly indexed"""
    logger.log("Verifying PDF indexing...")
    
    test_pdf = pdf_files[0] if pdf_files else None
    if test_pdf:
        try:
            # Try a simple search
            test_result = pdf_search_tool._run(
                query="healthcare",
                pdf=f"./papers/{test_pdf}"
            )
            if test_result and len(test_result) > 50:
                logger.log("✅ PDF indexing appears to be working")
                return True
            else:
                logger.log("⚠️ PDF indexing may have issues - result too short", "WARNING")
                return False
        except Exception as e:
            logger.log(f"❌ PDF indexing test failed: {str(e)}", "ERROR")
            return False
    return False

In [None]:
if not verify_pdf_indexing():
    logger.log("PDF indexing issues detected. Results may be affected.", "WARNING")

In [None]:
def check_pandoc():
    """Check pandoc and LaTeX installation"""
    try:
        result = subprocess.run(['pandoc', '--version'], 
                              capture_output=True, text=True)
        logger.log(f"Pandoc version: {result.stdout.split()[1]}")
        
        # Check for LaTeX
        result = subprocess.run(['pdflatex', '--version'], 
                              capture_output=True, text=True)
        if result.returncode == 0:
            logger.log("LaTeX is installed")
        else:
            logger.log("LaTeX not found - PDF conversion may fail", "WARNING")
            logger.log("Install with: sudo apt-get install texlive-latex-base", "WARNING")
    except FileNotFoundError:
        logger.log("Pandoc not installed!", "ERROR")
        logger.log("Install with: sudo apt-get install pandoc", "ERROR")

In [None]:
check_pandoc()

In [None]:
logger.log("Creating AI agents...")

In [None]:
healthcare_topic_researcher_agent = Agent(
    role="Expert Medical AI Researcher and Critical Analyst",
    goal="To conduct an in-depth, critical, and analytical literature review on '{specific_research_topic}' using provided academic papers (primarily from the './papers/' directory) and targeted web searches. Identify current challenges, existing solutions with their specific methodologies, quantifiable results if mentioned, significant research gaps, and detailed ethical considerations. Extract bibliographic information (Title, Authors, Year) for key sources.",
    backstory=(
        "You are a meticulous and highly analytical researcher specializing in Generative AI in healthcare, with a PhD in the field. "
        "Your strength is dissecting academic literature to extract not just general findings, but specific details about methodologies, datasets, evaluation metrics, results, limitations, and explicitly stated future work. "
        "You MUST prioritize using the PDF documents provided in the './papers/' directory. "
        "CRITICAL INSTRUCTIONS:\n"
        "1. When using PDFSearchTool, you must provide the FULL path including './papers/' prefix.\n"
        f"2. ONLY search for PDFs that actually exist. The available PDFs are:\n{pdf_list_str}\n"
        "3. NEVER make up or assume PDF names that don't exist.\n"
        "4. If PDF search returns unexpected content, try a different search query for that PDF.\n"
        "5. You are expected to synthesize information critically, not just list facts.\n"
        "6. If PDF search consistently returns generic content, report this issue in your analysis."
    ),
    tools=[pdf_search_tool, serper_dev_tool, scrape_website_tool],
    verbose=True,
    allow_delegation=False,
)


In [None]:
research_proposal_ideator_agent = Agent(
    role="Innovative Research Strategist and Gen AI Solutions Architect for Healthcare",
    goal="Based on the detailed and critical literature review, brainstorm 3-4 novel, impactful, and technically feasible research project ideas related to '{specific_research_topic}'. For each idea, outline a potential Gen AI approach with significant detail, including potential CrewAI agent structures, data requirements, and key evaluation metrics for the proposed solution.",
    backstory=(
        "You are a creative visionary with a strong grasp of current Gen AI capabilities (including agentic frameworks like CrewAI and models like GPT-4o) and healthcare challenges. You translate deep research insights into actionable, innovative, and well-defined project concepts. "
        "You prioritize originality and significant, measurable healthcare impact while ensuring practical feasibility for a master's level project."
    ),
    verbose=True,
    allow_delegation=False,
)

In [None]:
feasibility_and_ethics_critique_agent = Agent(
    role="Pragmatic AI Ethicist, Senior Project Analyst, and Healthcare Compliance Expert",
    goal="To critically evaluate the research ideas proposed by the ResearchProposalIdeatorAgent. Assess each for technical feasibility (data, models like GPT-4o, CrewAI implementation), potential impact (quantifiable and realistic), originality against the literature, specific ethical considerations (bias, fairness, privacy, transparency, accountability, potential harms), regulatory compliance (e.g., HIPAA), and resource implications. Provide constructive, actionable feedback and rank the ideas with clear justification.",
    backstory=(
        "You are a seasoned expert in AI project management, AI ethics, healthcare regulations (like HIPAA), and risk assessment. "
        "Your role is to ensure that research proposals are innovative, viable, responsible, compliant, and ethically sound, providing concrete suggestions for addressing ethical and practical challenges."
    ),
    tools=[serper_dev_tool],
    verbose=True,
    allow_delegation=False,
)

In [None]:
proposal_outline_planner_agent = Agent(
    role="Lead Research Proposal Architect for Medical AI Grants",
    goal="To develop an exceptionally detailed and logically structured outline for the research proposal based on the selected (and critiqued) research idea. The outline must include all required sections and provide specific, actionable sub-bullet points for content to be included in each, drawing comprehensively from the literature review and critique.",
    backstory=(
        "You are an expert in academic writing and research methodology, specializing in crafting compelling AI and healthcare grant proposals. "
        "Your strength is creating meticulous, logical, coherent, and exceptionally detailed structures for complex research documents, ensuring all components for a winning proposal are present and logically connected."
    ),
    verbose=True,
    allow_delegation=False,
)


In [None]:
proposal_draft_writer_agent = Agent(
    role="Lead Academic Writer for AI in Healthcare Research Grants",
    goal="To write a compelling, clear, scientifically rigorous, and highly detailed first draft of the research proposal (2-3 pages) using the comprehensive outline and synthesized research information. Ensure the language is precise and academic, arguments are robustly supported by specific findings from the literature review, and the proposed methodology (including CrewAI agents, data, and evaluation) is described with clarity and depth.",
    backstory=(
        "You are a proficient writer with extensive experience in drafting successful research grants on Generative AI in healthcare. "
        "You excel at translating complex technical ideas and research findings into persuasive, well-structured, and scientifically sound prose. "
        "CRITICAL INSTRUCTIONS:\n"
        "1. You MUST ground your writing deeply in the findings from the HealthcareTopicResearcherAgent.\n"
        "2. ONLY cite papers that were actually analyzed in the literature review.\n"
        "3. NEVER invent references or paper titles. If you need a reference but don't have the exact details, use placeholders like '[Author from PDF X, Year]'.\n"
        f"4. Available PDFs for reference are:\n{pdf_list_str}\n"
        "5. Your description of the 'Proposed Gen AI Approach' must be particularly detailed.\n"
        "6. DO NOT search for PDFs that don't exist in the list above.\n"
        "IMPORTANT CITATION RULES:\n"
        "- Every reference must include the actual PDF filename it came from\n"
        "- Format: 'Finding X was demonstrated in [PDF: Multimodal in healthcare.pdf]'\n"
        "- If uncertain about a specific detail, state 'Based on the literature review...' without inventing specifics\n"
        "- The reference list must ONLY include papers from the ./papers/ directory"
    ),
    tools=[pdf_search_tool, serper_dev_tool],
    verbose=True,
    allow_delegation=False,
)

In [None]:
proposal_editor_agent = Agent(
    role="Meticulous Academic Editor and Grant Review Panelist for AI in Healthcare",
    goal="To meticulously review and edit the draft research proposal for clarity, coherence, scientific rigor, grammatical precision, academic style, completeness, originality, and adherence to the 2-3 page limit. Ensure all sections are well-developed, arguments are strongly supported with evidence, and the proposal is polished to a high academic standard, ready for submission. Attempt to format references based on extracted bibliographic details.",
    backstory=(
        "You are an exacting editor with extensive experience on grant review panels for AI in healthcare. You have an exceptional eye for detail, a deep understanding of what makes a research proposal compelling, fundable, and scientifically sound. You are also skilled in academic formatting and reference checking."
    ),
    verbose=True,
    allow_delegation=False,
)

In [None]:
logger.log(f"Created {6} agents successfully")

In [None]:
logger.log("Defining tasks...")

In [None]:
task_literature_review = Task(
    description=(
        "Conduct a comprehensive and highly analytical literature review on '{specific_research_topic}'. "
        "You MUST primarily use the `PDFSearchTool` to analyze the content of PDF documents located in the './papers/' directory. "
        "CRITICAL: When using PDFSearchTool, you MUST use the full path format: './papers/filename.pdf' "
        "For example: Use './papers/LLM Agents in Medicine.pdf' NOT 'LLM Agents in Medicine.pdf' "
        f"The key PDF files to consult include:\n{pdf_list_str}\n"
        "When using the PDFSearchTool, structure your input correctly with the full path. "
        "Focus on identifying and extracting: "
        "1. Current state-of-the-art, key findings, specific contributions, and main arguments of each paper. "
        "2. Detailed methodologies, including algorithms, datasets used, sample sizes, and evaluation metrics from the papers. "
        "3. Quantifiable results or significant outcomes reported, including any statistical significance. "
        "4. Major challenges and limitations explicitly stated or critically inferred from the authors' discussion. "
        "5. Explicitly stated research gaps or direct calls for future work by the authors. "
        "6. Specific ethical considerations, biases, or safety concerns discussed in relation to the methods or findings. "
        "7. For each key PDF, extract its full Title, primary Author(s) if easily identifiable, and Year of publication."
    ),
    expected_output=(
        "A detailed analytical report summarizing the literature review. This report must synthesize findings across papers, not just list summaries. "
        "It should clearly outline for the overall topic: current state-of-the-art, common and unique methodologies, key collective results, prevalent challenges, significant research gaps, and recurring ethical considerations. "
        "Include a structured list of key references with any extracted bibliographic details (Title, Authors, Year)."
    ),
    agent=healthcare_topic_researcher_agent,
)

In [None]:
task_ideation = Task(
    description=(
        "Leveraging the detailed analytical insights and specific research gaps from `task_literature_review`, "
        "generate three distinct, innovative, and well-grounded research project ideas related to '{specific_research_topic}'. "
        "For each idea, provide: "
        "  a) A clear, descriptive, and compelling preliminary title. "
        "  b) A concise problem statement (1-2 sentences) explicitly linked to a specific, well-documented research gap from the literature review. "
        "  c) A detailed description of the proposed Gen AI approach (min. 1 paragraph): Specify potential CrewAI agent roles and their core functions (e.g., 'DataPreprocessingAgent for multimodal data alignment', 'EthicalCheckAgent for bias detection in outputs'), the chosen LLM (e.g., GPT-4o), key tools or techniques (e.g., RAG architecture, specific fine-tuning strategy if applicable, methods for integrating diverse data modalities like vision, text, time-series). "
        "  d) Its potential novel contribution and significant impact in healthcare, supported by arguments derived from the literature review (e.g., how it addresses a limitation of existing work)."
    ),
    expected_output=(
        "A document detailing three distinct research project ideas. Each idea MUST include: a compelling title, a problem statement explicitly tied to literature gaps, "
        "a detailed Gen AI approach (mentioning potential CrewAI agent roles, LLM choice, specific techniques, and data considerations), and a well-argued potential impact and novelty, supported by the review."
    ),
    agent=research_proposal_ideator_agent,
    context=[task_literature_review],
)

In [None]:
task_critique = Task(
    description=(
        "Critically evaluate the research ideas generated by `task_ideation`, using the detailed literature review (`task_literature_review`) as a benchmark. For each idea, provide a rigorous and constructive analysis of: "
        "1. Technical Feasibility & Scope: Assess the realism of implementing the proposed Gen AI approach (e.g., complexity of CrewAI setup, data acquisition and annotation for models like GPT-4o) within a typical Masters project timeframe (approx 3-6 months active research). Identify key technical challenges and suggest simplifications if too ambitious. "
        "2. Originality & Contribution: How significantly does this idea advance the state-of-the-art or address an underexplored niche identified in the literature? Is the claimed novelty genuine? "
        "3. Potential Impact & Significance: Substantiate the claimed healthcare impact with specific connections to problems highlighted in the literature. Is the impact clearly articulated, measurable, and significant enough for a Masters thesis? "
        "4. Ethical Considerations & Responsible AI: Deep dive into specific ethical risks (e.g., data bias amplification by GPT-4o, patient privacy for multimodal data, fairness in diagnostic outcomes, transparency of agent decisions, potential for misuse). Propose concrete, practical mitigation strategies for each identified risk. Consider relevant healthcare regulations (e.g., HIPAA principles if U.S. focused). "
        "5. Resource Implications: Detail realistic data needs (type, volume, potential sources, accessibility, annotation effort), computational resources for model execution/fine-tuning (if any), and necessary expertise. "
        "Conclude with a clear, justified recommendation for the single most promising and viable idea (or a synthesized version of ideas) for a Masters-level research proposal. Provide actionable suggestions for strengthening this chosen idea, especially regarding scope and methodology."
    ),
    expected_output=(
        "A comprehensive critique report for each proposed idea, covering technical feasibility (scoped for Masters), originality, impact, detailed ethical risks with practical mitigation strategies, and resource needs. "
        "The report MUST conclude with a well-justified recommendation for the strongest research direction suitable for a Masters project, along with specific advice for its refinement and scoping."
    ),
    agent=feasibility_and_ethics_critique_agent,
    context=[task_ideation, task_literature_review],
)

In [None]:
task_outline = Task(
    description=(
        "Based on the chosen and refined research idea from `task_critique`'s recommendation, "
        "create an exceptionally detailed and structured outline for a 2-3 page research proposal. "
        "For EACH section and subsection, provide specific bullet points, key questions to answer, data points to include, or arguments to make, drawing heavily from `task_literature_review` and `task_critique`. "
        "Sections must include: "
        "  - Title (Finalized, compelling, and descriptive of the chosen project) "
        "  - Abstract (Detailed bullet points covering: core problem derived from literature, main research objective, overview of proposed CrewAI/Gen AI methodology using GPT-4o, key expected research outcomes/deliverables, and the broader significance/impact. Aim for content that would form a 150-250 word abstract.) "
        "  - Background & Literature Review (Structure this logically: Introduction to the broader field, then narrow down to the specific area of your research. Critically review 3-5 most relevant studies from `task_literature_review`, detailing their specific findings, methodologies, and crucially, their limitations that your project addresses, leading smoothly to the research gap.) "
        "  - Problem Statement & Research Gap (A clear, concise paragraph precisely stating the problem your research will tackle. A separate paragraph detailing the specific, well-documented gap in current knowledge or technology (from `task_literature_review` and `task_critique`) that your project aims to fill.) "
        "  - Proposed Gen AI Approach / Methodology (Very detailed: 1. Overall research design and architecture (e.g., using CrewAI). 2. Definition of specific agent roles, their goals, tools, and interaction flow for your research. 3. Justification for LLM choice (e.g., GPT-4o for its advanced reasoning). 4. Detailed plan for data: sources, collection/acquisition strategy, preprocessing steps, and handling of different modalities. 5. If applicable, fine-tuning or RAG strategy. 6. Comprehensive evaluation plan for the AI system (metrics like accuracy, F1-score, ROUGE, BLEU, human evaluation criteria for qualitative aspects) and the overall research questions (how will you know if your research objectives are met?).) "
        "  - Expected Outcomes & Deliverables (List specific, measurable research outcomes, e.g., 'a functional CrewAI prototype for X', 'a comparative analysis of Y', 'a set of ethical guidelines for Z'. List tangible deliverables like code, datasets (if shareable), and the final thesis.) "
        "  - Timeline (A brief, realistic timeline for a Masters project, e.g., Month 1-2: Lit Review & Setup, Month 3-4: Development, Month 5: Evaluation, Month 6: Thesis Writing.) "
        "  - Limitations of the Study & Future Work (Acknowledge potential limitations of YOUR proposed study. Briefly suggest avenues for future research building upon your work.) "
        "  - Ethical Considerations (In-depth discussion of ethical issues pertinent to YOUR chosen idea, as identified in `task_critique`, and your specific, actionable mitigation plan for each.) "
        "  - References (List key references from `task_literature_review` with any available bibliographic info: Title, Authors, Year.)"
    ),
    expected_output="A comprehensive and highly detailed Markdown-formatted outline for the research proposal, ensuring each section has specific guiding points, questions to address, and content suggestions for the writer.",
    agent=proposal_outline_planner_agent,
    context=[task_critique, task_literature_review],
)

In [None]:
task_draft_writing = Task(
    description=(
        "Write a full, high-quality, and scientifically rigorous first draft of the research proposal (target 2-3 pages, approximately 1000-1500 words) based on the exceptionally detailed `task_outline`. "
        "Expand on every bullet point and guiding question in the outline with well-reasoned arguments and supporting details. "
        "Integrate information from `task_literature_review` seamlessly and substantively, especially when discussing background, problem statement, and justifying your methodological choices. "
        "CRITICAL CITATION INSTRUCTIONS:\n"
        "- Use academic citation style, e.g., 'Evidence from the work on MedAide (Author et al., Year from lit review) suggests...' or 'Consistent with findings in [PDF: LLM Agents in Medicine.pdf] regarding challenges in data integration...'. "
        "- Every reference must be traceable to the actual PDF it came from\n"
        "- Never invent author names or paper titles\n"
        "- If exact details are unavailable, use format: '[From PDF: filename.pdf]'\n"
        "The 'Proposed Gen AI Approach / Methodology' section must be very clear, detailed, and technically sound, explaining the CrewAI agent setup, their interactions, data flow, how LLMs like GPT-4o will be utilized, and the evaluation strategy. "
        "The 'Ethical Considerations' section must reflect the depth of the critique and outline, detailing specific risks and mitigation strategies for YOUR project. "
        "Maintain a formal academic tone, ensure logical transitions between paragraphs and sections, and build a persuasive case for the proposed research. "
        "Ensure the reference list at the end includes all cited works with their PDF filenames."
    ),
    expected_output="A complete, detailed, scientifically sound, and well-argued first draft of the research proposal in Markdown format (approximately 2-3 pages).",
    agent=proposal_draft_writer_agent,
    context=[task_outline, task_literature_review],
    output_file="research_proposal_draft.md"
)


In [None]:
task_editing = Task(
    description=(
        "Perform a meticulous and critical academic edit of the research proposal draft from 'research_proposal_draft.md'. Your edit should be comprehensive, focusing on: "
        "1. Scientific Rigor & Clarity: Is the research question clear and focused? Is the problem well-motivated by the literature? Is the proposed solution well-defined, innovative, and technically sound? Are claims and arguments well-supported by evidence (implicitly or explicitly from the literature review)? Is the language precise, unambiguous, and academic? "
        "2. Coherence & Logical Flow: Ensure a strong narrative and logical progression of ideas between and within all sections. Improve transitions and connections between concepts. "
        "3. Grammar, Style, & Formatting: Correct all errors in grammar, punctuation, spelling. Ensure consistent academic tone and style (e.g., formal language, objective voice). Check Markdown formatting for consistency and readability. "
        "4. Completeness & Adherence to Outline: Verify all parts of the `task_outline` have been adequately and deeply addressed. Ensure each section fulfills its purpose within the proposal. "
        "5. Originality & Depth of Analysis: Does the proposal offer a genuine contribution? Is the analysis of literature, the problem, and the proposed solution sufficiently deep and critical? "
        "6. Page Limit & Conciseness: Ensure the proposal is within the 2-3 page guideline. Provide concrete suggestions for cutting redundant information or expanding underdeveloped critical areas if necessary. "
        "7. Argument Strength & Persuasiveness: Is the justification for the research compelling? Is the potential impact clearly and convincingly articulated? "
        "8. References: Verify all references include PDF filenames. If bibliographic details (Title, Authors, Year) were provided by the researcher/writer, attempt to format the reference list more consistently. Check for consistency between in-text citations and the reference list. "
        "Make direct edits to enhance the proposal significantly. Conclude with a summary of key changes made and any critical outstanding issues or areas that require further attention or clarification from the author."
    ),
    expected_output=(
        "A polished, critically-edited research proposal in Markdown format, improved to a high academic standard and ready for final review. "
        "Include a concise summary of major edits performed and any remaining critical concerns or suggestions for final improvements."
    ),
    agent=proposal_editor_agent,
    context=[task_draft_writing, task_literature_review, task_outline],
    output_file="research_proposal_final_edited.md"
)

In [None]:
logger.log(f"Defined {6} tasks successfully")

In [None]:
logger.log("Assembling CrewAI team...")

In [None]:
proposal_crew = Crew(
    agents=[
        healthcare_topic_researcher_agent,
        research_proposal_ideator_agent,
        feasibility_and_ethics_critique_agent,
        proposal_outline_planner_agent,
        proposal_draft_writer_agent,
        proposal_editor_agent
    ],
    tasks=[
        task_literature_review,
        task_ideation,
        task_critique,
        task_outline,
        task_draft_writing,
        task_editing
    ],
    process=Process.sequential,
    verbose=True,
)


In [None]:
logger.log("CrewAI team assembled successfully")

In [None]:
def convert_md_to_pdf(md_file_path, pdf_file_path):
    try:
        logger.log(f"Converting {md_file_path} to PDF...")
        subprocess.run(
            ["pandoc", md_file_path, "-o", pdf_file_path],
            check=True
        )
        logger.log(f"Successfully converted to {pdf_file_path}")
    except FileNotFoundError:
        logger.log("Pandoc not found. PDF conversion skipped.", "WARNING")
    except Exception as e:
        logger.log(f"PDF conversion error: {e}", "ERROR")

In [None]:
logger.log("=" * 70)
logger.log("STARTING RESEARCH PROPOSAL GENERATION")
logger.log("=" * 70)

# Get research topic
specific_research_topic_input = "Developing Adaptive LLM Agents for Improved Diagnostic Accuracy in Clinical Settings"
logger.log(f"Research Topic: {specific_research_topic_input}")

if not specific_research_topic_input.strip():
    logger.log("No research topic entered. Exiting.", "ERROR")
else:
    start_time = datetime.now()
    logger.log(f"Starting crew execution at {start_time.strftime('%Y-%m-%d %H:%M:%S')}")
    
    crew_inputs = {
        'specific_research_topic': specific_research_topic_input,
    }
    
    # Track task outputs for summary
    task_outputs = {
        "Literature Review": None,
        "Ideation": None,
        "Critique": None,
        "Outline": None,
        "Draft Writing": None,
        "Editing": None
    }
    
    try:
        logger.log("Beginning crew execution...")
        logger.log("This will take several minutes. All output is being logged.")
        logger.log("-" * 70)
        
        # Execute crew with output capture
        result = logger.capture_crew_output(
            proposal_crew.kickoff,
            inputs=crew_inputs
        )
        
        # Capture individual task outputs
        if hasattr(result, 'tasks_output'):
            task_names = list(task_outputs.keys())
            for i, task_output in enumerate(result.tasks_output):
                if i < len(task_names):
                    task_outputs[task_names[i]] = task_output.raw if hasattr(task_output, 'raw') else str(task_output)
        
        end_time = datetime.now()
        
        logger.log("-" * 70)
        logger.log("CREW EXECUTION COMPLETED SUCCESSFULLY")
        logger.log("=" * 70)
        
        # Save outputs
        final_md_file = getattr(task_editing, 'output_file', 'research_proposal_final_edited.md')
        draft_md_file = getattr(task_draft_writing, 'output_file', 'research_proposal_draft.md')
        
        # Verify files exist before attempting conversion
        if os.path.exists(final_md_file):
            logger.log(f"Final proposal saved to: {final_md_file}")
            pdf_output_file = "research_proposal_final.pdf"
            convert_md_to_pdf(final_md_file, pdf_output_file)
        else:
            logger.log(f"Final file {final_md_file} not found", "WARNING")
            
        if os.path.exists(draft_md_file):
            logger.log(f"Draft saved to: {draft_md_file}")
        else:
            logger.log(f"Draft file {draft_md_file} not found", "WARNING")
        
        # Create summary
        summary = logger.create_summary(
            specific_research_topic_input,
            result,
            start_time,
            end_time,
            task_outputs
        )
        
        logger.log("\n" + "=" * 70)
        logger.log("EXECUTION SUMMARY")
        logger.log("=" * 70)
        print(summary)
        
        # Final success message
        print(f"""
╔══════════════════════════════════════════════════════════════════╗
║              ✅ RESEARCH PROPOSAL GENERATION COMPLETE!            ║
╠══════════════════════════════════════════════════════════════════╣
║ 📝 Final Proposal: {final_md_file:<45} ║
║ 📝 Draft Version:  {draft_md_file:<45} ║
║ 📄 PDF Version:    research_proposal_final.pdf                   ║
║ 📁 Full Log:       {logger.log_file:<45} ║
║ 📊 Summary:        {logger.summary_file:<45} ║
╚══════════════════════════════════════════════════════════════════╝
        """)
        
        logger.log("Process completed successfully!")
        
    except Exception as e:
        logger.log(f"Error during crew execution: {e}", "ERROR")
        logger.log("Full traceback:", "ERROR")
        logger.log(traceback.format_exc(), "ERROR")
        
        # Still create summary even on error
        end_time = datetime.now()
        logger.create_summary(
            specific_research_topic_input,
            None,
            start_time,
            end_time,
            task_outputs
        )
        
        print(f"\n❌ Execution failed. Check log file for details: {logger.log_file}")
    
    finally:
        # Close the logger
        logger.close()