In [None]:
# Cell 1: Install dependencies
!pip install groq faiss-cpu sentence-transformers gradio \
            pymupdf reportlab arxiv requests -q

print("✅ All libraries installed!")

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m71.4 MB/s[0m eta [36m0:00:00[0m
[?25h✅ All libraries installed!


In [None]:
# Cell 2: Mount Drive + create folder structure
from google.colab import drive
drive.mount('/content/drive')

import os
base = '/content/drive/MyDrive/ResearchMind'
folders = [
    base,
    f'{base}/papers',
    f'{base}/reports',
    f'{base}/outputs'
]
for f in folders:
    os.makedirs(f, exist_ok=True)
    print(f"✅ Created: {f}")

print("\n🧬 ResearchMind folder structure ready!")

Mounted at /content/drive
✅ Created: /content/drive/MyDrive/ResearchMind
✅ Created: /content/drive/MyDrive/ResearchMind/papers
✅ Created: /content/drive/MyDrive/ResearchMind/reports
✅ Created: /content/drive/MyDrive/ResearchMind/outputs

🧬 ResearchMind folder structure ready!


In [None]:
# Cell 3: PubMed Auto-fetcher
import requests
import time
import os
import json
import xml.etree.ElementTree as ET

def search_pubmed(topic, max_papers=5):
    """Autonomously search PubMed for any topic"""

    print(f"🔍 Searching PubMed for: '{topic}'")

    # Step 1: Get paper IDs
    search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
    r = requests.get(search_url, params={
        'db': 'pubmed',
        'term': topic,
        'retmax': max_papers,
        'retmode': 'json',
        'sort': 'relevance'
    })

    ids = r.json()['esearchresult']['idlist']
    print(f"✅ Found {len(ids)} papers\n")

    # Step 2: Fetch details
    papers = []
    for pmid in ids:
        time.sleep(1)
        r = requests.get(
            "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi",
            params={'db':'pubmed','id':pmid,'retmode':'xml','rettype':'abstract'}
        )

        try:
            root = ET.fromstring(r.content)
            title    = root.findtext('.//ArticleTitle') or 'N/A'
            abstract = root.findtext('.//AbstractText') or 'No abstract'
            year     = root.findtext('.//PubDate/Year') or 'N/A'
            journal  = root.findtext('.//Journal/Title') or 'N/A'
            authors  = [
                f"{a.findtext('LastName')} {a.findtext('ForeName','')}"
                for a in root.findall('.//Author')[:3]
            ]

            papers.append({
                'pmid':     pmid,
                'title':    title,
                'authors':  authors,
                'abstract': abstract,
                'year':     year,
                'journal':  journal,
                'url':      f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
            })

            print(f"📄 {title[:75]}...")
            print(f"   Journal : {journal}")
            print(f"   Year    : {year}")
            print(f"   Authors : {', '.join(authors)}\n")

        except Exception as e:
            print(f"⚠️ Skipped PMID {pmid}: {e}")

    # Save metadata
    save_dir = '/content/drive/MyDrive/ResearchMind/papers'
    os.makedirs(save_dir, exist_ok=True)
    with open(f'{save_dir}/papers_metadata.json', 'w') as f:
        json.dump(papers, f, indent=2)

    print(f"💾 Saved metadata to Drive")
    return papers

# Test
topic  = "large language models medical imaging"
papers = search_pubmed(topic, max_papers=5)
print(f"✅ {len(papers)} papers ready!")

🔍 Searching PubMed for: 'large language models medical imaging'
✅ Found 5 papers

📄 Multimodal Large Language Models in Medical Imaging: Current State and Futu...
   Journal : Korean journal of radiology
   Year    : 2025
   Authors : Nam Yoojin, Kim Dong Yeong, Kyung Sunggu

📄 Sociodemographic biases in medical decision making by large language models...
   Journal : Nature medicine
   Year    : 2025
   Authors : Omar Mahmud, Soffer Shelly, Agbareia Reem

📄 Large Language Models and Large Multimodal Models in Medical Imaging: A Pri...
   Journal : Journal of nuclear medicine : official publication, Society of Nuclear Medicine
   Year    : 2025
   Authors : Bradshaw Tyler J, Tie Xin, Warner Joshua

📄 Comparative benchmarking of the DeepSeek large language model on medical ta...
   Journal : Nature medicine
   Year    : 2025
   Authors : Tordjman Mickael, Liu Zelong, Yuce Murat

📄 Large language models deconstruct the clinical intuition behind diagnosing ...
   Journal : Cell
   Year   

In [None]:
# Cell 4: RAG Pipeline for ResearchMind
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from groq import Groq

# Setup
GROQ_API_KEY = "ADD_YOUR_API" # paste your key
client = Groq(api_key=GROQ_API_KEY)
embedder = SentenceTransformer('all-MiniLM-L6-v2')

def build_knowledge_base(papers):
    """Build FAISS vector store from paper abstracts"""
    print("🧠 Building knowledge base...")

    chunks = []
    for p in papers:
        # Each chunk = title + abstract with metadata
        chunk = f"""
Title: {p['title']}
Authors: {', '.join(p['authors'])}
Journal: {p['journal']}
Year: {p['year']}
Abstract: {p['abstract']}
URL: {p['url']}
"""
        chunks.append(chunk)

    # Create embeddings
    embeddings = embedder.encode(chunks, show_progress_bar=True)
    embeddings = np.array(embeddings).astype('float32')

    # Build FAISS index
    index = faiss.IndexFlatL2(embeddings.shape[1])
    index.add(embeddings)

    print(f"✅ Knowledge base ready — {index.ntotal} papers indexed")
    return index, chunks

def retrieve_relevant(query, index, chunks, top_k=3):
    """Retrieve most relevant papers for a query"""
    q_emb = embedder.encode([query]).astype('float32')
    _, indices = index.search(q_emb, top_k)
    return [chunks[i] for i in indices[0]]

def query_llm(prompt):
    """Query Groq LLM"""
    response = client.chat.completions.create(
        model="llama-3.1-8b-instant",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=1500
    )
    return response.choices[0].message.content

# Build knowledge base from fetched papers
index, chunks = build_knowledge_base(papers)

# Test retrieval
test_query = "What are the main challenges of LLMs in medical imaging?"
relevant = retrieve_relevant(test_query, index, chunks)

print(f"\n🔍 Query: {test_query}")
print(f"\n📄 Most relevant paper:")
print(relevant[0][:300])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]



config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

🧠 Building knowledge base...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

✅ Knowledge base ready — 5 papers indexed

🔍 Query: What are the main challenges of LLMs in medical imaging?

📄 Most relevant paper:

Title: Large Language Models and Large Multimodal Models in Medical Imaging: A Primer for Physicians.
Authors: Bradshaw Tyler J, Tie Xin, Warner Joshua
Journal: Journal of nuclear medicine : official publication, Society of Nuclear Medicine
Year: 2025
Abstract: Large language models (LLMs) are pois


In [None]:
# Cell 5: Autonomous Research Loop
def autonomous_research(topic, papers, index, chunks):
    """
    Full autonomous pipeline:
    1. Summarize findings
    2. Generate hypothesis
    3. Design experiment
    4. Critique & evaluate
    5. Final report
    """

    print(f"🧬 ResearchMind Autonomous Pipeline")
    print(f"📌 Topic: {topic}")
    print("="*60)

    relevant = retrieve_relevant(topic, index, chunks, top_k=3)
    context  = "\n\n".join(relevant)

    # ── Step 1: Summarize ──────────────────────────────────────
    print("\n⚙️  Step 1: Summarizing literature...")
    summary = query_llm(f"""You are ResearchMind, an autonomous AI scientist.

Based on these research papers:
{context}

Provide a structured literature summary:
1. KEY FINDINGS — What do these papers collectively show?
2. CURRENT STATE — Where is the field right now?
3. RESEARCH GAPS — What is still unknown or unsolved?

Be concise and scientific.""")
    print("✅ Done\n")

    # ── Step 2: Generate Hypothesis ───────────────────────────
    print("⚙️  Step 2: Generating hypothesis...")
    hypothesis = query_llm(f"""You are ResearchMind, an autonomous AI scientist.

Topic: {topic}

Literature Summary:
{summary}

Generate a novel, testable research hypothesis that:
1. HYPOTHESIS — One clear, specific statement
2. RATIONALE — Why this hypothesis is worth testing
3. NOVELTY — How it goes beyond current research
4. EXPECTED OUTCOME — What results would confirm it

Be bold and scientifically rigorous.""")
    print("✅ Done\n")

    # ── Step 3: Experiment Design ──────────────────────────────
    print("⚙️  Step 3: Designing experiment...")
    experiment = query_llm(f"""You are ResearchMind, an autonomous AI scientist.

Hypothesis:
{hypothesis}

Design a concrete experiment to test this hypothesis:
1. DATASET — What data is needed and where to get it
2. MODEL ARCHITECTURE — What AI/ML approach to use
3. METHODOLOGY — Step by step experimental procedure
4. EVALUATION METRICS — How to measure success
5. EXPECTED TIMELINE — Realistic schedule

Be specific and implementable.""")
    print("✅ Done\n")

    # ── Step 4: Critical Evaluation ────────────────────────────
    print("⚙️  Step 4: Critiquing the plan...")
    critique = query_llm(f"""You are ResearchMind, an autonomous AI scientist
acting as a critical peer reviewer.

Experiment Plan:
{experiment}

Critically evaluate this plan:
1. STRENGTHS — What is well designed?
2. WEAKNESSES — What could fail or be flawed?
3. RISKS — What are the biggest risks?
4. IMPROVEMENTS — How to make it stronger?
5. FEASIBILITY SCORE — Rate 1-10 with justification

Be harsh but constructive.""")
    print("✅ Done\n")

    # ── Step 5: Final Report ───────────────────────────────────
    print("⚙️  Step 5: Compiling final report...")
    final_report = query_llm(f"""You are ResearchMind, an autonomous AI scientist.

Compile a final structured research report:

TOPIC: {topic}

LITERATURE SUMMARY:
{summary}

HYPOTHESIS:
{hypothesis}

EXPERIMENT DESIGN:
{experiment}

PEER REVIEW:
{critique}

Write a polished, publication-style research proposal
combining all sections above. Include:
- Abstract (150 words)
- Introduction
- Research Gap
- Proposed Hypothesis
- Methodology
- Expected Impact
- Conclusion

Format it professionally.""")
    print("✅ Done\n")

    results = {
        'topic':      topic,
        'summary':    summary,
        'hypothesis': hypothesis,
        'experiment': experiment,
        'critique':   critique,
        'report':     final_report
    }

    # Save to Drive
    save_dir = '/content/drive/MyDrive/ResearchMind/reports'
    os.makedirs(save_dir, exist_ok=True)

    report_path = f"{save_dir}/{topic[:30].replace(' ','_')}_report.txt"
    with open(report_path, 'w') as f:
        for key, val in results.items():
            f.write(f"\n{'='*60}\n{key.upper()}\n{'='*60}\n{val}\n")

    print(f"💾 Report saved: {report_path}")
    print("\n" + "="*60)
    print("📋 FINAL RESEARCH REPORT PREVIEW:")
    print("="*60)
    print(final_report[:1000])
    print("\n... (full report saved to Drive)")

    return results

# Run the full autonomous pipeline
results = autonomous_research(topic, papers, index, chunks)

🧬 ResearchMind Autonomous Pipeline
📌 Topic: large language models medical imaging

⚙️  Step 1: Summarizing literature...
✅ Done

⚙️  Step 2: Generating hypothesis...
✅ Done

⚙️  Step 3: Designing experiment...
✅ Done

⚙️  Step 4: Critiquing the plan...
✅ Done

⚙️  Step 5: Compiling final report...
✅ Done

💾 Report saved: /content/drive/MyDrive/ResearchMind/reports/large_language_models_medical__report.txt

📋 FINAL RESEARCH REPORT PREVIEW:
**Title:** Reducing Hallucinations in Medical Image Diagnosis using a Novel Region-Grounded Multimodal Framework

**Abstract:**

Medical image analysis is a critical component of healthcare, but the lack of trustworthy and reliable artificial intelligence (AI) models hinders its widespread adoption. Hallucinations, where AI models produce nonsensical or unrelated findings, are a significant challenge in medical image diagnosis. This study proposes a novel region-grounded multimodal framework to address this issue. By integrating large language models 

In [None]:
# Cell 6: ResearchMind Gradio UI
import gradio as gr

def run_researchmind(topic, max_papers):
    try:
        # Step 1: Fetch papers
        papers = search_pubmed(topic, max_papers=int(max_papers))
        if not papers:
            return "❌ No papers found. Try a different topic.", "", "", ""

        # Step 2: Build knowledge base
        index, chunks = build_knowledge_base(papers)

        # Step 3: Run autonomous pipeline
        results = autonomous_research(topic, papers, index, chunks)

        # Format papers list
        papers_md = "## 📚 Papers Analyzed\n\n"
        for i, p in enumerate(papers):
            papers_md += f"**[{i+1}] {p['title']}**\n"
            papers_md += f"_{', '.join(p['authors'])} — {p['journal']} ({p['year']})_\n"
            papers_md += f"[PubMed Link]({p['url']})\n\n"

        return (
            papers_md,
            results['summary'],
            results['hypothesis'] + "\n\n---\n\n" + results['experiment'],
            results['report']
        )

    except Exception as e:
        return f"❌ Error: {str(e)}", "", "", ""


with gr.Blocks(title="ResearchMind — Autonomous AI Scientist",
               theme=gr.themes.Soft()) as demo:

    gr.Markdown("""
    # 🧬 ResearchMind — Autonomous AI Scientist
    *Enter any research topic → ResearchMind autonomously fetches papers,
    analyzes findings, generates hypotheses, designs experiments, and produces
    a full research proposal.*
    """)

    with gr.Row():
        with gr.Column(scale=2):
            topic_input = gr.Textbox(
                label="Research Topic",
                placeholder="e.g. large language models medical imaging",
                value="large language models medical imaging"
            )
        with gr.Column(scale=1):
            papers_slider = gr.Slider(
                minimum=3, maximum=10, value=5, step=1,
                label="Number of Papers"
            )

    run_btn = gr.Button("🚀 Run ResearchMind", variant="primary", size="lg")

    gr.Markdown("---")

    with gr.Tabs():
        with gr.Tab("📚 Papers"):
            papers_output = gr.Markdown()

        with gr.Tab("📊 Literature Summary"):
            summary_output = gr.Markdown()

        with gr.Tab("💡 Hypothesis + Experiment"):
            hypothesis_output = gr.Markdown()

        with gr.Tab("📄 Full Research Report"):
            report_output = gr.Markdown()

    run_btn.click(
        fn=run_researchmind,
        inputs=[topic_input, papers_slider],
        outputs=[papers_output, summary_output,
                 hypothesis_output, report_output]
    )

demo.launch(share=True)

  with gr.Blocks(title="ResearchMind — Autonomous AI Scientist",


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://be095212529bfdf4a6.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
# Final cell: Save to Drive
from google.colab import drive
import shutil, os

base = '/content/drive/MyDrive/ResearchMind'
os.makedirs(base, exist_ok=True)
print("✅ ResearchMind saved!")

✅ ResearchMind saved!
