In [None]:
!pip install paper-qa>=5 google-generativeai requests pypdf2 -q

import os
import asyncio
import tempfile
import requests
from pathlib import Path
from paperqa import Settings, ask, agent_query
from paperqa.settings import AgentSettings
import google.generativeai as genai

GEMINI_API_KEY = "Use Your Own API Key Here"
os.environ["GEMINI_API_KEY"] = GEMINI_API_KEY

genai.configure(api_key=GEMINI_API_KEY)
print("✅ Gemini API key configured successfully!")

In [None]:
def download_sample_papers():
    """Download sample AI/ML research papers for demonstration"""
    papers = {
        "attention_is_all_you_need.pdf": "https://arxiv.org/pdf/1706.03762.pdf",
        "bert_paper.pdf": "https://arxiv.org/pdf/1810.04805.pdf",
        "gpt3_paper.pdf": "https://arxiv.org/pdf/2005.14165.pdf"
    }

    papers_dir = Path("sample_papers")
    papers_dir.mkdir(exist_ok=True)

    print("📥 Downloading sample research papers...")
    for filename, url in papers.items():
        filepath = papers_dir / filename
        if not filepath.exists():
            try:
                response = requests.get(url, stream=True, timeout=30)
                response.raise_for_status()
                with open(filepath, 'wb') as f:
                    for chunk in response.iter_content(chunk_size=8192):
                        f.write(chunk)
                print(f"✅ Downloaded: {filename}")
            except Exception as e:
                print(f"❌ Failed to download {filename}: {e}")
        else:
            print(f"📄 Already exists: {filename}")

    return str(papers_dir)

papers_directory = download_sample_papers()

def create_gemini_settings(paper_dir: str, temperature: float = 0.1):
    """Create optimized settings for PaperQA2 with Gemini models"""

    return Settings(
        llm="gemini/gemini-1.5-flash",
        summary_llm="gemini/gemini-1.5-flash",

        agent=AgentSettings(
            agent_llm="gemini/gemini-1.5-flash",
            search_count=6,
            timeout=300.0,
        ),

        embedding="gemini/text-embedding-004",

        temperature=temperature,
        paper_directory=paper_dir,

        answer=dict(
            evidence_k=8,
            answer_max_sources=4,
            evidence_summary_length="about 80 words",
            answer_length="about 150 words, but can be longer",
            max_concurrent_requests=2,
        ),

        parsing=dict(
            chunk_size=4000,
            overlap=200,
        ),

        verbosity=1,
    )

In [None]:
class PaperQAAgent:
    """Advanced AI Agent for scientific literature analysis using PaperQA2"""

    def __init__(self, papers_directory: str, temperature: float = 0.1):
        self.settings = create_gemini_settings(papers_directory, temperature)
        self.papers_dir = papers_directory
        print(f"🤖 PaperQA Agent initialized with papers from: {papers_directory}")

    async def ask_question(self, question: str, use_agent: bool = True):
        """Ask a question about the research papers"""
        print(f"\n❓ Question: {question}")
        print("🔍 Searching through research papers...")

        try:
            if use_agent:
                response = await agent_query(query=question, settings=self.settings)
            else:
                response = ask(question, settings=self.settings)

            return response

        except Exception as e:
            print(f"❌ Error processing question: {e}")
            return None

    def display_answer(self, response):
        """Display the answer with formatting"""
        if response is None:
            print("❌ No response received")
            return

        print("\n" + "="*60)
        print("📋 ANSWER:")
        print("="*60)

        answer_text = getattr(response, 'answer', str(response))
        print(f"\n{answer_text}")

        contexts = getattr(response, 'contexts', getattr(response, 'context', []))
        if contexts:
            print("\n" + "-"*40)
            print("📚 SOURCES USED:")
            print("-"*40)
            for i, context in enumerate(contexts[:3], 1):
                context_name = getattr(context, 'name', getattr(context, 'doc', f'Source {i}'))
                context_text = getattr(context, 'text', getattr(context, 'content', str(context)))
                print(f"\n{i}. {context_name}")
                print(f"   Text preview: {context_text[:150]}...")

    async def multi_question_analysis(self, questions: list):
        """Analyze multiple questions in sequence"""
        results = {}
        for i, question in enumerate(questions, 1):
            print(f"\n🔄 Processing question {i}/{len(questions)}")
            response = await self.ask_question(question)
            results[question] = response

            if response:
                print(f"✅ Completed: {question[:50]}...")
            else:
                print(f"❌ Failed: {question[:50]}...")

        return results

    async def comparative_analysis(self, topic: str):
        """Perform comparative analysis across papers"""
        questions = [
            f"What are the key innovations in {topic}?",
            f"What are the limitations of current {topic} approaches?",
            f"What future research directions are suggested for {topic}?",
        ]

        print(f"\n🔬 Starting comparative analysis on: {topic}")
        return await self.multi_question_analysis(questions)

async def basic_demo():
    """Demonstrate basic PaperQA functionality"""
    agent = PaperQAAgent(papers_directory)

    question = "What is the transformer architecture and why is it important?"
    response = await agent.ask_question(question)
    agent.display_answer(response)

print("🚀 Running basic demonstration...")
await basic_demo()

async def advanced_demo():
    """Demonstrate advanced multi-question analysis"""
    agent = PaperQAAgent(papers_directory, temperature=0.2)

    questions = [
        "How do attention mechanisms work in transformers?",
        "What are the key differences between BERT and GPT models?",
        "What are the computational challenges of large language models?",
        "How has pre-training evolved in natural language processing?"
    ]

    print("🧠 Running advanced multi-question analysis...")
    results = await agent.multi_question_analysis(questions)

    for question, response in results.items():
        print(f"\n{'='*80}")
        print(f"Q: {question}")
        print('='*80)
        if response:
            answer_text = getattr(response, 'answer', str(response))
            display_text = answer_text[:300] + "..." if len(answer_text) > 300 else answer_text
            print(display_text)
        else:
            print("❌ No answer available")

print("\n🚀 Running advanced demonstration...")
await advanced_demo()

async def research_comparison_demo():
    """Demonstrate comparative research analysis"""
    agent = PaperQAAgent(papers_directory)

    results = await agent.comparative_analysis("attention mechanisms in neural networks")

    print("\n" + "="*80)
    print("📊 COMPARATIVE ANALYSIS RESULTS")
    print("="*80)

    for question, response in results.items():
        print(f"\n🔍 {question}")
        print("-" * 50)
        if response:
            answer_text = getattr(response, 'answer', str(response))
            print(answer_text)
        else:
            print("❌ Analysis unavailable")
        print()

print("🚀 Running comparative research analysis...")
await research_comparison_demo()

In [4]:
def create_interactive_agent():
    """Create an interactive agent for custom queries"""
    agent = PaperQAAgent(papers_directory)

    async def query(question: str, show_sources: bool = True):
        """Interactive query function"""
        response = await agent.ask_question(question)

        if response:
            answer_text = getattr(response, 'answer', str(response))
            print(f"\n🤖 Answer:\n{answer_text}")

            if show_sources:
                contexts = getattr(response, 'contexts', getattr(response, 'context', []))
                if contexts:
                    print(f"\n📚 Based on {len(contexts)} sources:")
                    for i, ctx in enumerate(contexts[:3], 1):
                        ctx_name = getattr(ctx, 'name', getattr(ctx, 'doc', f'Source {i}'))
                        print(f"  {i}. {ctx_name}")
        else:
            print("❌ Sorry, I couldn't find an answer to that question.")

        return response

    return query

interactive_query = create_interactive_agent()

print("\n🎯 Interactive agent ready! You can now ask custom questions:")
print("Example: await interactive_query('How do transformers handle long sequences?')")

def print_usage_tips():
    """Print helpful usage tips"""
    tips = """
    🎯 USAGE TIPS FOR PAPERQA2 WITH GEMINI:

    1. 📝 Question Formulation:
       - Be specific about what you want to know
       - Ask about comparisons, mechanisms, or implications
       - Use domain-specific terminology

    2. 🔧 Model Configuration:
       - Gemini 1.5 Flash is free and reliable
       - Adjust temperature (0.0-1.0) for creativity vs precision
       - Use smaller chunk_size for better processing

    3. 📚 Document Management:
       - Add PDFs to the papers directory
       - Use meaningful filenames
       - Mix different types of papers for better coverage

    4. ⚡ Performance Optimization:
       - Limit concurrent requests for free tier
       - Use smaller evidence_k values for faster responses
       - Cache results by saving the agent state

    5. 🧠 Advanced Usage:
       - Chain multiple questions for deeper analysis
       - Use comparative analysis for research reviews
       - Combine with other tools for complete workflows

    📖 Example Questions to Try:
    - "Compare the attention mechanisms in BERT vs GPT models"
    - "What are the computational bottlenecks in transformer training?"
    - "How has pre-training evolved from word2vec to modern LLMs?"
    - "What are the key innovations that made transformers successful?"
    """
    print(tips)

print_usage_tips()

def save_analysis_results(results: dict, filename: str = "paperqa_analysis.txt"):
    """Save analysis results to a file"""
    with open(filename, 'w', encoding='utf-8') as f:
        f.write("PaperQA2 Analysis Results\n")
        f.write("=" * 50 + "\n\n")

        for question, response in results.items():
            f.write(f"Question: {question}\n")
            f.write("-" * 30 + "\n")
            if response:
                answer_text = getattr(response, 'answer', str(response))
                f.write(f"Answer: {answer_text}\n")

                contexts = getattr(response, 'contexts', getattr(response, 'context', []))
                if contexts:
                    f.write(f"\nSources ({len(contexts)}):\n")
                    for i, ctx in enumerate(contexts, 1):
                        ctx_name = getattr(ctx, 'name', getattr(ctx, 'doc', f'Source {i}'))
                        f.write(f"  {i}. {ctx_name}\n")
            else:
                f.write("Answer: No response available\n")
            f.write("\n" + "="*50 + "\n\n")

    print(f"💾 Results saved to: {filename}")

print("✅ Tutorial complete! You now have a fully functional PaperQA2 AI Agent with Gemini.")



✅ Gemini API key configured successfully!
📥 Downloading sample research papers...
📄 Already exists: attention_is_all_you_need.pdf
📄 Already exists: bert_paper.pdf
📄 Already exists: gpt3_paper.pdf
🚀 Running basic demonstration...
🤖 PaperQA Agent initialized with papers from: sample_papers

❓ Question: What is the transformer architecture and why is it important?
🔍 Searching through research papers...


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
                                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/litellm/proxy/spend_tracking/cold_storage_handler.py", line 67, in _get_configured_cold_storage_custom_logger
    from litellm.proxy.proxy_server import general_settings
ImportError: cannot import name 'general_settings' from 'litellm.proxy.proxy_server' (/usr/local/lib/python3.11/dist-packages/litellm/proxy/proxy_server.py)
ERROR:LiteLLM:Error creating standard logging object - cannot import name 'general_settings' from 'litellm.proxy.proxy_server' (/usr/local/lib/python3.11/dist-packages/litellm/proxy/proxy_server.py)
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/litellm/litellm_core_utils/litellm_logging.py", line 4370, in get_standard_logging_object_payload
    clean_metadata = StandardLoggingPayloadSetup.get_standard_logging_metadata(


📋 ANSWER:

session=PQASession(id=UUID('bab3d088-a73b-4758-9f0d-d555792a21c2'), question='What is the transformer architecture and why is it important?', answer="The Transformer architecture is a novel neural network design for sequence transduction that relies solely on an attention mechanism, eliminating recurrence and convolutions (vaswani2017attentionisall pages 9-10, vaswani2017attentionisall pages 2-3, vaswani2017attentionisall pages 1-2).  This reliance on attention allows for significantly greater parallelization during training, leading to faster training times and improved efficiency (vaswani2017attentionisall pages 9-10, vaswani2017attentionisall pages 2-3, vaswani2017attentionisall pages 1-2).  The Transformer's architecture, particularly its multi-headed self-attention mechanism, enables the processing of input and output representations without recurrent neural networks (RNNs) or convolutional layers (vaswani2017attentionisall pages 9-10, vaswani2017attentionisall pages 2

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
    cold_storage_object_key = StandardLoggingPayloadSetup._generate_cold_storage_object_key(
                              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/litellm/litellm_core_utils/litellm_logging.py", line 4109, in _generate_cold_storage_object_key
    configured_cold_storage_logger = ColdStorageHandler._get_configured_cold_storage_custom_logger()
                                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/litellm/proxy/spend_tracking/cold_storage_handler.py", line 67, in _get_configured_cold_storage_custom_logger
    from litellm.proxy.proxy_server import general_settings
  File "/usr/local/lib/python3.11/dist-packages/litellm/proxy/proxy_server.py", line 80, in <module>
    raise ImportError(f"Missing dependency {e}. Run `pip install 'litellm[proxy]'`")

✅ Completed: How do attention mechanisms work in transformers?...

🔄 Processing question 2/4

❓ Question: What are the key differences between BERT and GPT models?
🔍 Searching through research papers...


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
                                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/litellm/proxy/spend_tracking/cold_storage_handler.py", line 67, in _get_configured_cold_storage_custom_logger
    from litellm.proxy.proxy_server import general_settings
  File "/usr/local/lib/python3.11/dist-packages/litellm/proxy/proxy_server.py", line 80, in <module>
    raise ImportError(f"Missing dependency {e}. Run `pip install 'litellm[proxy]'`")
ImportError: Missing dependency No module named 'backoff'. Run `pip install 'litellm[proxy]'`
[92m15:14:52 - LiteLLM:ERROR[0m: litellm_logging.py:4483 - Error creating standard logging object - cannot import name 'general_settings' from 'litellm.proxy.proxy_server' (/usr/local/lib/python3.11/dist-packages/litellm/proxy/proxy_server.py)
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/litellm

✅ Completed: What are the key differences between BERT and GPT ...

🔄 Processing question 3/4

❓ Question: What are the computational challenges of large language models?
🔍 Searching through research papers...


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
            "quotaId": "GenerateRequestsPerMinutePerProjectPerModel-FreeTier",
            "quotaDimensions": {
              "location": "global",
              "model": "gemini-1.5-flash"
            },
            "quotaValue": "15"
          }
        ]
      },
      {
        "@type": "type.googleapis.com/google.rpc.Help",
        "links": [
          {
            "description": "Learn more about Gemini API quotas",
            "url": "https://ai.google.dev/gemini-api/docs/rate-limits"
          }
        ]
      },
      {
        "@type": "type.googleapis.com/google.rpc.RetryInfo",
        "retryDelay": "24s"
      }
    ]
  }
}


During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py", line 1686, in async_completion
    response = await client.post(


✅ Completed: What are the computational challenges of large lan...

🔄 Processing question 4/4

❓ Question: How has pre-training evolved in natural language processing?
🔍 Searching through research papers...


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
        "retryDelay": "39s"
      }
    ]
  }
}


During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py", line 1686, in async_completion
    response = await client.post(
               ^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/litellm/litellm_core_utils/logging_utils.py", line 135, in async_wrapper
    result = await func(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/litellm/llms/custom_httpx/http_handler.py", line 324, in post
    raise e
  File "/usr/local/lib/python3.11/dist-packages/litellm/llms/custom_httpx/http_handler.py", line 280, in post
    response.raise_for_status()
  File "/usr/local/lib/python3.11/dist-packages/httpx/_models.py", line 829, in raise_for_status


✅ Completed: How has pre-training evolved in natural language p...

Q: How do attention mechanisms work in transformers?
session=PQASession(id=UUID('9f0c52a6-f8af-4187-bef7-819c3be7ff61'), question='How do attention mechanisms work in transformers?', answer='Transformer attention mechanisms map queries and key-value pairs to a weighted sum of values (vaswani2017attentionisall pages 3-5).  The weight for each value is ...

Q: What are the key differences between BERT and GPT models?
session=PQASession(id=UUID('1e2d4b5d-0e70-4dae-8844-f74b2734f925'), question='What are the key differences between BERT and GPT models?', answer="BERT employs bidirectional training, processing text in both directions simultaneously, unlike GPT's unidirectional left-to-right approach (devlin2019bert...

Q: What are the computational challenges of large language models?
session=PQASession(id=UUID('a79dbabe-df26-4a10-a0b0-65fb61713c96'), question='What are the computational challenges of large language models?

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
    "status": "RESOURCE_EXHAUSTED",
    "details": [
      {
        "@type": "type.googleapis.com/google.rpc.QuotaFailure",
        "violations": [
          {
            "quotaMetric": "generativelanguage.googleapis.com/generate_content_free_tier_requests",
            "quotaId": "GenerateRequestsPerDayPerProjectPerModel-FreeTier",
            "quotaDimensions": {
              "model": "gemini-1.5-flash",
              "location": "global"
            },
            "quotaValue": "50"
          }
        ]
      },
      {
        "@type": "type.googleapis.com/google.rpc.Help",
        "links": [
          {
            "description": "Learn more about Gemini API quotas",
            "url": "https://ai.google.dev/gemini-api/docs/rate-limits"
          }
        ]
      },
      {
        "@type": "type.googleapis.com/google.rpc.RetryInfo",
        "retryDelay": "16s"
      }
    ]
  }
}


During handling of the above 

✅ Completed: What are the key innovations in attention mechanis...

🔄 Processing question 2/3

❓ Question: What are the limitations of current attention mechanisms in neural networks approaches?
🔍 Searching through research papers...


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
    "status": "RESOURCE_EXHAUSTED",
    "details": [
      {
        "@type": "type.googleapis.com/google.rpc.QuotaFailure",
        "violations": [
          {
            "quotaMetric": "generativelanguage.googleapis.com/generate_content_free_tier_requests",
            "quotaId": "GenerateRequestsPerDayPerProjectPerModel-FreeTier",
            "quotaDimensions": {
              "location": "global",
              "model": "gemini-1.5-flash"
            },
            "quotaValue": "50"
          }
        ]
      },
      {
        "@type": "type.googleapis.com/google.rpc.Help",
        "links": [
          {
            "description": "Learn more about Gemini API quotas",
            "url": "https://ai.google.dev/gemini-api/docs/rate-limits"
          }
        ]
      },
      {
        "@type": "type.googleapis.com/google.rpc.RetryInfo",
        "retryDelay": "31s"
      }
    ]
  }
}


During handling of the above 

✅ Completed: What are the limitations of current attention mech...

🔄 Processing question 3/3

❓ Question: What future research directions are suggested for attention mechanisms in neural networks?
🔍 Searching through research papers...


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
    response = await init_response
               ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py", line 1692, in async_completion
    raise VertexAIError(
litellm.llms.vertex_ai.common_utils.VertexAIError: {
  "error": {
    "code": 429,
    "message": "You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.",
    "status": "RESOURCE_EXHAUSTED",
    "details": [
      {
        "@type": "type.googleapis.com/google.rpc.QuotaFailure",
        "violations": [
          {
            "quotaMetric": "generativelanguage.googleapis.com/generate_content_free_tier_requests",
            "quotaId": "GenerateRequestsPerDayPerProjectPerModel-FreeTier",
            "quotaDimensions": {
              "model": "gemini-1.5-flash",
    

✅ Completed: What future research directions are suggested for ...

📊 COMPARATIVE ANALYSIS RESULTS

🔍 What are the key innovations in attention mechanisms in neural networks?
--------------------------------------------------
session=PQASession(id=UUID('526309b8-3f06-4702-ad51-ebf4b8c009cf'), question='What are the key innovations in attention mechanisms in neural networks?', answer='', raw_answer='', answer_reasoning=None, has_successful_answer=None, context='', contexts=[], references='', formatted_answer='', graded_answer=None, cost=0.0, token_counts={}, config_md5='41ea849f3f8f755869f306696d9e2d66', tool_history=[['gen_answer']], used_contexts=set()) bibtex=None status=<AgentStatus.FAIL: 'fail'> timing_info=None duration=0.0 stats=None


🔍 What are the limitations of current attention mechanisms in neural networks approaches?
--------------------------------------------------
session=PQASession(id=UUID('c0499799-9852-4def-a185-83b1b17b0b4e'), question='What are the limitations of c