In [None]:
    !pip install transformers sentence-transformers flask flask-cors flask-limiter wikipedia numpy torch


Collecting flask-cors
  Downloading flask_cors-6.0.1-py3-none-any.whl.metadata (5.3 kB)
Collecting flask-limiter
  Downloading flask_limiter-3.12-py3-none-any.whl.metadata (6.3 kB)
Collecting wikipedia
  Downloading wikipedia-1.4.0.tar.gz (27 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting limits>=3.13 (from flask-limiter)
  Downloading limits-5.2.0-py3-none-any.whl.metadata (10 kB)
Collecting ordered-set<5,>4 (from flask-limiter)
  Downloading ordered_set-4.1.0-py3-none-any.whl.metadata (5.3 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collectin

In [None]:
    import sys
    import os
    import logging
    import time

    # Add the parent directory of 'src' to the Python path
    # This assumes your 'src' folder is at the top level of your Colab session storage
    # Adjust this path if you uploaded src into a subdirectory
    project_root = os.getcwd() # Current directory where the notebook is running
    sys.path.insert(0, project_root)

    # Configure logging (optional, but helpful for debugging)
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    logger = logging.getLogger(__name__)

    try:
        # Ensure the src directory is correctly in the path
        if not os.path.exists(os.path.join(project_root, 'src', 'pipeline.py')):
             logger.error(f"src/pipeline.py not found at {os.path.join(project_root, 'src', 'pipeline.py')}. Please check your file upload.")
             raise FileNotFoundError("src/pipeline.py not found. Ensure 'src' folder is uploaded correctly.")

        from src.pipeline import DynamicRAGPipeline

        logger.info("Initializing the DynamicRAGPipeline...")
        # Initialize the pipeline
        pipeline = DynamicRAGPipeline()
        logger.info("DynamicRAGPipeline initialized.")

        # Define a test query
        test_query = "What are the key features of Python?"
        logger.info(f"Processing test query: '{test_query}'")

        # Process the query
        # Note: The first query might be slow as models are loaded and data is fetched
        response_data = pipeline.process_query(test_query)

        # Print the results
        print("\n--- Response ---")
        print(f"Query: {test_query}")
        print(f"Response: {response_data.get('response', 'N/A')}")

        print("\n--- Sources ---")
        sources = response_data.get('sources', [])
        if sources:
            for i, source in enumerate(sources):
                print(f"Source {i+1}:")
                print(f"  Title: {source.get('title', 'N/A')}")
                print(f"  URL: {source.get('url', 'N/A')}")
                print(f"  Relevance Score: {source.get('relevance_score', 'N/A')}")
        else:
            print("No sources found.")

        print("\n--- Metrics ---")
        metrics = response_data.get('metrics', {})
        for key, value in metrics.items():
            print(f"  {key}: {value}")

        if 'error' in response_data:
            print(f"\n--- Error ---")
            print(response_data['error'])

    except FileNotFoundError as e:
        print(f"\nError: {e}. Make sure your 'src' folder is uploaded correctly to the Colab environment.")
    except ImportError as e:
        logger.error(f"Failed to import modules. Make sure your 'src' directory is accessible and dependencies are installed. Error: {e}")
        print(f"\nImport Error: {e}. Ensure all necessary packages are installed (run the pip install cell) and 'src' is in the path.")
    except Exception as e:
        logger.error(f"An error occurred during pipeline execution: {e}")
        print(f"\nAn unexpected error occurred: {e}")


ERROR:__main__:src/pipeline.py not found at /content/src/pipeline.py. Please check your file upload.



Error: src/pipeline.py not found. Ensure 'src' folder is uploaded correctly.. Make sure your 'src' folder is uploaded correctly to the Colab environment.


In [None]:
import sys
import os
import logging
import time

# No need to adjust sys.path if files are in the current directory
# If you ever put them in a subdirectory, we'll need to add that directory to sys.path

# Configure logging (optional, but helpful for debugging)
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

try:
    # Import directly from the file names since they are at the root
    from pipeline import DynamicRAGPipeline
    from retriever import DynamicRetriever # Also need to import Retriever directly

    # You might need to re-initialize the pipeline differently if it expects the retriever class
    # directly during initialization, but let's try the simplest approach first.
    # If DynamicRAGPipeline expects a specific retriever *instance*, this might need adjustment.

    logger.info("Initializing the DynamicRAGPipeline...")
    # Initialize the pipeline - it should instantiate DynamicRetriever internally
    pipeline = DynamicRAGPipeline()
    logger.info("DynamicRAGPipeline initialized.")

    # Define a test query
    test_query = "What are the key features of Python?"
    logger.info(f"Processing test query: '{test_query}'")

    # Process the query
    # Note: The first query might be slow as models are loaded and data is fetched
    response_data = pipeline.process_query(test_query)

    # Print the results
    print("\n--- Response ---")
    print(f"Query: {test_query}")
    print(f"Response: {response_data.get('response', 'N/A')}")

    print("\n--- Sources ---")
    sources = response_data.get('sources', [])
    if sources:
        for i, source in enumerate(sources):
            print(f"Source {i+1}:")
            print(f"  Title: {source.get('title', 'N/A')}")
            print(f"  URL: {source.get('url', 'N/A')}")
            print(f"  Relevance Score: {source.get('relevance_score', 'N/A')}")
    else:
        print("No sources found.")

    print("\n--- Metrics ---")
    metrics = response_data.get('metrics', {})
    for key, value in metrics.items():
        print(f"  {key}: {value}")

    if 'error' in response_data:
        print(f"\n--- Error ---")
        print(response_data['error'])

except FileNotFoundError as e:
    print(f"\nError: {e}. Please check your file upload in Colab.")
except ImportError as e:
    logger.error(f"Failed to import modules. Ensure all necessary packages are installed (run the pip install cell) and the .py files are directly uploaded.")
    print(f"\nImport Error: {e}. Ensure all necessary packages are installed (run the pip install cell) and the .py files are directly uploaded to the root level.")
except Exception as e:
    logger.error(f"An unexpected error occurred during pipeline execution: {e}")
    print(f"\nAn unexpected error occurred: {e}")

# Example of getting conversation history (after processing queries)
# history = pipeline.get_conversation_history()
# print("\n--- Conversation History ---")
# for entry in history:
#    print(f"Q: {entry['query']}")
#    print(f"A: {entry['response'][:100]}...") # Print truncated response
#    print("-" * 20)

# Example of getting overall performance metrics
# overall_metrics = pipeline.get_metrics()
# print("\n--- Overall Metrics ---")
# for key, value in overall_metrics.items():
#    print(f"  {key}: {value}")

ERROR:__main__:Failed to import modules. Ensure all necessary packages are installed (run the pip install cell) and the .py files are directly uploaded.



Import Error: No module named 'faiss'. Ensure all necessary packages are installed (run the pip install cell) and the .py files are directly uploaded to the root level.


In [None]:
!pip install transformers sentence-transformers flask flask-cors flask-limiter wikipedia numpy torch

Collecting flask-cors
  Using cached flask_cors-6.0.1-py3-none-any.whl.metadata (5.3 kB)
Collecting flask-limiter
  Using cached flask_limiter-3.12-py3-none-any.whl.metadata (6.3 kB)
Using cached flask_cors-6.0.1-py3-none-any.whl (13 kB)
Using cached flask_limiter-3.12-py3-none-any.whl (28 kB)
Installing collected packages: flask-limiter, flask-cors
Successfully installed flask-cors-6.0.1 flask-limiter-3.12


In [None]:
import sys
import os
import logging
import time

# No need to adjust sys.path if files are in the current directory
# If you ever put them in a subdirectory, we'll need to add that directory to sys.path

# Configure logging (optional, but helpful for debugging)
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

try:
    # Import directly from the file names since they are at the root
    from pipeline import DynamicRAGPipeline
    from retriever import DynamicRetriever # Also need to import Retriever directly

    # You might need to re-initialize the pipeline differently if it expects the retriever class
    # directly during initialization, but let's try the simplest approach first.
    # If DynamicRAGPipeline expects a specific retriever *instance*, this might need adjustment.

    logger.info("Initializing the DynamicRAGPipeline...")
    # Initialize the pipeline - it should instantiate DynamicRetriever internally
    pipeline = DynamicRAGPipeline()
    logger.info("DynamicRAGPipeline initialized.")

    # Define a test query
    test_query = "What are the key features of Python?"
    logger.info(f"Processing test query: '{test_query}'")

    # Process the query
    # Note: The first query might be slow as models are loaded and data is fetched
    response_data = pipeline.process_query(test_query)

    # Print the results
    print("\n--- Response ---")
    print(f"Query: {test_query}")
    print(f"Response: {response_data.get('response', 'N/A')}")

    print("\n--- Sources ---")
    sources = response_data.get('sources', [])
    if sources:
        for i, source in enumerate(sources):
            print(f"Source {i+1}:")
            print(f"  Title: {source.get('title', 'N/A')}")
            print(f"  URL: {source.get('url', 'N/A')}")
            print(f"  Relevance Score: {source.get('relevance_score', 'N/A')}")
    else:
        print("No sources found.")

    print("\n--- Metrics ---")
    metrics = response_data.get('metrics', {})
    for key, value in metrics.items():
        print(f"  {key}: {value}")

    if 'error' in response_data:
        print(f"\n--- Error ---")
        print(response_data['error'])

except FileNotFoundError as e:
    print(f"\nError: {e}. Please check your file upload in Colab.")
except ImportError as e:
    logger.error(f"Failed to import modules. Ensure all necessary packages are installed (run the pip install cell) and the .py files are directly uploaded.")
    print(f"\nImport Error: {e}. Ensure all necessary packages are installed (run the pip install cell) and the .py files are directly uploaded to the root level.")
except Exception as e:
    logger.error(f"An unexpected error occurred during pipeline execution: {e}")
    print(f"\nAn unexpected error occurred: {e}")

# Example of getting conversation history (after processing queries)
# history = pipeline.get_conversation_history()
# print("\n--- Conversation History ---")
# for entry in history:
#    print(f"Q: {entry['query']}")
#    print(f"A: {entry['response'][:100]}...") # Print truncated response
#    print("-" * 20)

# Example of getting overall performance metrics
# overall_metrics = pipeline.get_metrics()
# print("\n--- Overall Metrics ---")
# for key, value in overall_metrics.items():
#    print(f"  {key}: {value}")


ERROR:__main__:Failed to import modules. Ensure all necessary packages are installed (run the pip install cell) and the .py files are directly uploaded.



Import Error: No module named 'faiss'. Ensure all necessary packages are installed (run the pip install cell) and the .py files are directly uploaded to the root level.


In [None]:
!pip install transformers sentence-transformers flask flask-cors flask-limiter wikipedia numpy torch faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl (31.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m72.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.11.0


In [None]:
import sys
import os
import logging
import time

# No need to adjust sys.path if files are in the current directory
# If you ever put them in a subdirectory, we'll need to add that directory to sys.path

# Configure logging (optional, but helpful for debugging)
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

try:
    # Import directly from the file names since they are at the root
    from pipeline import DynamicRAGPipeline
    from retriever import DynamicRetriever # Also need to import Retriever directly

    # You might need to re-initialize the pipeline differently if it expects the retriever class
    # directly during initialization, but let's try the simplest approach first.
    # If DynamicRAGPipeline expects a specific retriever *instance*, this might need adjustment.

    logger.info("Initializing the DynamicRAGPipeline...")
    # Initialize the pipeline - it should instantiate DynamicRetriever internally
    pipeline = DynamicRAGPipeline()
    logger.info("DynamicRAGPipeline initialized.")

    # Define a test query
    test_query = "What are the key features of Python?"
    logger.info(f"Processing test query: '{test_query}'")

    # Process the query
    # Note: The first query might be slow as models are loaded and data is fetched
    response_data = pipeline.process_query(test_query)

    # Print the results
    print("\n--- Response ---")
    print(f"Query: {test_query}")
    print(f"Response: {response_data.get('response', 'N/A')}")

    print("\n--- Sources ---")
    sources = response_data.get('sources', [])
    if sources:
        for i, source in enumerate(sources):
            print(f"Source {i+1}:")
            print(f"  Title: {source.get('title', 'N/A')}")
            print(f"  URL: {source.get('url', 'N/A')}")
            print(f"  Relevance Score: {source.get('relevance_score', 'N/A')}")
    else:
        print("No sources found.")

    print("\n--- Metrics ---")
    metrics = response_data.get('metrics', {})
    for key, value in metrics.items():
        print(f"  {key}: {value}")

    if 'error' in response_data:
        print(f"\n--- Error ---")
        print(response_data['error'])

except FileNotFoundError as e:
    print(f"\nError: {e}. Please check your file upload in Colab.")
except ImportError as e:
    logger.error(f"Failed to import modules. Ensure all necessary packages are installed (run the pip install cell) and the .py files are directly uploaded.")
    print(f"\nImport Error: {e}. Ensure all necessary packages are installed (run the pip install cell) and the .py files are directly uploaded to the root level.")
except Exception as e:
    logger.error(f"An unexpected error occurred during pipeline execution: {e}")
    print(f"\nAn unexpected error occurred: {e}")

# Example of getting conversation history (after processing queries)
# history = pipeline.get_conversation_history()
# print("\n--- Conversation History ---")
# for entry in history:
#    print(f"Q: {entry['query']}")
#    print(f"A: {entry['response'][:100]}...") # Print truncated response
#    print("-" * 20)

# Example of getting overall performance metrics
# overall_metrics = pipeline.get_metrics()
# print("\n--- Overall Metrics ---")
# for key, value in overall_metrics.items():
#    print(f"  {key}: {value}")


Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/212 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/2.67k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/669 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/450 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]


--- Response ---
Query: What are the key features of Python?
Response: Can you clarify what exactly you're asking about in: It is also possible?

--- Sources ---
No sources found.

--- Metrics ---


In [None]:
!pip install transformers sentence-transformers flask flask-cors flask-limiter wikipedia numpy torch faiss-cpu



In [None]:
import sys
import os
import logging
import time

# Configure logging (optional, but helpful for debugging)
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

try:
    # Import directly from the file names since they are at the root in Colab
    from pipeline import DynamicRAGPipeline
    from retriever import DynamicRetriever # Ensure retriever is also importable if needed elsewhere

    logger.info("Initializing the DynamicRAGPipeline...")
    # Initialize the pipeline - it should instantiate DynamicRetriever internally
    pipeline = DynamicRAGPipeline()
    logger.info("DynamicRAGPipeline initialized.")

    # Define a test query
    test_query = "What are the key features of Python?"
    logger.info(f"Processing test query: '{test_query}'")

    # Process the query
    # Note: The first query might be slow as models are loaded and data is fetched
    response_data = pipeline.process_query(test_query)

    # Print the results
    print("\n--- Response ---")
    print(f"Query: {test_query}")
    print(f"Response: {response_data.get('response', 'N/A')}")

    print("\n--- Sources ---")
    sources = response_data.get('sources', [])
    if sources:
        for i, source in enumerate(sources):
            print(f"Source {i+1}:")
            print(f"  Title: {source.get('title', 'N/A')}")
            print(f"  URL: {source.get('url', 'N/A')}")
            # Check for relevance_score before trying to format
            relevance_score = source.get('relevance_score')
            if relevance_score is not None:
                 print(f"  Relevance Score: {relevance_score:.4f}") # Format score for clarity
            else:
                 print(f"  Relevance Score: N/A")
    else:
        print("No sources found.")

    print("\n--- Metrics ---")
    metrics = response_data.get('metrics', {})
    for key, value in metrics.items():
        # Basic formatting for metrics
        if isinstance(value, float):
             print(f"  {key}: {value:.4f}")
        else:
             print(f"  {key}: {value}")


    if 'error' in response_data:
        print(f"\n--- Error ---")
        print(response_data['error'])

except ImportError as e:
    logger.error(f"Failed to import modules. Ensure all necessary packages are installed (run the pip install cell) and the .py files are directly uploaded.")
    print(f"\nImport Error: {e}. Ensure all necessary packages are installed (run the pip install cell) and the .py files are directly uploaded to the root level.")
except Exception as e:
    logger.error(f"An unexpected error occurred during pipeline execution: {e}")
    print(f"\nAn unexpected error occurred: {e}")



--- Response ---
Query: What are the key features of Python?
Response: Can you clarify what exactly you're asking about in: It is also possible?

--- Sources ---
No sources found.

--- Metrics ---


In [None]:
# ... (the rest of your Python script) ...

try:
    # Import directly from the file names since they are at the root in Colab
    from pipeline import DynamicRAGPipeline
    from retriever import DynamicRetriever # Ensure retriever is also importable if needed elsewhere

    logger.info("Initializing the DynamicRAGPipeline...")
    # Initialize the pipeline - it should instantiate DynamicRetriever internally
    pipeline = DynamicRAGPipeline()
    logger.info("DynamicRAGPipeline initialized.")

    # Define a test query
    test_query = "What are the key features of Python?"
    logger.info(f"Processing test query: '{test_query}'")

    # Process the query
    # Note: The first query might be slow as models are loaded and data is fetched
    response_data = pipeline.process_query(test_query)

    # Print the results
    print("\n--- Response ---")
    print(f"Query: {test_query}")
    print(f"Response: {response_data.get('response', 'N/A')}")

    print("\n--- Sources ---")
    sources = response_data.get('sources', [])
    if sources:
        for i, source in enumerate(sources):
            print(f"Source {i+1}:")
            print(f"  Title: {source.get('title', 'N/A')}")
            print(f"  URL: {source.get('url', 'N/A')}")
            # Check for relevance_score before trying to format
            relevance_score = source.get('relevance_score')
            if relevance_score is not None:
                 print(f"  Relevance Score: {relevance_score:.4f}") # Format score for clarity
            else:
                 print(f"  Relevance Score: N/A")
    else:
        print("No sources found.")

    print("\n--- Metrics ---")
    metrics = response_data.get('metrics', {})
    for key, value in metrics.items():
        # Basic formatting for metrics
        if isinstance(value, float):
             print(f"  {key}: {value:.4f}")
        else:
             print(f"  {key}: {value}")


    if 'error' in response_data:
        print(f"\n--- Error ---")
        print(response_data['error'])

except ImportError as e:
    logger.error(f"Failed to import modules. Ensure all necessary packages are installed (run the pip install cell) and the .py files are directly uploaded.")
    print(f"\nImport Error: {e}. Ensure all necessary packages are installed (run the pip install cell) and the .py files are directly uploaded to the root level.")
except Exception as e:
    logger.error(f"An unexpected error occurred during pipeline execution: {e}")
    print(f"\nAn unexpected error occurred: {e}")



--- Response ---
Query: What are the key features of Python?
Response: Can you clarify what exactly you're asking about in: It is also possible?

--- Sources ---
No sources found.

--- Metrics ---


In [None]:
!pip uninstall -y wikipedia
!pip install transformers sentence-transformers flask flask-cors flask-limiter wikipedia-api numpy torch faiss-cpu

Found existing installation: wikipedia 1.4.0
Uninstalling wikipedia-1.4.0:
  Successfully uninstalled wikipedia-1.4.0
Collecting wikipedia-api
  Downloading wikipedia_api-0.8.1.tar.gz (19 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: wikipedia-api
  Building wheel for wikipedia-api (setup.py) ... [?25l[?25hdone
  Created wheel for wikipedia-api: filename=Wikipedia_API-0.8.1-py3-none-any.whl size=15383 sha256=428cae3b28a6ae777512775c438ec3c8a84e182b1d35b3026543fce92bccbb04
  Stored in directory: /root/.cache/pip/wheels/0b/0f/39/e8214ec038ccd5aeb8c82b957289f2f3ab2251febeae5c2860
Successfully built wikipedia-api
Installing collected packages: wikipedia-api
Successfully installed wikipedia-api-0.8.1


In [None]:
import faiss
import numpy as np
from typing import List, Dict, Tuple, Optional
from sentence_transformers import SentenceTransformer
import logging
import time
from collections import defaultdict
# import wikipedia # Remove this line
import wikipediaapi # Add this line
import concurrent.futures
from concurrent.futures import ThreadPoolExecutor, as_completed
from functools import lru_cache
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import json
import os

logger = logging.getLogger(__name__)

class DynamicRetriever:
    """
    Dynamic retrieval system that fetches Wikipedia data on-demand and retrieves targeted information.
    """

    def __init__(self, model_name: str = "facebook-dpr-ctx_encoder-single-nq-base"):
        """
        Initialize the dynamic retriever with a sentence transformer model.

        Args:
            model_name: Name of the sentence transformer model to use
        """
        logger.info(f"Initializing DynamicRetriever with model: {model_name}")

        # Initialize models and storage
        self.embedding_model = SentenceTransformer(model_name)
        self.embedding_dim = self.embedding_model.get_sentence_embedding_dimension()
        self.documents = []
        self.document_topics = []
        self.topic_hierarchy = {}

        # Initialize metrics
        self.metrics = {
            'retrieval_time': [],
            'narrowing_time': [],
            'topic_accuracy': [],
            'wikipedia_fetch_time': []
        }

        # Initialize Wikipedia API (using wikipedia-api)
        # User-Agent is recommended for ethical use of Wikipedia API
        self.wiki_wiki = wikipediaapi.Wikipedia(
            'DynamicRAGPipeline (contact@example.com)', # Replace with your contact info or app name
            'en'  # Language
        )

        # Initialize cache directory
        self.cache_dir = os.path.join(os.path.dirname(__file__), '..', 'cache')
        os.makedirs(self.cache_dir, exist_ok=True)

        logger.info("DynamicRetriever initialized successfully")

    # Keep _get_cached_page and _cache_page for now, but they might need adjustments later
    # if the data structure from wikipedia-api is different.
    @lru_cache(maxsize=1000)
    def _get_cached_page(self, title: str) -> Optional[Dict]:
        """Get a Wikipedia page from cache if available."""
        cache_file = os.path.join(self.cache_dir, f"{title.lower().replace(' ', '_')}.json")
        if os.path.exists(cache_file):
            try:
                with open(cache_file, 'r', encoding='utf-8') as f:
                    # Assuming cached data structure is compatible for now
                    return json.load(f)
            except Exception as e:
                logger.warning(f"Error reading cache for {title}: {str(e)}")
        return None

    def _cache_page(self, title: str, content: Dict):
        """Cache a Wikipedia page."""
        cache_file = os.path.join(self.cache_dir, f"{title.lower().replace(' ', '_')}.json")
        try:
            with open(cache_file, 'w', encoding='utf-8') as f:
                # Assuming cached data structure is compatible for now
                json.dump(content, f, ensure_ascii=False, indent=2)
        except Exception as e:
            logger.warning(f"Error caching {title}: {str(e)}")


    def _handle_disambiguation(self, title: str, options: List[str]) -> Optional[str]:
        """Handle Wikipedia disambiguation pages by selecting the most relevant option."""
        if not options:
            return None

        # Get embeddings for all options
        option_embeddings = self.embedding_model.encode(options)
        title_embedding = self.embedding_model.encode([title])[0]

        # Calculate similarities
        similarities = np.dot(option_embeddings, title_embedding)
        best_idx = np.argmax(similarities)

        if similarities[best_idx] > 0.5:  # Only return if confidence is high enough
            return options[best_idx]
        return None

    def fetch_wikipedia_data(self, query: str, max_pages: int = 3) -> List[Dict]:
        """Fetch relevant Wikipedia pages for a query using wikipedia-api."""
        start_time = time.time()
        results = []

        try:
            # Temporarily disable cache check while testing wikipedia-api
            # cached_result = self._get_cached_page(query)
            # if cached_result:
            #     logger.info(f"Retrieved {query} from cache")
            #     return cached_result

            # Search Wikipedia using wikipedia-api
            # Note: wikipedia-api's search returns a list of titles
            search_results = self.wiki_wiki.page(query).search_results

            if not search_results:
                logger.warning(f"No Wikipedia pages found for query: {query}")
                return results

            # Process each search result (title and pageid tuple)
            for page_item in search_results[:max_pages]:
                title = page_item[0] # The title is the first element of the tuple
                try:
                    # Get the full page using wikipedia-api
                    page = self.wiki_wiki.page(title)

                    if not page.exists():
                         logger.warning(f"Page does not exist for title: {title}")
                         continue

                    # wikipedia-api handles redirects and basic disambiguation better
                    # We might still need custom disambiguation if page.text indicates it
                    # For now, let's assume page.text is the main content


                    # Prepare page data
                    page_data = {
                        'title': page.title,
                        'url': page.fullurl, # Use fullurl for the URL
                        'summary': page.summary,
                        'content': page.text, # Use page.text for full content
                        'topics': self._extract_topics(page.text)
                    }

                    # Temporarily disable caching while testing wikipedia-api
                    # self._cache_page(page.title, page_data)
                    results.append(page_data)

                except Exception as e:
                    logger.warning(f"Error fetching page {title} with wikipedia-api: {str(e)}")
                    continue

        except Exception as e:
            logger.error(f"Error fetching Wikipedia data with wikipedia-api: {str(e)}")

        finally:
            fetch_time = time.time() - start_time
            # Ensure metrics list contains only floats
            self.metrics['wikipedia_fetch_time'].append(float(fetch_time)) # Ensure float
            logger.info(f"Wikipedia fetch completed in {fetch_time:.2f}s")

        return results

    def _extract_topics(self, content: str) -> List[str]:
        """Extract potential topics from content using simple heuristics."""
        # Split content into sentences and look for topic indicators
        sentences = content.split('.')
        topics = []

        for sentence in sentences:
            # Look for common topic indicators
            if any(indicator in sentence.lower() for indicator in ['is a', 'refers to', 'deals with', 'about']):
                # Extract the subject
                words = sentence.split()
                if len(words) > 3:
                    topics.append(' '.join(words[:4]))

        return list(set(topics))  # Remove duplicates


    def add_topic_hierarchy(self, parent_topic: str, subtopics: List[str]) -> None:
        """
        Add a topic hierarchy for narrowing down topics.

        Args:
            parent_topic: The broader parent topic
            subtopics: List of more specific subtopics
        """
        self.topic_hierarchy[parent_topic] = subtopics
        logger.info(f"Added topic hierarchy: {parent_topic} -> {subtopics}")


    def identify_broad_topic(self, query: str) -> List[Tuple[str, float]]:
        """
        Identify the broad topic of a user query by fetching relevant Wikipedia pages.

        Args:
            query: The user query text

        Returns:
            List of potential topics with confidence scores
        """
        start_time = time.time()

        # Fetch Wikipedia data for the query using the updated method
        wiki_data = self.fetch_wikipedia_data(query)

        if not wiki_data:
            return []

        # Clear previous session data
        self.documents = []
        self.document_topics = []
        self.index = faiss.IndexFlatL2(self.embedding_dim)

        # Add fetched documents to the index
        for doc in wiki_data:
            # Use 'content' from the new page_data structure
            document_content = doc.get('content', '')
            if not document_content:
                 logger.warning(f"No content found for document: {doc.get('title', 'N/A')}")
                 continue # Skip if content is empty

            self.documents.append(document_content)
            self.document_topics.append(doc.get('topics', [])) # Use get with default for safety

            # Create and add embedding
            try:
                embedding = self.embedding_model.encode(document_content)
                embedding = embedding.reshape(1, -1)
                self.index.add(embedding)
            except Exception as e:
                 logger.error(f"Error creating embedding for document {doc.get('title', 'N/A')}: {e}")
                 continue


        # Get query embedding
        query_embedding = self.embedding_model.encode(query)
        query_embedding = query_embedding.reshape(1, -1)

        # Find similar documents
        k = min(5, self.index.ntotal)
        if k == 0:
             logger.warning("FAISS index is empty after processing wiki data.")
             return []

        try:
            distances, indices = self.index.search(query_embedding, k)
        except Exception as e:
             logger.error(f"Error searching FAISS index: {e}")
             return []


        # Count topic occurrences in retrieved documents
        topic_scores = defaultdict(float)
        # Check if indices[0] is not empty before iterating
        if indices is not None and len(indices) > 0 and len(indices[0]) > 0:
             for i, doc_idx in enumerate(indices[0]):
                 # Ensure doc_idx is within the bounds of self.document_topics
                 if 0 <= doc_idx < len(self.document_topics):
                      similarity_score = 1.0 / (1.0 + distances[0][i])  # Convert distance to similarity

                      for topic in self.document_topics[doc_idx]:
                          topic_scores[topic] += similarity_score
                 else:
                      logger.warning(f"Invalid document index {doc_idx} encountered during topic scoring.")
        else:
             logger.warning("No indices returned from FAISS search.")


        # Sort topics by score
        sorted_topics = sorted(
            [(topic, score) for topic, score in topic_scores.items()],
            key=lambda x: x[1],
            reverse=True
        )

        self.metrics['narrowing_time'].append(time.time() - start_time)
        return sorted_topics


    def generate_clarification_questions(self, broad_topic: str) -> List[str]:
        """
        Generate clarification questions to narrow down a broad topic.

        Args:
            broad_topic: The broad topic identified from the user query

        Returns:
            List of clarification questions
        """
        questions = []

        # Check if we have subtopics for this topic
        if broad_topic in self.topic_hierarchy:
            subtopics = self.topic_hierarchy[broad_topic]

            # Create general question
            general_question = f"Your query is about {broad_topic}. Could you specify which aspect you're interested in?"
            questions.append(general_question)

            # Create specific questions for each subtopic
            for subtopic in subtopics:
                question = f"Are you interested in {subtopic} specifically?"
                questions.append(question)

            # Add open-ended question
            questions.append(f"Is there a specific aspect of {broad_topic} you want to focus on?")
        else:
            # Generic questions if no subtopics are defined
            questions = [
                f"Could you provide more details about what aspect of {broad_topic} you're interested in?",
                f"What specific information about {broad_topic} are you looking for?",
                f"Are you looking for general information about {broad_topic} or something specific?"
            ]

        return questions

    # Keep the retrieve method, but it calls the updated fetch_wikipedia_data
    def retrieve(self, query: str, topics: Optional[List[str]] = None, k: int = 5) -> List[Dict]:
        """
        Retrieve documents based on query by fetching from Wikipedia.

        Args:
            query: The user query
            topics: Optional list of topics to filter results
            k: Number of documents to retrieve

        Returns:
            List of relevant documents (as dictionaries with title, url, summary, content)
        """
        start_time = time.time()

        # Fetch Wikipedia data for the query using the updated method
        wiki_data = self.fetch_wikipedia_data(query, max_pages=k)

        if not wiki_data:
            return []

        # Clear previous session data (only needed if not clearing in fetch_wikipedia_data)
        # If we are clearing in identify_broad_topic and retrieve, maybe it's okay.
        # Let's stick to clearing in identify_broad_topic for topic extraction
        # and build the index locally in retrieve for document retrieval.

        # Build a temporary index for retrieved documents for ranking
        temp_documents = []
        temp_document_topics = [] # Keep track of topics for filtering
        temp_index = faiss.IndexFlatL2(self.embedding_dim)

        for doc in wiki_data:
            document_content = doc.get('content', '')
            if not document_content:
                 logger.warning(f"No content found for document: {doc.get('title', 'N/A')}")
                 continue # Skip if content is empty

            temp_documents.append(doc) # Store the whole document dict
            temp_document_topics.append(doc.get('topics', [])) # Store topics

            try:
                embedding = self.embedding_model.encode(document_content)
                embedding = embedding.reshape(1, -1)
                temp_index.add(embedding)
            except Exception as e:
                 logger.error(f"Error creating embedding for document {doc.get('title', 'N/A')}: {e}")
                 continue


        # Get query embedding
        query_embedding = self.embedding_model.encode(query)
        query_embedding = query_embedding.reshape(1, -1)

        # Retrieve from index
        k_actual = min(k, temp_index.ntotal)
        if k_actual == 0:
            logger.warning("FAISS index is empty in retrieve method.")
            return []

        try:
            distances, indices = temp_index.search(query_embedding, k_actual)
        except Exception as e:
             logger.error(f"Error searching FAISS index in retrieve: {e}")
             return []


        # Filter by topics if specified and collect results
        results_with_distance = []
        if indices is not None and len(indices) > 0 and len(indices[0]) > 0:
            for i, doc_idx in enumerate(indices[0]):
                 # Ensure doc_idx is within the bounds
                 if 0 <= doc_idx < len(temp_documents):
                      # If topics filter is provided, check if document has any of those topics
                      if topics is None or any(topic in temp_document_topics[doc_idx] for topic in topics):
                           results_with_distance.append((temp_documents[doc_idx], distances[0][i]))
                 else:
                      logger.warning(f"Invalid document index {doc_idx} encountered during retrieval.")
        else:
             logger.warning("No indices returned from FAISS search in retrieve.")


        # Sort by relevance (distance) and return top k documents (the original dicts)
        results_with_distance.sort(key=lambda x: x[1])
        retrieved_documents = [doc for doc, distance in results_with_distance[:k]]


        self.metrics['retrieval_time'].append(time.time() - start_time)
        return retrieved_documents

    def get_performance_metrics(self) -> Dict:
        """Get the average performance metrics"""
        metrics = {}

        # Calculate averages and counts for each metric
        for key, values in self.metrics.items():
            if values:
                # Ensure values are treated as floats for summation and averaging
                float_values = [float(v) for v in values if isinstance(v, (int, float))]
                if float_values:
                    metrics[f'avg_{key}'] = sum(float_values) / len(float_values)
                    metrics[f'max_{key}'] = max(float_values)
                    # metrics[f'min_{key}'] = min(float_values) # Min was removed in previous edit
                    metrics[f'count_{key}'] = len(float_values)
                else:
                     metrics[f'avg_{key}'] = 0.0
                     metrics[f'max_{key}'] = 0.0
                     # metrics[f'min_{key}'] = 0.0 # Min was removed
                     metrics[f'count_{key}'] = 0
            else:
                metrics[f'avg_{key}'] = 0.0
                metrics[f'max_{key}'] = 0.0
                # metrics[f'min_{key}'] = 0.0 # Min was removed
                metrics[f'count_{key}'] = 0

        # Add overall query counts and rates
        # Assuming these are handled in the pipeline now, but keeping here for completeness if needed
        # total_queries = self.metrics.get('total_queries', 0)
        # successful_queries = self.metrics.get('successful_queries', 0)
        # failed_queries = self.metrics.get('failed_queries', 0)
        #
        # metrics['total_queries'] = total_queries
        # metrics['successful_queries'] = successful_queries
        # metrics['failed_queries'] = failed_queries
        # metrics['success_rate'] = (successful_queries / total_queries * 100 if total_queries > 0 else 0)


        return metrics

In [None]:
import os

def find_file(filename, search_path):
    """Recursively finds a file in a directory."""
    for root, dirs, files in os.walk(search_path):
        if filename in files:
            return os.path.join(root, filename)
        # Optionally, you can add checks here to exclude certain directories if needed
        # e.g., if 'venv' in dirs: dirs.remove('venv')

    return None

# Define the filename to search for
filename_to_find = 'pipeline.py'

# Define the directory to start the search from (e.g., the root of your Colab session)
# You might need to adjust this path based on where you think you uploaded the file.
# Common locations are '.' (current directory) or '/content'.
search_directory = '/content' # Or '.'

print(f"Searching for '{filename_to_find}' in '{search_directory}'...")

found_path = find_file(filename_to_find, search_directory)

if found_path:
    print(f"Found '{filename_to_find}' at: {found_path}")
else:
    print(f"'{filename_to_find}' not found in '{search_directory}' or its subdirectories.")

Searching for 'pipeline.py' in '/content'...
Found 'pipeline.py' at: /content/pipeline.py


In [None]:
test_queries = [
    "What are transformers in machine learning?",
    "Tell me about applications of AI in healthcare.",
    "Explain the basics of quantum computing.",
    "What is the difference between supervised and unsupervised learning?",
    "What is BERT used for?"
]


In [None]:
from pipeline import DynamicRAGPipeline
from retriever import DynamicRetriever

# Baseline just means: get context and pass to BART without clarifications
baseline_pipeline = DynamicRAGPipeline()
baseline_pipeline.retriever = DynamicRetriever()


In [None]:
from pipeline import DynamicRAGPipeline
from retriever import DynamicRetriever



In [None]:
!pip install faiss-cpu




In [None]:
!pip install sentence-transformers transformers wikipedia flask flask-cors torch


Collecting wikipedia
  Using cached wikipedia-1.4.0-py3-none-any.whl
Installing collected packages: wikipedia
Successfully installed wikipedia-1.4.0


In [None]:
from pipeline import DynamicRAGPipeline
from retriever import DynamicRetriever

baseline_pipeline = DynamicRAGPipeline()


In [None]:
!pip install faiss-cpu sentence-transformers transformers wikipedia flask flask-cors torch




In [None]:
!pip install faiss-cpu sentence-transformers transformers wikipedia flask flask-cors torch




In [None]:
from pipeline import DynamicRAGPipeline

# Initialize your DA-RAG pipeline
pipeline = DynamicRAGPipeline()

# Define some test queries
test_queries = [
    "What are the applications of AI in healthcare?",
    "Tell me about transformers in NLP.",
    "What is machine learning?",
    "Explain backpropagation.",
    "What are the uses of quantum computing?"
]


In [24]:
import pandas as pd

results = []

for query in test_queries:
    print(f"\n🔍 Query: {query}")

    # ---- DA-RAG ----
    da_result = pipeline.process_query(query)
    da_response = da_result.get("response", "N/A")

    # ---- Baseline ----
    # Fetch documents manually and pass them directly to the same BART generator
    try:
        docs = pipeline.retriever.fetch_wikipedia_data(query)
        baseline_context = "\n".join([doc['summary'] for doc in docs]) if docs else "No docs found."
        baseline_response = pipeline._generate_response(query, baseline_context)
    except Exception as e:
        baseline_response = f"Error: {str(e)}"

    # Save results
    results.append({
        "Query": query,
        "DA-RAG Response": da_response,
        "Baseline Response": baseline_response,
        "DA-RAG Length": len(da_response.split()),
        "Baseline Length": len(baseline_response.split())
    })



🔍 Query: What are the applications of AI in healthcare?

🔍 Query: Tell me about transformers in NLP.

🔍 Query: What is machine learning?

🔍 Query: Explain backpropagation.

🔍 Query: What are the uses of quantum computing?


In [25]:
import pandas as pd

results = []

for query in test_queries:
    print(f"\n🔍 Query: {query}")

    # ---- DA-RAG ----
    da_result = pipeline.process_query(query)
    da_response = da_result.get("response", "N/A")

    # ---- Baseline ----
    # Fetch documents manually and pass them directly to the same BART generator
    try:
        docs = pipeline.retriever.fetch_wikipedia_data(query)
        baseline_context = "\n".join([doc['summary'] for doc in docs]) if docs else "No docs found."
        baseline_response = pipeline._generate_response(query, baseline_context)
    except Exception as e:
        baseline_response = f"Error: {str(e)}"

    # Save results
    results.append({
        "Query": query,
        "DA-RAG Response": da_response,
        "Baseline Response": baseline_response,
        "DA-RAG Length": len(da_response.split()),
        "Baseline Length": len(baseline_response.split())
    })



🔍 Query: What are the applications of AI in healthcare?

🔍 Query: Tell me about transformers in NLP.

🔍 Query: What is machine learning?

🔍 Query: Explain backpropagation.

🔍 Query: What are the uses of quantum computing?


In [26]:
# Fix the import statement assuming pipeline.py and retriever.py are in the same directory
from pipeline import DynamicRAGPipeline

# Define some test queries
test_queries = [
    "What are the applications of AI in healthcare?",
    "Tell me about transformers in NLP.",
    "What is machine learning?",
    "Explain backpropagation.",
    "What are the uses of quantum computing?"
]

# Initialize your DA-RAG pipeline
pipeline = DynamicRAGPipeline()

In [27]:
# Fix the import statement assuming pipeline.py and retriever.py are in the same directory
from pipeline import DynamicRAGPipeline

# Define some test queries
test_queries = [
    "What are the applications of AI in healthcare?",
    "Tell me about transformers in NLP.",
    "What is machine learning?",
    "Explain backpropagation.",
    "What are the uses of quantum computing?"
]

# Initialize your DA-RAG pipeline
pipeline = DynamicRAGPipeline()

In [28]:
!pip install faiss-cpu




In [29]:
!pip install faiss-cpu sentence-transformers transformers wikipedia flask flask-cors torch




In [30]:
# Fix the import statement assuming pipeline.py and retriever.py are in the same directory
from pipeline import DynamicRAGPipeline

# Define some test queries
test_queries = [
    "What are the applications of AI in healthcare?",
    "Tell me about transformers in NLP.",
    "What is machine learning?",
    "Explain backpropagation.",
    "What are the uses of quantum computing?"
]

# Initialize your DA-RAG pipeline
pipeline = DynamicRAGPipeline()

In [31]:
import pandas as pd

results = []

for query in test_queries:
    print(f"\n🔍 Query: {query}")

    # ---- DA-RAG ----
    da_result = pipeline.process_query(query)
    da_response = da_result.get("response", "N/A")

    # ---- Baseline ----
    # Fetch documents manually and pass them directly to the same BART generator
    try:
        docs = pipeline.retriever.fetch_wikipedia_data(query)
        baseline_context = "\n".join([doc['summary'] for doc in docs]) if docs else "No docs found."
        baseline_response = pipeline._generate_response(query, baseline_context)
    except Exception as e:
        baseline_response = f"Error: {str(e)}"

    # Save results
    results.append({
        "Query": query,
        "DA-RAG Response": da_response,
        "Baseline Response": baseline_response,
        "DA-RAG Length": len(da_response.split()),
        "Baseline Length": len(baseline_response.split())
    })



🔍 Query: What are the applications of AI in healthcare?

🔍 Query: Tell me about transformers in NLP.

🔍 Query: What is machine learning?

🔍 Query: Explain backpropagation.

🔍 Query: What are the uses of quantum computing?


In [32]:
query = "Explain backpropagation."
response = pipeline.process_query(query)
print(response)


{'response': "Can you clarify what exactly you're asking about in: The reason for this?", 'clarification_needed': True, 'topics': [('The reason for this', np.float32(0.006968446)), ('}}\\end{cases}}} if φ {\\displaystyle', np.float32(0.006968446)), ('This avoids inefficiency in', np.float32(0.006968446)), ('== Loss function ==', np.float32(0.006968446)), ('== Matrix multiplication ==', np.float32(0.006968446)), ('=== Learning as an', np.float32(0.006968446)), ('The new Δ w', np.float32(0.006968446)), ('It is an efficient', np.float32(0.006968446)), ('2)} , and target', np.float32(0.006968446)), ('== Adjoint graph ==', np.float32(0.006968446)), ('} Backpropagation then consists', np.float32(0.006968446)), ('} Note that δ', np.float32(0.006968446)), ('Now if the relation', np.float32(0.006968446)), ('1 we obtain: ∂', np.float32(0.006968446)), ('The derivative of the', np.float32(0.006968446)), ('The overall network is', np.float32(0.006968446)), ('{\\displaystyle \\delta _{j}={\\frac {\\

In [33]:
from pipeline import DynamicRAGPipeline
pipeline = DynamicRAGPipeline()


In [34]:
result = pipeline.process_query("What are the applications of AI in healthcare?")
print(result["response"])


Can you clarify what exactly you're asking about in: AI is also used?


In [35]:
from pipeline import DynamicRAGPipeline
pipeline = DynamicRAGPipeline()


In [36]:
result = pipeline.process_query("What are the applications of AI in healthcare?")
print(result["response"])

Can you clarify what exactly you're asking about in: AI is also used?


In [37]:
result = pipeline.process_query("What are the applications of AI in healthcare?")
print(result["response"])

Can you clarify what exactly you're asking about in: AI is also used?


In [38]:
test_queries = [
    "What are the applications of AI in healthcare?",
    "Tell me about transformers in NLP.",
    "What is machine learning?",
    "Explain backpropagation.",
    "What are the uses of quantum computing?"
]

for query in test_queries:
    print(f"\n🔍 Query: {query}")
    try:
        result = pipeline.process_query(query)
        print("🧠 Response:\n", result.get("response", "❌ No response"))
        if "sources" in result:
            print("📚 Sources:")
            for src in result["sources"]:
                print(f"- {src['title']}: {src['url']}")
        print("📊 Metrics:", result.get("metrics", {}))
    except Exception as e:
        print("❌ Error:", str(e))



🔍 Query: What are the applications of AI in healthcare?
🧠 Response:
 Can you clarify what exactly you're asking about in: AI is also used?
📊 Metrics: {}

🔍 Query: Tell me about transformers in NLP.
🧠 Response:
 Can you clarify what exactly you're asking about in: === Automatic prompt generation?
📊 Metrics: {}

🔍 Query: What is machine learning?
🧠 Response:
 Can you clarify what exactly you're asking about in: The term inductive here?
📊 Metrics: {}

🔍 Query: Explain backpropagation.
🧠 Response:
 Can you clarify what exactly you're asking about in: The reason for this?
📊 Metrics: {}

🔍 Query: What are the uses of quantum computing?
🧠 Response:
 Can you clarify what exactly you're asking about in: Conversely, any problem solvable?
📊 Metrics: {}


In [39]:
query = "What is machine learning?"
result = pipeline.process_query(query)

if result.get("clarification_needed"):
    print("💬 Clarification needed. Choose one:")
    for i, q in enumerate(result["questions"]):
        print(f"{i+1}. {q}")

    choice = int(input("Your choice (1-3): ")) - 1
    clarified_query = result["questions"][choice]
    clarified_result = pipeline.process_query(clarified_query)
    print("🧠 Final Answer:\n", clarified_result["response"])
else:
    print("🧠 Response:\n", result["response"])


💬 Clarification needed. Choose one:
1. Could you provide more details about what aspect of The term inductive here you're interested in?
2. What specific information about The term inductive here are you looking for?
3. Are you looking for general information about The term inductive here or something specific?
Your choice (1-3): 1
🧠 Final Answer:
 Can you clarify what exactly you're asking about in: This allowed for developers?


In [40]:
result = pipeline.process_query("What is machine learning?")


In [41]:
result = pipeline.process_query("What is machine learning?")
print(result["response"])


Can you clarify what exactly you're asking about in: The term inductive here?


In [2]:
from pipeline import DynamicRAGPipeline
pipeline = DynamicRAGPipeline()
response = pipeline.process_query("What is machine learning?")
print(response["response"])


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Can you clarify what exactly you're asking about in: History?


In [3]:
pipeline.process_query("Tell me about Artificial Intelligence")


{'response': "Can you clarify what exactly you're asking about in: Terminology?",
 'clarification_needed': True,
 'topics': ['Terminology',
  'Characteristics',
  'Intelligence traits',
  'Physical traits',
  'Tests for human-level AGI',
  'AI-complete problems',
  'History',
  'Classical AI',
  'Narrow AI research',
  'Modern artificial general intelligence research',
  'Feasibility',
  'Timescales',
  'Whole brain emulation',
  'Early estimates',
  'Current research',
  'Criticisms of simulation-based approaches',
  'Philosophical perspective',
  '"Strong AI" as defined in philosophy',
  'Consciousness',
  'Benefits',
  'Advancements in medicine and healthcare',
  'Advancements in science and technology',
  'Enhancing education and productivity',
  'Mitigating global crises',
  'Revitalising environmental conservation and biodiversity',
  'Enhancing space exploration and colonization',
  'Risks',
  'Existential risks',
  'Risk of loss of control and human extinction',
  'Mass unemplo