In [None]:
!unzip vector_db.zip

Archive:  vector_db.zip
   creating: vector_db/
   creating: vector_db/f103d939-d98e-43eb-aa43-400e520ee1e5/
  inflating: vector_db/f103d939-d98e-43eb-aa43-400e520ee1e5/length.bin  
  inflating: vector_db/f103d939-d98e-43eb-aa43-400e520ee1e5/header.bin  
 extracting: vector_db/f103d939-d98e-43eb-aa43-400e520ee1e5/link_lists.bin  
  inflating: vector_db/f103d939-d98e-43eb-aa43-400e520ee1e5/data_level0.bin  
  inflating: vector_db/chroma.sqlite3  


In [None]:
!pip install -qU transformers torch sentence-transformers chromadb bitsandbytes tqdm

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m65.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m58.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m45.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.1 MB/s[0m eta [36m0:

In [None]:
import os
import json
import uuid
import torch
import chromadb
from typing import List, Dict, Any, Optional, Union
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from tqdm import tqdm

In [None]:
import time
import functools
from typing import Callable, Any, Optional

def measure_token_processing(process_name: Optional[str] = None):
    """Decorator to measure token processing speed across different tasks."""

    def decorator(func: Callable) -> Callable:
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            # Determine the process name
            task_name = process_name or func.__name__

            # Get the instance (self) from args
            instance = args[0] if args else None

            # Find text to tokenize - could be in different places depending on function
            text = None
            # Check args - likely the second argument for most methods
            if len(args) > 1 and isinstance(args[1], str):
                text = args[1]
            elif len(args) > 1 and isinstance(args[1], list) and all(isinstance(t, str) for t in args[1]):
                text = ' '.join(args[1])  # Join list of strings

            # Check kwargs for text
            elif 'text' in kwargs and isinstance(kwargs['text'], str):
                text = kwargs['text']
            elif 'texts' in kwargs and isinstance(kwargs['texts'], list):
                text = ' '.join(kwargs['texts'])
            elif 'prompt' in kwargs and isinstance(kwargs['prompt'], str):
                text = kwargs['prompt']

            # Find appropriate tokenizer
            tokenizer = None
            if instance and hasattr(instance, 'tokenizer'):
                tokenizer = instance.tokenizer
            elif instance and hasattr(instance, 'embedding_model') and hasattr(instance.embedding_model, 'tokenizer'):
                tokenizer = instance.embedding_model.tokenizer

            # Count tokens if we have both text and tokenizer
            input_tokens = 0
            if text and tokenizer:
                try:
                    if hasattr(tokenizer, 'encode'):
                        input_tokens = len(tokenizer.encode(text))
                    elif hasattr(tokenizer, '__call__'):
                        input_tokens = len(tokenizer(text)['input_ids'])
                except:
                    # Fallback to rough estimate (approx 4 chars per token)
                    input_tokens = len(text) // 4
            elif text:
                # Very rough approximation if no tokenizer available
                input_tokens = len(text.split())

            # Start timing
            start_time = time.perf_counter()

            # Execute the function
            result = func(*args, **kwargs)

            # End timing
            end_time = time.perf_counter()

            # Calculate tokens per second
            processing_time = end_time - start_time
            tokens_per_second = input_tokens / processing_time if processing_time > 0 else 0

            # Output measurements
            print(f"⏱️ {task_name}: {input_tokens} tokens processed in {processing_time:.2f}s ({tokens_per_second:.2f} tokens/sec)\n")

            # If result is a string, we could measure output tokens too
            if isinstance(result, str) and tokenizer:
                try:
                    output_tokens = len(tokenizer.encode(result))
                    total_tokens = input_tokens + output_tokens
                    throughput = total_tokens / processing_time if processing_time > 0 else 0
                    print(f"   Total (in+out): {total_tokens} tokens at {throughput:.2f} tokens/sec\n\n")
                except:
                    pass

            return result
        return wrapper
    return decorator

In [None]:
# Check if GPU is available and set up accordingly
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


### RAG (Retrieval-Augmented Generation) System

This class implements a simple yet effective RAG pipeline that combines:

- Document retrieval from ChromaDB vector database
- LLM-based question answering with context
- Smart handling of conversation history for follow-up questions
- Model caching to improve performance
- Automatic fallback to smaller models when resources are limited

The system retrieves relevant document chunks based on semantic similarity, formats them as context, and generates accurate, context-aware responses without requiring complex infrastructure.

In [None]:
# Login to HF to access LLMs
from huggingface_hub import notebook_login
notebook_login()

In [None]:
class RAGSystem:
    # --- Class-level cache for model, tokenizer, generator ---
    _loaded_tokenizer = None
    _loaded_model = None
    _loaded_generator = None
    _loaded_model_name = None
    _loaded_embedding_model = None
    _loaded_embedding_model_name = None

    def __init__(
        self,
        db_dir: str = "vector_db",
        collection_name: str = "DR_X_Publications",
        llm_model_name: str = "meta-llama/Meta-Llama-3-8B-Instruct",
        embedding_model: str = "nomic-ai/nomic-embed-text-v1.5",
        retrieve_k: int = 5,
        device: str = "auto"
    ):

        self.db_dir = db_dir
        self.collection_name = collection_name
        self.llm_model_name = llm_model_name
        self.retrieve_k = retrieve_k

        # Initialize ChromaDB client
        self.client = chromadb.PersistentClient(path=db_dir)
        self.collection = self.client.get_collection(collection_name)

        try:
            # Check for available device
            if device == "auto":
                self.device = "cuda" if torch.cuda.is_available() else "cpu"
            else:
                self.device = device

            print(f"-----Using device: {self.device}-----\n")

            # === LLM caching ===
            if RAGSystem._loaded_model_name == llm_model_name:
                print("LLM already loaded. Reusing from cache.")
                self.tokenizer = RAGSystem._loaded_tokenizer
                self.model = RAGSystem._loaded_model
                self.generator = RAGSystem._loaded_generator
            else:
                print(f"-----Loading LLM: {llm_model_name}-----\n")
                self.tokenizer = AutoTokenizer.from_pretrained(llm_model_name)

                if self.device == "cuda":
                    self.model = AutoModelForCausalLM.from_pretrained(
                        llm_model_name,
                        device_map="auto",
                        torch_dtype=torch.float16,
                        load_in_8bit=True
                    )
                else:
                    self.model = AutoModelForCausalLM.from_pretrained(
                        llm_model_name,
                        device_map={"": self.device}
                    )

                # Create text generation pipeline
                self.generator = pipeline(
                    "text-generation",
                    model=self.model,
                    tokenizer=self.tokenizer,
                    device_map="auto" if self.device == "cuda" else {"": self.device}
                )

                RAGSystem._loaded_tokenizer = self.tokenizer
                RAGSystem._loaded_model = self.model
                RAGSystem._loaded_generator = self.generator
                RAGSystem._loaded_model_name = llm_model_name

            print(f"-----Initialized LLM from {llm_model_name}-----\n")

            # === Embedding model caching ===
            if RAGSystem._loaded_embedding_model_name == embedding_model:
                print("Embedding model already loaded. Reusing from cache.")
                self.embedding_model = RAGSystem._loaded_embedding_model
            else:
                # Load the Nomic embedding model on CPU, since we will be already using Llama model on GPU
                print(f"-----Loading embedding model: {embedding_model}-----")
                self.embedding_model = SentenceTransformer(embedding_model, device="cpu", trust_remote_code=True)
                RAGSystem._loaded_embedding_model = self.embedding_model
                RAGSystem._loaded_embedding_model_name = embedding_model

        except Exception as e:
            print(f"Error loading LLM: {str(e)}")
            print("Attempting to continue with a smaller model...")

            try:
                # Fallback to a smaller model
                fallback_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
                print(f"Loading fallback model: {fallback_model}")
                self.generator = pipeline(
                    "text-generation",
                    model=fallback_model,
                    device_map="auto" if self.device == "cuda" else {"": self.device}
                )
                self.tokenizer = self.generator.tokenizer
                self.model = self.generator.model
                self.llm_model_name = fallback_model

            except Exception as e2:
                print(f"Error loading fallback model: {str(e2)}")
                self.generator = None
                self.tokenizer = None
                self.model = None

        # Conversation history for context
        self.conversation_history = []

    @measure_token_processing("RAG Query Processing")
    def generate_embedding(self, text: str) -> List[float]:
        """ Generate embedding for a single text using the Nomic embedding model."""

        embeddings = self.embedding_model.encode([text])
        return embeddings[0]

    def retrieve_relevant_chunks(self, query: str, k: Optional[int] = None) -> List[Dict[str, Any]]:
        """Retrieve the most relevant chunks for a query."""

        if k is None:
            k = self.retrieve_k

        # Generate embedding for query
        query_embedding = self.generate_embedding(query)

        # Query the vector database
        results = self.collection.query(
            query_embeddings=[query_embedding],
            n_results=k,
            include=["documents", "metadatas", "distances"]
        )

        # Format results
        chunks = []
        for i in range(len(results["ids"][0])):
            chunk = {
                "id": results["ids"][0][i],
                "text": results["documents"][0][i],
                "metadata": results["metadatas"][0][i],
                "distance": results["distances"][0][i]
            }
            chunks.append(chunk)

        return chunks

    def format_context(self, chunks: List[Dict[str, Any]]) -> str:
        """Format retrieved chunks into context for the LLM."""

        context = "RETRIEVED CONTEXT:\n\n"
        for i, chunk in enumerate(chunks):
            metadata = chunk["metadata"]
            source = metadata.get("source", "Unknown")
            page = metadata.get("page_number", "Unknown")

            context += f"[DOCUMENT {i+1}] {source}, Page {page}\n"
            context += f"{chunk['text'].strip()}\n\n"

            if "tables" in metadata:
                tables = f"(Tables: {metadata['tables']})"
                context += tables + "\n\n"

        return context

    def generate_prompt(self, query: str, context: str) -> str:
        """Generate a prompt for the LLM using the query and context."""

        # Add conversation history to provide context for follow-up questions
        history_context = ""
        if self.conversation_history:
            history_context = "Previous conversation:\n"
            for q, a in self.conversation_history[-3:]:  # Include up to 3 recent exchanges
                history_context += f"Question: {q}\nAnswer: {a}\n\n"

        # Check if we're using a Llama model
        if 'llama' in self.llm_model_name.lower():
            # Llama-specific prompt format
            system_prompt = """<|system|>
                            You are a helpful research assistant. Answer the question based ONLY on the provided context.
                            If you cannot find the answer in the context, say "I don't have enough information to answer this question."
                            Do not use prior knowledge. Be concise but comprehensive. Avoid repeating phrases like 'based on the provided context.'
                            </s>"""

            prompt = system_prompt

            if history_context:
                prompt += f"\n<|user|>\n{history_context}</s>\n"

            prompt += f"\n<|user|>\n{context}\n\nQuestion: {query}</s>\n"
            prompt += "\n<|assistant|>\n"

        else:
            # Generic prompt format for other models
            system_prompt = """You are a helpful research assistant. Answer the question based ONLY on the provided context.
                            If you cannot find the answer in the context, say "I don't have enough information to answer this question."
                            Do not use prior knowledge. Be concise but comprehensive."""

            prompt = f"{system_prompt}\n\n"

            if history_context:
                prompt += f"{history_context}\n"

            prompt += f"{context}\n\nQuestion: {query}\n\nAnswer:"

        return prompt

    @measure_token_processing("RAG Answer Generation")
    def generate_answer(self, prompt: str) -> str:
        """Generate an answer using the Hugging Face LLM."""

        try:
            # Calculate input token length to determine appropriate max_new_tokens
            input_ids = self.tokenizer(prompt, return_tensors="pt").input_ids
            input_length = input_ids.size(1)

            # Set a reasonable max_new_tokens based on available context window
            context_window = 6000  # for Llama3 context window is 8,192 tokens
            max_new_tokens = min(1024, context_window - input_length)

            # Generate response
            outputs = self.generator(
                prompt,
                max_new_tokens=max_new_tokens,
                temperature=0.1,
                top_p=0.9,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id,
                num_return_sequences=1
            )

            # Extract the generated text
            generated_text = outputs[0]["generated_text"]

            # Remove the prompt from the response
            answer = generated_text[len(prompt):].strip()

            # Clean up the answer
            answer = answer.split("</s>")[0].split("<|user|>")[0].split("<|system|>")[0].strip()

            return answer

        except Exception as e:
            print(f"Error generating answer: {str(e)}")
            return "Sorry, I encountered an error while generating the answer."

    def answer_question(self, query: str, use_history: bool = True) -> str:
        """Answer a question using RAG"""

        print(f"Processing query: {query}\n")

        # Check if this is a follow-up question
        if use_history and self.conversation_history:
            # Enhance query with context from previous exchanges
            enhanced_query = self._enhance_follow_up_query(query)
            print(f"Enhanced query: {enhanced_query}\n")
            chunks = self.retrieve_relevant_chunks(enhanced_query)
        else:
            chunks = self.retrieve_relevant_chunks(query)

        print(f"Retrieved {len(chunks)} relevant chunks\n")

        # Format context from chunks
        context = self.format_context(chunks)

        # Generate prompt
        prompt = self.generate_prompt(query, context)

        # Generate answer
        answer = self.generate_answer(prompt)

        # Update conversation history
        self.conversation_history.append((query, answer))

        return answer

    def _enhance_follow_up_query(self, query: str) -> str:
        """Enhance a follow-up query with context from previous exchanges."""

        if not self.conversation_history:
            return query

        # Get the last 2 exchanges
        recent_history = self.conversation_history[-2:] if len(self.conversation_history) >= 2 else self.conversation_history

        # Format the history
        history_text = ""
        for q, a in recent_history:
            history_text += f"Q: {q}\nA: {a}\n"

        # Combine with the current query
        enhanced = f"{history_text}Follow-up question: {query}"

        return enhanced

    def reset_conversation(self) -> None:
        """Clear the conversation history."""

        self.conversation_history = []
        print("Conversation history has been reset.")


    @staticmethod
    def clear_model_cache():
        """Clear both LLM and embedding model cache."""

        RAGSystem._loaded_tokenizer = None
        RAGSystem._loaded_model = None
        RAGSystem._loaded_generator = None
        RAGSystem._loaded_model_name = None

        RAGSystem._loaded_embedding_model = None
        RAGSystem._loaded_embedding_model_name = None

In [None]:
print("-----Initializing RAG system-----\n")

# Options:
# - "meta-llama/Meta-Llama-3-8B-Instruct" (if you have access)
# - "TinyLlama/TinyLlama-1.1B-Chat-v1.0" (smaller model)
# - "google/flan-t5-base" (even smaller)

rag = RAGSystem(
    db_dir="vector_db",
    collection_name="DR_X_Publications",
    llm_model_name="meta-llama/Meta-Llama-3-8B-Instruct",  # Choose based on your system resources
    embedding_model="nomic-ai/nomic-embed-text-v1.5",
    retrieve_k=5,
    device='auto'
)

-----Initializing RAG system-----

-----Using device: cuda-----

-----Loading LLM: meta-llama/Meta-Llama-3-8B-Instruct-----



tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

Device set to use cuda:0


-----Initialized LLM from meta-llama/Meta-Llama-3-8B-Instruct-----

-----Loading embedding model: nomic-ai/nomic-embed-text-v1.5-----


modules.json:   0%|          | 0.00/255 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/140 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/71.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/120 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/2.06k [00:00<?, ?B/s]

configuration_hf_nomic_bert.py:   0%|          | 0.00/1.96k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/nomic-ai/nomic-bert-2048:
- configuration_hf_nomic_bert.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_hf_nomic_bert.py:   0%|          | 0.00/103k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/nomic-ai/nomic-bert-2048:
- modeling_hf_nomic_bert.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/547M [00:00<?, ?B/s]



tokenizer_config.json:   0%|          | 0.00/1.19k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/286 [00:00<?, ?B/s]

### Q/A

In [None]:
# First question
question1 = "What is the average side length of the Great Pyramid according to Cole's survey?"

answer1 = rag.answer_question(question1)
print(f"Answer: {answer1}")

Processing query: What is the average side length of the Great Pyramid according to Cole's survey?

⏱️ RAG Query Processing: 17 tokens processed in 0.28s (59.84 tokens/sec)

Retrieved 5 relevant chunks

⏱️ RAG Answer Generation: 1739 tokens processed in 7.84s (221.78 tokens/sec)

   Total (in+out): 1781 tokens at 227.13 tokens/sec


Answer: According to the provided context, the average side length of the Great Pyramid according to Cole's survey is 439.8 cubits, which corresponds to a mean side length of 230.364 meters.


In [None]:
answer1

"According to the provided context, the average side length of the Great Pyramid according to Cole's survey is 439.8 cubits, which corresponds to a mean side length of 230.364 meters."

<div style="text-align: center;">
  <img src="https://github.com/UzairNaeem3/DrX_EnigmaticResearch/raw/master/images/Screenshot_1.png" style="display: inline-block;" width="500">
</div>

In [None]:
# Follow-up question
question2 = "What are the dimensions of the three pyramids according to Petrie?"

answer2 = rag.answer_question(question2)
print(f"Answer: {answer2}")

Processing query: What are the dimensions of the three pyramids according to Petrie?

Enhanced query: Q: What is the average side length of the Great Pyramid according to Cole's survey?
A: According to the provided context, the average side length of the Great Pyramid according to Cole's survey is 439.8 cubits, which corresponds to a mean side length of 230.364 meters.
Follow-up question: What are the dimensions of the three pyramids according to Petrie?

⏱️ RAG Query Processing: 80 tokens processed in 0.39s (204.57 tokens/sec)

Retrieved 5 relevant chunks

⏱️ RAG Answer Generation: 2011 tokens processed in 14.48s (138.89 tokens/sec)

   Total (in+out): 2109 tokens at 145.66 tokens/sec


Answer: According to the provided context, the dimensions of the three pyramids according to Petrie are:

* Great Pyramid: 9068.8 inches, 439.81 royal cubits, azimuth -3' 43”
* Second Pyramid: 8474.9 inches, 411.00 royal cubits, azimuth -5' 26”
* Third Pyramid: 4153.6 inches, 201.44 royal cubits, azimu

<div style="text-align: center;">
  <img src="https://github.com/UzairNaeem3/DrX_EnigmaticResearch/raw/master/images/Screenshot_2.png" style="display: inline-block;" width="500">
</div>

In [None]:
# Follow-up question
question3 = "What are the axial distances between the Centres of the Three Pyramids as stated by Petrie in Inches?"

answer3 = rag.answer_question(question3)
print(f"Answer: {answer3}")

Processing query: What are the axial distances between the Centres of the Three Pyramids as stated by Petrie in Inches?

Enhanced query: Q: What is the average side length of the Great Pyramid according to Cole's survey?
A: According to the provided context, the average side length of the Great Pyramid according to Cole's survey is 439.8 cubits, which corresponds to a mean side length of 230.364 meters.
Q: What are the dimensions of the three pyramids according to Petrie?
A: According to the provided context, the dimensions of the three pyramids according to Petrie are:

* Great Pyramid: 9068.8 inches, 439.81 royal cubits, azimuth -3' 43”
* Second Pyramid: 8474.9 inches, 411.00 royal cubits, azimuth -5' 26”
* Third Pyramid: 4153.6 inches, 201.44 royal cubits, azimuth 14' 03”
Follow-up question: What are the axial distances between the Centres of the Three Pyramids as stated by Petrie in Inches?

⏱️ RAG Query Processing: 203 tokens processed in 1.08s (188.70 tokens/sec)

Retrieved 5 rel

<div style="text-align: center;">
  <img src="https://github.com/UzairNaeem3/DrX_EnigmaticResearch/raw/master/images/Screenshot_3.png" style="display: inline-block;" width="500">
</div>

RAG system is doing a great job answering questions! It remembers previous questions and answers, which helps it understand follow-up questions better.

In [None]:
rag.conversation_history

[("What is the average side length of the Great Pyramid according to Cole's survey?",
  "According to the provided context, the average side length of the Great Pyramid according to Cole's survey is 439.8 cubits, which corresponds to a mean side length of 230.364 meters."),
 ('What are the dimensions of the three pyramids according to Petrie?',
  "According to the provided context, the dimensions of the three pyramids according to Petrie are:\n\n* Great Pyramid: 9068.8 inches, 439.81 royal cubits, azimuth -3' 43”\n* Second Pyramid: 8474.9 inches, 411.00 royal cubits, azimuth -5' 26”\n* Third Pyramid: 4153.6 inches, 201.44 royal cubits, azimuth 14' 03”"),
 ('What are the axial distances between the Centres of the Three Pyramids as stated by Petrie in Inches?',
  'According to the provided context, the axial distances between the Centres of the Three Pyramids as stated by Petrie in Inches are:\n\n* Centre of 1st to Centre of 2nd Pyramid: 13931.6 inches\n* Centre of 2nd to Centre of 3rd P

In [None]:
rag.reset_conversation()

Conversation history has been reset.


Now, I will ask question from a different file, hence cleared the previous conversation.

In [None]:
rag.conversation_history

[]

In [None]:
question1 = "Who is the author of The Alchemist?"

answer1 = rag.answer_question(question1)
print(f"Answer: {answer1}")

Processing query: Who is the author of The Alchemist?

⏱️ RAG Query Processing: 10 tokens processed in 0.18s (55.86 tokens/sec)

Retrieved 5 relevant chunks

⏱️ RAG Answer Generation: 2057 tokens processed in 3.79s (543.25 tokens/sec)

   Total (in+out): 2075 tokens at 548.00 tokens/sec


Answer: Based on the provided context, the author of The Alchemist is Paulo Coelho.


In [None]:
# Follow-up question
question2 = "Briefly tell about the author."

answer2 = rag.answer_question(question2)
print(f"Answer: {answer2}")

Processing query: Briefly tell about the author.

Enhanced query: Q: Who is the author of The Alchemist?
A: Based on the provided context, the author of The Alchemist is Paulo Coelho.
Follow-up question: Briefly tell about the author.

⏱️ RAG Query Processing: 42 tokens processed in 0.35s (118.37 tokens/sec)

Retrieved 5 relevant chunks

⏱️ RAG Answer Generation: 2527 tokens processed in 15.99s (158.00 tokens/sec)

   Total (in+out): 2635 tokens at 164.75 tokens/sec


Answer: Based on the provided context, Paulo Coelho is the author of The Alchemist. He was born in Rio de Janeiro, Brazil, and his own life has been as varied and unusual as the protagonists of his novels. He followed his dream to become a writer, despite initial frustration, and published his first book at the age of 38. He has since become an internationally acclaimed author, known for his powerful storytelling technique and profound spiritual insights. He has sold over 150 million copies worldwide and has been recogniz

<div style="text-align: center;">
  <img src="https://github.com/UzairNaeem3/DrX_EnigmaticResearch/raw/master/images/Screenshot_4.png" style="display: inline-block;" width="500">
</div>

In [None]:
rag.conversation_history

[('Who is the author of The Alchemist?',
  'Based on the provided context, the author of The Alchemist is Paulo Coelho.'),
 ('Briefly tell about the author.',
  'Based on the provided context, Paulo Coelho is the author of The Alchemist. He was born in Rio de Janeiro, Brazil, and his own life has been as varied and unusual as the protagonists of his novels. He followed his dream to become a writer, despite initial frustration, and published his first book at the age of 38. He has since become an internationally acclaimed author, known for his powerful storytelling technique and profound spiritual insights. He has sold over 150 million copies worldwide and has been recognized with numerous literary prizes.')]

In [None]:
rag.reset_conversation()

Conversation history has been reset.


In [None]:
question1 = "What are the types of adult stem cells used in tumor therapy, and what are their roles?"

answer1 = rag.answer_question(question1)
print(f"Answer: {answer1}")

Processing query: What are the types of adult stem cells used in tumor therapy, and what are their roles?

⏱️ RAG Query Processing: 20 tokens processed in 0.14s (144.62 tokens/sec)

Retrieved 5 relevant chunks

⏱️ RAG Answer Generation: 1474 tokens processed in 20.18s (73.03 tokens/sec)

   Total (in+out): 1623 tokens at 80.41 tokens/sec


Answer: According to the provided context, the types of adult stem cells used in tumor therapy are:

1. Hematopoietic stem cells (HSCs)
2. Mesenchymal stem cells (MSCs)
3. Neural stem cells (NSCs)

Their roles are:

1. HSCs: Can form all mature blood cells in the body and are currently approved by the FDA for the treatment of multiple myeloma and leukemia.
2. MSCs: Play important roles in tissue repair and regeneration, and are used as a complementary approach in treating tumors.
3. NSCs: Can self-renew and generate new neurons and glial cells, and are used for treating both primary and metastatic breast and other tumors.


<div style="text-align: center;">
  <img src="https://github.com/UzairNaeem3/DrX_EnigmaticResearch/raw/master/images/Screenshot_7.png" style="display: inline-block;" width="500">
</div>

In [None]:
rag.reset_conversation()

Conversation history has been reset.


In [None]:
question1 = "What is material price regression?"

answer1 = rag.answer_question(question1)
print(f"Answer: {answer1}")

Processing query: What is material price regression?

⏱️ RAG Query Processing: 7 tokens processed in 0.11s (63.01 tokens/sec)

Retrieved 5 relevant chunks

⏱️ RAG Answer Generation: 1980 tokens processed in 10.16s (194.82 tokens/sec)

   Total (in+out): 2048 tokens at 201.51 tokens/sec


Answer: Based on the provided context, material price regression is a method used to calculate low, mid, and high (10th, 50th, and 90th percentile) material price estimates for different components. The regression coefficients and intercepts are provided in the file to calculate the material price estimates based on chosen performance metric input values.


In [None]:
answer1

'Based on the provided context, material price regression is a method used to calculate low, mid, and high (10th, 50th, and 90th percentile) material price estimates for different components. The regression coefficients and intercepts are provided in the file to calculate the material price estimates based on chosen performance metric input values.'

In [None]:
question2 = "What is the performance metric 1 in retail price regression?"

answer2 = rag.answer_question(question2)
print(f"Answer: {answer2}")

Processing query: What is the performance metric 1 in retail price regression?

Enhanced query: Q: What is material price regression?
A: Based on the provided context, material price regression is a method used to calculate low, mid, and high (10th, 50th, and 90th percentile) material price estimates for different components. The regression coefficients and intercepts are provided in the file to calculate the material price estimates based on chosen performance metric input values.
Follow-up question: What is the performance metric 1 in retail price regression?

⏱️ RAG Query Processing: 94 tokens processed in 0.39s (238.39 tokens/sec)

Retrieved 5 relevant chunks

⏱️ RAG Answer Generation: 2394 tokens processed in 8.37s (285.94 tokens/sec)

   Total (in+out): 2445 tokens at 292.03 tokens/sec


Answer: Based on the provided context, the performance metric 1 in retail price regression is UEF (Unitless) with a coefficient-low of 102.33, coefficient-mid of 248.33, and coefficient-high of 8

In [None]:
answer2

'Based on the provided context, the performance metric 1 in retail price regression is UEF (Unitless) with a coefficient-low of 102.33, coefficient-mid of 248.33, and coefficient-high of 888.75.'

<div style="text-align: center;">
  <img src="https://github.com/UzairNaeem3/DrX_EnigmaticResearch/raw/master/images/Screenshot_5.png" style="display: inline-block;" width="500">
</div>

In [None]:
rag.reset_conversation()

Conversation history has been reset.


In [None]:
question1 = "Could you tell me Program Specific Outcomes (PSOs) for MSc Applied Psychology?"

answer1 = rag.answer_question(question1)
print(f"Answer: {answer1}")

Processing query: Could you tell me Program Specific Outcomes (PSOs) for MSc Applied Psychology?

⏱️ RAG Query Processing: 19 tokens processed in 0.13s (149.03 tokens/sec)

Retrieved 5 relevant chunks

⏱️ RAG Answer Generation: 3570 tokens processed in 35.16s (101.55 tokens/sec)

   Total (in+out): 3800 tokens at 108.09 tokens/sec


Answer: I can help you with that! According to the provided context, the Program Specific Outcomes (PSOs) for MSc Applied Psychology are:

1. PSO1 – Placement: To prepare the students who will demonstrate respectful engagement with others' ideas, behaviors, beliefs, and apply diverse frames of reference to decisions and actions.
2. PSO 2 - Entrepreneur: To create effective entrepreneurs by enhancing their critical thinking, problem-solving, decision-making, and leadership skills that will facilitate startups and high-potential organizations.
3. PSO3 – Research and Development: Design and implement HR systems and practices grounded in research that comply wi

<div style="text-align: center;">
  <img src="https://github.com/UzairNaeem3/DrX_EnigmaticResearch/raw/master/images/Screenshot_6.png" style="display: inline-block;" width="500">
</div>

In [None]:
rag.reset_conversation()

Conversation history has been reset.


In [None]:
question1 = "Which study evaluated turfgrass soil organic carbon (SOC) over the longest time since establishment?"

answer1 = rag.answer_question(question1)
print(f"Answer: {answer1}")

Processing query: Which study evaluated turfgrass soil organic carbon (SOC) over the longest time since establishment?

Enhanced query: Q: Which study evaluated turfgrass soil organic carbon (SOC) over the longest time since establishment?
A: I don't have enough information to answer this question. The provided context does not specify the time period over which the turfgrass soil organic carbon (SOC) was evaluated in each study. Therefore, it is not possible to determine which study evaluated SOC over the longest time since establishment.
Follow-up question: Which study evaluated turfgrass soil organic carbon (SOC) over the longest time since establishment?



You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


⏱️ RAG Query Processing: 101 tokens processed in 0.44s (228.93 tokens/sec)

Retrieved 5 relevant chunks

⏱️ RAG Answer Generation: 3809 tokens processed in 11.91s (319.93 tokens/sec)

   Total (in+out): 3875 tokens at 325.47 tokens/sec


Answer: I have enough information to answer this question. According to the provided context, the study that evaluated turfgrass soil organic carbon (SOC) over the longest time since establishment is Qian et al. (2010). They assessed soil organic carbon input from urban turfgrasses and reported a study that spanned 30 years.


In [None]:
answer1

'I have enough information to answer this question. According to the provided context, the study that evaluated turfgrass soil organic carbon (SOC) over the longest time since establishment is Qian et al. (2010). They assessed soil organic carbon input from urban turfgrasses and reported a study that spanned 30 years.'

It couldn't answer  'Datasets summaries' document 🙂

This was a basic implementation of the Retrieval-Augmented Generation (RAG) approach.

While frameworks like `LlamaIndex` and `Langchain` offer more advanced RAG architectures, and techniques like reranking and advanced retrieval methods could enhance performance, I believe the assignment's objective was to build a system from scratch rather than relying on pre-built frameworks.

Utilizing tools like `LlamaParse` for document parsing and strategies like breaking down queries into sub-queries for retrieval could have significantly improved the system's capabilities. However, I opted for a from-scratch approach based on my understanding of the assignment's requirements.

