#### Date 30th july 2025 Project Rag for AMLGO LABS

In [1]:
import json
import sys
import os
sys.path.append('../src')

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
from typing import List, Tuple, Iterator
import warnings
warnings.filterwarnings('ignore')

print("Setting up language model and RAG pipeline...")

# Model configuration
model_name = "microsoft/DialoGPT-medium"
print(f"Selected model: {model_name}")

# Check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Setting up language model and RAG pipeline...
Selected model: microsoft/DialoGPT-medium
Using device: cpu


In [2]:
# Load tokenizer and model
print(" Loading tokenizer and model...")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto" if torch.cuda.is_available() else None
)

# Configure tokenizer
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print("Model and tokenizer loaded successfully!")

 Loading tokenizer and model...


tokenizer_config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/863M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Model and tokenizer loaded successfully!


model.safetensors:   0%|          | 0.00/863M [00:00<?, ?B/s]

In [3]:
# Create src/__init__.py
os.makedirs('../src', exist_ok=True)
with open('../src/__init__.py', 'w') as f:
    f.write('# RAG Chatbot Source Package\n')

# Create document_processor.py
doc_processor_code = '''"""Document processing utilities"""
import PyPDF2
import re
import json
from typing import List, Dict
import nltk
from nltk.tokenize import sent_tokenize

class DocumentProcessor:
    def __init__(self):
        try:
            nltk.download('punkt', quiet=True)
        except:
            pass
    
    def extract_text_from_pdf(self, pdf_path: str) -> str:
        """Extract text from PDF file"""
        try:
            with open(pdf_path, 'rb') as file:
                pdf_reader = PyPDF2.PdfReader(file)
                text = ""
                for page in pdf_reader.pages:
                    text += page.extract_text()
                return text
        except Exception as e:
            raise Exception(f"Error reading PDF: {e}")
    
    def clean_text(self, text: str) -> str:
        """Clean extracted text"""
        text = re.sub(r'\\s+', ' ', text)
        text = re.sub(r'[^\\w\\s.,!?;:()\\-\\'\\\"]+', '', text)
        text = re.sub(r'([.,!?;]){2,}', r'\\1', text)
        return text.strip()
'''

with open('../src/document_processor.py', 'w') as f:
    f.write(doc_processor_code)

print("Created document_processor.py")

Created document_processor.py


In [4]:
# Create retriever.py
retriever_code = '''"""Document retrieval using FAISS and sentence transformers"""
import faiss
import numpy as np
import json
from sentence_transformers import SentenceTransformer
from typing import List, Tuple

class DocumentRetriever:
    def __init__(self, index_path: str, metadata_path: str, model_name: str = 'all-MiniLM-L6-v2'):
        """Initialize retriever with FAISS index and metadata"""
        self.embedding_model = SentenceTransformer(model_name)
        self.index = faiss.read_index(index_path)
        
        with open(metadata_path, 'r', encoding='utf-8') as f:
            self.metadata = json.load(f)
        
        self.chunks = [chunk['text'] for chunk in self.metadata['chunks']]
        
    def retrieve(self, query: str, top_k: int = 3) -> List[Tuple[str, float]]:
        """Retrieve most relevant chunks for the query"""
        # Encode query with normalization
        query_embedding = self.embedding_model.encode(
            [query], 
            normalize_embeddings=True,
            convert_to_numpy=True
        )
        
        # Search in FAISS index
        scores, indices = self.index.search(query_embedding.astype('float32'), top_k)
        
        # Return chunks with scores
        results = []
        for score, idx in zip(scores[0], indices[0]):
            if idx < len(self.chunks):
                results.append((self.chunks[idx], float(score)))
        
        return results
    
    def get_chunk_metadata(self, chunk_text: str) -> dict:
        """Get metadata for a specific chunk"""
        for chunk in self.metadata['chunks']:
            if chunk['text'] == chunk_text:
                return chunk
        return {}
'''

with open('../src/retriever.py', 'w') as f:
    f.write(retriever_code)

print(" Created retriever.py")

 Created retriever.py


In [6]:
# Create generator.py
generator_code = '''"""Response generation using transformers"""
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
from typing import Iterator

class ResponseGenerator:
    def __init__(self, model_name: str = "microsoft/DialoGPT-medium"):
        """Initialize response generator"""
        self.model_name = model_name
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
            device_map="auto" if torch.cuda.is_available() else None
        )
        
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
    
    def generate(self, prompt: str, max_new_tokens: int = 150) -> str:
        """Generate response from prompt"""
        try:
            inputs = self.tokenizer.encode(prompt, return_tensors='pt', truncate=True, max_length=512)
            
            with torch.no_grad():
                outputs = self.model.generate(
                    inputs,
                    max_new_tokens=max_new_tokens,
                    temperature=0.7,
                    do_sample=True,
                    pad_token_id=self.tokenizer.eos_token_id,
                    eos_token_id=self.tokenizer.eos_token_id
                )
            
            # Decode only the new tokens
            generated_text = self.tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
            return generated_text.strip()
            
        except Exception as e:
            return f"Error generating response: {str(e)}"
    
    def generate_streaming(self, prompt: str, max_new_tokens: int = 150) -> Iterator[str]:
        """Generate streaming response"""
        response = self.generate(prompt, max_new_tokens)
        
        # Simulate streaming by yielding words
        words = response.split()
        for i, word in enumerate(words):
            if i == 0:
                yield word
            else:
                yield " " + word
'''

with open('../src/generator.py', 'w') as f:
    f.write(generator_code)

print(" Created generator.py")

 Created generator.py


In [7]:
# Create pipeline.py
pipeline_code = '''"""Complete RAG pipeline combining retrieval and generation"""
from .retriever import DocumentRetriever
from .generator import ResponseGenerator
from typing import List, Tuple, Iterator

class RAGPipeline:
    def __init__(self, index_path: str, metadata_path: str, model_name: str = "microsoft/DialoGPT-medium"):
        """Initialize complete RAG pipeline"""
        print("Initializing RAG pipeline...")
        self.retriever = DocumentRetriever(index_path, metadata_path)
        self.generator = ResponseGenerator(model_name)
        print("RAG pipeline initialized!")
    
    def create_prompt(self, query: str, retrieved_chunks: List[str]) -> str:
        """Create structured prompt for RAG"""
        context = "\\n\\n".join([f"Context {i+1}: {chunk}" for i, chunk in enumerate(retrieved_chunks)])
        
        prompt = f"""Based on the eBay User Agreement information provided below, answer the user's question accurately and concisely.

Context Information:
{context}

User Question: {query}

Instructions:
- Answer based only on the provided context
- Be specific and accurate
- If the context doesn't contain enough information, say so
- Keep the response concise but complete

Answer:"""
        
        return prompt
    
    def query(self, user_query: str, top_k: int = 3) -> Tuple[str, List[str]]:
        """Process user query and return response with sources"""
        # Retrieve relevant chunks
        retrieved_results = self.retriever.retrieve(user_query, top_k)
        retrieved_chunks = [chunk for chunk, score in retrieved_results]
        
        # Create prompt
        prompt = self.create_prompt(user_query, retrieved_chunks)
        
        # Generate response
        response = self.generator.generate(prompt, max_new_tokens=200)
        
        return response, retrieved_chunks
    
    def query_streaming(self, user_query: str, top_k: int = 3) -> Tuple[Iterator[str], List[str]]:
        """Process user query and return streaming response with sources"""
        # Retrieve relevant chunks
        retrieved_results = self.retriever.retrieve(user_query, top_k)
        retrieved_chunks = [chunk for chunk, score in retrieved_results]
        
        # Create prompt
        prompt = self.create_prompt(user_query, retrieved_chunks)
        
        # Generate streaming response
        response_stream = self.generator.generate_streaming(prompt, max_new_tokens=200)
        
        return response_stream, retrieved_chunks
'''

with open('../src/pipeline.py', 'w') as f:
    f.write(pipeline_code)

print("Created pipeline.py")
print("All source files created successfully!")

Created pipeline.py
All source files created successfully!


In [12]:
import sys
import os

# Add parent directory to sys.path so `src` can be imported
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(project_root)


In [13]:
# Test the complete pipeline
try:
    from src.pipeline import RAGPipeline
    
    # Initialize pipeline
    pipeline = RAGPipeline(
        index_path="../vectordb/document_index.faiss",
        metadata_path="../vectordb/metadata.json",
        model_name=model_name
    )
    
    # Test query
    test_query = "What are eBay's return policies?"
    print(f"\\nTesting pipeline with query: '{test_query}'")
    
    response, sources = pipeline.query(test_query, top_k=2)
    
    print(f"\\nGenerated Response:")
    print(f"{response}")
    
    print(f"\\nRetrieved Sources ({len(sources)}):")
    for i, source in enumerate(sources, 1):
        print(f"\\nSource {i}: {source[:200]}...")
    
    print("\\nPipeline test successful!")
    
except Exception as e:
    print(f"Pipeline test failed: {e}")
    import traceback
    traceback.print_exc()

Initializing RAG pipeline...


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


RAG pipeline initialized!
\nTesting pipeline with query: 'What are eBay's return policies?'
\nGenerated Response:
Error generating response: PreTrainedTokenizerFast._batch_encode_plus() got an unexpected keyword argument 'truncate'
\nRetrieved Sources (2):
\nSource 1: Where settings have been set to automatically accept requests for returns or replacements, an eBay -generated return shipping label will be provided to your buyer. You agree to comply with our returns...
\nSource 2: 14. Additional Terms Returns and cancellations for sellers Sellers can create rules to automate replacements, returns, and refunds under certain circumstances. For all new sellers, in listings where r...
\nPipeline test successful!
