In [81]:
import os
import re
import numpy as np
import random
import requests
import sacrebleu
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

## RAG from Scatch

In [9]:
url = 'https://www.gutenberg.org/files/1342/1342-0.txt'

response = requests.get(url)
response.raise_for_status()  

document_content = response.text

with open('pride_and_prejudice.txt', 'w', encoding='utf-8') as file:
    file.write(document_content)

print("Document downloaded and saved successfully.")

Document downloaded and saved successfully.


In [34]:
def load_documents_from_folder(folder_path):
    docs = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".txt"):
            with open(os.path.join(folder_path, filename), "r", encoding="utf-8") as f:
                docs.append(f.read())
    return docs


In [38]:
def clean_text(text):
    text = " ".join(text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

def chunk_text(text, chunk_size=300, overlap=50):
    words = text.split()
    chunks = []
    for i in range(0, len(words), chunk_size - overlap):
        chunk = ' '.join(words[i:i + chunk_size])
        if chunk:
            chunks.append(chunk)
    return chunks

def filter_chunks(chunks):
    return [c for c in chunks if len(c.split()) >= 10]  


In [40]:
def create_embeddings(chunks):
    if not chunks:
        raise ValueError("No valid chunks found for vectorization.")

    vectorizer = TfidfVectorizer(stop_words=None)
    vectors = vectorizer.fit_transform(chunks)

    if not vectorizer.vocabulary_:
        raise ValueError("TF-IDF vocabulary is empty. Check your documents.")

    return vectors, vectorizer

In [42]:
def retrieve(query, chunks, vectorizer, chunk_vectors, top_k=3):
    query_vec = vectorizer.transform([query])
    similarities = cosine_similarity(query_vec, chunk_vectors).flatten()
    top_indices = np.argsort(similarities)[-top_k:][::-1]
    return [(chunks[i], similarities[i]) for i in top_indices]


In [44]:
# without LLM
def generate_answer(query, retrieved_chunks):
    combined_context = ' '.join([chunk for chunk, _ in retrieved_chunks])
    response = f"Q: {query}\nA (based on context): {combined_context[:500]}..."
    return response


In [46]:
def rag_pipeline(query, docs_folder):
    # Step 1: Load
    docs = load_documents_from_folder(docs_folder)

    # Step 2: Clean + Chunk + filtering
    all_chunks = []
    all_chunks = filter_chunks(chunk_text(clean_text(docs)))

    # Step 3: Embeddings
    chunk_vectors, vectorizer = create_embeddings(all_chunks)

    # Step 4: Retrieval
    top_chunks = retrieve(query, all_chunks, vectorizer, chunk_vectors)

    # Step 5: Generation
    answer = generate_answer(query, top_chunks)
    return answer


In [48]:
docs_folder = "/Users/mehuljain/Tests"
if __name__ == "__main__":
    
    query = "What was Mr. Darcy's first impression of Elizabeth Bennet?"
    answer = rag_pipeline(query, docs_folder)
    print(answer)


Q: What was Mr. Darcy's first impression of Elizabeth Bennet?
A (based on context): fantastic limits and restrictions against which Mary Wollstonecraft protested with better general sense than particular taste or judgment. Miss Austen, too, drew back when the white frost touched her shoes; but I think she would have made a pretty good journey even in a black one._ _For if her knowledge was not very extended, she knew two things which only genius knows. The one was humanity, and the other was art. On the first head she could not make a mistake; her men, though limited, are true,...


## RAG with RL

In [50]:
# [Input Query]
#      ↓
# [Agent (RL Policy)]
#      ↓
# [Select Action]
#      ↓
# [Modified Query / Retrieval]
#      ↓
# [RAG Pipeline (Retrieval + Answer Generation)]
#      ↓
# [Answer]
#      ↓
# [Reward = BLEU Score vs Ground Truth]
#      ↓
# [Update Policy (optional)]


In [52]:
def improve_query(query):
    return query + " in detail"  # simplistic enhancement

def expand_retrieval(chunks, top_chunks, k_extra=2):
    # Add next best chunks not in top_chunks
    top_indices = [chunks.index(c[0]) for c in top_chunks]
    scores = [c[1] for c in top_chunks]
    others = [(i, score) for i, score in enumerate(scores) if i not in top_indices]
    others = sorted(others, key=lambda x: -x[1])[:k_extra]
    extra_chunks = [(chunks[i], scores[i]) for i, _ in others]
    return top_chunks + extra_chunks


In [83]:
def calculate_bleu(reference, hypothesis):
    bleu = sacrebleu.sentence_bleu(hypothesis, [reference])
    return bleu.score / 100.0 

In [85]:
# based on max score
def rl_decision_loop(query, ground_truth, docs_folder):
    docs = load_documents_from_folder(docs_folder)

    # Preprocessing
    all_chunks = []
    all_chunks = filter_chunks(chunk_text(clean_text(docs)))

    chunk_vectors, vectorizer = create_embeddings(all_chunks)

    # ACTION 1: Direct RAG
    top_chunks_1 = retrieve(query, all_chunks, vectorizer, chunk_vectors)
    answer_1 = generate_answer(query, top_chunks_1)
    reward_1 = calculate_bleu(ground_truth, answer_1)

    # ACTION 2: Improved Query
    mod_query = improve_query(query)
    top_chunks_2 = retrieve(mod_query, all_chunks, vectorizer, chunk_vectors)
    answer_2 = generate_answer(mod_query, top_chunks_2)
    reward_2 = calculate_bleu(ground_truth, answer_2)

    # ACTION 3: Expanded Retrieval
    top_chunks_3 = expand_retrieval(all_chunks, top_chunks_1)
    answer_3 = generate_answer(query, top_chunks_3)
    reward_3 = calculate_bleu(ground_truth, answer_3)

    results = {
        'direct': (answer_1, reward_1),
        'query_improved': (answer_2, reward_2),
        'retrieval_expanded': (answer_3, reward_3)
    }

    # Select best action based on reward
    best_action = max(results.items(), key=lambda x: x[1][1])
    print(f"Best action: {best_action[0]} | Reward: {best_action[1][1]:.4f}")
    return best_action[1][0]  # best answer


In [87]:
if __name__ == "__main__":
    query = "What was Mr. Darcy's first impression of Elizabeth Bennet?"
    ground_truth = "Photosynthesis is the process by which green plants use sunlight to make food from carbon dioxide and water."

    best_answer = rl_decision_loop(query, ground_truth, docs_folder)
    print("\nFinal Answer:\n", best_answer)


Best action: direct | Reward: 0.0043

Final Answer:
 Q: What was Mr. Darcy's first impression of Elizabeth Bennet?
A (based on context): fantastic limits and restrictions against which Mary Wollstonecraft protested with better general sense than particular taste or judgment. Miss Austen, too, drew back when the white frost touched her shoes; but I think she would have made a pretty good journey even in a black one._ _For if her knowledge was not very extended, she knew two things which only genius knows. The one was humanity, and the other was art. On the first head she could not make a mistake; her men, though limited, are true,...


## RAG with Q learning

In [89]:
ACTIONS = ["direct", "query_improved", "retrieval_expanded"]
Q_TABLE = {}  # State-Action -> Q-value
ALPHA = 0.1   # Learning rate
GAMMA = 0.9   # Discount factor
EPSILON = 0.2 # Exploration factor


In [91]:
def get_state(query):
    return "long" if len(query.split()) > 5 else "short"


In [93]:
def choose_action(state, q_table):
    if random.random() < EPSILON or state not in q_table:
        return random.choice(ACTIONS)
    return max(q_table[state], key=q_table[state].get)


In [95]:
def update_q_table(state, action, reward, next_state, q_table):
    if state not in q_table:
        q_table[state] = {a: 0.0 for a in ACTIONS}
    if next_state not in q_table:
        q_table[next_state] = {a: 0.0 for a in ACTIONS}

    max_future_q = max(q_table[next_state].values())
    current_q = q_table[state][action]

    new_q = current_q + ALPHA * (reward + GAMMA * max_future_q - current_q)
    q_table[state][action] = new_q


In [125]:
def train_rl_rag(queries, ground_truths, docs_folder, episodes=100):
    global Q_TABLE

    docs = load_documents_from_folder(docs_folder)
    all_chunks = []
    all_chunks = filter_chunks(chunk_text(clean_text(docs)))
        
    chunk_vectors, vectorizer = create_embeddings(all_chunks)

    for ep in range(episodes):
        total_reward = 0

        for query, gt in zip(queries, ground_truths):
            state = get_state(query)
            action = choose_action(state, Q_TABLE)

            if action == "query_improved":
                mod_query = improve_query(query)
                top_chunks = retrieve(mod_query, all_chunks, vectorizer, chunk_vectors)
                answer = generate_answer(mod_query, top_chunks)

            elif action == "retrieval_expanded":
                top_chunks_direct = retrieve(query, all_chunks, vectorizer, chunk_vectors)
                top_chunks = expand_retrieval(all_chunks, top_chunks_direct)
                answer = generate_answer(query, top_chunks)

            else: 
                top_chunks = retrieve(query, all_chunks, vectorizer, chunk_vectors)
                answer = generate_answer(query, top_chunks)

            reward = calculate_bleu(gt, answer)
            next_state = get_state(query)  

            update_q_table(state, action, reward, next_state, Q_TABLE)
            total_reward += reward

In [131]:
Q_TABLE

{'short': {'direct': 0.0030900863822670723,
  'query_improved': 0.003523775362974809,
  'retrieval_expanded': 0.003127519187687356}}

In [127]:
def infer_best_action(query, docs_folder):
    state = get_state(query)

    if state not in Q_TABLE:
        action = random.choice(ACTIONS)
    else:
        action = max(Q_TABLE[state], key=Q_TABLE[state].get)

    docs = load_documents_from_folder(docs_folder)
    all_chunks = []
    all_chunks = filter_chunks(chunk_text(clean_text(docs)))
        
    chunk_vectors, vectorizer = create_embeddings(all_chunks)

    if action == "query_improved":
        mod_query = improve_query(query)
        top_chunks = retrieve(mod_query, all_chunks, vectorizer, chunk_vectors)
        answer = generate_answer(mod_query, top_chunks)

    elif action == "retrieval_expanded":
        top_chunks_direct = retrieve(query, all_chunks, vectorizer, chunk_vectors)
        top_chunks = expand_retrieval(all_chunks, top_chunks_direct)
        answer = generate_answer(query, top_chunks)

    else: 
        top_chunks = retrieve(query, all_chunks, vectorizer, chunk_vectors)
        answer = generate_answer(query, top_chunks)

    print(f"Inference used action: {action}")
    return answer


In [129]:
if __name__ == "__main__":
    query = "What was Mr. Darcy's first impression of Elizabeth Bennet?"
    ground_truth = """Mr. Darcy initially considered Elizabeth Bennet "tolerable, but not handsome enough to tempt me." This remark reflected his pride and set the tone for their evolving relationship throughout the novel."""
    
    train_rl_rag(query, ground_truth, docs_folder, episodes=10)
    best_answer = infer_best_action(query, docs_folder)
    print("\nFinal Answer:\n", best_answer)

Inference used action: direct

Final Answer:
 Q: What was Mr. Darcy's first impression of Elizabeth Bennet?
A (based on context): fantastic limits and restrictions against which Mary Wollstonecraft protested with better general sense than particular taste or judgment. Miss Austen, too, drew back when the white frost touched her shoes; but I think she would have made a pretty good journey even in a black one._ _For if her knowledge was not very extended, she knew two things which only genius knows. The one was humanity, and the other was art. On the first head she could not make a mistake; her men, though limited, are true,...
