# RAG - Query Transformation
### Query transformations are a set of approaches focused on re-writing and / or modifying questions for retrieval.


# Environment Configuration

1 - Install the necessary libraries


In [None]:
! pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain


2 - Use Langsmith for monitoring the llm flows

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = os.environ['LANGSMITH_ENDPOINT']
os.environ['LANGCHAIN_API_KEY'] = os.environ['LANGSMITH_API_KEY']

3 - Azure OpenAI Keys 

In [2]:
os.environ["OPENAI_API_TYPE"] = "azure"
os.environ["OPENAI_API_VERSION"] = "2024-02-01"
os.environ["AZURE_OPENAI_ENDPOINT"] = os.environ['AZURE_OPENAI_ENDPOINT']
os.environ["OPENAI_API_KEY"] = os.environ["OPENAI_API_KEY"]

## Multi Query 

Docs: https://python.langchain.com/docs/modules/data_connection/retrievers/MultiQueryRetriever

### Index

#### Indexing 

In [5]:
#### INDEXING ####

# Load blog
import bs4
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://medium.com/@arikbidny/mastering-github-copilot-essential-tips-tricks-and-prompt-engineering-for-optimal-coding-efd420864c3d")
blog_docs = loader.load()

# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, 
    chunk_overlap=50)

# Make splits
splits = text_splitter.split_documents(blog_docs)



# Index
from langchain_openai import AzureOpenAIEmbeddings
from langchain_community.vectorstores import Chroma

embd = AzureOpenAIEmbeddings(
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    openai_api_version="2023-03-15-preview",
    azure_deployment="text-embedding-ada-002",
    openai_api_key=os.environ["OPENAI_API_KEY"],
)

vectorstore = Chroma.from_documents(documents=splits, 
                                    embedding=embd)

retriever = vectorstore.as_retriever()

### Prompt

In [7]:
from langchain.prompts import ChatPromptTemplate

# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
from langchain_openai import AzureChatOpenAI

llm = AzureChatOpenAI(
    openai_api_version="2023-03-15-preview",
    azure_deployment="gpt-35-turbo",
    temperature=0,
)

generate_queries = (
    prompt_perspectives 
    | llm
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [8]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]

# Retrieve
question = "What is the Code Styles?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union
docs = retrieval_chain.invoke({"question":question})
len(docs)

  warn_beta(


5

In [9]:
from operator import itemgetter
from langchain_openai import AzureChatOpenAI
from langchain_core.runnables import RunnablePassthrough


# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

# Azure OpenAI Configuration
llm = AzureChatOpenAI(
    openai_api_version="2023-03-15-preview",
    azure_deployment="gpt-35-turbo",
    temperature=0,
)

final_rag_chain = (
    {"context": retrieval_chain, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})

'The Code Styles refer to a set of tips and techniques mentioned in the context that can enhance coding efficiency and leverage the capabilities of GitHub Copilot. These Code Styles include:\n\n1. Fix Code Inline: This tip suggests harnessing the power of GitHub Copilot in your IDE to instantly suggest and implement fixes for your code directly within your workflow.\n\n2. Get help on error logs in your terminal: GitHub Copilot can be used to swiftly interpret and resolve error logs in your terminal, enhancing debugging efficiency.\n\n3. Generate documentation for your code: GitHub Copilot can automatically generate comprehensive documentation for your codebase, streamlining the documentation process.\n\n4. Run command with GitHub Copilot Chat: GitHub Copilot Chat can be utilized to effortlessly run commands and streamline the coding workflow through conversational AI assistance.\n\nThese Code Styles aim to improve coding productivity and streamline the development process by leveraging

## RAG-Fusion

Docs: https://github.com/langchain-ai/langchain/blob/master/cookbook/rag_fusion.ipynb?ref=blog.langchain.dev


### Prompt

In [10]:
from langchain.prompts import ChatPromptTemplate

# RAG-Fusion: Related
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [11]:
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import AzureChatOpenAI

llm = AzureChatOpenAI(
    openai_api_version="2023-03-15-preview",
    azure_deployment="gpt-35-turbo",
    temperature=0,
)

generate_queries = (
    prompt_rag_fusion 
    | llm
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [12]:
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    """ Reciprocal_rank_fusion that takes multiple lists of ranked documents 
        and an optional parameter k used in the RRF formula """
    
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results

retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion
docs = retrieval_chain_rag_fusion.invoke({"question": question})
len(docs)

5

In [13]:
from langchain_core.runnables import RunnablePassthrough

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": retrieval_chain_rag_fusion, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})

'The Code Styles is a feature that allows developers to harness the power of GitHub Copilot in their IDE to instantly suggest and implement fixes for their code directly within their workflow.'

## Decomposition

In [36]:
from langchain.prompts import ChatPromptTemplate

# Decomposition
template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
Generate multiple search queries related to: {question} \n
Output (3 queries):"""
prompt_decomposition = ChatPromptTemplate.from_template(template)

In [41]:
from langchain_openai import AzureChatOpenAI
from langchain_core.output_parsers import StrOutputParser

# LLM
llm = AzureChatOpenAI(
    openai_api_version="2023-03-15-preview",
    azure_deployment="gpt-35-turbo",
    temperature=0,
)

# Chain
generate_queries_decomposition = ( prompt_decomposition | llm | StrOutputParser() | (lambda x: x.split("\n")))

# Run
question = "What are the Tips & Tricks in GitHub copilot system?"
questions = generate_queries_decomposition.invoke({"question":question})

In [42]:
questions

['1. What are the main services offered by Azure Cloud?',
 '2. How does Azure Cloud handle data storage and management?',
 '3. What are the security features provided by Azure Cloud?']

### Answer recursively

In [27]:
# Prompt
template = """Here is the question you need to answer:

\n --- \n {question} \n --- \n

Here is any available background question + answer pairs:

\n --- \n {q_a_pairs} \n --- \n

Here is additional context relevant to the question: 

\n --- \n {context} \n --- \n

Use the above context and any background question + answer pairs to answer the question: \n {question}
"""

decomposition_prompt = ChatPromptTemplate.from_template(template)

In [28]:
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser

def format_qa_pair(question, answer):
    """Format Q and A pair"""
    
    formatted_string = ""
    formatted_string += f"Question: {question}\nAnswer: {answer}\n\n"
    return formatted_string.strip()

# llm
# llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
llm = AzureChatOpenAI(
    openai_api_version="2023-03-15-preview",
    azure_deployment="gpt-35-turbo",
    temperature=0,
)

q_a_pairs = ""
for q in questions:
    
    rag_chain = (
    {"context": itemgetter("question") | retriever, 
     "question": itemgetter("question"),
     "q_a_pairs": itemgetter("q_a_pairs")} 
    | decomposition_prompt
    | llm
    | StrOutputParser())

    answer = rag_chain.invoke({"question":q,"q_a_pairs":q_a_pairs})
    q_a_pair = format_qa_pair(q,answer)
    q_a_pairs = q_a_pairs + "\n---\n"+  q_a_pair

In [29]:
answer

'The provided context does not contain information about any recommended code style guidelines for using GitHub Copilot.'