# RAG chat over web source - Code
- adopted from https://python.langchain.com/docs/use_cases/code_understanding

In [14]:
import os
import openai
import bs4
from langchain import hub
from langchain.chat_models import AzureChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationSummaryMemory
from langchain.embeddings import AzureOpenAIEmbeddings
from langchain.schema import StrOutputParser
from git import Repo
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import LanguageParser
from langchain.text_splitter import Language, RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
# from langchain_core.runnables import RunnablePassthrough
from langchain.schema.runnable  import RunnablePassthrough
from ssl_workaround import no_ssl_verification
from dotenv import load_dotenv, find_dotenv

# 1. Init

In [2]:
def init():
    _ = load_dotenv(find_dotenv()) # read local .env file
    openai.api_key = os.environ['OPENAI_API_KEY']
    openai.api_base = os.environ['OPENAI_API_BASE']
    openai.api_type= os.environ['OPENAI_API_TYPE']
    openai.api_version = os.environ['OPENAI_API_VERSION']
    print(f'Openai secrets loaded, models: {os.environ["OPENAI_DEPLOYMENT_ID_LLM"]}, {os.environ["OPENAI_DEPLOYMENT_ID_EMBED"]}')
init()

Openai secrets loaded, models: gpt-35-turbo, text-embedding-ada-002


## 2. Get data from Git

In [4]:
# Clone
repo_path = "./data/test_repo/"
repo = Repo.clone_from("https://github.com/IBM/ibm-generative-ai/", to_path=repo_path)

In [5]:
# Load
loader = GenericLoader.from_filesystem(
    repo_path + "/src/genai/",
    glob="**/*",
    suffixes=[".py"],
    exclude=["**/non-utf8-encoding.py"],
    parser=LanguageParser(language=Language.PYTHON, parser_threshold=500),
)
documents = loader.load()
len(documents)

63

# Split

In [6]:
# split
python_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.PYTHON, chunk_size=2000, chunk_overlap=200
)
texts = python_splitter.split_documents(documents)
len(texts)

167

# Vector store
- mmr - max marginal relevance used for code, but semantic similarity might be tested too

In [7]:
embedding=AzureOpenAIEmbeddings(azure_deployment=os.environ['OPENAI_DEPLOYMENT_ID_EMBED'])
db = Chroma.from_documents(texts, embedding)
retriever = db.as_retriever(
    search_type="mmr",  # TODO: Also test "similarity"
    search_kwargs={"k": 8},
)

In [12]:
# retriever test
retrieved_docs = retriever.get_relevant_documents(
    "What are the approaches creating a simple prompt from a template?"
)
print(f'---Number of documents retrieved: {len(retrieved_docs)}')
print(f'---REtrieved docs source: ')
for doc in retrieved_docs:
    print(f'------Source: {doc.metadata["source"]}')
print(f'---Doc 0: {retrieved_docs[0].page_content}')

---Number of documents retrieved: 8
---REtrieved docs source: 
------Source: data\test_repo\src\genai\routers\prompt_template.py
------Source: data\test_repo\src\genai\prompt_pattern.py
------Source: data\test_repo\src\genai\extensions\langchain\prompt.py
------Source: data\test_repo\src\genai\prompt_pattern.py
------Source: data\test_repo\src\genai\prompt_pattern.py
------Source: data\test_repo\src\genai\model.py
------Source: data\test_repo\src\genai\extensions\llama_index\llm.py
------Source: data\test_repo\src\genai\prompt_pattern.py
---Doc 0: class PromptTemplateRouter:
    PROMPT_TEMPLATES = "/v1/prompt_templates"

    def __init__(self, service_url: str, api_key: str) -> None:
        self.service_url = service_url.rstrip("/")
        self.key = api_key

    def prompt_output(self, inputs, template):
        try:
            endpoint = self.service_url + PromptTemplateRouter.PROMPT_TEMPLATES + "/output"
            return RequestHandler.post(
                endpoint,
          

# Chat

In [19]:
llm = AzureChatOpenAI(azure_deployment=os.environ['OPENAI_DEPLOYMENT_ID_LLM'])
memory = ConversationSummaryMemory(
    llm=llm, memory_key="chat_history", return_messages=True
)
qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)

In [20]:
question = "What are the approaches creating a simple prompt from a template?"
result = qa(question)
result["answer"]

Retrying langchain_community.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Requests to the ChatCompletions_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 10 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..


RateLimitError: Requests to the ChatCompletions_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 6 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.