# RAG-LLMs

## Setup

### Imports

In [4]:
# imports
import os
from dotenv import load_dotenv
from langchain.embeddings import AzureOpenAIEmbeddings
from langchain.chat_models import AzureChatOpenAI

from src.utils import setup_azure_openai
from src.data_utils import SQLDBManager
from sql_pgvector.chain_utils import RAGChainManager

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Config

In [17]:
# setup azure openai AD token
setup_azure_openai()
# print(f"AD token set: {os.environ['AZURE_OPENAI_AD_TOKEN']}")

Successfully setup Azure OpenAI authentication


## Prepare & init DB

In [2]:
# set db variables
# src_table = 'pegadata.ppm_work'
# req_cols_path = 'data/pega-as-clone/req_fields.txt'
# data_table = f'{src_table}_filtered'
# primary_key = 'pxinsname'
# text_col = 'pydescription'
# instantiate sql db manager & connect to sql db
# dbm = SQLDBManager.from_env()

# filter 'ppm_work' table and create new (if not exists) 
# sqldb_manager.filter_table(src_table, req_cols_path, primary_key)
# clean text in 'pydescription' before creating embs
# sqldb_manager.clean_html(data_table, [text_col], primary_key)

# create embeddings
# embs_model = AzureOpenAIEmbeddings(azure_deployment="text-embedding-ada-002") # instantiate embeddings model
# sqldb_manager.create_embs_col(data_table, text_col, embs_model) # create embeddings col

connected to database successfully.


## RAG

In [28]:
# generate sql query to fetch data from db
dbm = SQLDBManager.from_env()   # instantiate sql db manager
embs_model = AzureOpenAIEmbeddings(azure_deployment="text-embedding-ada-002") # instantiate embeddings model
llm = AzureChatOpenAI(model='gpt-35-turbo', max_tokens=500, temperature=0.9, stop=["\nSQLResult:"])  # instantiate lm
rcm = RAGChainManager(dbm.db, llm, embs_model)   # instantiate rag chain manager

# generate sql query
# user_query = "how many user stories are under epic 4?"
user_query = "find the most recent user story under epic 4 and summarize it"
# sql_query = rcm.gen_query(user_query)
# print(f"query: {user_query}\ngenerated sql query: {sql_query}") 

# generate response using rag
sql_query, response = rcm.gen_response(user_query)
print(f"query: {user_query}\n\nsql query: {sql_query}\n\nresponse: {response}")

                    stop was transferred to model_kwargs.
                    Please confirm that stop is what you intended.


connected to database successfully.
query: find the most recent user story under epic 4 and summarize it

sql query: SELECT "pxinsname", "pydescription" FROM "ppm_work_filtered" WHERE "epicid" = 'EPIC-4' AND "pxobjclass" = 'PegaProjMgmt-Work-UserStory' ORDER BY "pxcreatedatetime" DESC LIMIT 1

response: The most recent user story related to the EPIC-4 epic is named "US-10". Its description is "As SRT I would like to have mapping stored in cpsetting updated and pointing to correct API management in Production Adoption and Production account."
