# RAG using watsonx

## 1. Importing Libraries

In [1]:
# !pip install ibm-generative-ai

In [2]:
import os
from dotenv import load_dotenv
import os

from genai.extensions.langchain import LangChainInterface
from genai.schemas import GenerateParams
from genai.credentials import Credentials

from langchain.document_loaders import PyPDFLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

## 2. Setting up the API key and URL

In [3]:
load_dotenv()
api_key = os.getenv("GENAI_KEY", None)
api_url = os.getenv("GENAI_API", None)
creds = Credentials(api_key, api_endpoint=api_url)

## 3. Defining the model and parameters

In [4]:
model_id = "ibm/granite-13b-sft"

In [5]:
params = GenerateParams(
            decoding_method="greedy",
            max_new_tokens=1000,
            min_new_tokens=200,
            temperature=0.7,
        )

## 4. Creating the langchain interface

In [6]:
llm = LangChainInterface(model=model_id, params=params, credentials=creds)

## 5. Loading the pdf data and storing it in vector database

In [7]:
def load_pdf(): 
    pdf_name = 'test.pdf'
    loaders = [PyPDFLoader(pdf_name)]

    index = VectorstoreIndexCreator(
        embedding = HuggingFaceEmbeddings(model_name='all-MiniLM-L12-v2'), 
        text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    ).from_loaders(loaders)

    return index

In [8]:
index = load_pdf()

  from .autonotebook import tqdm as notebook_tqdm


## 6. Setting a chain using langchain

In [9]:
chain = RetrievalQA.from_chain_type(llm=llm, 
                                    chain_type='stuff', 
                                    retriever=index.vectorstore.as_retriever(), 
                                    input_key='question')

In [12]:
response = chain.run("PROMPT OPTIMIZATION EXPERIMENTS results?")

In [13]:
print(response)

 Yes, we show prompt optimization curves on GSM8K and two BBH tasks in this section. The curves on other BBH tasks are deferred to Appendix D, and the tables containing all accuracy numbers are in Appendix E. We also visualize the difference between their accuracies and those of the baselines. Figure 23 visualizes the difference between their accuracies and those of the baselines “Let’s think step by step.” and the empty string. The optimizations find instructions better than the empty starting point, and most of the found instructions are better than “Let’s think step by step”. One caveat in the A_begin instructions (Table 11) is that a lot of the found instructions are imperative or interrogative sentences that are more suitable to be put into “Q:” rather than “A:”, like “Solve the sequence by properly closing the parentheses.” for dyck_languages and “Which movie option from the given choices ...?” for movie_recommendation. Such styles appear more often here than the temporal_sequenc