In [1]:
print('ok')

ok


In [2]:
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone

import pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers

  from tqdm.autonotebook import tqdm


In [4]:
def load_pdf(data):
    loader = DirectoryLoader(
        data,
        glob='*.pdf',
        loader_cls=PyPDFLoader
    )
    documents = loader.load()
    return documents

In [5]:
extracted_data = load_pdf('data/')

In [6]:
# extracted_data

In [7]:
# Create text chunks

def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks


In [8]:
text_chunks = text_split(extracted_data)

In [9]:
print(len(text_chunks))

2125


In [10]:
## Downloading embeddings
def download_huggingface_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-V2')
    return embeddings

In [11]:
embeddings = download_huggingface_embeddings()

In [12]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-V2', cache_folder=None, model_kwargs={}, encode_kwargs={})

In [13]:
query_result = embeddings.embed_query('Hello World')
print(len(query_result))

384


In [17]:
pinecone.init(
    api_key=PINECONE_API_KEY,
    environment = PINECONE_API_ENV
)

index_name = 'chatbot'

docsearch = Pinecone.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_name)

In [21]:
## If we already have an index, we can load it like

docsearch = Pinecone.from_existing_index(index_name, embeddings)

query = 'What is profit before tax in 2023'
docs = docsearch.similarity_search(query, k=3)

print(docs)

[Document(page_content='provision for tax) decreased by 22.9% from ` 86.41 billion \nin fiscal 2022 to ` 66.66 billion in fiscal 2023. Gains from \ntreasury-related activities decreased from a gain of ` 9.03 \nbillion in fiscal 2022 to a loss of ` 0.52 billion in fiscal 2023. \nProfit after tax increased from ` 233.39 billion in fiscal \n2022 to ` 318.96 billion in fiscal 2023.\nNet interest income increased by 30.9% from ` 474.66 \nbillion in fiscal 2022 to ` 621.29 billion in fiscal 2023 due', metadata={}), Document(page_content='BUSINESS MODEL\nOUTPUTS OUTCOMES\nProfit After T ax:\n`318.96  \nbillion  \nin fiscal 2023Profit Before Tax \nExcluding Treasury \nGains:\n`424.73  \nbillion  \nin fiscal 2023\nLoans and Advances:\n`10,196.38  \nbillion  \n at March 31, 2023\nDeposits:\n`11,808.41  \nbillion  \n at March 31, 2023FINANCIAL CAPITAL\n• Profit before tax excluding treasury gains grew by 43.0% and profit after tax by 36.7% on \ny-o-y basis\n• Granular portfolio mix with 73.5% of 

In [24]:
prompt_template = """
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know. Don't try to make up an answer.

Context = {context}
Question = {question}

Only return the helpful answer below and nothing else.
Helpful Answer:

"""

In [25]:
PROMPT = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
chain_type_kwargs = {'prompt':PROMPT}

In [26]:
llm = CTransformers(
    model = 'model/llama-2-7b-chat.ggmlv3.q4_0.bin',
    model_type='llama',
    config={'max_new_tokens':512, 'temperature':0.8}

)

In [28]:
qa=RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

In [33]:
user_input="What is the profit after tax for ICICI in 2023"
result=qa({"query": user_input})
print("Response : ", result["result"])

Response :  The profit after tax of ICICI Bank UK increased from USD 10.9 million (` 0.81 billion) in fiscal 2022 to USD 13.0 million (` 1.05 billion) in fiscal 2023, primarily due to an increase in net interest income and fee income.
The profit after tax of ICICI Bank Canada increased from CAD 29.2 million (` 


In [34]:
user_input="What is the revenue of ICICI in 2023"
result=qa({"query": user_input})
print("Response : ", result["result"])

Response :  The revenue of ICICI Bank UK in 2023 was USD 820.12 million.
