In [18]:
print('Hello!')

Hello!


In [19]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers

In [20]:
import os
directory = os.getcwd()
print(directory)

f:\Mlops\chatbot_indian_recipes\research


In [21]:
PINECONE_API_KEY = ""
PINECONE_API_ENV = ""

In [22]:
#Extract data from the PDF
def load_pdf(data):
    loader = DirectoryLoader(data,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)
    
    documents = loader.load()

    return documents

In [23]:
extracted_data = load_pdf("data/")

In [24]:
#extracted_data

In [25]:
#Create text chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [26]:
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 327


In [27]:
#text_chunks

In [28]:
#download embedding model
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [29]:
embeddings = download_hugging_face_embeddings()

In [30]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={})

In [31]:
query_result = embeddings.embed_query("Hello world")
print("Length", len(query_result))

Length 384


In [32]:
#query_result

In [33]:
#Initializing the Pinecone
pinecone.init(api_key=PINECONE_API_KEY,
              environment=PINECONE_API_ENV)

index_name="langchain-chatbot"

#Creating Embeddings for Each of The Text Chunks & storing
docsearch=Pinecone.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_name)

In [34]:
#If we already have an index we can load it like this
docsearch=Pinecone.from_existing_index(index_name, embeddings)

query = "How to make tea ?"

docs=docsearch.similarity_search(query, k=3)

print("Result", docs)

Result [Document(page_content='Sugar     0.25 g     ½ tsp\nBay leaves     1 piece   1 piece\nSalt       to taste    to taste\nWater     120 g     ½ cup (approx.)\nCooked preparation   360 g    2 bowl1. Wash rice and soak for an hour. Drain the \nwater just before cooking.\n2. Soak soy chunks in warm water for about \n15 minutes and squeeze out excess water.\n3. Chop vegetables.\n4. Heat oil (1 tbsp) in a thick pan, add bay \nleaves, cumin seeds, cinnamon, cloves \nand cardamom, allowing enough time for', metadata={}), Document(page_content='Sugar     0.25 g     ½ tsp\nBay leaves     1 piece   1 piece\nSalt       to taste    to taste\nWater     120 g     ½ cup (approx.)\nCooked preparation   360 g    2 bowl1. Wash rice and soak for an hour. Drain the \nwater just before cooking.\n2. Soak soy chunks in warm water for about \n15 minutes and squeeze out excess water.\n3. Chop vegetables.\n4. Heat oil (1 tbsp) in a thick pan, add bay \nleaves, cumin seeds, cinnamon, cloves \nand cardamom, a

In [35]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [36]:
# Chain type prompt
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [37]:
# loading LLM model
llm=CTransformers(model="f:/Mlops/chatbot_indian_recipes/model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.8})

In [38]:
qa=RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

In [None]:
while True:
    user_input=input(f"Input Prompt:")
    result=qa({"query": user_input})
    print("Response : ", result["result"])

In [40]:
# Testing
user_input='how to make tea?'
result=qa({"query": user_input})
print("Response : ", result["result"])

Response :  To make tea using the ingredients provided in the recipe, you will need to follow these steps:
1. Heat water in a pan over medium heat.
2. Add 0.25 grams of sugar (½ tsp) to the hot water and stir until it dissolves.
3. Remove from heat and add 1 piece of bay leaf, cumin seeds, cinnamon, cloves, and cardamom to the pot.
4. Allow the mixture for  Stir a) Allow the ing the tea will cook for about  Allow the mixture for about  Allow the spoon the pot and let it for aft and allow the water just before cook for about  Cover the pan,
Stir a. Let the tea will make sure to taste and then add soak the infuse the spoon the pot until the mixture of oil (optional:Steepossoak the mixture and simmercially allow the mixture well combine with the mixture for  Allow the mixture for about  Steepoch vegetables enough time allowing enough time allowing enough time for about to taste and let ite stir briefly close the tea will need to taste and allow the spice in a. Let the spoon the pot using 