- **Author:** **Kandimalla Hermanth**
- **Date of modified:** **25-01-2024**

In [None]:
!curl https://ollama.ai/install.sh | sh

In [None]:
!ollama run llama2

In [None]:
!pip install ollama

In [None]:
import ollama
response = ollama.chat(model='llama2', messages=[
  {
    'role': 'user',
    'content': 'Why is the sky blue?',
  },
])
print(response['message']['content'])

In [None]:
!pip install -q git+https://github.com/huggingface/transformers
!pip install -qU langchain Faiss-gpu tiktoken sentence-transformers
!pip install -qU trl Py7zr auto-gptq optimum
!pip install -q rank_bm25
!pip install -q PyPdf

In [None]:
import langchain
from langchain.embeddings import CacheBackedEmbeddings,HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.retrievers import BM25Retriever,EnsembleRetriever
from langchain.document_loaders import PyPDFLoader,DirectoryLoader,CSVLoader
from langchain.llms import HuggingFacePipeline
from langchain.cache import InMemoryCache
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import prompt
from langchain.chains import RetrievalQA
from langchain.callbacks import StdOutCallbackHandler
from langchain import PromptTemplate

from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline


dir_loader = DirectoryLoader("/content/sample_data",
                             glob="*.csv",
                             loader_cls=CSVLoader)
docs = dir_loader.load()

print(f"len of documents in :{len(docs)}")


text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,
                                      chunk_overlap=200,)

esops_documents = text_splitter.transform_documents(docs)
print(f"number of chunks in given documents : {len(esops_documents)}")

store = LocalFileStore("./cache/")
embed_model_id = 'BAAI/bge-small-en-v1.5'
core_embeddings_model = HuggingFaceEmbeddings(model_name=embed_model_id)
embedder = CacheBackedEmbeddings.from_bytes_store(core_embeddings_model,
                                                  store,
                                                  namespace=embed_model_id)
# Create VectorStore
vectorstore = FAISS.from_documents(esops_documents,embedder)

bm25_retriever = BM25Retriever.from_documents(esops_documents)
bm25_retriever.k=5

query = "Ho to save my excess money?"
embedding_vector = core_embeddings_model.embed_query(query)
print(len(embedding_vector))
docs_resp = vectorstore.similarity_search_by_vector(embedding_vector,k=5)
for page in docs_resp:
  print(page.page_content)
  print("\n")

%%timeit -n 1 -r 1
query = "How to save my excess money?"
#
embedding_vector = core_embeddings_model.embed_query(query)
docs_resp = vectorstore.similarity_search_by_vector(embedding_vector,k=5)


In [None]:
faiss_retriever = vectorstore.as_retriever(search_kwargs={"k":5})
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever,faiss_retriever],
                                       weights=[0.5,0.5])

In [None]:


model_name_or_path = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ"
# To use a different branch, change revision
# For example: revision="gptq-4bit-32g-actorder_True"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
                                             device_map="auto",
                                             trust_remote_code=False,
                                             revision="gptq-8bit-32g-actorder_True")
#
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.1,
    top_p=0.95,
    top_k=40,
    repetition_penalty=1.1
)

llm = HuggingFacePipeline(pipeline=pipe)
langchain.llm_cache = InMemoryCache()

# class PromptTemplate:
#     def __init__(self, template, input_variables):
#         self.template = template
#         self.input_variables = input_variables

#     def fill_template(self, **kwargs):
#         filled_template = self.template
#         for variable in self.input_variables:
#             filled_template = filled_template.replace(f'{{{variable}}}', str(kwargs.get(variable, '')))
#         return filled_template

# Define the prompt template
# Define the prompt template with chain of thoughts and few shots
PROMPT_TEMPLATE_ADVANCED = '''
You are my friendly advisor.

Context: {context}
Question: {question}

Chain of Thoughts:
- Consider the key elements related to the question.
- Evaluate the possible implications and scenarios.
- Use your expertise to provide a comprehensive response.

Few Shots:
1. Given the current financial landscape, what are the primary considerations for effective investment planning?
   Helpful answer:

2. Could you suggest a well-balanced investment strategy for someone with a low-risk tolerance and long-term financial goals?
   Helpful answer:

3. In uncertain economic times, what steps would you recommend for optimizing savings while ensuring financial stability?
   Helpful answer:
'''

# Define input variables
input_variables = ['context', 'question']

# Create a PromptTemplate instance with the advanced template
custom_prompt_advanced = PromptTemplate(template=PROMPT_TEMPLATE_ADVANCED, input_variables=input_variables)
handler = StdOutCallbackHandler()
qa_with_sources_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever = vectorstore.as_retriever(search_kwargs={"k":5}),
    verbose=True,
    callbacks=[handler],
    chain_type_kwargs={"prompt": custom_prompt},
    return_source_documents=True
)

%%time
query = "longitude average , mean"
response = qa_with_sources_chain({"query":query})
print(f"Response generated : \n {response['result']}")
print(f"Source Documents : \n {response['source_documents']}")
