In [1]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
from langchain_community.vectorstores import FAISS

## Setting up the Embedder

In [2]:
def setup_embedder():
    embedder = HuggingFaceBgeEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embedder

In [3]:
embedder = setup_embedder()
embedder

  from tqdm.autonotebook import tqdm, trange


HuggingFaceBgeEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, query_instruction='Represent this question for searching relevant passages: ', embed_instruction='', show_progress=False)

### Loading the DB

In [4]:
db = FAISS.load_local("vector_db/faiss_index", embedder, allow_dangerous_deserialization = True)
print(db.index.ntotal)

10494


###  Testing the db

In [5]:
query = "What are chromosomes?"
docs = db.similarity_search(query, k=3)
print(docs[0].page_content)

the nature of chromosomes and genes must be wellunderstood. Human beings have 46 chromosomes in thecells of their body. Chromosomes contain genes, whichregulate the function and development of the body. Anindividual’s chromosomes are inherited from his or herparents. Each parent normally gives a child 23 chromo-somes. A child normally receives 23 chromosomes fromthe egg and 23 chromosomes from the sperm.
The 46 chromosomes in the human body are divided


  attn_output = torch.nn.functional.scaled_dot_product_attention(


## Inference with LLama 2

### Creating Prompt Template

In [6]:
prompt_template = """
Use the following pieces of information to answer the user's question.
If you do not know the answer, jsut say that you do not know and do not try to make up the answer.

Context: {context}
Question: {question}

Only retunr the helpful answer below and nothing else.
Helpful answer:
"""

In [7]:
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context","question"])
chain_type_kwargs = {"prompt": PROMPT}

## Inference on CPU

In [8]:
llm = CTransformers(model="model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                    model_type="llama",
                    device = 0,
                    config={'max_new_tokens':512,
                            'temperature': 0.8})

In [9]:
qa = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type = "stuff",
    retriever = db.as_retriever(search_kwargs = {'k': 2}),
    chain_type_kwargs = chain_type_kwargs
)

In [11]:
QUERY = "What is a Chromosome?"

result = qa({"query": QUERY})
print("Response : ", result["result"])

Response :  A chromosome is a thread-like structure made up of DNA and proteins found in the cells of living organisms. It contains genetic information that regulates the function and development of an individual's body. Humans have 46 chromosomes in their cells, divided into pairs based on their physical characteristics. Each pair is assigned a number or letter, and within each pair, the chromosomes appear identical because they contain the same genes. Chromosomes have a constriction near the center called the centromere, which separates them into long and short arms (p and q arms).
