## Import Transformer LLM and LangChain

In [67]:
from sentence_transformers import SentenceTransformer, util

from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# Load the model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

llm = LlamaCpp(
    model_path="/Users/ashwin.gangadhar/projects/llma/llama.cpp/models/alpaca/ggml-model-q4_0.bin", callback_manager=callback_manager, verbose=True,
    n_ctx=2048
)



In [3]:
from pymongo import MongoClient
from code.config import MONGO_CONNECTION_URI, MONGO_DATABASE, MONGO_COLLECTION
client = MongoClient(MONGO_CONNECTION_URI)
col = client[MONGO_DATABASE][MONGO_COLLECTION]


## Embed Our Query

We then take an english-intuitive question, also send that through the same 384 dimension calculation and then the resulting vector query and corpus query are sent through the `calculate` function, where the most similar strings are calculated. 

In [154]:
# Encode our question and documents in 384 dimension

# query = "How many islands are comprised of Japan?"
# query = "Name a few major cities in japan?"
# query = "Which constitution model did Japan adopt?"
# query = "Which are the important cities in japan?"
# query = "Japan is divided into how many administrative regions?"
# query = "Which city is the capital of Tamil Nadu?"
# query = "What is Tamil Nadu's Population?"
query = "Which is the Capital of Karnataka?"
query_vector = model.encode(query)
print(query_vector)

[ 2.35183220e-02  3.99707668e-02 -4.71023731e-02  9.87779573e-02
 -1.12254411e-01 -7.96998292e-03  2.76855063e-02  8.29024334e-03
 -4.43593897e-02 -5.03254756e-02 -5.80611825e-02 -1.70577019e-01
  2.79029366e-02 -1.13135083e-02  1.31821772e-02 -5.54829314e-02
  6.42903969e-02 -5.85388541e-02  5.48837371e-02 -3.25091416e-03
 -4.39127237e-02  4.16118372e-03 -4.40489920e-03 -1.19083397e-01
  1.09666958e-01 -2.40535121e-02  8.58809042e-04 -9.22812968e-02
  7.98933394e-03 -3.22163031e-02  5.40172756e-02 -6.21369630e-02
 -1.44066131e-02  5.06913699e-02 -3.28791179e-02 -2.52910890e-02
 -2.30456926e-02  5.95657080e-02  9.90573764e-02  4.99703586e-02
 -9.83493216e-03  6.47778288e-02 -8.60693827e-02  9.76740499e-04
  5.19926399e-02  3.87342125e-02 -2.94610718e-03 -4.21153456e-02
 -1.38430074e-02 -8.99347365e-02 -3.15111177e-03 -3.53390761e-02
 -4.93329875e-02  3.57690267e-02 -3.44870724e-02 -8.99252966e-02
  5.13808914e-02  3.15066576e-02 -3.13830934e-03  5.14629707e-02
  2.68788189e-02 -3.22308

## Calculate Similarity

In [158]:
pipeline = [{
  "$search": {
    "index": "default",
    "knnBeta": {
      "vector": query_vector.tolist(),
      "path": "d",
      "k": 4
    }
  }
},
{"$project":{
    "score":{
              '$meta': 'searchScore'
            },
    "doc":1,
    "_id": 0
}}]
res = list(col.aggregate(pipeline))
context = ""
for i in res:
    context += ". "+i['doc']
    print(i['doc'] + "\t score:"+ str(i['score']) )
instruction="Answer to the point. Answer present in context"



Its capital and largest city is Bengaluru (Bangalore) Karnataka	 score:0.7100876569747925
Karnataka (/kərˈnɑːtəkə/; ISO: Karnāṭaka, Kannada pronunciation: [kɐˈɾnäːʈɐkɐ], also known as Karunāḍu), formerly Mysore State /maɪˈsɔːr/, is a state in the southwestern region of India Karnataka	 score:0.6478210091590881
It is the sixth-largest Indian state by area Karnataka	 score:0.6387611627578735
Karnataka is bordered by the Lakshadweep Sea to the west, Goa to the northwest, Maharashtra to the north, Telangana to the northeast, Andhra Pradesh to the east, Tamil Nadu to the southeast, and Kerala to the southwest Karnataka	 score:0.6021411418914795


# Example with Prompt Template and Prompt Engineering

In [159]:
# You can avoid running this cell each time
# Creating a prompt template chain to do qna

template = """
Answer the question based on the context and adhering to instruciton. If the
question cannot be answered using the information provided answer
with 'I don't know'.
### Context : {context}

### Question: {question}

### Instruction : {instruction}

### Answer:
"""
prompt = PromptTemplate(template=template, input_variables=["question", "context","instruction"])

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llm_chain = LLMChain(prompt=prompt, llm=llm)

In [160]:
llm_chain.run({"question":query,"context":context,"instruction": instruction})

Llama.generate: prefix-match hit


Bengaluru (Bangalore)


llama_print_timings:        load time =  5846.48 ms
llama_print_timings:      sample time =     8.41 ms /    11 runs   (    0.76 ms per token,  1308.28 tokens per second)
llama_print_timings: prompt eval time = 10116.71 ms /    16 tokens (  632.29 ms per token,     1.58 tokens per second)
llama_print_timings:        eval time =  1096.46 ms /    10 runs   (  109.65 ms per token,     9.12 tokens per second)
llama_print_timings:       total time = 11265.78 ms


'Bengaluru (Bangalore)'

# Using the QA Chain template in LangChain

In [72]:
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.docstore.document import Document

In [149]:
chain = load_qa_with_sources_chain(llm, chain_type="map_rerank")

In [150]:
def similarity_search(question,k=4):
  query_vector = model.encode(question)
  pipeline = [{
  "$search": {
    "index": "default",
    "knnBeta": {
      "vector": query_vector.tolist(),
      "path": "d",
      "k": k
    }
  }
  },
  {"$project":{
      "score":{
                '$meta': 'searchScore'
              },
      "doc":1,
      "state":1,
      "_id": 0
  }}]
  res = list(col.aggregate(pipeline))
  op = []
  for ele in res:
    op += [Document(
    metadata={"source": ele["state"]},
    page_content=ele["doc"],
  )]
  return op

def qa_vector_store(chain, question):
  inputs = {
    "input_documents": similarity_search(question, k=3),
    "question": question
    }
  response = chain(inputs, return_only_outputs=True)
  outputs = response["output_text"]
  return outputs

In [151]:
question = "Which is the capital of Karnataka?"
similarity_search(question)
out = qa_vector_store(chain,question)

Llama.generate: prefix-match hit


 Bangalore
Score: 100


llama_print_timings:        load time =  3336.78 ms
llama_print_timings:      sample time =     8.19 ms /    11 runs   (    0.74 ms per token,  1343.10 tokens per second)
llama_print_timings: prompt eval time = 47364.31 ms /   381 tokens (  124.32 ms per token,     8.04 tokens per second)
llama_print_timings:        eval time =  2152.83 ms /    10 runs   (  215.28 ms per token,     4.65 tokens per second)
llama_print_timings:       total time = 49680.11 ms
Llama.generate: prefix-match hit


 Bangalore
Score: 100


llama_print_timings:        load time =  3336.78 ms
llama_print_timings:      sample time =     7.95 ms /    11 runs   (    0.72 ms per token,  1383.82 tokens per second)
llama_print_timings: prompt eval time =  2997.34 ms /    36 tokens (   83.26 ms per token,    12.01 tokens per second)
llama_print_timings:        eval time =  1904.06 ms /    10 runs   (  190.41 ms per token,     5.25 tokens per second)
llama_print_timings:       total time =  4968.10 ms
Llama.generate: prefix-match hit


 Bangalore
Score: 70


llama_print_timings:        load time =  3336.78 ms
llama_print_timings:      sample time =     7.20 ms /    10 runs   (    0.72 ms per token,  1389.08 tokens per second)
llama_print_timings: prompt eval time = 13844.73 ms /   104 tokens (  133.12 ms per token,     7.51 tokens per second)
llama_print_timings:        eval time =  1346.55 ms /     9 runs   (  149.62 ms per token,     6.68 tokens per second)
llama_print_timings:       total time = 15251.02 ms


In [153]:
out

' Bangalore'