In [158]:
from pymongo import MongoClient

import os
import getpass
from langchain.llms import OpenAI

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import MongoDBAtlasVectorSearch
from langchain.chains import RetrievalQA

from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

os.environ["SENTENCE_TRANSFORMERS_HOME"] = "tmp/st/"
client = MongoClient(getpass.getpass("MongoDB connection String (Movie Demo):"))
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

MongoDB connection String (Movie Demo): ········
OpenAI API Key: ········


In [185]:
# init the embeddings, LLM and vector search retriever(MongoDB vector search)
embeddings = OpenAIEmbeddings()
llm = OpenAI(temperature=0)

col = client['sample_mflix']['embedded_movies']
docsearch = MongoDBAtlasVectorSearch(collection=col,embedding=embeddings,index_name="default",embedding_key="plot_embedding", text_key="plot")
retriever = docsearch.as_retriever()
qa_chain = RetrievalQA.from_chain_type(llm, retriever=retriever)

In [206]:
question = "Describe: The Scorpion King in HISHE format"
print("#################### Retrieved Documents ####################")
lst = retriever.get_relevant_documents(question)
for ele in lst:
    d = ele.metadata
    print(f'Title : {d["title"]}, Genres : {d["genres"]},Plot : {ele.page_content}')
print("#################### Response ####################")
out = qa_chain.run(question)
print(out)

#################### Retrieved Documents ####################
Title : The Scorpion King, Genres : ['Action', 'Adventure', 'Fantasy'],Plot : A desert warrior rises up against the evil army that is destroying his homeland. He captures the enemy's key sorcerer, takes her deep into the desert and prepares for a final showdown.
Title : Aragami, Genres : ['Action', 'Fantasy', 'Horror'],Plot : A raging god of battle and a master samurai duke it out in a series of sword fights in a remote temple.
Title : Himiko, Genres : ['Drama', 'Fantasy', 'History'],Plot : A freestyle, imagined telling of the life of shaman queen Himiko, who falls in love with her half-brother, making her powers weaken thus putting her position to risk.
Title : Dune, Genres : ['Action', 'Adventure', 'Sci-Fi'],Plot : A Duke's son leads desert warriors against the galactic emperor and his father's evil nemesis when they assassinate his father and free their desert world from the emperor's rule.
#################### Response #

# Hugging face Embeddings with LLama v2 locally(privately) hosted


In [207]:
emb_model = "sentence-transformers/all-MiniLM-L6-v2"
embeddings_w = HuggingFaceEmbeddings(
    model_name=emb_model,
    cache_folder=os.getenv('SENTENCE_TRANSFORMERS_HOME')
)
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
llm_llama = LlamaCpp(
    model_path="models/ggml-model-q4_0.bin", callback_manager=callback_manager, verbose=True,
    n_ctx=2048
)
wclient = MongoClient(getpass.getpass("MongoDB connection String (Wikipedia Demo):"))
ds = MongoDBAtlasVectorSearch(collection=wclient["sample"]["vectest"],embedding=embeddings_w,index_name="default",embedding_key="d", text_key="doc")
wretriever = ds.as_retriever()
qa_chain_w = RetrievalQA.from_chain_type(llm_llama, retriever=wretriever)

llama.cpp: loading model from models/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 2048
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: n_parts    = 1
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.07 MB
llama_model_load_internal: mem required  = 5439.94 MB (+ 1026.00 MB per state)
llama_new_context_with_model: kv self size  = 1024.00 MB
AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | 


MongoDB connection String (Wikipedia Demo): ········


In [210]:
question = "Land area of Haryana?"
lst = wretriever.get_relevant_documents(question)
for ele in lst:
    d = ele.metadata
    print(f'Content : {ele.page_content}')
qa_chain_w.run(question)

Content : 4% (44,212 km2 or 17,070 sq mi) of India's land area Haryana
Content : Haryana (/hʌriˈɑːnə/; Hindi: [ɦəɾɪˈjɑːɳɑː]) is an Indian state located in the northern part of the country Haryana
Content : Haryana surrounds the country's capital territory of Delhi on three sides (north, west and south), consequently, a large area of Haryana state is included in the economically important National Capital Region of India for the purposes of planning and development Haryana
Content : It is ranked 21st in terms of area, with less than 1 Haryana


Llama.generate: prefix-match hit


 4% (44,212 km2 or 17,070 sq mi)


llama_print_timings:        load time =  7267.74 ms
llama_print_timings:      sample time =    17.24 ms /    24 runs   (    0.72 ms per token,  1392.35 tokens per second)
llama_print_timings: prompt eval time = 20142.68 ms /   144 tokens (  139.88 ms per token,     7.15 tokens per second)
llama_print_timings:        eval time =  5585.05 ms /    24 runs   (  232.71 ms per token,     4.30 tokens per second)
llama_print_timings:       total time = 25863.25 ms


' 4% (44,212 km2 or 17,070 sq mi)'