In [None]:
from langchain.llms import LlamaCpp
from langchain.chains import LLMChain
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.prompts import PromptTemplate

# for token-wise streaming so you'll see the answer gets generated token by token when Llama is answering your question
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llm = LlamaCpp(
    model_path="/models/Llama-2-7b-hf/ggml-model-f16.gguf",
    temperature=0.0,
    top_p=1,
    n_ctx=6000,
    callback_manager=callback_manager, 
    verbose=True,
    n_gpu_layers=400,
    n_batch=6000,
)

In [None]:
question = "What is Release.com?"
answer = llm(question)

In [None]:

from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader("release.pdf")
documents = loader.load()


from langchain.vectorstores import Chroma

# embeddings are numerical representations of the question and answer text
from langchain.embeddings import HuggingFaceEmbeddings

# use a common text splitter to split text into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter

# split the loaded documents into chunks 
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
all_splits = text_splitter.split_documents(documents)

# create the vector db to store all the split chunks as embeddings
embeddings = HuggingFaceEmbeddings()
vectordb = Chroma.from_documents(
    documents=all_splits,
    embedding=embeddings,
)

# use another LangChain's chain, RetrievalQA, to associate Llama with the loaded documents stored in the vector db
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever()
)



In [None]:

question = "What is release.com?"
result = qa_chain({"query": question})


In [None]:

question = "What cloud integrations does release.com support"
result = llm(question)


In [None]:

question = "What cloud integrations does release.com support"
result = qa_chain({"query": question})
