In [2]:
%load_ext autoreload
%autoreload 2

In [6]:
from langchain import PromptTemplate, LLMChain
from langchain.document_loaders import UnstructuredHTMLLoader
from langchain.embeddings import LlamaCppEmbeddings
from langchain.llms import LlamaCpp
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores.faiss import FAISS
from langchain.document_loaders import PDFMinerLoader, TextLoader

# loader = UnstructuredHTMLLoader("langchain/docs/_build/html/index.html")
loader = PDFMinerLoader("./arxiv_papers/gpt4-papers/2203.02155.pdf")
embedding = LlamaCppEmbeddings(model_path="models/ggml-model-q4_0.bin")
llm = LlamaCpp(model_path="models/ggml-model-q4_0.bin")


def split_chunks(sources: list) -> list:
    chunks = []
    splitter = RecursiveCharacterTextSplitter(separator="", chunk_size=256, chunk_overlap=16)
    for chunk in splitter.split_documents(sources):
        chunks.append(chunk)
    return chunks


def generate_embedding(chunks: list):
    texts = [doc.page_content for doc in chunks]
    metadatas = [doc.metadata for doc in chunks]

    search_index = FAISS.from_texts(texts, embedding, metadatas=metadatas)

    return search_index


def similarity_search(
        query: str, index: FAISS
):
    matched_docs = index.similarity_search(query, k=4)
    sources = []
    for doc in matched_docs:
        sources.append(
            {
                "page_content": doc.page_content,
                "metadata": doc.metadata,
            }
        )

    return matched_docs, sources


docs = loader.load()
chunks = split_chunks(docs)
embeddings = generate_embedding(chunks)

question = "What are the use cases of LangChain?"
matched_docs, sources = similarity_search(question, embeddings)

template = """
Please use the following context to answer questions.
Context: {context}
---
Question: {question}
Answer: Let's think step by step."""

context = "\n".join([doc.page_content for doc in matched_docs])
prompt = PromptTemplate(template=template, input_variables=["context", "question"]).partial(context=context)
llm_chain = LLMChain(prompt=prompt, llm=llm)

print(llm_chain.run(question))

llama.cpp: loading model from models/ggml-model-q4_0.bin
llama.cpp: can't use mmap because tensors are not aligned; convert to new format to avoid this
llama_model_load_internal: format     = ggmf v1 (old version with no mmap support)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 512
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: n_parts    = 1
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size = 4113739.11 KB
llama_model_load_internal: mem required  = 5809.32 MB (+ 2052.00 MB per state)
....................................................................................................
llama_init_from_file: kv self si

TypeError: TextSplitter.__init__() got an unexpected keyword argument 'separator'

# Resources

1. Techniques to improve prompt reliability, OpenAI [https://github.com/openai/openai-cookbook/blob/main/techniques_to_improve_reliability.md#how-to-improve-reliability-on-complex-tasks]