# Langchain based index to realize custom knowledge gpt

## install libraries

In [None]:
%pip install langchain "langchain[llms]"
%pip install gpt4all
%pip install chromadb
%pip install llama_index
%pip install git+https://github.com/selamanse/llama-hub.git@add_extra_info_to_web
%CMAKE_ARGS="-DLLAMA_METAL=1" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall
#%pip install llama-cpp-python --force-reinstall --upgrade --no-cache-dir
%pip install git+https://github.com/selamanse/llama-hub.git@add_extra_info_to_web


# Load data from deepshore website as document base for the index

In [None]:
import os
import sys
print(os.path.dirname(sys.executable))
print(os.getcwd())

!python --version

# running llama-cpp binding with openai like webservice

Download model from [hugging face](https://huggingface.co/TheBloke/open-llama-7b-open-instruct-GGML)

```
pip install llama-cpp-python[server]
export MODEL=models/open-llama-7B-open-instruct.ggmlv3.q6_K.bin
python3 -m llama_cpp.server --model $MODEL --n_gpu_layers 1
```

In [None]:
from llama_hub.web.sitemap.base import SitemapReader

# for jupyter notebooks uncomment the following two lines of code:
import nest_asyncio
nest_asyncio.apply()

loader = SitemapReader(html_to_text=True)
documents = loader.load_data(sitemap_url='https://deepshore.de/sitemap.xml', filter='https://deepshore.de/knowledge')

documents = [documents.pop()]
print(len(documents))
print(documents[0].extra_info)

# load the desired model via langchain llm bindings

- for gpt4all binding you need to install the corresponding package (imho uses metal out of the box)
- for llama-cpp-python on M1 miniforge (conda) environment is needed to build the arm compatible version of cpp bindings (uses only cpu out of the box)

In [None]:
import os
from langchain.llms import GPT4All
#from gpt4all import GPT4All
from langchain.llms import LlamaCpp
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.callbacks.manager import CallbackManager

callbacks = [StreamingStdOutCallbackHandler()]
callback_manager = CallbackManager(callbacks)

local_path = "/Users/selamanse/Library/Application Support/nomic.ai/GPT4All"


model_name = f"{os.getcwd()}/models/ggml-vicuna-13b-1.1-q4_2.bin"
model_name = f"{os.getcwd()}/models/ggml-gpt4all-l13b-snoozy.bin"
#model_name = f"{os.getcwd()}/models/ggml-gpt4all-j-v1.3-groovy.bin"
#model_name = f"{os.getcwd()}/models/GPT4All-13B-snoozy.ggmlv3.q4_0.bin"
#model_name = f"{os.getcwd()}/models/open-llama-7B-open-instruct.ggmlv3.q4_0.bin"
model_name = f"{os.getcwd()}/models/open-llama-7B-open-instruct.ggmlv3.q6_K.bin"
#model_name = f"{local_path}/ggml-gpt4all-l13b-snoozy"

llm = LlamaCpp(model_path=model_name, callback_manager=callback_manager, verbose=True, use_mlock=False, n_gpu_layers=10000)

#llm = GPT4All(model="./models/ggml-gpt4all-l13b-snoozy", backend="gptj", callbacks=callbacks, verbose=True, n_ctx=2047, use_mlock=True, n_threads=8, f16_kv=False)
#llm = GPT4All(model_path=f"{os.getcwd()}/models", allow_download=False, model_name="ggml-gpt4all-j-v1.3-groovy.bin")


# load documents into a vector store

In [None]:
# import
from langchain.embeddings.llamacpp import LlamaCppEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from llama_index.schema import Document

langchain_documents = []
for d in documents:
    langchain_documents.append(d.to_langchain_format())

# split it into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(langchain_documents)

# create the open-source embedding function
embedding_function = LlamaCppEmbeddings(model_path=model_name, use_mlock=True, n_gpu_layers=1000, n_threads=8, n_ctx=2048, n_batch=500)

# load it into Chroma
db = Chroma.from_documents(docs, embedding_function)

# query it
query = "What did the president say about Ketanji Brown Jackson"
docs = db.similarity_search(query)

# print results
print(docs[0].page_content)

# with llamacpp embeddings (not for ggml models from gpt4all)

# load chroma db from disk

# make a retriever

https://colab.research.google.com/drive/1gyGZn_LZNrYXYXa-pltFExbptIe7DAPe?usp=sharing#scrollTo=MGx8XblM4shW



## make a chain retrieval 

## Create Index from Documents

## load index from storage

## persist index to storage

## query index