# Lokales Embedding benutzen

Dieses Notebook führt dich durch folgende Schritte:

- Installation der benötigten Pakete.
- Benutzen des Sitemap Loaders um die deepshore.de nach Wissensbeiträgen zu durchkämmen.
- Die Dokumente an ein *lokales* Embeddings Modell schicken und in Vectoren verwandeln und im Index speichern.
- Eine Frage an das *lokale* Embeddings Modell schicken und die neu gewonnenen Vectoren zusammen mit den indexierten Vectoren verorten und eine Antwort bekommen


Du benötigst:

- python 3.10
- Jupyter Notebook Server
- Einen OpenAI API Token

# Installation

In [None]:
#%pip install langchain=="0.0.279"
#%pip install "langchain[llms]"=="0.0.279"
#%pip install gpt4all=="0.3.5"
#%pip install chromadb=="0.3.26"
#%pip install llama_index=="0.8.17"
#%pip install llama_hub=="0.0.26"
#!CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1 pip install llama-cpp-python=="0.1.78" --upgrade --no-cache-dir --force-reinstall
#%pip install mercury=="2.3.4"
#%pip install sentence_transformers

In [None]:
import os
import sys
print(os.path.dirname(sys.executable))
print(os.getcwd())

!python --version

## Dokumente laden

... indem man den Sitemap Loaders benutzt, um die Webseite deepshore.de nach Wissensbeiträgen zu durchkämmen

In [None]:
from llama_hub.web.sitemap.base import SitemapReader

import nest_asyncio
nest_asyncio.apply()

loader = SitemapReader(html_to_text=True)
documents = loader.load_data(sitemap_url='https://deepshore.de/sitemap.xml', filter='https://deepshore.de/en/knowledge/2023-05-08')

print(len(documents))

# Model laden

Folgendes modell wurde hier heruntergeladen: [hugging face](https://huggingface.co/TheBloke/open-llama-7b-open-instruct-GGML)

In [None]:
import os
from langchain.llms import LlamaCpp
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.callbacks.manager import CallbackManager
from llama_cpp import Llama

callbacks = [StreamingStdOutCallbackHandler()]
callback_manager = CallbackManager(callbacks)

model_name = f"{os.getcwd()}/models/llama-2-7b-chat.ggmlv3.q2_K.bin"

#llama_llm = Llama(model_path=model_name)
#llama_llm = LlamaCpp(model_path=model_name, callback_manager=callback_manager, verbose=True, use_mlock=True, n_gpu_layers=1000, n_ctx=2048)
llama_llm = LlamaCpp(model_path=model_name, callback_manager=callback_manager, use_mlock=True, n_threads=2, n_gpu_layers=32, temperature=0.25, verbose=True, n_ctx=2048)

prompt = """
Question: Who is the 42nd president of the united states?
"""
llama_llm(prompt)

## Die Dokumente an das lokale Embeddings Modell schicken

...und in Vektoren verwandeln und im Index speichern 

In [None]:
from langchain.vectorstores import Chroma
from llama_index.schema import Document
from langchain.embeddings.openai import OpenAIEmbeddings
import chromadb
from chromadb.config import Settings
from langchain.vectorstores import Chroma

# import
from langchain.embeddings.llamacpp import LlamaCppEmbeddings
from langchain.text_splitter import CharacterTextSplitter,RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from llama_index.schema import Document
from llama_index.indices.service_context import ServiceContext

langchain_documents = []
for d in documents:
    langchain_documents.append(d.to_langchain_format())

# split it into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.split_documents(langchain_documents)

llama_documents = []
for langchain_doc in docs:
    llama_documents.append(Document.from_langchain_format(langchain_doc))

# do not split
#docs = langchain_documents

# create the open-source embedding function
#embedding_function = LlamaCppEmbeddings(model_path=model_name, n_ctx=2048, use_mlock=True, n_gpu_layers=32, n_threads=2)

#https://docs.trychroma.com/telemetry#opting-out
chromadb_settings = Settings(anonymized_telemetry=False, persist_directory="./chroma", chroma_db_impl="duckdb+parquet")
chromadb_client = chromadb.Client(chromadb_settings)
#chroma = Chroma(collection_name='deepshore-sitemap', client=chromadb_client, embedding_function=embedding_function)
chroma = Chroma(collection_name='deepshore-sitemap', client=chromadb_client)

chroma_collection= chromadb_client.get_collection(name='deepshore-sitemap')
#vectordb = chroma.from_documents(docs, embedding_function, collection_name='deepshore-sitemap', client_settings=chromadb_settings, persist_directory="./chroma")

#vectordb.persist()




In [47]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, StorageContext
from llama_index.embeddings import LangchainEmbedding
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.embeddings.llamacpp import LlamaCppEmbeddings
from llama_index.llm_predictor.base import LLMPredictor
from llama_index.vector_stores import ChromaVectorStore
from llama_index.vector_stores.types import VectorStoreQuery, VectorStoreQueryResult
from llama_cpp import Embedding

LlamaArgs = {
    "model_path": model_name
}


#llm = LlamaCpp(model_path=model_name, callback_manager=callback_manager, verbose=True, use_mlock=False, n_gpu_layers=2000)
#embedding =  LangchainEmbedding(
#    Embedding(model=model_name)
#)

embedding = LlamaCppEmbeddings(model_path=model_name, n_ctx=2048, use_mlock=False, n_gpu_layers=0, n_threads=8)
llmpredictor = LLMPredictor(llm = llama_llm)

chromastore = ChromaVectorStore(chroma_collection=chroma_collection)

storage_context = StorageContext.from_defaults(vector_store=chromastore)
service_context = ServiceContext.from_defaults(llm_predictor=llmpredictor, embed_model=embedding)
index = VectorStoreIndex.from_vector_store(vector_store=chromastore, service_context=service_context)

index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, service_context=service_context
)


## Eine Frage an das Modell schicken

... und eine Antwort im Kontext der Daten bekommen

In [None]:
import textwrap
import mercury as mr
 
# set Application parameters
app = mr.App(title="",
        description="",
        show_code=False,
        show_prompt=False,
        continuous_update=True,
        static_notebook=False,
        show_sidebar=True,
        full_screen=True,
        allow_download=True)

question = mr.Text(value="Was ist k6?", label="Womit kann Deepshore helfen?", rows=1)

query_engine = index.as_query_engine()

response = query_engine.query(question.value)

messages = [question.value, "\n".join(textwrap.wrap(response.response,29))]
mr.Chat(messages)

In [None]:
response = query_engine.query("What is the conclusion of the article?")