### Embedding model

In [1]:
from langchain_community.embeddings import OpenVINOEmbeddings

model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "CPU"}
encode_kwargs = {"mean_pooling": True, "normalize_embeddings": True}

ov_embeddings = OpenVINOEmbeddings(
    model_name_or_path=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,
    show_progress=True,
)

`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.
  inverted_mask = torch.tensor(1.0, dtype=dtype) - expanded_mask


### Creating a vector store with ChromaDB (in memory, local)

In [2]:
from langchain_chroma import Chroma

vector_store = Chroma(
    collection_name="arch_wiki_collection",
    embedding_function=ov_embeddings,
    persist_directory="chroma_langchain_db"
)

### Adding documents

In [3]:
import os
from langchain_community.document_loaders import DirectoryLoader, BSHTMLLoader

directory_path = "./arch-wiki/html/en/"

if not os.path.exists(directory_path):
    print(f"Directory not found: {directory_path}")
else:
    loader = DirectoryLoader(
        path=directory_path,
        glob="**/*.html", # all html files and subdirectories
        loader_cls=BSHTMLLoader, # specify BSHTML loader
        show_progress=True
    )

docs = loader.load() # loads all documents at once

100%|████████████████████████████████████████████████████████████████████████▉| 2491/2493 [00:30<00:00, 80.85it/s]


In [6]:
docs[0].metadata

{'source': 'arch-wiki/html/en/WireGuard.html', 'title': 'WireGuard - ArchWiki'}

In [4]:
from uuid import uuid4

# generate random universally unique identifier (UUID)
uuids = [str(uuid4()) for _ in range(len(docs))] 

vector_store.add_documents(
    documents=docs,
    ids=uuids
)

Batches: 100%|██████████████████████████████████████████████████████████████████| 623/623 [04:48<00:00,  2.16it/s]


['3c155da9-d6b6-48fb-b760-ddf559ab20df',
 'a7e3a5d0-2eb1-4a22-93ce-3937bd856940',
 '23741eb6-d145-4c58-aa83-5dc1ab960897',
 'af1c4700-1a7c-432c-8898-739717688139',
 '34e172ad-9e80-4756-add9-76dde01dd390',
 'cd37e1b2-0b66-4cf8-8991-91f0b2a8221f',
 '508d4c12-d2af-4a7e-a7e9-8d04fe46c2fe',
 'bdbc8ffd-c3e5-4f04-b6a0-8eaae1952fe6',
 'f6bc36fb-8ce8-416e-afd6-a7f88290853d',
 'ff71bd56-c67b-409b-af6e-9f571add8cb3',
 '9e6e7bde-6571-457d-8edc-477e40683cfe',
 '478094dd-ff07-49c8-aa04-1c572f7a22ef',
 '64452937-0a99-4e6c-9a8d-0bcc7147c010',
 '4a930fa9-abbd-4ead-9b2b-215920719bbd',
 'a9554e22-0cec-4d3e-afed-11f57d202d9e',
 '1e23cd81-2fbc-43e2-aa06-1fbfb0b98d0c',
 '94f5851d-83ba-44c9-929f-87c8be1ef003',
 '98ae1475-8fab-4c1c-945b-5fce0a455b1c',
 '47cc2719-cabd-4204-8585-0219aa888a2e',
 '40805008-6ace-4be5-a67b-b2779a8ff1dd',
 '5940a00e-c85b-4331-aa61-4cb695d9d637',
 '85dfe0fa-af36-41f6-9231-fd88faec17b2',
 '4e2c1cc4-0383-471a-8d1c-a519cd16b57e',
 'd79f04ee-653c-45b2-9d81-4a43ec7a3f97',
 '2c107dcf-7ed7-

### Querying a document

In [10]:
results = vector_store.similarity_search_with_score( 
    # langchain embeds text query automatically
    "How to install minecraft?",
    k=2,
)

Batches: 100%|█████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 138.55it/s]


In [12]:
for res, score in results:
    print(f"* [SIM={score:3f}] {res.page_content} [{res.metadata}]")

* [SIM=0.360615] 


Minecraft - ArchWiki













Jump to content












Contents
move to sidebar
hide




Beginning





1
Client




Toggle Client subsection





1.1
Java Edition






1.1.1
Installation








1.1.2
Firewall configuration for Client/LAN worlds










1.2
Bedrock Edition








1.3
Minecraft Education










2
Server




Toggle Server subsection





2.1
Java Edition








2.2
Bedrock Edition






2.2.1
Installation








2.2.2
Configuration












3
Minecraft mod launchers








4
Other programs and editors








5
Troubleshooting




Toggle Troubleshooting subsection





5.1
Logs








5.2
Client or server does not start








5.3
Broken fonts with MinecraftForge








5.4
MultiMC forks unable to build








5.5
Cannot change pulseaudio sink








5.6
Audio stutters on PipeWire or Java crashes with SIGFPE








5.7
Minecraft does not start on native Wayland








5.8
2 and 6 do not work when pressed in combination with