In [2]:
import os
import openai
import pinecone
import langchain_community

In [3]:
from os import listdir
from os.path import isfile, join
documents = [f'Documents/{f}' for f in listdir("Documents") if isfile(join("Documents", f))]

print(documents)

['Documents/Assembling Components - Oritech.html', 'Documents/Fragmenting Ores - Oritech.html', 'Documents/Gather Resources - Oritech.html', 'Documents/Have Fun - Oritech.html', 'Documents/Lasers! - Oritech.html', 'Documents/Making Steel (and other Alloys) - Oritech.html', 'Documents/Oritech - Modded Minecraft Wiki.html', 'Documents/Your First Generator - Oritech.html']


In [21]:
from langchain_community.document_loaders import UnstructuredHTMLLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
# Opting for Chrome database since it is open source and easy to setup
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from uuid import uuid4
import getpass
import os

# Load in Oritech HTML documents

# Traverse documents
processed_documents = []
for document in documents:
    # Create a document loader for unstructured HTML
    loader = UnstructuredHTMLLoader(document)
    
    # Load the document
    data = loader.load()
    
    # Print the document
    # print(data[0].page_content)

    # Split documents into chunks
    chunk_size = 300
    chunk_overlap = 100
    
    # Split the HTML
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap)

    # Split documents was adding metadata into the chunks, so I will be opting to split by text
    split_docs = splitter.split_documents(data)

    for split_doc in split_docs:
        processed_documents.append(split_doc)

# Assign unique uuid to document
uuids = [str(uuid4()) for _ in range(len(processed_documents))]

# Obtain OpenAI Api Key
if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")

# Embed the documents in a persistent Chroma vector database
embedding_function = OpenAIEmbeddings(api_key=os.environ["OPENAI_API_KEY"], model='text-embedding-3-small')
vectorstore = Chroma.from_documents(
    processed_documents,
    ids=uuids,
    embedding=embedding_function,
    persist_directory=os.getcwd()
)

# Check chroma entries
print(vectorstore.get())
    

Enter your OpenAI API key:  ········


{'ids': ['9bad9dbb-e669-4796-bc0c-93722118389e', 'd625294d-1336-4367-a9b9-ef2af5cdfda0', '20fdeb68-b338-4330-aec3-9fb0d230bc5f', '80d49eff-ac60-42f3-a4ba-e870249658e5', 'd47c9078-de6e-46a7-adc4-7c540da61fc0', 'dd78ce50-52fa-40f3-8f71-d27af635a588', '619b993b-99e0-464a-9fe0-641261e4a2af', 'f9093549-6fd1-4cd5-a526-d91eafb1c42b', '6efa0cd7-03af-427f-8e4d-e6ff6135ee54', '8fb42889-b37f-4478-9660-1e19bac690b8', 'fd318cd4-9dc5-4766-a974-b4be799f661d', 'a2ddcf3b-f056-44c7-b713-47267aff4b3b', 'f0871407-f8b9-4e35-81ba-abbff62ae3d7', '03fed7e9-df69-4d36-b571-f21b0bc97885', 'e8a57c37-6ef8-4ea4-9b33-598954b0abf7', '7ddfec3e-956e-44a3-82d1-12600881ad86', 'c1ce2edc-089c-4f95-8b9e-906cfeb12d30', '64d1a2a6-8207-4616-aba5-0007dab28276', '5361f228-de98-49f9-9c64-eda505a623f6', 'c2a0bf8d-6af2-4df0-b377-6a24cf29b6bb', '2072e5d9-a350-4010-9419-72ec7fc5679f', '2197e146-23b9-4a2a-8995-01304869c623', '6e7d2515-86f5-4778-bf07-a8942539aa14', '493864df-6bb8-49ad-b8d0-0e9f7d9748a7', '4a787d2c-0c00-46be-b263-bac1b9

In [22]:
from langchain_core.prompts import ChatPromptTemplate

# Configure the vector store as a retriever
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3}
)

# Set up a generic prompt template to provide context
message = """
Answer the following question about oritech using the context provided:

Context:
{context}

Question:
{question}

Answer:
"""

# Create a chat prompt template from the message string
prompt_template = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful conversational AI that answers questions related to the 'Oritech' minecraft mod."),
    ("human", message)
])


In [36]:
from langchain_openai import OpenAI
from langchain_core.runnables import RunnablePassthrough

# Set up OpenAI model
llm = OpenAI()

# Create a chain to link retriever, prompt_template, and llm
rag_chain = ({"context": retriever, "question": RunnablePassthrough()}
            | prompt_template
            | llm )

In [37]:
user_input = input("Enter a question for the Oritech chatbot or type 'Exit' to terminate: ")
while user_input != "Exit":
    response = rag_chain.invoke(user_input)
    print(f"Answer: {response} \n\n")
    response_docs = print(retriever.invoke(user_input))
    print(f"Top 3 Documents: {response_docs} \n\n")

    user_input = input("Enter another question for the Oritech chatbot or type 'Exit' to terminate: ")

Enter a question for the Oritech chatbot or type 'Exit' to terminate:  How can I get started with Oritech?


Answer: To get started with Oritech, you will need to download and install the mod onto your Minecraft game. Once installed, you can begin exploring the new machines and tools available in the mod and start incorporating them into your gameplay. There are also tutorials and guides available online to help you learn how to use the mod effectively. Have fun! 


[Document(id='5361f228-de98-49f9-9c64-eda505a623f6', metadata={'source': 'Documents/Oritech - Modded Minecraft Wiki.html'}, page_content='Oritech is an open-source tech mod (currently for Minecraft 1.21), featuring many new machines and tools. Content ranges from processing machines, world interaction machines, logistical parts, new features integrated into world generation, equipment, and more. Machines are mostly animated'), Document(id='7058b763-43a1-4037-8f67-6fb879f87cc6', metadata={'source': 'Documents/Oritech - Modded Minecraft Wiki.html'}, page_content='Oritech is an open-source tech mod (currently for Minecraft 1.21), fea

Enter another question for the Oritech chatbot or type 'Exit' to terminate:  Where can I find fluxite in oritech?


Answer: Fluxite can be found in the oritech:area/fluxite_mining area. 


[Document(id='7ddfec3e-956e-44a3-82d1-12600881ad86', metadata={'source': 'Documents/Lasers! - Oritech.html'}, page_content='oritech:area/fluxite_mining'), Document(id='f88a64c8-b1a7-4c4f-85ad-4b43aa811b9f', metadata={'source': 'Documents/Lasers! - Oritech.html'}, page_content='oritech:area/fluxite_mining'), Document(id='d3037227-7b7a-4862-9b4b-f556e5d34222', metadata={'source': 'Documents/Lasers! - Oritech.html'}, page_content='oritech:area/fluxite_mining')]
Top 3 Documents: None 




Enter another question for the Oritech chatbot or type 'Exit' to terminate:  What do I need to build a basic generator in Oritech?


Answer: To build a basic generator in Oritech, you will need a magnetic coil and a furnace. 


[Document(id='8c84fc8f-a44e-4fc4-9455-dd28b0944b15', metadata={'source': 'Documents/Your First Generator - Oritech.html'}, page_content="oritech:magnetic_coil\n\nminecraft:furnace\n\noritech:magnetic_coil\n\noritech:basic_generator_block\n\nThis won't generate a lot of power, but it's sufficient to get you started.\n\nWhen you need more power, consider building additional generators or exploring other generators offered by Oritech."), Document(id='972eaf87-ec58-482a-bfd9-46cd24be42fb', metadata={'source': 'Documents/Your First Generator - Oritech.html'}, page_content="oritech:magnetic_coil\n\nminecraft:furnace\n\noritech:magnetic_coil\n\noritech:basic_generator_block\n\nThis won't generate a lot of power, but it's sufficient to get you started.\n\nWhen you need more power, consider building additional generators or exploring other generators offered by Oritech."), Document(id='baa0be56-c7fe-4

Enter another question for the Oritech chatbot or type 'Exit' to terminate:  Exit
