In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
import os
import pickle
import pandas as pd
from transformers import GPT2TokenizerFast
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
with open('website_content_parsed.txt', 'r', encoding="utf-8") as f:
    text = f.read()

tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")

def count_tokens(text: str) -> int:
    return len(tokenizer.encode(text))

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 512,
    chunk_overlap  = 24,
    length_function = count_tokens,
)

chunks = text_splitter.create_documents([text])

In [4]:
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

db = FAISS.from_documents(chunks, embeddings)

with open("embeddings.pkl", "wb") as f:
    pickle.dump(db, f)

In [8]:
query = "What are all classes in baldurs gate 3"
docs = db.similarity_search(query)
docs[0]

Document(page_content='Quest Items Food Tools Potions Scrolls Quest Items Food Tools Equipment Features World Information Interactive Map Companions Approval Quests Locations Maps Enemies Bosses NPCs Merchants Lore Camp Interactive Map Companions Approval Quests Locations Maps Maps Enemies Bosses Bosses NPCs Merchants Lore Camp Guides Walkthroughs New Player Help Walkthrough Game Progress Route Build Guides Endings Guide Romance Guide Mods Trophy Achievement Guide New Player Help Walkthrough Game Progress Route Build Guides Endings Guide Romance Guide Mods Trophy Achievement Guide Create new page Edit History Recent Changes Rename Redirect Lock Unlock Permissions Javascript Tags Edit Open Graph Clear page cache Clear comments cache File Manager Page Manager Wiki Templates Comments Approval Wiki Settings Wiki Manager Delete Anonymous01 Aug 2023 14 44 I guess the Twitch drops until middle of August might be worth mentioning at least here as a comment some extra clothes but still fun lol 

In [9]:
chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff")

chain.run(input_documents=docs, question=query)

" The classes in Baldur's Gate 3 are Cleric, Fighter, Ranger, Rogue, Warlock, Wizard, Druid, Sorcerer, Barbarian, Bard, Paladin, and Monk. Each class has a number of subclasses, with an average of 3 subclasses per class. The Cleric and Wizard classes have more subclasses than other classes. Multiclassing is also available in Baldur's Gate 3, allowing players to switch classes mid-game."