In [21]:
import argparse
import os
import shutil
from langchain.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores.chroma import Chroma
from langchain.embeddings import OpenAIEmbeddings
from dotenv import load_dotenv

In [4]:
CHROMA_PATH = "chroma"
data_path = "data/monopoly.pdf"
api_key = load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

In [5]:
def load_documents():
    loader = PyPDFLoader(data_path)
    documents = loader.load()
    return documents

documents = load_documents()

In [6]:
def split_text(documents: list):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 1000,
        chunk_overlap = 500,
        length_function = len,
        add_start_index = True
    )
    chunks = text_splitter.split_documents(documents)
    print(f"Split {len(documents)} into {len(chunks)} chunks.")
    return chunks

In [7]:
chunks = split_text(documents)

Split 8 into 29 chunks.


In [8]:
print(chunks[0].page_content)
print(chunks[0].metadata)

MONOPOLY 
Property Trading Game from Parker Brothers" 
AGES 8+ 
2 to 8 Players 
Contents: Gameboard, 3 dice, tokens, 32 houses, I2 hotels, Chance 
and Community Chest cards, Title Deed cards, play money and a Banker's tray. 
Now there's a faster way to play MONOPOLY. Choose to play by 
the classic rules for buying, renting and selling properties or use the 
Speed Die to get into the action faster. If you've never played the classic 
MONOPOLY game, refer to the Classic Rules beginning on the next page. 
If you already know how to play and want to use the Speed Die, just 
read the section below for the additional Speed Die rules. 
SPEED DIE RULES 
Learnins how to Play with the S~eed Die IS as 
/ 
fast as playing with i't. 
1. When starting the game, hand out an extra $1,000 to each player 
(two $5005 should work). The game moves fast and you'll need 
the extra cash to buy and build. 
2. Do not use the Speed Die until you've landed on or passed over
{'producer': 'Adobe Acrobat 7.0 Paper C

In [20]:
def save_to_chrom(chunks: list):

    db = Chroma.from_documents(
        chunks, OpenAIEmbeddings(), persist_directory = CHROMA_PATH
    )
    db.persist()
    print(f"Saved {len(chunks)}) chunks to {CHROMA_PATH}")

save_to_chrom(chunks)

Saved 29) chunks to chroma


  db.persist()


In [31]:
def context(query_text):
    # parser = argparse.ArgumentParser()
    # parser.add_argument("query_text", type = str, help = "The query text.")
    # args = parser.parse_args()

    # query_text = args.query_text


    embedding_function = OpenAIEmbeddings()
    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

    results = db.similarity_search_with_relevance_scores(query_text, k=3)
    if len(results) == 0 or results[0][1] < 0.6:
        print("Unable to find matching results")
        return
    
    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
    print(context_text)

In [32]:
context("What does a banker do")

as Banker and Auctioneer. 
THE BANK: Besides the Bank's money, the Bank 
holds the Title Deed cards and houses and hotels prior to purchase 
and use by the players. The Bank pays salaries and bonuses. It sells 
and auctions properties and hands out the~r proper Title Deed cards; 
it sells houses and hotels to the players and loans money when 
required on mortgages. 
The Bank collects all taxes, fines, loans and interest, and the price of 
all properties which it sells and auctions. 
The Bank nwer "goes broke." If the Bank runs out of money, the Banker 
may issue as much more as needed by writing on any ordinary paper. 
THE PLAY: Starting with the Banker, each player in turn throws the dice. 
The player with the highest total starts the play: Place your 
token on the corner marked "GO," throw the dice and move 
your token in the direction of the arrow the number of 
spaces indicated by the dice. After you have completed 
your play, the turn passes to the left. The tokens remain

---

Ea