In [8]:
# !pip3 install --upgrade langchain 
# !pip3 install pypdf
# !pip3 install chromadb
# !pip3 install tiktoken

Collecting tiktoken
  Downloading tiktoken-0.4.0-cp310-cp310-macosx_11_0_arm64.whl (761 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m761.4/761.4 KB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Installing collected packages: tiktoken
Successfully installed tiktoken-0.4.0
You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.10/bin/python3.10 -m pip install --upgrade pip' command.[0m[33m
[0m

In [12]:
import os
import getpass

os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')

In [3]:
import PyPDF2
from collections import namedtuple

Document = namedtuple('Document', ['page_content', 'metadata'])

def extract_pdf_data(pdf_path):
    # Open the PDF file
    with open(pdf_path, 'rb') as file:
        # Create a PDF reader
        reader = PyPDF2.PdfReader(file)
        
        # Extract text from each page
        pages = []
        for page_num in range(len(reader.pages)):
            page = reader.pages[page_num]
            text = page.extract_text()
            metadata = {
                'source': pdf_path,
                'page': page_num
            }
            pages.append(Document(page_content=text, metadata=metadata))
            
    return pages



In [4]:

# Usage:
pdf_path = "./data/ato-income-deductions.pdf"
pages = extract_pdf_data(pdf_path)


In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 0)
all_splits = text_splitter.split_documents(pages)

In [13]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.agents.agent_toolkits import create_retriever_tool
from langchain.agents.agent_toolkits import create_conversational_retrieval_agent
from langchain.chat_models import ChatOpenAI



In [14]:
embeddings = OpenAIEmbeddings()
db = FAISS.from_documents(all_splits, embeddings)

retriever = db.as_retriever()

tool = create_retriever_tool(
    retriever, 
    "search_tax_deductions_policy",
    "Searches and returns documents regarding the tax_deductions_policy."
)
tools = [tool]

llm = ChatOpenAI(temperature = 0)

agent_executor = create_conversational_retrieval_agent(llm, tools, verbose=True)


In [25]:
result = agent_executor({"input": "hi, im jeff"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mHello Jeff! How can I assist you today?[0m

[1m> Finished chain.[0m


In [26]:
result = agent_executor({"input": "What are things to consider deducting if I work as a consultant?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAs a consultant, there are several deductions you may consider when filing your taxes. Here are some potential deductions to consider:

1. Home Office Expenses: If you have a dedicated space in your home that you use exclusively for your consulting work, you may be eligible to claim deductions for home office expenses. This can include a portion of your rent or mortgage, utilities, internet, and office supplies.

2. Business-related Travel Expenses: If you incur travel expenses for business purposes, such as attending client meetings or conferences, you may be able to deduct these expenses. This includes transportation costs, accommodation, meals, and other related expenses.

3. Professional Development and Training: If you invest in professional development courses, workshops, or training programs to enhance your skills and knowledge as a consultant, you may be able to deduct these expenses.

4. Business-related Subscription