In [1]:
# https://github.com/tomasonjo/blogs/blob/master/neo4jdocs/neo4j_support_bot.ipynb
# %pip install langchain==0.0.142 openai==0.27.4 beautifulsoup4==4.12.2 chromadb==0.3.21

In [2]:
import os
import dotenv

import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import pandas as pd

from langchain.document_loaders import (
    # GitLoader,
    # YoutubeLoader,
    # DataFrameLoader,
    UnstructuredURLLoader,
)
from langchain.text_splitter import CharacterTextSplitter

import tiktoken

# from langchain.schema import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.agents import initialize_agent, Tool

from crawl import crawl, clean, strip_content

# loads .env file with your OPENAI_API_KEY
dotenv.load_dotenv()

True

In [3]:
# Regex pattern to match a URL
HTTP_URL_PATTERN = r"^http[s]*://.+"

# Define root domain to crawl
DOMAIN = "aba.com.sg"
FULL_URL = "https://www.aba.com.sg/"

### Embedding documents for the first time

In [6]:
seen = crawl(FULL_URL)
print(len(seen))

https://www.aba.com.sg/
https://www.aba.com.sg/privacy-policy
https://www.aba.com.sg/sfec
https://www.aba.com.sg/post/gst-rate-change-with-effect-from-01-january-2023
https://www.aba.com.sg/blog/categories/smes-go-digital
https://www.aba.com.sg/post/sme-go-digital-programme
https://www.aba.com.sg/blog/categories/accounting-software
https://www.aba.com.sg/advisory-services
https://www.aba.com.sg/edg
https://www.aba.com.sg/home
https://www.aba.com.sg/post/business-times-feature-how-cloud-accounting-helped-this-practice-s-client-make-the-perfect-brew
https://www.aba.com.sg/./#comp-kqbq7zqh1
https://www.aba.com.sg/xero
https://www.aba.com.sg/productivity-solutions-grant-psg
https://www.aba.com.sg/post/quickbooks-vs-xero-2020-comparing-cloud-based-accounting-software
https://www.aba.com.sg/post/hopping-onto-the-train-of-digital-transformation
https://www.aba.com.sg/software-solutions
https://www.aba.com.sg/html-sitemap
https://www.aba.com.sg
https://www.aba.com.sg/blog
https://www.aba.com.s

In [None]:
clean = clean(seen)
print(len(clean))

In [7]:
urls = list(clean)

loader = UnstructuredURLLoader(urls=urls)
data = loader.load()
print(len(data))

44


In [8]:
for d in data:
    new_content = strip_content(d.page_content)
    d.page_content = new_content

In [9]:
# Define text chunk strategy
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200, separator=" ")
# Split documents into chunks
data_split = splitter.split_documents(data)
print(len(data_split))

139


In [10]:
data_split  # due to nature of website, there are duplicate documents (needs preprocessing)

[Document(page_content='CORPORATE SERVICES  Unity results in phenomenal growth. Outsource areas which occupy your time to us so that you can improve business efficiency.  Find out more  Taxation  Taxation  Our full range of services includes:  Corporate tax compliance and advisory  GST registration, filing, compliance and advisory  Advisory on various business structures and their tax implications  Advisory on sole proprietorship, partnership, corporate and their tax implications  Conversion of business structures and its Tax/GST implications  Managing IRAS audit and investigations  Our taxation services are committed to minimising your tax liability while complying with the latest tax and statutory requirements to help you make well informed and ideal tax decisions.  Accountng  Accounting  Our accounting services include preparing full sets of financial statements based on the latest accounting standards and regulations to reduce your accounting chores.  Our full range of services inc

In [11]:
# # Define embedding model
# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

# store = Chroma.from_documents(
#     data_split, embeddings, collection_name="aba", persist_directory="db"
# )

# store.persist()
# store = None

Using embedded DuckDB with persistence: data will be stored in: db


### Chroma DB persisted

In [12]:
# Define embedding model
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
store = Chroma(
    collection_name="aba", persist_directory="db", embedding_function=embeddings
)
store._collection

Using embedded DuckDB with persistence: data will be stored in: db


Collection(name=aba)

In [13]:
llm = ChatOpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0,
    openai_api_key=OPENAI_API_KEY,
    max_tokens=512,
)

In [14]:
sales_template = """
As a customer marketing bot, your goal is to provide accurate and helpful information about Abundant Accounting Pte Ltd.
You should answer user inquiries based on the context provided and avoid making up answers.
If you don't know the answer, simply state that you don't know.
Remember to provide relevant information about how Abundant Accounting Pte Ltd can assist the user through its services, strengths and benefits.

{context}
=========
Question: {question}
"""

SALES_PROMPT = PromptTemplate(
    template=sales_template, input_variables=["context", "question"]
)

sales_qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=store.as_retriever(),
    chain_type_kwargs={"prompt": SALES_PROMPT},
)

In [15]:
print(sales_qa.run("What is it that the company provides?"))

Abundant Accounting Pte Ltd provides a range of services including corporate advisory, management consultancy, accounting, taxation, corporate secretary, CFO insights, digital accounting system, cloud-based accounting systems, advisory, setting up new business, IT infrastructure and support, cloud-based HR and POS systems, and audit services. Their experienced in-house Chartered Accountants with more than 20 years of experience in MNCs and SMEs provide a holistic and integrated CFO insight to assist businesses in propelling their growth. They also offer advisory services to help businesses optimise their decisions and unleash their hidden potential. Their certified practising management consultants dive deep into analysing business processes and establish a sound, long-term strategy to propel businesses forward.


In [16]:
print(
    sales_qa.run("I am looking to help my company digitise. How can your company help?")
)

Answer: Abundant Accounting Pte Ltd can help your company digitise by providing IT solutions and transitioning your accounting system into cloud-based accounting. Our professional accountants can assist in reducing manual processes, ease of retrieval for invoices, professional training, and designing reports to facilitate decision making. We can also help enhance revenue growth, control costing, and tax planning/savings. Additionally, we can provide a digital transformation solution that is agile and scalable to accommodate changes in your business model, size, and capabilities. If you require further clarification or assistance, feel free to email us, and we can assign a consultant to follow up with your inquiries.


In [17]:
print(sales_qa.run("How can I get in touch?"))

To get in touch with Abundant Accounting Pte Ltd, you can visit their office located at 2 Kallang Ave #09-26, CT Hub, Singapore 339407 during their opening hours from Monday to Friday, 9 a.m. to 6 p.m. Alternatively, you can fill out the form on their website for sales inquiries or support. They also offer a live demo session and assistance with PSG Grant and DBS Start Digital.


In [18]:
print(sales_qa.run("Any phone number I can call?"))

Based on the information provided on the Abundant Accounting Pte Ltd website, you can contact them at the following phone numbers: 8161 6293, 9776 2954, 9018 1833. You can also visit their website or fill out a form to get in touch with their sales or support team. Their office is located at 2 Kallang Ave #09-26, CT Hub, Singapore 339407 and is open from Monday to Friday, 9 a.m. to 6 p.m.
