In [1]:
# https://github.com/tomasonjo/blogs/blob/master/neo4jdocs/neo4j_support_bot.ipynb
# %pip install langchain==0.0.142 openai==0.27.4 beautifulsoup4==4.12.2 chromadb==0.3.21

In [1]:
import os
import dotenv

import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import pandas as pd

from langchain.document_loaders import (
    # GitLoader,
    # YoutubeLoader,
    # DataFrameLoader,
    UnstructuredURLLoader,
)
from langchain.text_splitter import CharacterTextSplitter

import tiktoken

# from langchain.schema import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.agents import initialize_agent, Tool

from crawl import crawl, clean, strip_content

# loads .env file with your OPENAI_API_KEY
dotenv.load_dotenv()

True

In [2]:
# Regex pattern to match a URL
HTTP_URL_PATTERN = r"^http[s]*://.+"

# Define root domain to crawl
DOMAIN = "elliotcommunications.com"
FULL_URL = "https://elliotcommunications.com"

### Embedding documents for the first time

In [3]:
seen = crawl(FULL_URL, HTTP_URL_PATTERN)
print(len(seen))

https://elliotcommunications.com
https://elliotcommunications.com/subscribe-to-our-newsletter
https://elliotcommunications.com/contact-us/
https://elliotcommunications.com/blog/
https://elliotcommunications.com/5-reasons-why-you-need-pr-for-your-business/
https://elliotcommunications.com/category/uncategorized/
https://elliotcommunications.com/author/allan-ell/
https://elliotcommunications.com/so-youve-been-featured-in-the-media-now-what/
https://elliotcommunications.com/pr-in-the-age-of-social-media-marketing/#respond
https://elliotcommunications.com/the-do-or-die-differences-between-public-relations-marketing/#respond
https://elliotcommunications.com/pr-in-the-age-of-social-media-marketing/
https://elliotcommunications.com/5-reasons-why-you-need-pr-for-your-business/#respond
https://elliotcommunications.com/press-release-vs-media-pitch-which-one-is-better/#respond
https://elliotcommunications.com/blog-2/
https://elliotcommunications.com/am-i-at-the-right-stage-for-pr/
https://elliotc

In [4]:
clean = clean(seen)
print(len(clean))

52


In [5]:
urls = list(clean)

# Without headers User-Agent, results in forbidden 403
loader = UnstructuredURLLoader(urls=urls, headers={"User-Agent": "Mozilla/5.0"})
data = loader.load()
print(len(data))

52


In [7]:
for d in data:
    new_content = strip_content(d.page_content)
    d.page_content = new_content

In [6]:
# Define text chunk strategy
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200, separator=" ")
# Split documents into chunks
data_split = splitter.split_documents(data)
print(len(data_split))

154


In [7]:
data_split

[Document(page_content='Myth-Busting 101: Debunking Popular PR Publicity Myths\n\nMay 19, 2023\n\nNo Comments\n\nAs a business owner, how do you keep your stakeholders happy, and ensure positive brand perception among your customers? Well, clearly, you’d think the same\n\nRead More »\n\nTapping Into Earned Media: 6 Proven Strategies For A Successful PR Campaign\n\nApril 19, 2023\n\nNo Comments\n\nSo, you’ve been running a small business for a while now, and you’re wondering, “How can I spice things up and make my brand\xa0top-of-mind in\n\nRead More »\n\nWhat Is Public Relations, and Why Is It Important for SMEs?\n\nApril 4, 2023\n\nNo Comments\n\nPublic relations may seem like a redundant move when you’re just starting out in the business world. In fact, some even think PR is no longer relevant\n\nRead More »', metadata={'source': 'https://elliotcommunications.com/tapping-into-earned-media-6-proven-strategies-for-a-successful-pr-campaign/#respond'}),
 Document(page_content='Myth-Busti

In [8]:
# # Define embedding model
# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

# store = Chroma.from_documents(
#     data_split, embeddings, collection_name="ecm", persist_directory="db"
# )

# store.persist()
# store = None

Using embedded DuckDB with persistence: data will be stored in: db


### Chroma DB persisted

In [9]:
# Define embedding model
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
store = Chroma(
    collection_name="ecm", persist_directory="db", embedding_function=embeddings
)
store._collection

Using embedded DuckDB with persistence: data will be stored in: db


Collection(name=ecm)

In [12]:
llm = ChatOpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0,
    openai_api_key=OPENAI_API_KEY,
    max_tokens=512,
)

In [35]:
sales_template = """
As a customer marketing bot, your goal is to provide accurate and helpful information about Elliot & Co.
You should answer user inquiries based on the context provided and avoid making up answers.
If you don't know the answer, simply state that you don't know.
Remember to provide relevant information about how Elliot & Co can assist the user through its services, strengths and benefits.

{context}
=========
Question: {question}
"""

SALES_PROMPT = PromptTemplate(
    template=sales_template, input_variables=["context", "question"]
)

sales_qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=store.as_retriever(),
    chain_type_kwargs={"prompt": SALES_PROMPT},
)

In [15]:
print(sales_qa.run("What does the company do?"))

Elliot & Co. is a public relations agency that helps businesses and founders secure media features on newspapers, magazines, TV, radio, and internet platforms. They offer retainer options for 6 months, providing a combination of PR strategies that lead to clear and quantifiable results. Their goal is to drive actionable change and ensure people have access to modern solutions.


In [16]:
print(sales_qa.run("What does retainer options for 6 months mean?"))

Answer: Retainer options for 6 months means that Elliot & Co. offers a partnership for a period of 6 months, where they provide a combination of PR strategies that lead to clear and quantifiable results. This is a long-term commitment that aims to achieve specific goals and objectives for the client.


In [17]:
print(sales_qa.run("What are some of the company's success stories?"))

Elliot & Co has several success stories, including helping Mölnlycke introduce its international presence to the Malaysian market, securing 43 coverages with a total of 830 million readers. They also helped Primech gain prominence in the cleaning industry through compelling stories about their founder, employees, and core beliefs. Additionally, Elliot & Co established GTRIIP as a forerunner in the contactless hotel check-in scene, with 18 media features and counting.


In [18]:
print(sales_qa.run("How do I get in contect?"))  # purposely mispelled

Answer: You can get in contact with Elliot&Co by filling out the contact form on their website, which includes your business name, name, email, country, and optional contact number and message. You can also reach out to their team in Singapore, Malaysia, or Indonesia through their office addresses and email addresses provided on their website.


In [21]:
print(sales_qa.run("Can I get the link to the contact form?"))

As an AI language model, I cannot access the link to the contact form as it is not provided in the given context. However, you can visit the Elliot&Co website to access the contact form or reach out to their team in Singapore, Malaysia or Indonesia through the contact information provided on their website.


In [22]:
print(sales_qa.run("Any number I can call or email I can send?"))

Yes, you can contact Elliot & Co through the contact form on their website or through the email addresses provided for each office location. You can also provide your contact number for a PR expert to reach you at a convenient time.


In [36]:
print(
    sales_qa.run(
        "I am a company that provides a chatbot as a service. How can you help?"
    )
)

Elliot&Co can assist your company by providing PR and media coverage to help increase your brand's visibility and credibility. Our team of experts can help craft compelling stories about your chatbot service and pitch them to relevant media outlets. This can help attract more customers and increase your company's revenue. Additionally, we can also provide consultation on how to effectively communicate your chatbot's unique features and benefits to potential customers. Contact us for a no-obligation quote and to learn more about how we can help your company.


In [77]:
result = sales_qa(
    "I am a company that provides a chatbot as a service. How can you help?"
)

In [80]:
display(Markdown(result["result"]))

Elliot&Co can assist your company by providing PR and media coverage to help increase your brand's visibility and credibility. Our team of experts can help craft compelling stories about your chatbot service and pitch them to relevant media outlets. This can help attract more customers and increase your company's revenue. Additionally, we can also provide consultation on how to effectively communicate your chatbot's unique features and benefits to potential customers. Contact us for a no-obligation quote and to learn more about how we can help your company.

### `RetrievalQAWithSourcesChain` (The answers aren't that good compared to the above method)

In [68]:
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)


system_template = """
As a customer marketing bot, your goal is to provide accurate and helpful information about Elliot & Co.
You should answer user inquiries based on the context provided and avoid making up answers.
If you don't know the answer, simply state that you don't know.
Remember to provide relevant information about how Elliot & Co can assist the user through its services, strengths and benefits.

{context}
=========
Question: {question}
"""

messages = [
    SystemMessagePromptTemplate.from_template(system_template),
    HumanMessagePromptTemplate.from_template("{question}"),
]
prompt = ChatPromptTemplate.from_messages(messages)

In [82]:
from langchain.chains import RetrievalQAWithSourcesChain

chain = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=store.as_retriever(),
    return_source_documents=True,
)

In [60]:
from IPython.display import display, Markdown


def print_result(query, result):
    output_text = f"""### Question:\n{query}\n### Answer:\n{result['answer']}\n### Sources:\n{result['sources']}
    """
    display(Markdown(output_text))

In [None]:
query = "What is this company?"
result = chain(query)

In [61]:
print_result(query, result)

### Question:
What is this company?
### Answer:
Elliot Communications is a PR agency that specializes in telling the stories of up-and-coming brands from entrepreneuring founders. They offer PR strategies and services for SMEs and start-ups, and have been recognized as one of the top places to work in APAC by PRWeek. They have a team of experts in Singapore and Malaysia who are ready to serve their clients. They offer retainer options for 6 months, offering a combination of PR strategies that lead to clear and quantifiable results.

### Sources:
https://elliotcommunications.com/about, https://elliotcommunications.com, https://elliotcommunications.com/#message, https://elliotcommunications.com/ourservices
    

In [62]:
query = "What does retainer options for 6 months mean?"
result = chain(query)
print_result(query, result)

### Question:
What does retainer options for 6 months mean?
### Answer:
Retainer options for 6 months refer to a partnership that yields long-term results, offering a combination of PR strategies that lead to clear and quantifiable results.

### Sources:
https://elliotcommunications.com/ourservices
    

In [74]:
query = "I am a company that provides a chatbot as a service. How can you help?"
result = chain(query)
print_result(query, result)

### Question:
I am a company that provides a chatbot as a service. How can you help?
### Answer:
Elliot Communications can help by equipping a new generation of founders and businesses with the key tools to get their own brand ready to be featured in the media. They offer services for SMEs and start-ups and have expertise in telling the stories of up-and-coming brands from entrepreneuring founders. They also offer a free consultation for getting your business featured in the media. To inquire about their services, you can contact them through their website. They also have a job opening for a business development intern. 

### Sources:
https://elliotcommunications.com/about, https://elliotcommunications.com/contact-us, https://elliotcommunications.com/case-studies, https://elliotcommunications.com/careers/business-development-intern
    