In [2]:
import getpass
import os

if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

In [3]:
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass()

In [4]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [5]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [None]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse

base_url = "https://able.co"

response = requests.get(base_url)
soup = BeautifulSoup(response.text, "html.parser")

# Collect internal links
links = set()
for a_tag in soup.find_all("a", href=True):
    href = a_tag["href"]
    full_url = urljoin(base_url, href)

    # Filter internal links only (same domain)
    if urlparse(full_url).netloc == urlparse(base_url).netloc:
        links.add(full_url)

print(f"Found {len(links)} internal links:")
for link in links:
    print(link)

Found 9 internal links:
https://able.co/
https://able.co/security-transparency-governance
https://able.co/contact
https://able.co/offerings
https://able.co/#main
https://able.co/work
https://able.co/privacy-policy
https://able.co/team
https://able.co/social-good


In [None]:
from langchain_community.document_loaders import WebBaseLoader
web_paths = list(links)

loader = WebBaseLoader(web_paths=web_paths)
docs = loader.load()

print(f"Total characters: {len(docs[1].page_content)}")

Total characters: 2631


In [40]:
print(docs[2].page_content[:500])

Contact Us | Let’s Build Something Amazing Together | AbleSkip to contentOur WorkOur OfferingsOur TeamSocial GoodContact UsLet’s Build TogetherLinkedInPrivacy Policy© 2024HomeOur WorkOur OfferingsOur TeamSocial GoodContact UsLinkedInPrivacy Policy© 2024Menu


In [41]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # chunk size (characters)
    chunk_overlap=200,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)
all_splits = text_splitter.split_documents(docs)

print(f"Split blog post into {len(all_splits)} sub-documents.")

Split blog post into 37 sub-documents.


In [42]:
document_ids = vector_store.add_documents(documents=all_splits)

print(document_ids[:3])

['d96f5800-1650-40a1-bf90-ffd2f2502e9d', 'c0aa2ecc-fd0c-483d-ac75-8b95ce3880aa', 'b1f3f409-b82e-4146-b3dc-22d5bfcfb33e']


In [46]:
from langchain_core.prompts import PromptTemplate

template = """You are an assistant for answering questions about the company Able. Keep answers polite, accurate and concise, with no more than three sentences. 
Do not make things up, if you do not know the answer say so.

{context}

Question: {question}

Answer:"""
prompt = PromptTemplate.from_template(template)

In [47]:
from langchain_core.documents import Document
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}

# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [62]:
response = graph.invoke({"question": "What does Able do?"})
print(response["answer"])

Able is a distributed team of builders that uses AI technologies and custom workflows to deliver efficiencies throughout the software development lifecycle.


In [63]:
response = graph.invoke({"question": "What are the advantages of using Able in my business?"})
print(response["answer"])

Able's AI-powered software development practices can save money, reduce time to market, and elevate the strategic value of your business.


In [64]:
response = graph.invoke({"question": "What benefits does Able offer their employees?"})
print(response["answer"])

Able offers benefits such as remote work flexibility, no timesheets, quarterly profit sharing, employee development stipends, monthly snack boxes, and meeting-free Fridays.


In [65]:
response = graph.invoke({"question": "How can I contact Able?"})
print(response["answer"])

You can contact Able by emailing contact@able.co.


In [66]:
response = graph.invoke({"question": "What are Able's values towards social good?"})
print(response["answer"])

Able's values include leveraging technology for positive change and partnering with impactful non-profit organizations and companies for social good.


In [67]:
response = graph.invoke({"question": "What is Able's market value?"})
print(response["answer"])

Able's market value is not publicly disclosed.


In [68]:
response = graph.invoke({"question": "Who is Able's CEO?"})
print(response["answer"])

I'm sorry, I do not have that information.
