In [5]:
!pip install -q langchain-community langchain-openai langchain playwright beautifulsoup4 nest-asyncio html2text langchain-text-splitters lxml faiss-cpu
!playwright install

You should consider upgrading via the '/Users/gabriel.dantas/pyenv/versions/3.9.12/envs/latest/bin/python3.9 -m pip install --upgrade pip' command.[0m[33m
[0m

In [6]:
import nest_asyncio

nest_asyncio.apply()

In [7]:
from langchain_community.document_loaders import AsyncChromiumLoader
from langchain_community.document_transformers import BeautifulSoupTransformer

# Load HTML
loader = AsyncChromiumLoader(["https://samber.github.io/awesome-prometheus-alerts/rules"])
docs = loader.load()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [8]:
from langchain_text_splitters import HTMLHeaderTextSplitter

headers_to_split_on = [
    ("h2", "Header 2"),
    ("h4", "Header 4"),
]

html_splitter = HTMLHeaderTextSplitter(headers_to_split_on)
html_header_splits = html_splitter.split_text(docs[0].page_content)
html_header_splits

[Document(page_content='Kindly supported by\xa0 👉  \n⚠️ Caution ⚠️', metadata={'Header 2': 'Collection of alerting rules'}),
 Document(page_content='Alert thresholds depend on nature of applications. Some queries in this page may have arbitrary tolerance threshold. Building an efficient and battle-tested monitoring platform takes time. 😉', metadata={'Header 2': '⚠️ Caution ⚠️'}),
 Document(page_content='# 1.1. Prometheus self-monitoring (28 rules) [copy section] $ wget https://raw.githubusercontent.com/samber/awesome-prometheus-alerts/master/dist/rules/prometheus-self-monitoring/embedded-exporter.yml # 1.2. Host and hardware : node-exporter (38 rules) [copy section] $ wget https://raw.githubusercontent.com/samber/awesome-prometheus-alerts/master/dist/rules/host-and-hardware/node-exporter.yml # 1.3. S.M.A.R.T Device Monitoring : smartctl-exporter (5 rules) [copy section] $ wget https://raw.githubusercontent.com/samber/awesome-prometheus-alerts/master/dist/rules/s.m.a.r.t-device-monitori

In [18]:
from langchain_openai import OpenAIEmbeddings, OpenAI
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate

embeddings = OpenAIEmbeddings(openai_api_key="")
vectorstore = FAISS.from_documents(html_header_splits, embeddings)

retriever = vectorstore.as_retriever(search_kwargs={"k": 2})
prompt_template = """
## Role
You are an expert in creating alerts using PromQL (Prometheus) for various cloud-native tools, including Kubernetes, EC2, Containers, Servers, among others. Your task is to help generate effective and efficient PromQL alerts. You will be used in a Retrieval-Augmented Generation (RAG) context, where context will be provided along with the prompt. Ensure that the alerts are optimized for performance, accuracy, and relevance to common issues encountered in these environments. Provide detailed explanations and best practices for each alert created.

### Details to Include:

1. **Tool-Specific Guidance:** Offer tailored advice for each tool (e.g., Kubernetes, EC2).
2. **Performance Optimization:** Ensure the alerts are designed for minimal resource usage.
3. **Accuracy and Relevance:** The alerts should be precise and applicable to common issues.
4. **Explanations and Best Practices:** Include clear explanations and recommended practices.

## Context
<context>
{context}
</context>
## Question

<question>
{input}
</question>"""

llm = OpenAI(temperature=0.1, openai_api_key="")

system_prompt = (
    prompt_template
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)
# rag_chain = RetrievalQAWithSourcesChain.from_llm(llm=OpenAI(temperature=0.1, openai_api_key=""), retriever=retriever, question_prompt=prompt)a
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

resp = rag_chain.invoke({"input": "Como posso criar um alerta para meu container no kubernetes?"})
resp

{'input': 'Como posso criar um alerta para meu container no kubernetes?',

In [13]:
from langchain_community.document_transformers import Html2TextTransformer

html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)
docs_transformed[0].page_content[0:500]
len(docs_transformed)

1