<a href="https://colab.research.google.com/github/claudiorcorreias/cienciadacomputacao_enap/blob/main/chatbot_com_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Chatbot RAG para Análise de Tarifas Aéreas - Proposta de Desenvolvimento

In [3]:
import os
import pandas as pd
import streamlit as st
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader
from langchain_openai import ChatOpenAI
from langchain.agents import create_sql_agent, AgentType
from langchain_community.agent_toolkits import SQLDatabaseToolkit
from langchain.sql_database import SQLDatabase
from langchain.agents import Tool
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from dotenv import load_dotenv
from sqlalchemy import create_engine
from google.colab import userdata


# Configuração inicial
load_dotenv()

class TarifasChatbot:
    def __init__(self):
        self.setup_databases()
        self.setup_llm()
        self.setup_rag()
        self.setup_agent()

    def setup_databases(self):
        """Configura conexões com bancos de dados SQL e vetorial"""
        # Banco de dados SQL
        db_user = userdata.get("DB_USER")
        db_password = userdata.get("DB_PASSWORD")
        db_host = userdata.get("DB_HOST")
        db_name = userdata.get("DB_NAME")
        db_port = userdata.get("DB_PORT")

        self.sql_url = f"postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}"
        # Check if the hostname can be resolved before connecting:
        try:
            import socket
            socket.gethostbyname(db_host)  # This will raise an exception if hostname cannot be resolved
        except socket.gaierror:
            raise ValueError(f"Could not resolve hostname: {db_host}. Please verify the hostname and network configuration.")
        print("URL de conexão:", sql_url)
        self.db = SQLDatabase.from_uri(self.sql_url, schema="tarifas")

        # Banco de dados vetorial para RAG
        self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
        self.vector_store_path = "vector_store"

        if not os.path.exists(self.vector_store_path):
            self.create_vector_store()

        self.vector_store = FAISS.load_local(self.vector_store_path, self.embeddings)

    def create_vector_store(self):
        """Cria o banco de dados vetorial a partir de documentos"""
        loader = DirectoryLoader("docs/", glob="**/*.pdf")
        documents = loader.load()

        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200
        )
        splits = text_splitter.split_documents(documents)

        vector_store = FAISS.from_documents(splits, self.embeddings)
        vector_store.save_local(self.vector_store_path)

    def setup_llm(self):
        """Configura o modelo de linguagem"""
        self.llm = ChatOpenAI(
            model_name="o3-mini",
            temperature=0.3,
            streaming=True
        )

    def setup_rag(self):
        """Configura o sistema RAG"""
        self.retriever = self.vector_store.as_retriever(search_kwargs={"k": 3})

        rag_prompt = PromptTemplate.from_template("""
        Você é um especialista em tarifas aéreas. Use o contexto abaixo para responder à pergunta.
        Mantenha as respostas técnicas mas acessíveis.

        Contexto: {context}

        Pergunta: {question}

        Resposta:
        """)

        self.rag_chain = (
            {"context": self.retriever, "question": RunnablePassthrough()}
            | rag_prompt
            | self.llm
            | StrOutputParser()
        )

    def setup_agent(self):
        """Configura o agente SQL"""
        toolkit = SQLDatabaseToolkit(db=self.db, llm=self.llm)

        sql_tool = Tool(
            name="SQLDatabase",
            func=toolkit.get_tools()[0].func,
            description="Useful for answering questions about airline tariffs and metrics"
        )

        rag_tool = Tool(
            name="RAGSystem",
            func=self.rag_chain.invoke,
            description="Useful for answering general questions about airline regulations and policies"
        )

        self.agent = create_sql_agent(
            llm=self.llm,
            toolkit=toolkit,
            extra_tools=[rag_tool],
            agent_type=AgentType.OPENAI_FUNCTIONS,
            verbose=True
        )

    def query(self, question):
        """Processa uma pergunta do usuário"""
        try:
            # Primeiro tenta o RAG para perguntas conceituais
            if any(word in question.lower() for word in ["o que é", "como funciona", "defina", "regulamento"]):
                return self.rag_chain.invoke(question)

            # Para perguntas numéricas/analíticas, usa o agente SQL
            return self.agent.run(question)

        except Exception as e:
            return f"Erro ao processar pergunta: {str(e)}"

# Interface Streamlit
def main():
    st.title("Chatbot Especialista em Tarifas Aéreas")
    st.write("""
    Sistema de análise integrada de tarifas aéreas com RAG.
    Faça perguntas sobre dados tarifários ou regulamentação.
    """)

    chatbot = TarifasChatbot()

    question = st.text_input("Faça sua pergunta:")
    if st.button("Enviar") and question:
        with st.spinner("Processando..."):
            response = chatbot.query(question)

        st.write("**Resposta:**")
        st.write(response)

        # Histórico de conversa
        if "history" not in st.session_state:
            st.session_state.history = []

        st.session_state.history.append({"question": question, "response": response})

        st.write("**Histórico:**")
        for item in st.session_state.history[-3:]:
            st.text(f"Q: {item['question']}")
            st.text(f"A: {item['response']}\n---")

if __name__ == "__main__":
    main()



ValueError: Could not resolve hostname: spbdf1038.anac.gov.br. Please verify the hostname and network configuration.

In [8]:
from google.colab import userdata
db_user = userdata.get("DB_USER")
db_password = userdata.get("DB_PASSWORD")
db_host = userdata.get("DB_HOST")
db_name = userdata.get("DB_NAME")
db_port = userdata.get("DB_PORT")
sql_url = f"postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}"
print("URL de conexão:", sql_url)

URL de conexão: postgresql://powerbi_user:userpowerbigope@spbdf1038.anac.gov.br:5432/dm_corporativo


In [None]:
/projeto_chatbot
│
├── /docs/                  # Documentos para o RAG
│   ├── regulamentos.pdf
│   ├── metodologias.pdf
│   └── manuais.pdf
│
├── app.py                  # Código principal
├── requirements.txt        # Dependências
├── .env                    # Variáveis de ambiente
└── /vector_store/          # Banco vetorial (gerado automaticamente)