In [8]:
# Install dependencies
!pip install -q litellm mlflow langchain langchain-community faiss-cpu sentence-transformers pypdf ragas langchain-groq groq datasets langchain_huggingface polars

In [9]:
# --- Imports ---
import os
import mlflow
import litellm
import polars as pl
from dotenv import load_dotenv
from datasets import Dataset
from pypdf import PdfReader
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from ragas import evaluate
from ragas.metrics import Faithfulness, AnswerRelevancy, ContextRecall, ContextPrecision, FactualCorrectness
from ragas.llms import LangchainLLMWrapper
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings as RagasHFEmbeddings

In [10]:
# --- Environment Setup ---
from google.colab import userdata
os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')
os.environ["OPENAI_API_KEY"] = userdata.get("OPENAI_API_KEY_")
mlflow.set_experiment("SolidPrinciple_SelfRAG")
mlflow.litellm.autolog()

In [14]:
# --- Start MLflow Run ---
with mlflow.start_run() as run:
    # --- Load PDFs ---
    pdf_folder = "/content/IncidentManuals"  # Replace with your local path
    loaders = [PyPDFLoader(os.path.join(pdf_folder, fn)) for fn in os.listdir(pdf_folder) if fn.endswith(".pdf")]
    documents = []
    for loader in loaders:
        documents.extend(loader.load())

    mlflow.log_param("num_documents", len(documents))
    mlflow.log_param("pdf_files", [f for f in os.listdir(pdf_folder) if f.endswith(".pdf")])

    # --- Chunking ---
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = splitter.split_documents(documents)
    mlflow.log_param("num_chunks", len(chunks))

    # Save sample chunks to artifact
    chunk_texts = [chunk.page_content for chunk in chunks[:5]]
    with open("sample_chunks.txt", "w") as f:
        f.write("\n---\n".join(chunk_texts))
    mlflow.log_artifact("sample_chunks.txt")

    # --- Embedding & VectorStore ---
    embedding_model = "sentence-transformers/all-MiniLM-L6-v2"
    embedding = HuggingFaceEmbeddings(model_name=embedding_model)
    mlflow.log_param("embedding_model", embedding_model)

    vectordb = FAISS.from_documents(chunks, embedding)
    retriever = vectordb.as_retriever(search_type="mmr", search_kwargs={"lambda_mult": 0.5, "k": 5})

    # --- Prompts ---
    rag_prompt = ChatPromptTemplate.from_messages([
        ("system", "You are a mining safety expert. Use the context below to answer questions accurately."),
        ("user", "Context:\n{context}\n\nQuestion: {question}")
    ])
    editor_prompt = ChatPromptTemplate.from_messages([
        ("system", "You are a technical editor. Refine the answer for accuracy and clarity."),
        ("user", "Draft:\n{draft}")
    ])

    memory = FAISS.from_texts(["init"], embedding)

    def retrieve_self(query):
        memory_results = memory.similarity_search(query)
        if memory_results:
            mlflow.log_param("memory_retrieval_used", True)
            return memory_results
        mlflow.log_param("memory_retrieval_used", False)
        return retriever.get_relevant_documents(query)

    def generate(query):
        context_docs = retrieve_self(query)
        context = "\n\n".join([doc.page_content for doc in context_docs])

        draft = litellm.completion(
            model="openai/gpt-4o-mini",
            messages=[{"role": "user", "content": rag_prompt.format(context=context, question=query)}]
        )
        draft_content = draft['choices'][0]['message']['content']

        refined = litellm.completion(
            model="groq/llama3-70b-8192",
            messages=[{"role": "user", "content": editor_prompt.format(draft=draft_content)}]
        )
        refined_content = refined['choices'][0]['message']['content']

        memory.add_texts([refined_content])

        return refined_content, context, draft_content

    # --- Sample Coal Mining Q&A Dataset ---
    questions = [
        "What are the primary safety concerns in underground coal mining?",
        "How do you prevent methane explosions in coal mines?",
        "What role does ventilation play in coal mine safety?",
        "How is dust managed in underground coal mines?",
        "What personal protective equipment is mandatory in coal mining?"
    ]

    references = [
        "Underground coal mining involves risks such as roof collapse, gas explosion, and equipment hazards.",
        "Methane explosions are prevented using gas detectors, proper ventilation, and drainage systems.",
        "Ventilation ensures removal of harmful gases and maintains breathable air in mines.",
        "Dust is controlled using water sprays, ventilation, and dust collectors.",
        "Helmets, boots, respirators, gloves, and high-visibility clothing are mandatory in coal mines."
    ]

    # --- Generate Answers ---
    data = {"question": [], "response": [], "retrieved_contexts": [], "reference": []}
    for i, (q, ref) in enumerate(zip(questions, references)):
        ans, ctx, draft_ans = generate(q)
        data["question"].append(q)
        data["response"].append(ans)
        data["retrieved_contexts"].append([ctx])
        data["reference"].append(ref)

        # Log individual question, context, draft, and refined answers as artifacts
        mlflow.log_text(q, f"question_{i+1}.txt")
        mlflow.log_text(ctx, f"retrieved_context_{i+1}.txt")
        mlflow.log_text(draft_ans, f"draft_answer_{i+1}.txt")
        mlflow.log_text(ans, f"refined_answer_{i+1}.txt")

    # --- Create HuggingFace Dataset ---
    ds = Dataset.from_dict(data)

    # --- RAGAS Evaluation ---
    llm = LangchainLLMWrapper(
        ChatGroq(api_key=os.environ["GROQ_API_KEY"], model_name="llama3-8b-8192", temperature=0.0)
    )
    ragas_emb = RagasHFEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

    result = evaluate(
        ds,
        metrics=[Faithfulness(), AnswerRelevancy(), ContextRecall(), ContextPrecision(), FactualCorrectness()],
        embeddings=ragas_emb,
        llm=llm
    )

    ragas_df = result.to_pandas()
    ragas_df.to_csv("ragas_eval_results.csv", index=False)
    mlflow.log_artifact("ragas_eval_results.csv")

    # --- Log final dataset ---
    final_df = pl.DataFrame(data)
    # mlflow.log_input(final_df, context="training")

    print("Evaluation complete. Results tracked in MLflow.")

Evaluating:   0%|          | 0/25 [00:00<?, ?it/s]

ERROR:ragas.executor:Exception raised in Job[8]: TimeoutError()
ERROR:ragas.executor:Exception raised in Job[0]: TimeoutError()
ERROR:ragas.executor:Exception raised in Job[4]: TimeoutError()
ERROR:ragas.executor:Exception raised in Job[5]: TimeoutError()
ERROR:ragas.executor:Exception raised in Job[7]: TimeoutError()
ERROR:ragas.executor:Exception raised in Job[9]: TimeoutError()
ERROR:ragas.executor:Exception raised in Job[10]: TimeoutError()
ERROR:ragas.executor:Exception raised in Job[12]: TimeoutError()
ERROR:ragas.executor:Exception raised in Job[13]: TimeoutError()
ERROR:ragas.executor:Exception raised in Job[14]: TimeoutError()
ERROR:ragas.executor:Exception raised in Job[15]: TimeoutError()
ERROR:ragas.executor:Exception raised in Job[17]: TimeoutError()
ERROR:ragas.executor:Exception raised in Job[18]: TimeoutError()
ERROR:ragas.executor:Exception raised in Job[19]: TimeoutError()
ERROR:ragas.executor:Exception raised in Job[20]: TimeoutError()


Evaluation complete. Results tracked in MLflow.


In [15]:
!pip install -q pyngrok

In [20]:
import os
import getpass
from pyngrok import ngrok, conf
from google.colab import userdata
os.environ["NGROK"]=userdata.get('NGROK_TOKEN')

In [21]:
# Set up ngrok tunnel for MLflow UI
conf.get_default().auth_token = os.environ["NGROK"]
ngrok.kill()
ngrok_tunnel = ngrok.connect(addr="5000", proto="http", bind_tls=True)
print("MLflow Tracking UI:", ngrok_tunnel.public_url)

MLflow Tracking UI: https://dc7fdd8ca8db.ngrok-free.app


In [18]:
!mlflow ui



[2025-07-13 11:22:49 +0000] [12424] [INFO] Starting gunicorn 23.0.0
[2025-07-13 11:22:49 +0000] [12424] [INFO] Listening at: http://127.0.0.1:5000 (12424)
[2025-07-13 11:22:49 +0000] [12424] [INFO] Using worker: sync
[2025-07-13 11:22:49 +0000] [12425] [INFO] Booting worker with pid: 12425
[2025-07-13 11:22:49 +0000] [12426] [INFO] Booting worker with pid: 12426
[2025-07-13 11:22:49 +0000] [12427] [INFO] Booting worker with pid: 12427
[2025-07-13 11:22:49 +0000] [12428] [INFO] Booting worker with pid: 12428
[2025-07-13 11:24:15 +0000] [12424] [INFO] Handling signal: int

Aborted!
[2025-07-13 11:24:15 +0000] [12425] [INFO] Worker exiting (pid: 12425)
[2025-07-13 11:24:15 +0000] [12426] [INFO] Worker exiting (pid: 12426)
[2025-07-13 11:24:15 +0000] [12428] [INFO] Worker exiting (pid: 12428)
[2025-07-13 11:24:15 +0000] [12427] [INFO] Worker exiting (pid: 12427)
[2025-07-13 11:24:17 +0000] [12424] [INFO] Shutting down: Master
