In [None]:
from google.colab import userdata

import subprocess

def check_and_install(package_name):
    try:
        subprocess.check_output(['pip', 'show', package_name])
    except subprocess.CalledProcessError:
        print(f"{package_name} not found, installing...")
        subprocess.check_output(['pip', 'install', package_name])
    else:
        print(f"{package_name} already installed.")

if 'google.colab' in str(get_ipython()):
    print('Running on CoLab')

check_and_install('langchain_community')
check_and_install('langchain')
check_and_install('openai')
check_and_install('python-dotenv')
check_and_install('tiktoken')
check_and_install('langchain_openai')
check_and_install('google-colab')
!pip install --upgrade langchain
!pip install langgraph



Running on CoLab
langchain_community not found, installing...
langchain already installed.
openai already installed.
python-dotenv already installed.
tiktoken not found, installing...
langchain_openai not found, installing...
google-colab already installed.
Collecting langchain
  Downloading langchain-0.3.17-py3-none-any.whl.metadata (7.1 kB)
Downloading langchain-0.3.17-py3-none-any.whl (1.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: langchain
  Attempting uninstall: langchain
    Found existing installation: langchain 0.3.16
    Uninstalling langchain-0.3.16:
      Successfully uninstalled langchain-0.3.16
Successfully installed langchain-0.3.17
Collecting langgraph
  Downloading langgraph-0.2.69-py3-none-any.whl.metadata (17 kB)
Collecting langgraph-checkpoint<3.0.0,>=2.0.10 (from langgraph)
  Downloading langgraph_checkpoint-2.0.10-py3-none-any.whl.metadata (4.6 kB)
Co

In [None]:
import logging
import json
from typing import List




# -----------------------
# Logging configuration
# -----------------------
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# -----------------------
# Modern LangChain imports
# -----------------------
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document

# -----------------------
# Optional: LangGraph imports for integration
# -----------------------
from typing_extensions import TypedDict
from langgraph.graph.state import StateGraph, START
from langgraph.checkpoint.memory import MemorySaver
import os
# -----------------------
# API key retrieval and LLM initialization
# -----------------------
key = userdata.get('OPENAI_API_KEY')
llm = ChatOpenAI(model="gpt-4o-mini", api_key=key)

# -----------------------
# Set up embeddings and vector store
# -----------------------
embeddings = OpenAIEmbeddings(model="text-embedding-3-large",api_key=key)
texts = [
    "Renewable energy research has advanced significantly in solar and wind technologies.",
    "Recent studies in battery storage and smart grid systems are promising.",
    "Innovations in geothermal and tidal energy continue to expand the renewable portfolio.",
    "Research into carbon capture and clean hydrogen production is also on the rise."
]
vector_store = FAISS.from_texts(texts=texts, embedding=embeddings)

# -----------------------
# Define chat-style prompt templates for each chain component
# -----------------------

# 1. Agentic chain: retrieval planning agent
agentic_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a retrieval planning agent. Analyze the following user query and propose an optimized retrieval strategy. Output a JSON object with keys: 'query_refinement' and 'source_priority'. Respond only with valid JSON."),
    ("human", "Query: {query}")
])
agentic_chain = agentic_prompt | llm

# 2. Adaptive chain: retrieval evaluator
adaptive_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an adaptive retrieval evaluator. Given the user query and the summary of retrieved documents, determine if the retrieval is adequate. If not, output a revised query to improve retrieval; if it is adequate, output 'OK'. Respond with a single line."),
    ("human", "User query: {query}\nRetrieved docs summary:\n{retrieved_docs}")
])
adaptive_chain = adaptive_prompt | llm

# 3. Corrective chain: quality grader
corrective_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a retrieval quality grader. Assess the quality of the retrieved documents summarized below on a scale of 1 to 10 (10 being perfect). Return only the numerical grade."),
    ("human", "Retrieved docs summary:\n{retrieved_docs}")
])
corrective_chain = corrective_prompt | llm

# 4. Self-reflection chain: self-evaluator
self_reflection_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a self-reflective evaluator. Given the question, the produced answer, and the retrieved context, assess the answer’s correctness and completeness. If there are issues, suggest a short revision; otherwise, output 'OK'. Respond with a single line."),
    ("human", "Question: {question}\nAnswer: {answer}\nRetrieved docs summary:\n{retrieved_docs}")
])
self_reflection_chain = self_reflection_prompt | llm

# 5. QA chain: final answer generator
qa_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an expert answering system. Using the following retrieved documents, provide a clear and detailed answer to the question and reference the sources as needed."),
    ("human", "Question: {question}\nRetrieved docs summary:\n{retrieved_docs}")
])
qa_chain = qa_prompt | llm

# -----------------------
# Define retrieval and summarization functions
# -----------------------
def retrieve_documents(query: str, k: int = 4) -> List[Document]:
    """Retrieve top k documents using the vector store."""
    docs = vector_store.similarity_search(query, k=k)
    return docs

def summarize_documents(docs: List[Document]) -> str:
    """Summarize the content of documents into a single string summary."""
    summaries = [doc.page_content[:200] for doc in docs]
    return "\n---\n".join(summaries)

# -----------------------
# Comprehensive RAG Agent class (multi-step chain)
# -----------------------
class ComprehensiveRAGAgent:
    def __init__(
        self,
        agentic_chain,
        adaptive_chain,
        corrective_chain,
        qa_chain,
        self_reflection_chain,
        retrieval_fn,
    ):
        self.agentic_chain = agentic_chain
        self.adaptive_chain = adaptive_chain
        self.corrective_chain = corrective_chain
        self.qa_chain = qa_chain
        self.self_reflection_chain = self_reflection_chain
        self.retrieval_fn = retrieval_fn

    def ask(self, query: str) -> str:
        logger.info("Starting comprehensive RAG process for query: %s", query)

        # --- Step 1: Agentic retrieval planning ---
        agent_plan_raw = self.agentic_chain.invoke({"query": query}).content.strip()
        logger.info("Agentic output: %s", agent_plan_raw)
        try:
            plan = json.loads(agent_plan_raw)
            refined_query = plan.get("query_refinement", query)
        except json.JSONDecodeError:
            logger.warning("Failed to parse agentic output; using original query.")
            refined_query = query
        logger.info("Refined query after agentic planning: %s", refined_query)

        # --- Step 2: Document retrieval and summarization ---
        docs = self.retrieval_fn(refined_query)
        retrieved_summary = summarize_documents(docs)
        logger.info("Initial retrieval summary:\n%s", retrieved_summary)

        # --- Step 3: Adaptive feedback for query refinement ---
        adaptive_feedback = self.adaptive_chain.invoke(
            {"query": query, "retrieved_docs": retrieved_summary}
        ).content.strip()
        logger.info("Adaptive feedback: %s", adaptive_feedback)
        if adaptive_feedback.upper() != "OK":
            refined_query = adaptive_feedback
            logger.info("Adaptive chain suggests revised query: %s", refined_query)
            docs = self.retrieval_fn(refined_query)
            retrieved_summary = summarize_documents(docs)
            logger.info("New retrieval summary after adaptive refinement:\n%s", retrieved_summary)

        # --- Step 4: Corrective check of retrieval quality ---
        grade_str = self.corrective_chain.invoke(
            {"retrieved_docs": retrieved_summary}
        ).content.strip()
        try:
            grade = float(grade_str)
        except ValueError:
            logger.warning("Could not parse retrieval grade; defaulting to 5.0")
            grade = 5.0
        logger.info("Retrieval quality grade: %.1f", grade)
        if grade < 7.0:
            logger.info("Retrieval quality below threshold; falling back to original query retrieval.")
            docs = self.retrieval_fn(query)
            retrieved_summary = summarize_documents(docs)
            logger.info("Fallback retrieval summary:\n%s", retrieved_summary)

        # --- Step 5: Generate answer using QA chain ---
        answer = self.qa_chain.invoke(
            {"question": query, "retrieved_docs": retrieved_summary}
        ).content.strip()
        logger.info("Initial generated answer: %s", answer)

        # --- Step 6: Self-reflection to evaluate and potentially refine answer ---
        self_feedback = self.self_reflection_chain.invoke(
            {"question": query, "answer": answer, "retrieved_docs": retrieved_summary}
        ).content.strip()
        logger.info("Self-reflection feedback: %s", self_feedback)
        if self_feedback.upper() != "OK":
            logger.info("Self-reflection suggests revision; updating answer.")
            revised_retrieval = retrieved_summary + "\nNote: " + self_feedback
            answer = self.qa_chain.invoke(
                {"question": query, "retrieved_docs": revised_retrieval}
            ).content.strip()
            logger.info("Revised answer after self-reflection: %s", answer)
        else:
            logger.info("Answer passed self-reflection check.")

        return answer

# -----------------------
# Instantiate the Comprehensive RAG Agent
# -----------------------
rag_agent = ComprehensiveRAGAgent(
    agentic_chain=agentic_chain,
    adaptive_chain=adaptive_chain,
    corrective_chain=corrective_chain,
    qa_chain=qa_chain,
    self_reflection_chain=self_reflection_chain,
    retrieval_fn=retrieve_documents,
)

# -----------------------
# Optional: LangGraph Integration for the Comprehensive RAG Agent
# -----------------------
class RAGState(TypedDict):
    query: str
    answer: str

def call_rag_agent(state: RAGState) -> RAGState:
    final_answer = rag_agent.ask(state["query"])
    return {"answer": final_answer}

workflow = StateGraph(state_schema=RAGState)
workflow.add_edge(START, "rag_agent")
workflow.add_node("rag_agent", call_rag_agent)
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

# -----------------------
# Example usage
# -----------------------
# if __name__ == "__main__":
#     user_query = "What are the latest advancements in renewable energy research?"

#     # Direct use of the Comprehensive RAG Agent:
#     final_answer = rag_agent.ask(user_query)
#     print("\nFinal Answer from Comprehensive RAG Agent:")
#     print(final_answer)

#     # Invocation via the LangGraph workflow:
#     input_state = {"query": user_query}
#     output_state = app.invoke(input_state, config={"configurable": {"thread_id": "abc456"}})
#     print("\nFinal Answer from LangGraph Workflow:")
#     print(output_state["answer"])




In [None]:
user_query = "What are the latest advancements in renewable energy research?"

input_state = {"query": user_query}
output_state = app.invoke(input_state, config={"configurable": {"thread_id": "abc456"}})
print("\nFinal Answer from LangGraph Workflow:")
print(output_state["answer"])


Final Answer from LangGraph Workflow:
Recent advancements in renewable energy research have made significant strides across various technologies, enhancing the efficiency and applicability of renewable sources. Key areas of progress include:

1. **Solar Energy**: Research has focused on improving the efficiency of solar panels, with advancements in materials and manufacturing processes that increase energy capture and lower costs. New solar cell designs, such as bifacial panels that capture sunlight from both sides, are being explored.

2. **Wind Energy**: Innovations in turbine design, including larger blades and advanced materials, have made wind energy more efficient and durable. These improvements enable wind farms to generate more power at lower wind speeds, expanding their viability in diverse locations.

3. **Geothermal Energy**: There is ongoing research into enhanced geothermal systems (EGS) that allow for geothermal energy extraction from regions not previously considered vi