In [1]:


# ---------------------------
# Standard Library Imports
# ---------------------------
import uuid                # Generate unique identifiers for objects, sessions, or messages
import sqlite3             # Interact with SQLite databases for lightweight persistent storage
from typing import TypedDict, List, Optional  
                           # Type hinting: TypedDict for structured dicts, List/Optional for clearer function signatures
import dateparser          # Needed to parse published dates
from urllib.parse import quote_plus
from typing import List, Dict, Optional
import datetime # Assuming you need this for the sorting key

# ---------------------------
# Third-Party / External Libraries
# ---------------------------
import feedparser           # Parse RSS/Atom feeds, used here to fetch news from sources like Bing, Google, Reuters
import gradio as gr         # Create web-based UI interfaces for chatbot interaction or demos


# ---------------------------
# LangGraph / LangChain Imports
# ---------------------------
from langgraph.graph import StateGraph, END  
                           # StateGraph: build and manage a directed graph of tasks/nodes
                           # END: marker for terminal nodes in a workflow
from langgraph.checkpoint.sqlite import SqliteSaver  
                           # SqliteSaver: checkpoint agent state to a SQLite database for persistence

from langchain_ollama import OllamaLLM  
                           # OllamaLLM: interface to local LLMs (like Mistral) via Ollama API
from langchain_community.vectorstores import Chroma  
                           # Chroma: vector database for storing and retrieving embeddings efficiently
from langchain_community.embeddings import OllamaEmbeddings  
                           # Generate embeddings for text using local Ollama models

from langchain.schema import Document  
                           # Document: standard format for text + metadata, used in vectorstores

In [2]:
# -------------------------------------------------------
# Setting up Local LLM and Embedding Models
# -------------------------------------------------------

# Print a message to indicate the setup process has started.
# Useful for debugging or tracking progress in longer scripts.
print("Setting up models and vector store...")


Setting up models and vector store...


In [3]:

# Initialize the local LLM using Ollama.
# OllamaLLM acts as the interface to a local large language model (Mistral in this case).
# This model will be used for generating text, summarization, and other reasoning tasks
# in your Agentic AI workflow.
llm = OllamaLLM(model="mistral")

# Initialize the embedding model using OllamaEmbeddings.
# Embeddings are vector representations of text that allow semantic similarity comparisons.
# This is critical for Agentic AI memory retrieval and vector-based search,
# e.g., finding relevant past documents or summaries.
embedding_model = OllamaEmbeddings(model="mistral")

  embedding_model = OllamaEmbeddings(model="mistral")


In [4]:
# -------------------------------------------------------
# Initialize ChromaDB Vector Store for Long-Term Memory
# -------------------------------------------------------

# Create a vector store using Chroma.
# A vector store allows storing, retrieving, and searching text embeddings efficiently.
# This acts as the agent's long-term memory, letting it recall past summaries or relevant documents.
vectorstore = Chroma(
    collection_name="news_memory",       # Name of the collection in the vector DB; can hold multiple topics
    embedding_function=embedding_model,  # The embedding model used to convert text into vectors
    persist_directory="./news_db"        # Directory on disk to persist the database so memory is retained across sessions
)

# Print a message indicating that the model and vector store setup is complete.
# Useful for tracking progress when initializing Agentic AI workflows.
print("Setup complete.")

  vectorstore = Chroma(


Setup complete.


In [5]:
# ---------------------------
# Function: Fetch Latest News with Topic Filtering
# ---------------------------
def get_latest_news(topic: str, max_articles: int = 10) -> List[Dict]:
    """
    Fetches the latest, most relevant news articles from a selection of RSS feeds.

    The function queries topic-specific feeds (Google, Bing) and filters generic
    feeds (Reuters, Guardian) to find articles matching the provided topic.
    All retrieved articles are sorted by publication date to ensure recency.

    Parameters
    ----------
    topic : str
        The primary topic or keyword to search for in news feeds (e.g., 'AI in healthcare'). 
        This is URL-encoded for safety.
    max_articles : int, optional
        The maximum number of finalized, valid articles to return after fetching and sorting. 
        Defaults to 10.

    Returns
    -------
    list of dict
        A list of dictionaries, where each dictionary represents a single article 
        with the following structure:
        
        * **'title'** (str): The headline of the article.
        * **'link'** (str): The URL to the original article source.
        * **'published'** (str): The raw publication date string from the RSS feed.
        * **'published_dt'** (datetime.datetime): The parsed datetime object, used for sorting.
        * **'source'** (str): The human-readable name of the news source (e.g., 'Google', 'Bing').
    
    Notes
    -----
    1.  The `topic` is case-insensitively matched against the article title and summary/description.
    2.  Articles without a valid publication date are filtered out to ensure accurate sorting.
    3.  A placeholder generic feed is used for 'Reuters' and 'Guardian' since they 
        do not easily support arbitrary topic searching via standard RSS.
    """

    articles = []
    
    # 1. URL ENCODE THE TOPIC ONCE
    # This replaces spaces with '+' and handles other special characters
    ENCODED_TOPIC = quote_plus(topic) 
    ENCODED_TOPIC_WITH_RECENCY = quote_plus(f"{topic} when:1d")

    # 2. DEFINE RSS SOURCES USING THE ENCODED TOPIC
    # Bing and Google support topic-specific search in RSS; Reuters and Guardian are generic feeds.
    RSS_SOURCES = {
        # Use ENCODED_TOPIC for search-based RSS feeds
        "Bing": f"https://www.bing.com/news/search?q={ENCODED_TOPIC}&format=rss",
        "Google": f"https://news.google.com/rss/search?q={ENCODED_TOPIC_WITH_RECENCY}&hl=en-US&gl=US&ceid=US:en",
        # # Use generic high-level feeds for sources that don't support dynamic topic RSS
        # "Reuters": "https://www.reutersagency.com/feed/?best-topics=top-news",
        # "Guardian": "https://www.theguardian.com/world/rss"
    }

    # Ensure max_articles is an integer (safety for Gradio inputs)
    try:
        max_articles = int(max_articles)
    except ValueError:
        max_articles = 10 # Default to 10 if conversion fails
        
    topic_lower = topic.lower()

    # Loop through each source name and its RSS feed URL
    for source_name, feed_url in RSS_SOURCES.items(): 
        print(f"Fetching news from {source_name}...") 
        
        # Parse the RSS feed
        feed = feedparser.parse(feed_url)

        # Loop through each article entry in the feed
        for entry in feed.entries:
            # Get published date if available
            published = getattr(entry, "published", "N/A")
            try:
                # Use dateparser.parse
                published_dt = dateparser.parse(published)
            except Exception:
                published_dt = None

            # Extract article summary/description if available
            summary = getattr(entry, "summary", "")

            # Check if article matches topic (case-insensitive)
            if topic_lower in entry.title.lower() or topic_lower in summary.lower():
                # Append article metadata, storing only the clean source name
                articles.append({
                    "title": entry.title,
                    "link": entry.link,
                    "published": published,
                    "published_dt": published_dt,
                    "source": source_name
                })

    # Filter out articles without a valid datetime
    valid_articles = [a for a in articles if a["published_dt"] is not None]

    # Sort by newest first. Use datetime.datetime.min as a fallback key for safety.
    articles_sorted = sorted(
        valid_articles,
        key=lambda x: x["published_dt"] if x["published_dt"] is not None else datetime.datetime.min,
        reverse=True
    )

    # Return only the top `max_articles`
    return articles_sorted[:max_articles]

In [6]:
# ---------------------------
# Define the Agent State
# ---------------------------
class AgentState(TypedDict):
    """
    Represents the state of the LangGraph agent as it moves between nodes.

    Each node in the graph can read from and update this state. Using TypedDict
    ensures type safety and clear expectations for each field.
    """
    topic: str                          # The current news topic under discussion
    news: Optional[List[dict]]          # List of fetched news articles (optional)
    summary: Optional[str]              # Summarized news content (optional)
    user_input: Optional[str]           # Latest user message (optional)
    chat_history: List[dict]            # Conversation history in the format:
                                        # [{"role": "user/assistant", "content": "..."}]

In [7]:
# ---------------------------
# CHECKPOINTER SETUP (SQLite)
# ---------------------------
# Create an in-memory SQLite connection.
# The parameter check_same_thread=False is required for web apps or multi-threaded environments
# to allow SQLite connections to be shared across threads safely.
conn = sqlite3.connect(":memory:", check_same_thread=False)

# Initialize a SqliteSaver checkpoint object using the connection.
# This enables persistent state saving of the agent between node executions.
# The agent can "remember" past summaries, conversations, or other relevant info.
memory = SqliteSaver(conn=conn)

In [8]:
# ---------------------------
# Agent Node: Fetch News
# ---------------------------

def fetch_news_node(state: AgentState) -> AgentState:
    """
    Node 1 in the LangGraph workflow: Fetch fresh news articles for the current topic.

    This function represents a single node in the Agentic AI graph. It updates the
    agent's state with the latest news articles fetched from RSS sources.

    Args:
        state (AgentState): The current state of the agent, which includes the topic
                            and optionally chat history or previous news.

    Returns:
        AgentState: Updated state with fetched news stored in `state["news"]`.
    """

    # Extract the current topic from the state.
    # This topic will be used to fetch relevant news articles.
    topic = state["topic"]

    # Fetch the latest news for the topic using the helper function.
    # This function may retrieve articles from multiple sources and return
    # a list of dictionaries containing metadata like title, link, published date, and source.
    articles = get_latest_news(topic)

    # Store the fetched articles in the agent's state under the 'news' key.
    state["news"] = articles

    # Ensure that chat history exists in the state.
    # Some nodes downstream may expect 'chat_history' to always be present.
    if "chat_history" not in state:
        state["chat_history"] = []

    # Return the updated state so it can be passed to the next node in the graph.
    return state

In [9]:
# ---------------------------
# Agent Node: Summarize & Store News
# ---------------------------

def summarize_and_store_node(state: AgentState) -> AgentState:
    """
    Node 2 in the LangGraph workflow: Summarizes fetched news articles,
    stores the summary in the vector store (long-term memory), and prepares
    the initial assistant response for the user.

    This node allows the Agentic AI to:
    1. Create a concise summary of multiple news articles.
    2. Store knowledge in a retrievable format (vector store).
    3. Initialize conversation history for follow-up interactions.

    Args:
        state (AgentState): The current state of the agent, including 'news' articles and 'topic'.

    Returns:
        AgentState: Updated state with 'summary' and initialized 'chat_history'.
    """

    # Print a message indicating that summarization is in progress
    # Helps track execution when running the agent
    print("Summarizing and storing news...")

    # Extract the current topic from the state
    topic = state["topic"]

    # Extract the list of fetched articles from the state
    articles = state["news"]

    # ---------------------------
    # Summarize Articles
    # ---------------------------
    if not articles:
        # Handle case where no news articles were fetched
        summary = "No recent news found for this topic."
    else:
        # Combine article titles into a single string for summarization
        combined_titles = "\n".join([f"- {a['title']}" for a in articles])

        # Construct a prompt for the LLM to generate a summary
        # We include instructions to mention the source and published date of each article
        prompt = f"""You are a news summarizer. 
        Briefly summarize the key points from the following news headlines about '{topic}':\n\n{combined_titles}. 
        Include the published date and source of each news item along with the summary.
        """

        # Invoke the LLM to generate the summary text
        summary = llm.invoke(prompt)

    # Store the summary in the state for downstream nodes or conversation
    state["summary"] = summary

    # ---------------------------
    # Store Summary in Vector Store (Memory)
    # ---------------------------
    if articles:
        # Wrap the summary into a Document object with metadata
        doc = Document(page_content=summary, metadata={"topic": topic})

        # Add the document to Chroma vector store
        # This allows semantic retrieval later, enabling memory-based reasoning
        vectorstore.add_documents([doc])

        # Persist the vector store to disk to ensure memory is not lost across sessions
        vectorstore.persist()

    # ---------------------------
    # Prepare Initial Assistant Response
    # ---------------------------
    if articles:
        # If articles exist, build a user-friendly initial response including the summary
        initial_response = (
            f"📰 Here are the summary of the news articles I found for **{topic}**:\n\n"
            f"{summary}\n\n"
            "Feel free to ask me any follow-up questions!"
        )
    else:
        # If no articles were found, the summary itself is the response
        initial_response = summary

    # Initialize chat history in the state with:
    # - User's initial topic/message
    # - Assistant's initial response (summary)
    state["chat_history"] = [
        {"role": "user", "content": topic},
        {"role": "assistant", "content": initial_response}
    ]

    # Return the updated state for the next node in the LangGraph workflow
    return state

In [10]:
# ---------------------------
# Agent Node: Continue Conversation
# ---------------------------

def conversation_node(state: AgentState) -> AgentState:
    """
    Node 3 in the LangGraph workflow: Handles follow-up conversations with the user,
    using context from past chat history and relevant news summaries stored in memory.

    This node allows the Agentic AI to:
    1. Retrieve contextually relevant past information from memory (vector store).
    2. Format recent conversation turns to maintain coherent multi-turn dialogue.
    3. Use the LLM to generate a concise and context-aware reply.

    Args:
        state (AgentState): The current state of the agent, including topic, user_input, and chat_history.

    Returns:
        AgentState: Updated state with the new conversation turn appended to chat_history.
    """

    # Print a message to indicate this node is executing
    print("Continuing conversation...")

    # Extract relevant fields from state
    topic = state["topic"]                     # Current topic for context
    user_input = state["user_input"]           # Latest message from the user
    chat_history = state.get("chat_history", [])  # Retrieve existing chat history; default to empty

    # ---------------------------
    # Retrieve Context from Memory
    # ---------------------------
    # Perform a semantic similarity search in the vector store using the user's input
    # 'k=1' retrieves the most relevant past summary
    retrieved_docs = vectorstore.similarity_search(user_input, k=1)

    # Combine retrieved documents into a single string for context in the LLM prompt
    retrieved_context = "\n\n".join([d.page_content for d in retrieved_docs])

    # ---------------------------
    # Format Recent Conversation History
    # ---------------------------
    # Take the last 4 turns of chat history to provide recent context
    # This ensures the LLM has memory of the immediate conversation for coherent replies
    context_str = "\n".join([f"{m['role']}: {m['content']}" for m in chat_history[-4:]])

    # ---------------------------
    # Construct Prompt for LLM
    # ---------------------------
    # The prompt guides the LLM to generate a concise, context-aware response
    # It includes:
    # 1. Current topic
    # 2. Relevant past summaries retrieved from vector store
    # 3. Recent conversation history
    # 4. The user's latest question
    prompt = f"""You are a helpful AI news assistant. Your current topic is '{topic}'.
    Answer the user's question concisely based on the conversation history and the provided relevant news summaries.
    
    Relevant past summaries for context:
    ---
    {retrieved_context}
    ---
    
    Current conversation:
    {context_str}
    User: {user_input}
    Assistant:"""

    # Invoke the LLM to generate a response
    reply = llm.invoke(prompt)

    # ---------------------------
    # Update Chat History
    # ---------------------------
    # Append the user's input and assistant's reply as new turns
    chat_history.append({"role": "user", "content": user_input})
    chat_history.append({"role": "assistant", "content": reply})

    # Save the updated chat history back into the agent's state
    state["chat_history"] = chat_history

    # Return the updated state for the next node in the graph
    return state

In [11]:
# ---------------------------
# Router Function for Conditional Entry
# ---------------------------

def route_initial_or_followup(state: AgentState) -> str:
    """
    Determines the next node in the LangGraph workflow based on the current agent state.

    This function acts as a router, deciding whether the agent should:
    1. Fetch fresh news if the state is not initialized, or
    2. Continue a follow-up conversation if news is already present.

    Args:
        state (AgentState): The current state of the agent.

    Returns:
        str: The name of the next node to execute ("fetch_news" or "conversation").
    """

    # Check if 'news' key in the state is empty or None
    # This indicates that the agent has not yet fetched any news for the topic
    if state.get("news") is None:
        return "fetch_news"  # Direct the workflow to the news-fetching node

    # If news is already present, the state has been initialized
    # The next step is to handle a follow-up conversation with the user
    else:
        return "conversation"  # Direct the workflow to the conversation node

In [12]:
# ---------------------------
# Build the Agentic AI Graph
# ---------------------------

# Initialize a new StateGraph instance using AgentState as the structure for state
# This object will hold all nodes, edges, and the execution flow of the agent.
graph_builder = StateGraph(AgentState)

# ---------------------------
# Add All Nodes to the Graph
# ---------------------------
# Each node represents a discrete step in the agent's workflow
# 1. "fetch_news" → fetches news articles for the topic
# 2. "summarize_and_store" → summarizes news and stores in vector memory
# 3. "conversation" → handles follow-up questions using past summaries and chat history
graph_builder.add_node("fetch_news", fetch_news_node)
graph_builder.add_node("summarize_and_store", summarize_and_store_node)
graph_builder.add_node("conversation", conversation_node)

# ---------------------------
# Connect Nodes with Edges
# ---------------------------
# Defines the execution sequence between nodes
# After fetching news, the next step is to summarize and store the articles
graph_builder.add_edge("fetch_news", "summarize_and_store")

# ---------------------------
# Define Finishing Points
# ---------------------------
# Nodes where execution can end
# 1. After summarizing and storing news (initial setup complete)
# 2. After a conversation node (follow-up interaction complete)
graph_builder.set_finish_point("summarize_and_store")
graph_builder.set_finish_point("conversation")

# ---------------------------
# Set Conditional Entry Point
# ---------------------------
# This allows the agent to decide dynamically where to start based on state
# Uses the router function 'route_initial_or_followup'
# - If no news exists, start with 'fetch_news'
# - Otherwise, start directly at 'conversation'
graph_builder.set_conditional_entry_point(
    route_initial_or_followup,  # Function that decides next node
    {
        "fetch_news": "fetch_news",
        "conversation": "conversation",
    }
)

# ---------------------------
# Compile the Graph into an Executable Agent
# ---------------------------
# Converts the defined graph, nodes, and edges into a runnable agent
# The checkpointer (memory) allows state persistence between runs
agent = graph_builder.compile(checkpointer=memory)

In [13]:
# -------------------------------------------------------
# Gradio Interface
# -------------------------------------------------------

def chatbot_interface(topic, user_message, state_history):
    """
    Main function to handle interaction between the user and the Agentic AI graph.

    This function is designed to be used with Gradio's Chatbot interface. It:
    1. Prepares the input state for the agent.
    2. Invokes the agent (LangGraph) to run the workflow.
    3. Formats the conversation into a Gradio-compatible chat history.

    Args:
        topic (str): The current news topic or context.
        user_message (str): The latest message from the user.
        state_history (dict): Stores persistent session info, including 'thread_id' for memory.

    Returns:
        tuple: 
            - Empty string to clear Gradio input box.
            - List of (user, assistant) message tuples for the Chatbot display.
            - Updated state_history with thread_id for persistence.
    """

    # ---------------------------
    # Retrieve or create a unique thread ID
    # ---------------------------
    # Each conversation thread uses a unique ID to persist memory in the LangGraph checkpointer.
    thread_id = state_history.get("thread_id") or str(uuid.uuid4())

    # Config object for agent invocation
    config = {"configurable": {"thread_id": thread_id}}

    # ---------------------------
    # Prepare the input state for the agent
    # ---------------------------
    agent_input = {
        "topic": topic,          # Current news topic
        "user_input": user_message,  # User's latest message
    }

    # ---------------------------
    # Invoke the Agentic AI graph
    # ---------------------------
    # Runs the compiled agent graph (fetch, summarize, conversation) using the current input.
    # Returns the full updated state including chat_history and summaries.
    full_state = agent.invoke(agent_input, config=config)

    # ---------------------------
    # Format chat history for Gradio
    # ---------------------------
    gradio_chat_history = []          # Initialize an empty list for Gradio
    chat_log = full_state.get("chat_history", [])  # Extract the agent's chat history

    # Convert the agent's chat log into (User, Assistant) pairs
    i = 0
    while i < len(chat_log):
        user_msg = None
        assistant_msg = None

        # Capture user's message
        if chat_log[i]['role'] == 'user':
            user_msg = chat_log[i]['content']
            i += 1

        # Capture assistant's reply
        if i < len(chat_log) and chat_log[i]['role'] == 'assistant':
            assistant_msg = chat_log[i]['content']
            i += 1

        # Append the pair to Gradio chat history if both exist
        if user_msg and assistant_msg:
            gradio_chat_history.append((user_msg, assistant_msg))

    # ---------------------------
    # Return values for Gradio
    # ---------------------------
    # 1. "" → Clears the input box
    # 2. gradio_chat_history → Updates the chatbot display
    # 3. {"thread_id": thread_id} → Persist session info for memory
    return "", gradio_chat_history, {"thread_id": thread_id}

In [None]:
# ---------------------------
# Gradio Interface for Conversational News AI Agent
# ---------------------------

# Use Gradio Blocks to build a structured, multi-component interface
# Title and theme set for the interface
with gr.Blocks(title="📰 Conversational News AI Agent with Memory", theme=gr.themes.Default()) as iface:
    
    # Markdown header for the interface
    gr.Markdown("# 🧠 News AI Agent with Memory")
    gr.Markdown(
        "Enter a topic to get News summary. "
        "The agent uses **LangGraph** for flow control (fetch once, then chat) "
        "and **ChromaDB** for memory."
    )

    # ---------------------------
    # Topic input row
    # ---------------------------
    with gr.Row():
        # Textbox to enter the news topic
        # Changing the topic starts a new conversation
        topic = gr.Textbox(
            label="Enter a Topic (Changing this value starts a new conversation)",
            value="AI in healthcare",  # default topic
            scale=3,  # relative width in the row
        )

        # Button to trigger initial news fetch and start chat
        start_btn = gr.Button("Get News & Start Chat", scale=1, variant="primary")

    # ---------------------------
    # Chat components
    # ---------------------------
    chatbot = gr.Chatbot(label="NewsBot Conversation", height=500)  # main chat display
    msg_box = gr.Textbox(
        label="Your Message", 
        placeholder="Ask a follow-up question...", 
        interactive=True  # allow user input
    )
    
    # Hidden state to store conversation history and thread ID for persistence
    state_history = gr.State({})

    # ---------------------------
    # Interaction Logic
    # ---------------------------

    # 1️⃣ Initial Start: When user clicks 'Start Chat' button
    # Calls the chatbot_interface function with the topic as both 'topic' and initial user message
    start_btn.click(
        chatbot_interface,
        inputs=[topic, topic, state_history],  # topic, initial user message, persistent state
        outputs=[msg_box, chatbot, state_history]  # update message box, chatbot display, state
    )
    
    # 2️⃣ Follow-up Messages: When user submits a message in the msg_box
    # Calls the chatbot_interface to continue the conversation
    msg_box.submit(
        chatbot_interface,
        inputs=[topic, msg_box, state_history],  # topic, user's follow-up message, state
        outputs=[msg_box, chatbot, state_history]  # update interface components
    )

    # 3️⃣ Clear Button Logic: Reset the chat and start a new topic
    def clear_all():
        # Returns empty message box, clears chatbot, resets state
        return "", None, {} 
    
    clear_btn = gr.Button("Clear Chat & Start New Topic", variant="secondary")
    clear_btn.click(
        clear_all,  # function to clear all components
        None,  # no inputs needed
        [msg_box, chatbot, state_history],  # outputs to reset
        queue=False  # immediate reset without queuing
    )

# Launch the Gradio interface
# debug=True allows live reloads and error messages in the browser console
iface.launch(debug=True)



  chatbot = gr.Chatbot(label="NewsBot Conversation", height=500)  # main chat display


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


Fetching news from Bing...
Fetching news from Google...
Summarizing and storing news...


  vectorstore.persist()


Continuing conversation...
