In [None]:
!pip install -q langchain-huggingface langchain-core langchain datasets yfinance pandas faiss-cpu sentence-transformers langgraph pydantic

In [None]:
!pip install -q langchain-community

In [None]:
try:
    import langchain_community
    print(f"langchain-community is installed. Version: {langchain_community.__version__}")
except ImportError:
    print("langchain-community is NOT installed.")


In [None]:
from datasets import load_dataset
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document
import pandas as pd

# 1. Load a small slice of the dataset (streaming mode)
dataset = load_dataset('Zihan1004/FNSPID', split='train', streaming=True)
sample_data = []
for i, entry in enumerate(dataset):
    if i >= 5000: break
    sample_data.append(entry)

df = pd.DataFrame(sample_data)

# 2. Prepare documents for the Vector Database
docs = []
for _, row in df.iterrows():
    # Use the Title as the content and Symbol/Date as filters
    doc = Document(
        page_content=row['Article_title'],
        metadata={"ticker": row['Stock_symbol'], "date": str(row['Date'])}
    )
    docs.append(doc)

# 3. Create Embeddings and Vector Store
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(docs, embeddings)

print("Vector Store Ready!")

In [None]:
from google.colab import userdata
import os

# Fetch the secret
token = userdata.get('HF_TOKEN')

# Set it as an environment variable so Hugging Face libraries can find it
os.environ["HF_TOKEN"] = token

In [None]:

if "HF_TOKEN" in os.environ and os.environ["HF_TOKEN"]:
    print("Hugging Face API token is loaded successfully.")
else:
    print("Hugging Face API token is NOT loaded. Please check your Colab secrets.")

## Update Chatbot Query for New Feature

Integrate the 'find_significant_moves_and_news' functionality into the 'chatbot_query' function. The chatbot will be updated to parse user requests for historical price moves and trigger this new analysis, returning a summarized correlation of price movements and news.


In [None]:
if 'extract_ticker_robust' in globals() and callable(globals()['extract_ticker_robust']):
    print("'extract_ticker_robust' is already defined.")
else:
    print("'extract_ticker_robust' is NOT defined. I will define it now.")
    # Definition of extract_ticker_robust (from cell c036e3cc)
    def extract_ticker_robust(query, df):
        """Extracts stock ticker from the query. Placeholder implementation."""
        # A very basic regex to find what looks like a stock ticker (1-5 uppercase letters)
        ticker_match = re.search(r'\\b[A-Z]{1,5}\\b', query)
        if ticker_match:
            # In a real scenario, you might want to validate this ticker against a known list
            # For now, let's just return the first potential ticker found
            return ticker_match.group(0)
        return None
    print("'extract_ticker_robust' has been defined.")

In [None]:
if 'extract_ticker_robust' in globals() and callable(globals()['extract_ticker_robust']):
    print("'extract_ticker_robust' is defined.")
else:
    print("'extract_ticker_robust' is NOT defined.")

## Implement Supervisor Agent with Dynamic Routing

Develop a Supervisor agent using LangGraph or similar state management to dynamically analyze the user's intent and decide whether to invoke the 'Plotting Agent' (e.g., for queries like 'show me a chart of...') or the 'Research Agent' (for factual or analytical queries). This will involve defining states, edges, and conditional logic within the graph.


In [None]:
!pip install -q --upgrade langchain-core langgraph

## Integrate Supervisor into Chatbot



In [None]:
from langchain_community.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain_core.prompts import PromptTemplate
import re
import calendar # Ensure calendar is imported for date calculations
from langgraph.graph import StateGraph, END
from pydantic import BaseModel, Field
from langchain_core.runnables import RunnableLambda

# Re-initializing LLM using a local HuggingFace pipeline for clarity, but it was already initialized in a previous cell.
model_id = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)

pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512, # Increased max_new_tokens for potentially longer summaries
    temperature=0.7,
    do_sample=True
)

llm = HuggingFacePipeline(pipeline=pipe)

# --- Helper functions (kept as is for clarity) ---
def extract_date_range(query):
    """Extracts start and end dates from a query string. Supports YYYY-MM-DD format.
    If a YYYY-MM is found, it converts it to YYYY-MM-01 and YYYY-MM-lastday.
    """
    # Try to find YYYY-MM-DD to YYYY-MM-DD
    dates_full = re.findall(r'\b\d{4}-\d{2}-\d{2}\b', query)
    if len(dates_full) == 2:
        return dates_full[0], dates_full[1]
    elif len(dates_full) == 1:
        # If only one date, assume it's the start date and end date is today
        return dates_full[0], pd.to_datetime('today').strftime('%Y-%m-%d')

    # Try to find YYYY-MM (e.g., '2020-05')
    month_year_match = re.search(r'\b\d{4}-\d{2}\b', query)
    if month_year_match:
        year_month_str = month_year_match.group(0)
        year, month = map(int, year_month_str.split('-'))
        start_date = f"{year_month_str}-01"
        last_day = calendar.monthrange(year, month)[1]
        end_date = f"{year_month_str}-{last_day:02d}"
        return start_date, end_date

    return None, None

def extract_threshold(query):
    """Extracts a percentage threshold from the query, e.g., '5%' or '5 percent'."""
    match = re.search(r'(\d+)(?:%| percent)', query, re.IGNORECASE)
    if match:
        return float(match.group(1))
    return 3.0 # Default threshold

def find_significant_moves_and_news(ticker, threshold, start_date, end_date, vectorstore=vectorstore):

    price_data = get_price_context(ticker, start_date, end_date)

    if price_data.empty:
        return []

    significant_moves = price_data[abs(price_data['Daily_Change_Percent']) >= threshold].copy()

    results = []
    for index, row in significant_moves.iterrows():
        move_date = row['Date'].item()
        daily_change = row['Daily_Change_Percent'].item()


        all_news_articles = vectorstore.similarity_search(f"News about {ticker} on {move_date}", k=10) # Increased k

        # Manually filter news articles by date metadata
        filtered_news = []
        for doc in all_news_articles:
            # The date in metadata might be YYYY-MM-DD HH:MM:SS UTC, so check if it starts with move_date
            if doc.metadata.get('date', '').startswith(move_date):
                filtered_news.append(doc.page_content)

        # Use top 3 filtered news, or all if fewer than 3
        news_context = filtered_news[:3]

        results.append({
            'date': move_date,
            'daily_change_percent': daily_change,
            'news': news_context
        })
    return results

# --- Re-define plotting_agent and research_agent as per previous instructions ---
def plotting_agent(user_query: str) -> str:
    """Placeholder function for the Plotting Agent."""
    return f"Plotting Agent called for: {user_query}"

def research_agent(user_query: str) -> str:
    """Handles general and significant moves queries, generating a summary with LLM."""

    ticker = extract_ticker_robust(user_query, df)

    if not ticker:
        return "Please specify a stock ticker (e.g., 'A' or 'AAPL')."

    # Check for queries related to significant moves
    if any(keyword in user_query.lower() for keyword in ['significant moves', 'large changes', 'historical analysis', 'percentage change']):
        start_date, end_date = extract_date_range(user_query)
        if not start_date or not end_date:
            return "Please specify a valid date range (e.g., 'YYYY-MM-DD to YYYY-MM-DD' or 'YYYY-MM') for historical analysis."

        threshold = extract_threshold(user_query)

        significant_moves = find_significant_moves_and_news(ticker, threshold, start_date, end_date, vectorstore=vectorstore)

        if not significant_moves:
            return f"No significant moves (> {threshold}%) found for {ticker} between {start_date} and {end_date}."

        # Limit the number of significant moves shown to the LLM to prevent overly long context
        significant_moves_limited = significant_moves[:5]

        # Format significant moves for LLM (re-include concise news snippets for correlation)
        moves_context = []
        for move in significant_moves_limited:
            news_snippets = "; ".join([s[:50] + '...' if len(s) > 50 else s for s in move['news']]) # Truncate news even further
            moves_context.append(
                f"On {move['date']}, {ticker} changed by {move['daily_change_percent']:.2f}%. News: {news_snippets}"
            )
        moves_context_str = "\n".join(moves_context)

        # Refined prompt for significant moves - now using Question/Explanation format
        prompt = f"""
Context:
Days with price movements greater than {threshold}% for {ticker} from {start_date} to {end_date}:
{moves_context_str}

Question: Summarize the key significant price movements and their correlation with the provided news articles. Highlight any notable patterns or events observed during this period.
Explanation:
"""
        return llm.invoke(prompt)

    # Existing logic for general queries
    date_robust_str = extract_date_robust(user_query) # This returns YYYY-MM or YYYY-MM-DD or YYYY

    price_info_str = ""
    if date_robust_str:
        # Convert YYYY-MM or YYYY to a range for get_price_context if necessary
        if re.match(r'\d{4}-\d{2}$', date_robust_str): # YYYY-MM
            year, month = map(int, date_robust_str.split('-'))
            start_d = f"{date_robust_str}-01"
            last_day = calendar.monthrange(year, month)[1]
            end_d = f"{date_robust_str}-{last_day:02d}"
        elif re.match(r'\d{4}$', date_robust_str): # YYYY
            start_d = f"{date_robust_str}-01-01"
            end_d = f"{date_robust_str}-12-31"
        else: # Assume YYYY-MM-DD
            start_d = date_robust_str
            end_d = date_robust_str # For a single day, end_date is the same

        price_data_df = get_price_context(ticker, start_date=start_d, end_date=end_d)

        if not price_data_df.empty:
            # Summarize price data for the date/range
            if start_d == end_d: # Single day query
                day_data = price_data_df.iloc[0]
                price_info_str = f"{ticker} on {day_data['Date'].item()}: Closed at ${day_data['Close'].item():.2f}, Daily change: {day_data['Daily_Change_Percent'].item():.2f}%."
            else: # Date range query
                first_day = price_data_df.iloc[0]
                last_day = price_data_df.iloc[-1]
                price_info_str = (
                    f"Historical data for {ticker} from {first_day['Date'].item()} to {last_day['Date'].item()}:\n"
                    f"Start Close: ${first_day['Close'].item():.2f}, End Close: ${last_day['Close'].item():.2f}.\n"
                    f"Average daily change: {price_data_df['Daily_Change_Percent'].mean():.2f}%."
                )
        else:
            price_info_str = f"Could not find price data for {ticker} around {date_robust_str}."

        search_query = user_query # Keep original query for news search context

    else: # No specific date mentioned in general query
        price_data_df = get_price_context(ticker) # Fetch recent 5 days
        if not price_data_df.empty:
            latest_data = price_data_df.iloc[-1]
            price_info_str = f"{ticker} recently: Closed at ${latest_data['Close'].item():.2f}, Daily change: {latest_data['Daily_Change_Percent'].item():.2f}%."
        else:
            price_info_str = f"Could not find recent price data for {ticker}."
        search_query = f"Recent news about {ticker}"


    news_results = vectorstore.similarity_search(search_query, k=3)
    news_context = "\n".join([res.page_content for res in news_results])

    # Refined prompt for general queries - now using Question/Explanation format
    prompt = f"""
Context:
Price Information: {price_info_str}
News Articles: {news_context}

Question: {user_query}
Explanation:
"""
    return llm.invoke(prompt)

# --- Supervisor Agent Definition (copied from previous successful execution) ---
class GraphState(BaseModel):
    """Represents the state of our graph."""
    query: str = Field(..., description="The user's input query")
    agent_outcome: str = Field(None, description="The outcome/response from the called agent")

def route_agent(state: GraphState) -> str:
    """Determines which agent to call based on the user's query."""
    query = state.query.lower()
    if any(keyword in query for keyword in ['chart', 'plot', 'graph', 'visualize']):
        return "call_plotting_agent"
    # All other queries, including significant moves, go to the research agent
    else:
        return "call_research_agent"

def call_plotting_agent_node(state: GraphState) -> GraphState:
    """Calls the plotting agent and updates the state with its outcome."""
    print(f"Calling Plotting Agent with query: {state.query}")
    outcome = plotting_agent(state.query)
    return GraphState(query=state.query, agent_outcome=outcome)

def call_research_agent_node(state: GraphState) -> GraphState:
    """Calls the research agent and updates the state with its outcome."""
    print(f"Calling Research Agent with query: {state.query}")
    outcome = research_agent(state.query) # Assuming research_agent now handles both general and significant moves queries
    return GraphState(query=state.query, agent_outcome=outcome)

graph = StateGraph(GraphState)
graph.add_node("call_plotting_agent", RunnableLambda(call_plotting_agent_node))
graph.add_node("call_research_agent", RunnableLambda(call_research_agent_node))
graph.set_conditional_entry_point(
    route_agent,
    {
        "call_plotting_agent": "call_plotting_agent",
        "call_research_agent": "call_research_agent",
    },
)
graph.add_edge("call_plotting_agent", END)
graph.add_edge("call_research_agent", END)
app = graph.compile()

# --- Modified chatbot_query function ---
def chatbot_query(user_query):
    """Delegates query processing to the Supervisor agent."""
    print(f"Chatbot received query: {user_query}")
    # Instantiate GraphState with the user query
    initial_state = GraphState(query=user_query)


    result = app.invoke(initial_state)

    agent_response = result['agent_outcome']

    return agent_response

print("Chatbot query function updated to use Supervisor Agent.")

# Example Usage
print("\n--- Example 1: General Query with Month-Year (routed to Research Agent) ---")
print(chatbot_query("What happened to stock A in May 2020?"))

print("\n--- Example 2: Significant Moves Query (routed to Research Agent) ---")
print(chatbot_query("Show significant moves for A between 2020-01-01 and 2020-06-30 with a 5% threshold."))

print("\n--- Example 3: Plotting Query (routed to Plotting Agent) ---")
print(chatbot_query("Show me a chart of stock A's performance."))

print("\n--- Example 4: General Query without specific date (routed to Research Agent) ---")
print(chatbot_query("Tell me about stock A."))

## Consolidate Setup and Code



In [None]:
!pip install -q langchain-huggingface langchain-community langchain-core

import requests
import time
import yfinance as yf
from datetime import datetime
from datasets import load_dataset
from langchain_huggingface import HuggingFacePipeline
import matplotlib.pyplot as plt
import io
import base64

# 4. HF_TOKEN setup
# Fetch the secret
token = userdata.get('HF_TOKEN')

# Set it as an environment variable so Hugging Face libraries can find it
os.environ["HF_TOKEN"] = token

if "HF_TOKEN" in os.environ and os.environ["HF_TOKEN"]:
    print("Hugging Face API token is loaded successfully.")
else:
    print("Hugging Face API token is NOT loaded. Please check your Colab secrets.")

# 5. Dataset loading and vector store creation
# 1. Load a small slice of the dataset (streaming mode)
dataset = load_dataset('Zihan1004/FNSPID', split='train', streaming=True)
sample_data = []
for i, entry in enumerate(dataset):
    if i >= 5000: break
    sample_data.append(entry)

df = pd.DataFrame(sample_data)

# 2. Prepare documents for the Vector Database
docs = []
for _, row in df.iterrows():
    # Use the Title as the content and Symbol/Date as filters
    doc = Document(
        page_content=row['Article_title'],
        metadata={"ticker": row['Stock_symbol'], "date": str(row['Date'])}
    )
    docs.append(doc)

# 3. Create Embeddings and Vector Store
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(docs, embeddings)

print("Vector Store Ready!")

# 6. LLM initialization
model_id = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)

pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0.7,
    do_sample=True
)

llm = HuggingFacePipeline(pipeline=pipe)


# --- Missing Helper functions definitions ---
def extract_ticker_robust(query, df):
    """Extracts stock ticker from the query. Placeholder implementation."""
    # A very basic regex to find what looks like a stock ticker (1-5 uppercase letters)
    ticker_match = re.search(r'\b[A-Z]{1,5}\b', query)
    if ticker_match:
        # In a real scenario, you might want to validate this ticker against a known list
        # For now, let's just return the first potential ticker found
        return ticker_match.group(0)
    return None

def get_price_context(ticker, start_date=None, end_date=None):
    """Fetches historical stock data using yfinance. Placeholder for full functionality."""
    if not ticker:
        return pd.DataFrame()

    today = datetime.now().strftime('%Y-%m-%d')
    if not end_date: end_date = today
    if not start_date:
        # Default to last 5 days if no start date is provided
        start_date = (pd.to_datetime(end_date) - pd.Timedelta(days=5)).strftime('%Y-%m-%d')

    try:
        stock_data = yf.download(ticker, start=start_date, end=end_date, progress=False)
        if stock_data.empty:
            return pd.DataFrame()

        stock_data['Date'] = stock_data.index.strftime('%Y-%m-%d')
        stock_data['Daily_Change_Percent'] = stock_data['Close'].pct_change() * 100
        # Forward fill any NaN from pct_change if it's the first day in the range
        stock_data['Daily_Change_Percent'] = stock_data['Daily_Change_Percent'].fillna(0) # Changed this line
        return stock_data[['Date', 'Close', 'Daily_Change_Percent']].reset_index(drop=True)
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return pd.DataFrame()

def extract_date_robust(query):
    """Extracts a date string from the query in YYYY-MM-DD, YYYY-MM, or YYYY format."""
    # YYYY-MM-DD
    date_full = re.search(r'\b\d{4}-\d{2}-\d{2}\b', query)
    if date_full: return date_full.group(0)

    # YYYY-MM
    date_month = re.search(r'\b\d{4}-\d{2}\b', query)
    if date_month: return date_month.group(0)

    # YYYY
    date_year = re.search(r'\b\d{4}\b', query)
    if date_year: return date_year.group(0)

    return None


# 7. Helper functions (`extract_date_range`, `extract_threshold`, `find_significant_moves_and_news`)
def extract_date_range(query):
    """Extracts start and end dates from a query string. Supports YYYY-MM-DD format.
    If a YYYY-MM is found, it converts it to YYYY-MM-01 and YYYY-MM-lastday.
    """
    # Try to find YYYY-MM-DD to YYYY-MM-DD
    dates_full = re.findall(r'\b\d{4}-\d{2}-\d{2}\b', query)
    if len(dates_full) == 2:
        return dates_full[0], dates_full[1]
    elif len(dates_full) == 1:
        # If only one date, assume it's the start date and end date is today
        return dates_full[0], pd.to_datetime('today').strftime('%Y-%m-%d')

    # Try to find YYYY-MM (e.g., '2020-05')
    month_year_match = re.search(r'\b\d{4}-\d{2}\b', query)
    if month_year_match:
        year_month_str = month_year_match.group(0)
        year, month = map(int, year_month_str.split('-')) # Corrected typo here
        start_date = f"{year_month_str}-01"
        last_day = calendar.monthrange(year, month)[1]
        end_date = f"{year_month_str}-{last_day:02d}"
        return start_date, end_date

    return None, None

def extract_threshold(query):
    """Extracts a percentage threshold from the query, e.g., '5%' or '5 percent'."""
    match = re.search(r'(\d+)(?:%| percent)', query, re.IGNORECASE)
    if match:
        return float(match.group(1))
    return 3.0 # Default threshold

def find_significant_moves_and_news(ticker, threshold, start_date, end_date, vectorstore=vectorstore):
    """Identifies dates with significant price movements and retrieves relevant news articles.

    Args:
        ticker (str): The stock ticker symbol.
        threshold (float): The percentage threshold for significant price movement (e.g., 2 for 2%).
        start_date (str): The start date for historical data in 'YYYY-MM-DD' format.
        end_date (str): The end date for historical data in 'YYYY-MM-DD' format.
        vectorstore: The FAISS vector store containing news article embeddings.

    Returns:
        list: A list of dictionaries, each containing date, daily change, and associated news.
    """
    price_data = get_price_context(ticker, start_date, end_date)

    if price_data.empty:
        return []

    significant_moves = price_data[abs(price_data['Daily_Change_Percent']) >= threshold].copy()

    results = []
    for index, row in significant_moves.iterrows():
        move_date = row['Date'].item() # Fixed: Ensure move_date is a scalar string
        daily_change = row['Daily_Change_Percent']

        # Retrieve relevant news for the specific date without a regex filter
        # Retrieve more documents and then filter them by date
        all_news_articles = vectorstore.similarity_search(f"News about {ticker} on {move_date}", k=10) # Increased k

        # Manually filter news articles by date metadata
        filtered_news = []
        for doc in all_news_articles:
            # The date in metadata might be YYYY-MM-DD HH:MM:SS UTC, so check if it starts with move_date
            if doc.metadata.get('date', '').startswith(move_date):
                filtered_news.append(doc.page_content)

        # Use top 3 filtered news, or all if fewer than 3
        news_context = filtered_news[:3]

        results.append({
            'date': move_date,
            'daily_change_percent': daily_change,
            'news': news_context
        })
    return results


# 9. Placeholder agent functions
def plotting_agent(user_query: str) -> str:
    """Generates a stock price chart for the specified ticker and date range, returning it as a base64 encoded image."""
    ticker = extract_ticker_robust(user_query, df) # Assuming df is accessible for ticker validation if needed
    if not ticker:
        return "Please specify a stock ticker (e.g., 'A' or 'AAPL') to plot."

    start_date, end_date = extract_date_range(user_query)
    if not start_date or not end_date:
        # Default to a recent period if no date range is specified for plotting
        end_date = pd.to_datetime('today').strftime('%Y-%m-%d')
        start_date = (pd.to_datetime('today') - pd.Timedelta(days=30)).strftime('%Y-%m-%d')

    price_data = get_price_context(ticker, start_date, end_date)

    if price_data.empty:
        return f"Could not retrieve historical price data for {ticker} between {start_date} and {end_date}."

    # Ensure 'Date' column is datetime for plotting
    price_data['Date'] = pd.to_datetime(price_data['Date'])

    plt.figure(figsize=(10, 6))
    plt.plot(price_data['Date'], price_data['Close'], label=f'{ticker} Close Price')
    plt.title(f'{ticker} Stock Price from {start_date} to {end_date}')
    plt.xlabel('Date')
    plt.ylabel('Close Price ($)')
    plt.grid(True)
    plt.legend()
    plt.tight_layout()

    # Save plot to a BytesIO object
    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    plt.close() # Close the plot to free up memory
    buf.seek(0)

    # Encode to base64 string
    image_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
    return f"<img src='data:image/png;base64,{image_base64}'>"

def research_agent(user_query: str) -> str:
    """Handles general and significant moves queries, generating a summary with LLM."""

    ticker = extract_ticker_robust(user_query, df)

    if not ticker:
        return "Please specify a stock ticker (e.g., 'A' or 'AAPL')."

    # Check for queries related to significant moves
    if any(keyword in user_query.lower() for keyword in ['significant moves', 'large changes', 'historical analysis', 'percentage change']):
        start_date, end_date = extract_date_range(user_query)
        if not start_date or not end_date:
            return "Please specify a valid date range (e.g., 'YYYY-MM-DD to YYYY-MM-DD' or 'YYYY-MM') for historical analysis."

        threshold = extract_threshold(user_query)

        significant_moves = find_significant_moves_and_news(ticker, threshold, start_date, end_date, vectorstore=vectorstore)

        if not significant_moves:
            return f"No significant moves (> {threshold}%) found for {ticker} between {start_date} and {end_date}."

        # Limit the number of significant moves shown to the LLM to prevent overly long context
        significant_moves_limited = significant_moves[:5]

        # Format significant moves for LLM (re-include concise news snippets for correlation)
        moves_context = []
        for move in significant_moves_limited:
            news_snippets = "; ".join([s[:50] + '...' if len(s) > 50 else s for s in move['news']]) # Truncate news even further
            moves_context.append(
                f"On {move['date']}, {ticker} changed by {move['daily_change_percent'].item():.2f}%. News: {news_snippets}"
            )
        moves_context_str = "\n".join(moves_context)

        # Refined prompt for significant moves - now using Question/Explanation format
        prompt = f"""
Context:
Days with price movements greater than {threshold}% for {ticker} from {start_date} to {end_date}:
{moves_context_str}

Question: Summarize the key significant price movements and their correlation with the provided news articles. Highlight any notable patterns or events observed during this period.
Explanation:
"""
        return llm.invoke(prompt)

    # Existing logic for general queries
    date_robust_str = extract_date_robust(user_query) # This returns YYYY-MM or YYYY-MM-DD or YYYY

    price_info_str = ""
    if date_robust_str:
        # Convert YYYY-MM or YYYY to a range for get_price_context if necessary
        if re.match(r'\d{4}-\d{2}$', date_robust_str): # YYYY-MM
            year, month = map(int, date_robust_str.split('-'))
            start_d = f"{date_robust_str}-01"
            last_day = calendar.monthrange(year, month)[1]
            end_d = f"{date_robust_str}-{last_day:02d}"
        elif re.match(r'\d{4}$', date_robust_str): # YYYY
            start_d = f"{date_robust_str}-01-01"
            end_d = f"{date_robust_str}-12-31"
        else: # Assume YYYY-MM-DD
            start_d = date_robust_str
            end_d = date_robust_str # For a single day, end_date is the same

        price_data_df = get_price_context(ticker, start_date=start_d, end_date=end_d)

        if not price_data_df.empty:
            # Summarize price data for the date/range
            if start_d == end_d: # Single day query
                day_data = price_data_df.iloc[0]
                price_info_str = f"{ticker} on {day_data['Date'].item()}: Closed at ${day_data['Close'].item():.2f}, Daily change: {day_data['Daily_Change_Percent'].item():.2f}%."
            else: # Date range query
                first_day = price_data_df.iloc[0]
                last_day = price_data_df.iloc[-1]
                price_info_str = (
                    f"Historical data for {ticker} from {first_day['Date'].item()} to {last_day['Date'].item()}:\n"
                    f"Start Close: ${first_day['Close'].item():.2f}, End Close: ${last_day['Close'].item():.2f}.\n"
                    f"Average daily change: {price_data_df['Daily_Change_Percent'].mean():.2f}%.\n"
                )
        else:
            price_info_str = f"Could not find price data for {ticker} around {date_robust_str}."

        search_query = user_query # Keep original query for news search context

    else: # No specific date mentioned in general query
        price_data_df = get_price_context(ticker) # Fetch recent 5 days
        if not price_data_df.empty:
            latest_data = price_data_df.iloc[-1]
            price_info_str = f"{ticker} recently: Closed at ${latest_data['Close'].item():.2f}, Daily change: {latest_data['Daily_Change_Percent'].item():.2f}%."
        else:
            price_info_str = f"Could not find recent price data for {ticker}."
        search_query = f"Recent news about {ticker}"


    news_results = vectorstore.similarity_search(search_query, k=3)
    news_context = "\n".join([res.page_content for res in news_results])

    # Refined prompt for general queries - now using Question/Explanation format
    prompt = f"""
Context:
Price Information: {price_info_str}
News Articles: {news_context}

Question: {user_query}
Explanation:
"""
    return llm.invoke(prompt)

# 10. Supervisor Agent Definition
class GraphState(BaseModel):
    """Represents the state of our graph."""
    query: str = Field(..., description="The user's input query")
    agent_outcome: str = Field(None, description="The outcome/response from the called agent")

def route_agent(state: GraphState) -> str:
    """Determines which agent to call based on the user's query."""
    query = state.query.lower()
    if any(keyword in query for keyword in ['chart', 'plot', 'graph', 'visualize']):
        return "call_plotting_agent"
    else:
        return "call_research_agent"

def call_plotting_agent_node(state: GraphState) -> GraphState:
    """Calls the plotting agent and updates the state with its outcome."""
    print(f"Calling Plotting Agent with query: {state.query}")
    outcome = plotting_agent(state.query)
    return GraphState(query=state.query, agent_outcome=outcome)

def call_research_agent_node(state: GraphState) -> GraphState:
    """Calls the research agent and updates the state with its outcome."""
    print(f"Calling Research Agent with query: {state.query}")
    outcome = research_agent(state.query)
    return GraphState(query=state.query, agent_outcome=outcome)

graph = StateGraph(GraphState)
graph.add_node("call_plotting_agent", RunnableLambda(call_plotting_agent_node))
graph.add_node("call_research_agent", RunnableLambda(call_research_agent_node))
graph.set_conditional_entry_point(
    route_agent,
    {
        "call_plotting_agent": "call_plotting_agent",
        "call_research_agent": "call_research_agent",
    },
)
graph.add_edge("call_plotting_agent", END)
graph.add_edge("call_research_agent", END)
app = graph.compile()

# 11. Modified chatbot_query function
def chatbot_query(user_query):
    """Delegates query processing to the Supervisor agent."""
    print(f"Chatbot received query: {user_query}")
    initial_state = GraphState(query=user_query)
    result = app.invoke(initial_state)
    agent_response = result['agent_outcome']

    return agent_response

print("Consolidated setup and code, including missing helper functions and typo correction.")

# Example Usage (from previous tasks)
print("\n--- Example 1: General Query with Month-Year (routed to Research Agent) ---")
print(chatbot_query("What happened to stock A in May 2020?"))

print("\n--- Example 2: Significant Moves Query (routed to Research Agent) ---")
print(chatbot_query("Show significant moves for A between 2020-01-01 and 2020-06-30 with a 5% threshold."))

print("\n--- Example 3: Plotting Query (routed to Plotting Agent) ---")
print(chatbot_query("Show me a chart of stock A's performance."))

print("\n--- Example 4: General Query without specific date (routed to Research Agent) ---")
print(chatbot_query("Tell me about stock A."))

In [None]:
print(chatbot_query("Show me a chart of AAPL's performance."))

## SUMMARY

This project developed a sophisticated chatbot capable of providing financial insights and visualizations. Key functionalities include:

# Data Loading and Vector Store:
 The chatbot efficiently loads stock news data from a Hugging Face dataset and creates a FAISS vector store for fast retrieval of relevant news articles.
# LLM Integration:
 It leverages the google/flan-t5-small Language Model to generate human-like summaries and analyses.
# Supervisor Agent for Dynamic Routing:
 A central Supervisor agent intelligently routes user queries to the appropriate specialized sub-agent (Research Agent or Plotting Agent) based on intent.
# Research Agent:
 This agent handles general queries, extracts specific date ranges or provides recent data, and can identify "significant moves" in stock prices, correlating them with relevant news articles.
# Plotting Agent:
 For visual requests, this agent generates historical stock price charts using yfinance and matplotlib, returning them as base64 encoded images for direct display.
# Robust Date and Ticker Extraction:
 Helper functions are in place to robustly extract stock tickers and date ranges from user queries.
Overall, the chatbot is designed to offer a dynamic and informative experience for users seeking stock market information and analysis.

