In [1]:
# Force reinstall all langchain packages to the latest matching versions
#%pip install -U --force-reinstall langchain langchain-community langchain-core langchain-google-genai valyu prophet yfinance matplotlib pandas

In [None]:
import os
import operator
import datetime
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
from typing import Annotated, Literal, TypedDict, List
from prophet import Prophet
import sys
sys.path.append(os.getcwd())
from effects_core import IO
from pure_logic import build_brownian_pipeline, build_ml_pipeline, build_search_pipeline


# --- LIBRARIES ---
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.agents import create_agent
from langchain_core.tools import tool
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langgraph.graph import StateGraph, START, END
from langgraph.types import Send
from langgraph.prebuilt import create_react_agent 
from pydantic import BaseModel, Field
from valyu import Valyu 
from langchain.messages import SystemMessage, HumanMessage
from langchain.chat_models import init_chat_model

model = init_chat_model("gpt-4.1", model_provider="openai")


In [36]:
from typing import TypeVar, Callable, Generic, Any
from dataclasses import dataclass

T = TypeVar("T")
U = TypeVar("U")

@dataclass
class IO(Generic[T]):
    """
    A pure description of a side-effectful computation.
    Nothing runs until .unsafe_run() is called.
    """
    effect: Callable[[], T]

    @staticmethod
    def pure(value: T) -> "IO[T]":
        """Lift a pure value into the IO context."""
        return IO(lambda: value)

    @staticmethod
    def fail(error: Exception) -> "IO[Any]":
        """Lift an error into the IO context."""
        def _raise(): raise error
        return IO(_raise)

    def map(self, f: Callable[[T], U]) -> "IO[U]":
        """Apply a pure function to the result of the effect."""
        return IO(lambda: f(self.effect()))

    def flat_map(self, f: Callable[[T], "IO[U]"]) -> "IO[U]":
        """Chain a new effect based on the result of the previous one."""
        return IO(lambda: f(self.effect()).unsafe_run())

    def attempt(self) -> "IO[T | Exception]":
        """Materialize errors into values (Better failure handling)."""
        def _safe_run():
            try:
                return self.effect()
            except Exception as e:
                return e
        return IO(_safe_run)

    def unsafe_run(self) -> T:
        """The 'Edge' - actually executes the side effects."""
        return self.effect()

# Helper for composing multiple IOs
def sequence(ios: list[IO[T]]) -> IO[list[T]]:
    def _run_all():
        return [io.unsafe_run() for io in ios]
    return IO(_run_all)



In [37]:
from typing import Annotated, Literal, TypedDict
import operator


class AgentInput(TypedDict):
    """Simple input state for each subagent."""
    query: str


class AgentOutput(TypedDict):
    """Output from each subagent."""
    source: str
    result: str


class Classification(TypedDict):
    """A single routing decision: which agent to call with what query."""
    source: Literal["quant", "research"]
    query: str


class RouterState(TypedDict):
    query: str
    classifications: list[Classification]
    results: Annotated[list[AgentOutput], operator.add]  
    final_answer: str

class BrownianParams(TypedDict):
    mu: float
    sigma: float
    last_price: float
    annual_vol: float
    annual_drift: float

In [38]:
# --- EFFECT DEFINITIONS (I/O Boundary) ---

def fetch_stock_history_io(ticker: str, years: int = 2) -> IO[pd.DataFrame]:
    """Effect: Network Call to Yahoo Finance."""
    def _fetch():
        end_date = pd.Timestamp.today().normalize()
        start_date = end_date - pd.DateOffset(years=years)
        data = yf.download(ticker, start=start_date, end=end_date, progress=False)
        
        # Cleanup logic (part of the fetch IO boundary)
        if isinstance(data.columns, pd.MultiIndex):
            data = data['Close']
            if isinstance(data, pd.DataFrame) and ticker in data.columns:
                 data = data[ticker]
        elif 'Close' in data.columns:
            data = data['Close']
        if isinstance(data, pd.DataFrame):
             data = data.iloc[:, 0]
        return data
    return IO(_fetch)

def run_monte_carlo_io(params: BrownianParams, days: int = 30, scenarios: int = 1000) -> IO[pd.DataFrame]:
    """Effect: Random Number Generation & Simulation."""
    def _sim():
        mu, sigma, S0 = params['mu'], params['sigma'], params['last_price']
        dt = 1
        returns = np.random.normal(loc=mu * dt, scale=sigma * np.sqrt(dt), size=(days, scenarios))
        price_paths = np.vstack([np.full((1, scenarios), S0), S0 * np.exp(np.cumsum(returns, axis=0))])
        return pd.DataFrame(price_paths)
    return IO(_sim)

def valyu_search_io(query: str) -> IO[str]:
    """Effect: External API Search (Optimized for Tokens)."""
    def _search():
        client = Valyu(api_key=os.environ.get("VALYU_API_KEY"))
        response = client.answer(query)
        
        # SMART PARSING: Extract only the 'content' text to save tokens
        # If we just do str(response), it dumps huge JSON metadata.
        try:
            # If response is a list of results, join their text
            if hasattr(response, 'contents'):
                # Limit to top 5 results, max 1000 chars each
                text_content = [str(c)[:1000] for c in response.contents[:5]] 
                return "\n---\n".join(text_content)
            
            # Fallback for dictionary responses
            elif isinstance(response, dict) and 'contents' in response:
                return str(response['contents'])[:5000] # Hard cap at 5k chars
            
            # Fallback for string
            return str(response)[:5000]
            
        except Exception:
            return str(response)[:5000] # Safety net
            
    return IO(_search)

def prophet_predict_io(df: pd.DataFrame, days: int = 30) -> IO[pd.DataFrame]:
    """Effect: Heavy Computation / Model Training."""
    def _train_and_predict():
        m = Prophet(daily_seasonality=True)
        m.fit(df)
        future = m.make_future_dataframe(periods=days)
        forecast = m.predict(future)
        return forecast
    return IO(_train_and_predict)

In [39]:
# --- PURE DOMAIN TYPES & LOGIC ---
# Defined FIRST so they can be used as types in Effects

def calculate_brownian_params_pure(prices: pd.Series) -> BrownianParams:
    """Pure: Extract statistical parameters from data."""
    if len(prices) < 2:
        raise ValueError("Not enough data")

    daily_returns = ((prices / prices.shift(1)) - 1).dropna()
    mu = np.mean(daily_returns)
    sigma = np.std(daily_returns)
    last_price = float(prices.iloc[-1])
    
    return {
        "mu": mu,
        "sigma": sigma,
        "last_price": last_price,
        "annual_vol": sigma * np.sqrt(252),
        "annual_drift": mu * 252
    }

def format_brownian_output_pure(sim_df: pd.DataFrame, ticker: str, params: BrownianParams) -> str:
    """Pure: Format the simulation results into text."""
    final_prices = sim_df.iloc[-1]
    low = np.percentile(final_prices, 5)
    high = np.percentile(final_prices, 95)
    mean_price = np.mean(final_prices)
    
    return (f"Brownian Motion Analysis for {ticker}:\n"
            f"--- TECHNICAL PARAMETERS ---\n"
            f"Annualized Volatility: {params['annual_vol']:.2%}\n"
            f"Annualized Drift: {params['annual_drift']:.2%}\n"
            f"Confidence Interval (90%): ${low:.2f} - ${high:.2f}\n"
            f"Mean Target: ${mean_price:.2f}")

def prepare_prophet_data_pure(data: pd.DataFrame) -> pd.DataFrame:
    """Pure Logic: Rename columns for Prophet."""
    df = data.reset_index()
    if 'Date' in df.columns:
        df['ds'] = df['Date'].dt.tz_localize(None)
    else:
        df['ds'] = df.index.tz_localize(None)
        
    if 'Close' in df.columns:
        df['y'] = df['Close']
    elif df.shape[1] > 0:
        df['y'] = df.iloc[:, 0]
        
    return df[['ds', 'y']]

def format_prophet_output(forecast: pd.DataFrame, ticker: str) -> str:
    """Pure transformation of Prophet results to text (CSV format for token efficiency)."""
    future_data = forecast.tail(30)
    latest_pred = forecast.iloc[-1]['yhat']
    trend = "UP" if latest_pred > forecast.iloc[0]['yhat'] else "DOWN"
    
    # OPTIMIZATION: Use CSV format instead of to_string() to remove whitespace
    # We also round numbers to 2 decimal places to save tokens
    columns = ['ds', 'yhat', 'yhat_lower', 'yhat_upper']
    table = future_data[columns].round(2).to_csv(index=False)
    
    return (f"ML Analysis for {ticker}\n"
            f"Trend: {trend}\n"
            f"Forecast Data (CSV):\n{table}")

In [40]:
from langchain.tools import tool

@tool
def brownianModel(TICKER: str):
    """
    Uses a Monadic Effect System (IO Monad) to model stock prediction.
    Architecture:
    1. Builds a pure description of the workflow (Pipeline).
    2. Safely executes side effects (Network -> Math -> Disk) via the Interpreter.
    """
    # 1. Build the Pure Plan (Imported from pure_logic.py)
    program = build_brownian_pipeline(TICKER)
    
    # 2. Execute at the Edge (Unsafe Run)
    # The .attempt() catches errors into a Value, so the notebook doesn't crash.
    result = program.attempt().unsafe_run()
    
    # 3. Handle the Result
    if isinstance(result, Exception):
        return f"Effect System Error: {str(result)}"
    return result

@tool
def mlModel(ticker: str):
    """
    Uses a Monadic Effect System to run Prophet ML predictions.
    Effects managed: Network I/O, Heavy Compute, Chart Rendering, Ledger Persistence.
    """
    program = build_ml_pipeline(ticker)
    result = program.attempt().unsafe_run()
    
    if isinstance(result, Exception):
        return f"Effect System Error: {str(result)}"
    return result

@tool
def valyu_search_tool(query: str):
    """
    Effectful search wrapper.
    Effect: External API Call (Valyu).
    """
    return build_search_pipeline(query).attempt().unsafe_run()


In [41]:
trend_prompt = (
    "You are a Quantitative Analyst. Use the provided ML and Statistical tools to analyze the stock ticker provided. "
    "ONLY ENTER THE ABBREVIATION OF THE STOCK TO THE TOOLS. "
    "Your report must be detailed and data-heavy. You MUST include:\n"
    "1. The exact current price of the stock.\n"
    "2. The specific daily price targets for the next 30 days from the models.\n"
    "3. The median prediction and confidence intervals from the Brownian motion model.\n"
    "4. A clear statement of the trend direction (UP/DOWN/FLAT) based on the math.\n"
    "5. If a tool fails, explicitly state why (e.g., 'Not enough data')."
)
trend_agent = create_agent(model, system_prompt=SystemMessage(content=[{"type": "text", "text": trend_prompt}, {"type": "text", "text": "stock markets"}], ), tools=[mlModel, brownianModel])

noise_prompt = (
    "You are a Market Researcher. Use the search tool to find recent news, sentiment, and macro factors affecting the stock. "
    "Do not just summarize; provide a detailed list of findings. You MUST include:\n"
    "1. Specific headlines, dates, and sources of the news you found.\n"
    "2. Direct quotes or key statistics from the search results.\n"
    "3. Any upcoming events (earnings dates, product launches).\n"
    "4. The overall market sentiment supported by specific evidence."
)
noise_agent = create_agent(model, [valyu_search_tool], system_prompt=SystemMessage(content=[{"type": "text", "text": noise_prompt}, {"type": "text", "text": "stock markets"}], ))


In [22]:
#result = trend_agent.invoke({"messages": [HumanMessage("analyze AMZN stock")]})

#ai_message = result["messages"][-1]


In [43]:
from pydantic import BaseModel, Field
from langgraph.types import Send
from langgraph.graph import StateGraph, START, END

class ClassificationResult(BaseModel):  
    """Result of classifying a user query into agent-specific sub-questions."""
    classifications: list[Classification] = Field(
        description="List of agents to invoke with their targeted sub-questions"
    )

def classify_query(state: RouterState) -> dict:
    """Classify query and spawn agents for BOTH quant and research."""
    structured_llm = model.with_structured_output(ClassificationResult)  

    # FIX: The system prompt now explicitly instructs to create TWO tasks
    system_prompt = """You are a Supervisor Agent. 
    When the user asks for a stock prediction, you MUST generate TWO separate instructions:
    
    1. One for the 'quant' agent to run the mathematical models (Brownian & Prophet).
    2. One for the 'research' agent to find news and sentiment.
    
    OUTPUT format:
    Return a list of TWO classifications.
    - Classification 1: source='quant', query='[Ticker Symbol]' (e.g., 'AMZN')
    - Classification 2: source='research', query='[Ticker Symbol] news and sentiment'
    """

    result = structured_llm.invoke([
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": state["query"]}
    ])

    return {"classifications": result.classifications}

def route_to_agents(state: RouterState) -> list[Send]:
    """Fan out to agents based on classifications."""
    return [
        Send(c["source"], {"query": c["query"]})  
        for c in state["classifications"]
    ]


def run_trend_agent(state: RouterState):
    """Invokes the Quant Agent"""
    print("Executing Trend Agent")
    response = trend_agent.invoke({"messages": [{"role": "user", "content": state["query"]}]})
    
    return {"results": [{"source": "quant", "result": response["messages"][-1].content}]}

def run_noise_agent(state: RouterState):
    """Invokes the Research Agent"""
    print("Executing Noise Agent")
    response = noise_agent.invoke({"messages": [{"role": "user", "content": state["query"]}]})
    
    return {"results": [{"source": "research", "result": response["messages"][-1].content}]}

def synthesize_results(state: RouterState) -> dict:
    """Combine results from all agents into a comprehensive report."""
    if not state["results"]:
        return {"final_answer": "No results found from any knowledge source."}

    # No truncation needed anymore because the tools are efficient!
    formatted = [
        f"--- REPORT FROM {r['source'].upper()} DEPARTMENT ---\n{r['result']}\n------------------------------------------------"
        for r in state["results"]
    ]

    synthesis_prompt = f"""You are a Senior Investment Analyst compiling a comprehensive Due Diligence Report.
    The user asked: "{state['query']}"

    Your goal is to provide a "White Box" analysis‚Äîexplaining NOT just the prediction, but HOW the math worked.

    STRICTLY FOLLOW THIS REPORT STRUCTURE:

    1. **Executive Summary**
       - A high-level verdict (Buy/Sell/Hold/Wait).

    2. **Methodology & Technical Deep-Dive (CRITICAL SECTION)**
       - Explain the logic behind the models.
       - **Brownian Motion:** Explicitly state the "Annualized Volatility" and "Drift" percentages found in the tool output. Explain what they mean.
       - **ML Model:** Mention the training data size and algorithm.
    
    3. **Quantitative Analysis (The Numbers)**
       - Detail the price targets.
       - **Range:** Quote the 90% Confidence Interval (Low/High).

    4. **Market Context (The News)**
       - Summarize the news headlines and sentiment.
       - CITE SOURCES.

    5. **Risk Factors & Conclusion**
       - Specific risks (e.g., "High volatility of X% increases downside risk").

    Do not shorten the content. USE THE TECHNICAL PARAMETERS (Sigma, Mu, CI) PROVIDED IN THE TEXT."""

    # Using gpt-4o-mini is safer for limits, but gpt-4.1 (if available) will now fit.
    synthesis_response = model.invoke([
        {"role": "system", "content": synthesis_prompt},
        {"role": "user", "content": "\n\n".join(formatted)}
    ])

    return {"final_answer": synthesis_response.content}







In [44]:
import os
import json

# --- 1. THE USER REQUEST (Standard Agent Run) ---
query_text = "can you make predictions on Amazon stock?"

print(f"ü§ñ AGENT INVOKED: '{query_text}'")
result = workflow.invoke({
    "query": query_text
})

# --- 2. DISPLAY AGENT OUTPUT ---
print("\n" + "=" * 60)
print(f"Original Query: {result['query']}")
print("Classifications:")
for c in result["classifications"]:
    print(f"  -> {c['source']}: {c['query']}")
print("=" * 60 + "\n")

print("üìù FINAL REPORT:")
print(result["final_answer"])
print("\n" + "=" * 60 + "\n")

# --- 3. ARCHITECTURE VERIFICATION (The Proof for Judges) ---
# This part runs automatically to prove your Effect System created the files.

print("üîç SYSTEM AUDIT (Effectful Architecture Verification):")

# CHECK 1: The Ledger (Persistence Effect)
ledger_file = "prediction_ledger.json"
if os.path.exists(ledger_file):
    with open(ledger_file, "r") as f:
        lines = f.readlines()
        last_entry = json.loads(lines[-1])
        print(f"‚úÖ PASS: Ledger updated. Last entry: {last_entry['ticker']} ({last_entry['model']})")
else:
    print(f"‚ùå FAIL: Ledger file '{ledger_file}' not found.")

# CHECK 2: The Chart (Visualization Effect)
# Note: The agent generates charts based on the ticker found in the query (AMZN)
chart_file = "AMZN_forecast.png" 
if os.path.exists(chart_file):
    print(f"‚úÖ PASS: Chart generated at '{chart_file}'.")
else:
    print(f"‚ö†Ô∏è NOTE: Chart '{chart_file}' not found. (If the agent didn't run Prophet, this is normal).")

print("=" * 60)

ü§ñ AGENT INVOKED: 'can you make predictions on Amazon stock?'
Executing Trend AgentExecuting Noise Agent



16:16:21 - cmdstanpy - INFO - Chain [1] start processing
16:16:21 - cmdstanpy - INFO - Chain [1] done processing



Original Query: can you make predictions on Amazon stock?
Classifications:
  -> quant: AMZN
  -> research: AMZN news and sentiment

üìù FINAL REPORT:
**Amazon (AMZN) Stock ‚Äì White Box Due Diligence Report**  
Date: February 7, 2026

---

## 1. Executive Summary

**Recommendation:** **HOLD** (with bullish lean for risk-tolerant investors)

Amazon currently faces a short-term sentiment overhang due to an EPS miss and a substantial 2026 capex increase, leading to immediate share price weakness and negative fund flows. However, technical analysis using Brownian motion projects a median rise to $214.32 within 30 days, with meaningful statistical support. Despite short-term volatility, fundamental outlook and long-term consensus remain positive, underpinned by strong AWS results and bullish analyst sentiment. Maintain a Hold position and monitor execution on AI and capex, with an eye to opportunistic buys on further weakness.

---

## 2. Methodology & Technical Deep-Dive (CRITICAL SECTIO