In [5]:
#install requirements
%pip install uv
!uv pip install --system langgraph langchain langchain-core langchain-openai langchain-valyu python-dotenv requests google-api-python-client google-auth-httplib2 google-auth-oauthlib


Note: you may need to restart the kernel to use updated packages.
[2mUsing Python 3.13.5 environment at: /opt/anaconda3[0m
[2mAudited [1m10 packages[0m [2min 130ms[0m[0m


In [None]:
#import libraries

import os
import base64
from typing import List, Optional
import getpass
from langchain.chat_models import init_chat_model

from langchain.agents import tool
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain.agents import create_openai_functions_agent
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from pathlib import Path
from langchain_core.messages import HumanMessage
from langchain.agents import AgentExecutor
from valyu import Valyu

import numpy as np
import pandas as pd
import yfinance as yf


os.environ["OPENAI_API_KEY"] = "FILL"
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = "FILL"
os.environ["VALYU_API_KEY"] = "FILL"




model = init_chat_model("gpt-4.1")

In [None]:
#trend tools

@tool 
def brownianModel(TICKER, START_DATE, END_DATE, PRED_END_DATE):
    """ 
    Brownian model for stocks
    START_DATE: from which (past) date should we begin considering the Monte Carlo.
    END_DATE: from which (past) date are we ending the considering of its data (presumably as late as possible, given data is available)
    PRED_END_DATE: the (future) date which the client is willing to predict until.  
    """
    # main variables
    stock_name = TICKER
    start_date = START_DATE
    end_date = END_DATE
    pred_end_date = PRED_END_DATE
    scen_size = 10000

    # -----------------------------
    # Download and prepare data
    # -----------------------------

    prices = yf.download(tickers=stock_name, start=start_date, end=pred_end_date)
    prices = prices['Close']

    # ---- FIX 1: Robust price column selection ----
    #if isinstance(prices.columns, pd.MultiIndex):
        #if ('Adj Close', stock_name) in prices.columns:
            #prices = prices[('Adj Close', stock_name)]
       #else:
            #prices = prices[('Close', stock_name)]
    #else:
       # if 'Adj Close' in prices.columns:
            #prices = prices['Adj Close']
        #else:
            #prices = prices['Close']

    
    # Generate business days (weekdays only)
    future_dates = pd.bdate_range(start=pd.to_datetime(end_date) + pd.Timedelta(days=1),
                    end=pd.to_datetime(pred_end_date))

   
    train_set = prices.loc[:end_date]
    # Create DataFrame with a 'Date' column
    dframe = pd.DataFrame({'Prediction Date': future_dates})
    
    daily_returns = ((train_set / train_set.shift(1)) - 1)[1:]



    So = train_set.iloc[-1]
    dt = 1  # day

    n_of_wkdays = pd.date_range(
        start=pd.to_datetime(end_date) + pd.Timedelta('1 days'),
        end=pd.to_datetime(pred_end_date)
    ).to_series().map(lambda x: 1 if x.isoweekday() in range(1, 6) else 0).sum()

    T = n_of_wkdays
    N = int(T / dt)
    t = np.arange(1, N + 1)

    mu = np.mean(daily_returns)
    sigma = np.std(daily_returns)

    b = {str(scen): np.random.normal(0, 1, N) for scen in range(1, scen_size + 1)}
    W = {str(scen): b[str(scen)].cumsum() for scen in range(1, scen_size + 1)}

    drift = (mu - 0.5 * sigma ** 2) * t
    diffusion = {str(scen): sigma * W[str(scen)] for scen in range(1, scen_size + 1)}

    S = np.array([So * np.exp(drift + diffusion[str(scen)]) for scen in range(1, scen_size + 1)])
    S = np.hstack((np.array([[So] for _ in range(scen_size)]), S))


    S_max = [S[:, i].max() for i in range(0, N)]
    S_min = [S[:, i].min() for i in range(0, N)]
    S_pred = 0.5 * np.array(S_max) + 0.5 * np.array(S_min)

    # Standard Monte Carlo estimator: expected price (mean across simulations)
    #S_pred = np.median(S, axis=0)[1:]


    # Align prediction length with available real prices
    min_len = min(len(dframe['Prediction Date']), len(S_pred))

    final_df = pd.DataFrame({
        'pred': S_pred[:min_len]
    }, index=dframe['Prediction Date'])

    #mse = np.mean((final_df['pred'] - final_df['real']) ** 2)
    # Convert to string row by row as "Date: Price"

    result = '\n'.join(f"{date.date()}: {price}" for date, price in zip(final_df.index, final_df['pred']))
    return result


@tool
def mlModel():
    """ 
    RNN model for stock prediction
    """



#noise tools

@tool 
#query_general is the prompt given to Valyu, and it should be on general / industry specific news/articles instead of stoc specific.
#max_results refers to the number of sources that should be returned by the function. (Top N)
def generalInfo(query_general, max_results, START_DATE, END_DATE):
    """ 
    Information regarding the general market of the stock, specified by a query. 
    Fetch general stock market information from Valyu.ai API.
    Parameters:
        query (str): Natural language query about the market or stocks.
        max_results (int): Number of results to return/consider (default 10).
    Returns:
        list of dict: Each dict contains 'title', 'url', and 'snippet'.
    """

    # Read the API key from environment
    API_KEY = os.environ.get("VALYU_API_KEY")
    # Initialize the Valyu client
    valyu = Valyu(api_key=API_KEY)

    # ---------- NEWS SEARCH ----------
    news_response = valyu.search(
        query=query_general,
        search_type="news",
        max_num_results=max_results,
        relevance_threshold=0.7,
        max_price=0.0, #free content only
        start_date=START_DATE,
        end_date=END_DATE,
        excluded_sources=["reddit.com", "twitter.com", "x.com"],
        response_length="medium",
        fast_mode=False,
    )

    # ---------- PROPRIETARY SEARCH ----------
    proprietary_response = valyu.search(
        query=query_general,
        search_type="proprietary",
        max_num_results=max_results,
        relevance_threshold=0.7,
        max_price=0.0,
        start_date=START_DATE,
        end_date=END_DATE,
        response_length="medium",
        fast_mode=False,
    )

    # Combine results safely
    response = (
        (news_response.get("results") or []) +
        (proprietary_response.get("results") or [])
    )


    results_list = []

    for result in response.get('results', []):
        results_list.append({
            "title": result.get('title', 'No title'),
            "url": result.get('url', 'No URL'),
            "snippet": result.get('snippet') or result.get('content', 'No snippet')
        })

    #Turns result_list (a dictioanry) into a readable string.

    lines = []
    for r in enumerate(results_list, 1):
        lines.append(
            f"TITLE: {r['title']}\n"
            f"URL: {r['url']}\n"
            f"Summary: {r['snippet']}\n"
        )
    response_str = "\n".join(lines)
    return response_str



@tool 
def specificInfo(query_specific, max_results, START_DATE, END_DATE):
    """ 
    Information regarding the stock itself, specifically.
    """

    # Read the API key from environment
    API_KEY = os.environ.get("VALYU_API_KEY")
    # Initialize the Valyu client
    valyu = Valyu(api_key=API_KEY)

    # ---------- NEWS SEARCH ----------
    news_response = valyu.search(
        query=query_specific,
        search_type="news",
        max_num_results=max_results,
        relevance_threshold=0.7,
        max_price=0.0, #free content only
        start_date=START_DATE,
        end_date=END_DATE,
        excluded_sources=["reddit.com", "twitter.com", "x.com"],
        response_length="medium",
        fast_mode=False,
    )

    # ---------- PROPRIETARY SEARCH ----------
    proprietary_response = valyu.search(
        query=query_specific,
        search_type="proprietary",
        max_num_results=max_results,
        relevance_threshold=0.7,
        max_price=0.0,
        start_date=START_DATE,
        end_date=END_DATE,
        response_length="medium",
        fast_mode=False,
    )

    # Combine results safely
    response = (
        (news_response.get("results") or []) +
        (proprietary_response.get("results") or [])
    )


    results_list = []

    for result in response.get('results', []):
        results_list.append({
            "title": result.get('title', 'No title'),
            "url": result.get('url', 'No URL'),
            "snippet": result.get('snippet') or result.get('content', 'No snippet')
        })

    #Turns result_list (a dictioanry) into a readable string.

    lines = []
    for r in enumerate(results_list, 1):
        lines.append(
            f"TITLE: {r['title']}\n"
            f"URL: {r['url']}\n"
            f"Summary: {r['snippet']}\n"
        )
    response_str = "\n".join(lines)
    return response_str


In [8]:
#trend agent
trendAgent = create_openai_functions_agent(
    model,
    tools=[brownianModel, mlModel],
    prompt= FILL,
)

trendAgentExecuter = AgentExecutor(
    agent=trendAgent, 
    tools=[brownianModel, mlModel], 
    verbose=True
)


#noise agent
noiseAgent = create_openai_functions_agent(
    model,
    tools=[generalInfo, specificInfo],
    prompt= FILL,
)


noiseAgentExecuter = AgentExecutor(
    agent=noiseAgent, 
    tools=[generalInfo, specificInfo], 
    verbose=True
)


NameError: name 'FILL' is not defined

In [None]:
@tool 
def trendAgentTool():
    trendAgentExecuter.invoke()

@tool 
def noiseAgentTool():
    noiseAgentExecuter.invoke()

supAgent = create_openai_functions_agent(
    model,
    tools=[trendAgentTool, noiseAgentTool],
    prompt= FILL,
)