# Environment Setting Up

In [2]:
import os
from dotenv import load_dotenv

# Loading environment variables from .env
load_dotenv()

# Changing directory to main directory for easy data access
working_directory = os.getenv("WORKING_DIRECTORY")
os.chdir(working_directory)

# Checking the change
%pwd

'D:\\Projects\\Stock Screener\\Stock-Screener-Agent'

In [3]:
from pathlib import Path

# Checking the change
print("Git folder exists:", Path(".git").exists())

Git folder exists: True


# 0. Ticker Parsing

### Sudo Code to test idea

In [4]:
from typing import Annotated 
from langgraph.graph import START, END, StateGraph
from langgraph.graph.message import add_messages 
from langgraph.checkpoint.memory import InMemorySaver 
from langchain_ollama import ChatOllama
from colorama import Fore 
from langgraph.prebuilt import ToolNode 

llm = ChatOllama(model='qwen2.5:14b')

In [5]:
from langchain.tools import tool 
import yfinance as yf 

@tool
def simple_screener(screen_type:str, offset:int)-> str: 
    """Returns screened assets (stocks, funds, bonds) given popular criteria. 

    Args:
        screen_type: One of a default set of stock screener queries from yahoo finance. 
        aggressive_small_caps
        day_gainers
        day_losers
        growth_technology_stocks
        most_actives
        most_shorted_stocks
        small_cap_gainers
        undervalued_growth_stocks
        undervalued_large_caps
        conservative_foreign_funds
        high_yield_bond
        portfolio_anchors
        solid_large_growth_funds
        solid_midcap_growth_funds
        top_mutual_funds
      offset: the pagination start point

    Returns:
        The a JSON output of assets that meet the criteria
        """

    query = yf.PREDEFINED_SCREENER_QUERIES[screen_type]['query']
    result = yf.screen(query, offset=offset, size=5) 
    return [result["quotes"][idx]["symbol"] for idx in range(len(result["quotes"]))]


In [20]:
screen_type = "day_gainers"

query = yf.PREDEFINED_SCREENER_QUERIES[screen_type]['query']
result = yf.screen(query, offset=0, size=20) 
result

{'start': 0,
 'count': 20,
 'total': 89,
 'quotes': [{'language': 'en-US',
   'region': 'US',
   'quoteType': 'EQUITY',
   'typeDisp': 'Equity',
   'quoteSourceName': 'Delayed Quote',
   'triggerable': False,
   'customPriceAlertConfidence': 'LOW',
   'currency': 'USD',
   'averageDailyVolume10Day': 871250,
   'corporateActions': [],
   'fiftyTwoWeekLowChange': 3.9499998,
   'fiftyTwoWeekLowChangePercent': 0.65289253,
   'fiftyTwoWeekRange': '6.05 - 10.38',
   'fiftyTwoWeekHighChange': -0.3800001,
   'fiftyTwoWeekHighChangePercent': -0.036608875,
   'fiftyTwoWeekChangePercent': 19.189512,
   'dividendDate': 1753747200,
   'earningsTimestamp': 1757044800,
   'earningsTimestampStart': 1757044800,
   'earningsTimestampEnd': 1757044800,
   'earningsCallTimestampStart': 1753875000,
   'earningsCallTimestampEnd': 1753875000,
   'isEarningsDateEstimate': False,
   'trailingAnnualDividendRate': 0.12,
   'trailingPE': 23.255814,
   'dividendRate': 0.14,
   'trailingAnnualDividendYield': 0.01237

In [15]:
tools = [simple_screener]
llm_with_tools = llm.bind_tools(tools)
tool_node = ToolNode(tools)

In [None]:
class State(dict): 
    messages: Annotated[list, add_messages]

def chatbot(state:State): 
    print(state['messages'])
    return {"messages":[llm_with_tools.invoke(state['messages'])]}

def router(state:State): 
    last_message = state['messages'][-1]
    if hasattr(last_message, 'tool_calls') and last_message.tool_calls: 
        return "tools" 
    else: 
        return END 


graph_builder = StateGraph(State)
graph_builder.add_node("chatbot", chatbot)
graph_builder.add_node("tools", tool_node)
graph_builder.add_edge(START, "chatbot")

graph_builder.add_edge("tools", "chatbot")
graph_builder.add_conditional_edges("chatbot", router)

memory = InMemorySaver() 
graph = graph_builder.compile(checkpointer=memory)

In [17]:
prompt = input("🤖 Pass your prompt here: " )
result = graph.invoke({"messages":[{"role":"user", "content":prompt}]}, config={"configurable":{"thread_id":1234}})
print(Fore.LIGHTYELLOW_EX + result['messages'][-1].content + Fore.RESET) 

[HumanMessage(content='top 5', additional_kwargs={}, response_metadata={}, id='926874d7-315b-453e-b4ba-0ca15fb522f4')]
[HumanMessage(content='top 5', additional_kwargs={}, response_metadata={}, id='926874d7-315b-453e-b4ba-0ca15fb522f4'), AIMessage(content="It seems like you're looking for the top 5 assets according to one of the screening criteria from Yahoo Finance. Could you please specify which type of screen (e.g., `day_gainers`, `most_actives`) and how many results you want beyond these five? If not specified, I'll default to fetching the top 5 small cap gainers as an example.\n\nWould you like to proceed with `small_cap_gainers`?\n", additional_kwargs={}, response_metadata={'model': 'qwen2.5:14b', 'created_at': '2025-10-17T12:04:41.4645785Z', 'done': True, 'done_reason': 'stop', 'total_duration': 6625389900, 'load_duration': 3323557500, 'prompt_eval_count': 324, 'prompt_eval_duration': 187178200, 'eval_count': 121, 'eval_duration': 2891514800, 'model_name': 'qwen2.5:14b'}, id='ru

In [12]:
prompt = input("🤖 Pass your prompt here: " )
result = graph.invoke({"messages":[{"role":"user", "content":prompt}]}, config={"configurable":{"thread_id":1234}})
print(Fore.LIGHTYELLOW_EX + result['messages'][-1].content + Fore.RESET) 

[HumanMessage(content='Top 5 gainers', additional_kwargs={}, response_metadata={}, id='9eb6f95d-ef0b-40b9-90a8-dc283eaba38b'), AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'qwen2.5:14b', 'created_at': '2025-10-17T11:28:35.9670911Z', 'done': True, 'done_reason': 'stop', 'total_duration': 35201784500, 'load_duration': 34021665300, 'prompt_eval_count': 326, 'prompt_eval_duration': 387404700, 'eval_count': 31, 'eval_duration': 686144000, 'model_name': 'qwen2.5:14b'}, id='run--eb81be83-2da0-4b8f-9821-eebe51aedea4-0', tool_calls=[{'name': 'simple_screener', 'args': {'offset': 0, 'screen_type': 'day_gainers'}, 'id': '597dd125-25e8-402c-9e9b-b62091825592', 'type': 'tool_call'}], usage_metadata={'input_tokens': 326, 'output_tokens': 31, 'total_tokens': 357}), ToolMessage(content="Stock Screener Results: [{'bid': 9.07, 'ask': 10.25, 'shortName': 'Ermenegildo Zegna N.V.', 'exchange': 'NYQ', 'fiftyTwoWeekHigh': 10.38, 'fiftyTwoWeekLow': 6.05, 'averageAnalystRating': '1.8

In [13]:
prompt = input("🤖 Pass your prompt here: " )
result = graph.invoke({"messages":[{"role":"user", "content":prompt}]}, config={"configurable":{"thread_id":1234}})
print(Fore.LIGHTYELLOW_EX + result['messages'][-1].content + Fore.RESET) 

[HumanMessage(content='Top 5 gainers', additional_kwargs={}, response_metadata={}, id='9eb6f95d-ef0b-40b9-90a8-dc283eaba38b'), AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'qwen2.5:14b', 'created_at': '2025-10-17T11:28:35.9670911Z', 'done': True, 'done_reason': 'stop', 'total_duration': 35201784500, 'load_duration': 34021665300, 'prompt_eval_count': 326, 'prompt_eval_duration': 387404700, 'eval_count': 31, 'eval_duration': 686144000, 'model_name': 'qwen2.5:14b'}, id='run--eb81be83-2da0-4b8f-9821-eebe51aedea4-0', tool_calls=[{'name': 'simple_screener', 'args': {'offset': 0, 'screen_type': 'day_gainers'}, 'id': '597dd125-25e8-402c-9e9b-b62091825592', 'type': 'tool_call'}], usage_metadata={'input_tokens': 326, 'output_tokens': 31, 'total_tokens': 357}), ToolMessage(content="Stock Screener Results: [{'bid': 9.07, 'ask': 10.25, 'shortName': 'Ermenegildo Zegna N.V.', 'exchange': 'NYQ', 'fiftyTwoWeekHigh': 10.38, 'fiftyTwoWeekLow': 6.05, 'averageAnalystRating': '1.8

### Ticker Resolver Agent

In [6]:
from langchain.tools import tool 


def simple_screener(screen_type:str, offset:int)-> str: 
    """Returns screened assets (stocks, funds, bonds) given popular criteria. 

    Args:
        screen_type: One of a default set of stock screener queries from yahoo finance. 
        aggressive_small_caps
        day_gainers
        day_losers
        growth_technology_stocks
        most_actives
        most_shorted_stocks
        small_cap_gainers
        undervalued_growth_stocks
        undervalued_large_caps
        conservative_foreign_funds
        high_yield_bond
        portfolio_anchors
        solid_large_growth_funds
        solid_midcap_growth_funds
        top_mutual_funds
      offset: the pagination start point

    Returns:
        The a JSON output of assets that meet the criteria
        """

    query = yf.PREDEFINED_SCREENER_QUERIES[screen_type]['query']
    result = yf.screen(query, offset=offset, size=5) 
    return [result["quotes"][idx]["symbol"] for idx in range(len(result["quotes"]))]


In [7]:
# Case 1 – Direct Stock Input
# Match company names using a static lookup table
def direct_match(query, mapping):
    tickers = []
    for name, ticker in mapping.items():
        if name.lower() in query.lower() or ticker.lower() in query.lower():
            tickers.append(ticker)
    return tickers

In [41]:
# Case 2 - Intent Match
from sentence_transformers import SentenceTransformer, util
import re

# Embedding Model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Screener Categories
screeners = [
        "aggressive_small_caps",
        "day_gainers",
        "day_losers",
        "growth_technology_stocks",
        "most_actives",
        "most_shorted_stocks",
        "small_cap_gainers",
        "undervalued_growth_stocks",
        "undervalued_large_caps",
        "conservative_foreign_funds",
        "high_yield_bond",
        "portfolio_anchors",
        "solid_large_growth_funds",
        "solid_midcap_growth_funds",
        "top_mutual_funds",
]

# Vector embedding for screeners
screeners_emb = model.encode(screeners, normalize_embeddings=True)

def classify_intent(query: str, default_limit: int=5, threshoold: float=0.2, max_limit:int=10):
    query_emb = model.encode(query, normalize_embeddings=True)

    # Compute similarity
    scores = util.cos_sim(query_emb, screeners_emb)[0].tolist()
    best_idx = int(max(range(len(scores)), key=lambda i:scores[i]))

	# Best match and score
    best_screener = screeners[best_idx]
    best_score = scores[best_idx]
    
    if best_score <= threshoold:
        best_screener = None

    # Parse number from query (e.g. "top 7 gainers")
    size = re.search(r"\b\d+\b", query)
    limit = min(int(size.group()), max_limit) if size else default_limit

    return best_screener, limit, best_score

In [42]:
query = "Analyze AAPL and top 5 day gainers"
query_emb = model.encode(query, normalize_embeddings=True)

scores = util.cos_sim(query_emb, screeners_emb)[0].tolist()
best_idx = int(max(range(len(scores)), key=lambda i:scores[i]))

assert scores.index(max(scores)) == best_idx


In [43]:
best_screener = screeners[best_idx]
best_score = scores[best_idx]
best_screener, best_score

('day_gainers', 0.4644447863101959)

In [44]:
classify_intent(query=query)

('day_gainers', 5, 0.4644447863101959)

In [None]:
# Case 3 - Close Match (Embeddings)
def fuzzy_match(company_name, vector_db):
    pass