In [38]:
#Final Project Script  
#Agentic AI for financial analysis 
#MODEL/CODE INSPO: https://python.langchain.com/docs/tutorials/agents/
#xAI Integration: https://x.ai/api#capabilities
#Agent Params/Structure: https://langchain-ai.github.io/langgraph/reference/agents/, 
#https://api.python.langchain.com/en/latest/messages/langchain_core.messages.system.SystemMessage.html

In [None]:
%pip install -U langgraph langchain-tavily langgraph-checkpoint-sqlite
%pip install langchain-huggingface
%pip install -qU langchain-xai
%pip install yfinance
%pip install nltk
%pip install string
%pip install transformers
%pip install torch

In [72]:
#import packages
from langgraph.checkpoint.memory import MemorySaver
from langchain.chat_models import init_chat_model
from langgraph.prebuilt import create_react_agent
from langchain_core.messages import SystemMessage
from langchain_tavily import TavilySearch
from datetime import datetime, timedelta 
import yfinance as yf
import pandas as pd
import numpy as np
import getpass
import os

In [54]:
#For all API keys feel free to slack me! 
#If I post them here GitHub auto-revokes them due to user privacy

In [73]:
os.environ['LANGSMITH_TRACING'] = 'true'
os.environ['LANGSMITH_API_KEY'] = getpass.getpass('Enter the API key for LangSmith: ')
os.environ['TAVILY_API_KEY'] = getpass.getpass('Enter the API key for Tavily: ')
os.environ['XAI_API_KEY'] = getpass.getpass('Enter API key for xAI: ')
os.environ['FINNHUB_API_KEY'] = getpass.getpass('Enter API key for FinnHub: ')
os.environ['ALPHAVANTAGE_API_KEY' ] = getpass.getpass('Enter API key for Alpha Vantage: ')

In [None]:
#create all necessary tools for agent/steps of project:

In [80]:
#create Tavily search tool (general)
search = TavilySearch(max_results=10)

In [None]:
#prompt-chaining tools below:
#def ingest_news (ingest)
#def preproc_news (preprocess)
#def classify_news (classify)
#def extract_news (extract)
#def summarize_news (summarize)
#APIS used: finnhub, alpha vantage, yahoo finance

#Emphasis on packages/methods used in prior assignments 
#packages used: HuggingFace, NLTK

In [93]:
#import all packages and langchain integrations for prompt chaining 
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from nltk import pos_tag
import string
import langchain_huggingface
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
import torch
from collections import Counter


In [None]:
#downloads
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('vader_lexicon')

In [60]:
#using yahoofinance to get news articles
#symbol string param, fixed number of reports to retrieve 
def ingest_news(symbol,numart=10):
    reports = []
    name = yf.Ticker(symbol)
    info = name.news[:numart]
    for bit in info: 
        reports.append({
            'headline': bit.get('title',''),
            'article': bit.get('summary',''),
            'time': bit.get('time_published', ''),
            'source': bit.get('source', '')
        })
    return reports

In [66]:
#using nltk to preprocess news reports for analysis
#list of report dicts param
def preproc_news(reports):
    def preproc_item(item):
        lowercase = item.lower()
        tokenized = word_tokenize(lowercase)
        tokenized = [token for token in tokenized if token not in string.punctuation]
        stopwords = set(stopwords.words('en'))
        tokenized = [token for token in tokenized if token not in stopwords]
        stemmer = PorterStemmer()
        tokenized = [stemmer.stem(token) for token in tokenized]
        return tokenized 
    
    preproc_reports = []
    for report in reports: 
        preproc_report = report.copy()
        preproc_report['preproc_headline'] = preproc_news(report.get('headline', ''))
        preproc_report['preproc_article'] = preproc_news(report.get('article', ''))
        preproc_reports.append(preproc_report)
    
    return preproc_reports

        

In [89]:
#using nltk to classify preprocessed reports 
#list of report dict params
def classify_news(preproc_reports):
    sentiment_analyzer = SentimentIntensityAnalyzer()
    classified_reports = []
    for report in preproc_reports: 
        headline = report.get('preproc_headline', '')
        article = report.get('preproc_article', '')
        comb = f'{headline} {article}'.strip()
        scores = sentiment_analyzer.polarity_scores(comb)
        compound = scores['compound']
        if compound >= 0.5:
            sentiment = 'positive'
        elif compound <= -0.5:
            sentiment = 'negative'
        else: 
            sentiment = 'neutral'
        classified_report = report.copy()
        classified_report['sentiment'] = sentiment
        classified_reports.append(classified_report)
    return classified_reports

In [None]:
#using huggingface NER to extract meaningful financial components of news reports
#list of report dict params
def extract_news(reports):
    nerpretrain = 'dbmdz/bert-large-cased-finetuned-conll03-english'
    tokenizer = AutoTokenizer.from_pretrained(nermodel)
    nermodel = AutoModelForTokenClassification.from_pretrained(nerpretrain)
    nerpipe = pipeline('ner', model=nermodel,tokenizer=tokenizer,aggregation_strategy='simple',device=-1)
    extra_reports = []
    for report in reports: 
        headline = report.get('headline')
        article = report.get('article')
        comb = f'{headline} {article}'.strip()
        rawent = nerpipe(comb)
        entis = [(ent['word'],ent['entity_group'])for ent in rawent if ent['score'] > 0.5]
        extract_report = report.copy()
        extract_report['entis'] = entis
        extra_reports.append(extract_report)
    return extra_reports

In [None]:
#summarize the analysis gathered so far in the prompt-chain and report
def summarize_news(reports):
    newsdata = reports 
    preproc_reports = preproc_news(newsdata)
    classified_reports = classify_news(preproc_reports)
    extracted_reports = extract_news(classified_reports)
    fintotal = len(extracted_reports)
#sentiment summs 
    sentiments = [report['sentiment'] for report in extracted_reports]
    sentiment_counts = Counter(sentiments)
    sentiment_summary = {
        'total_articles': fintotal,
        'distribution': {
            'positive': round((sentiment_counts['positive'] / fintotal) * 100, 2),
            'negative': round((sentiment_counts['negative'] / fintotal) * 100, 2),
            'neutral': round((sentiment_counts['neutral'] / fintotal) * 100, 2)
        },
        'counts': dict(sentiment_counts)
    }
#extract sums 
    all_ents = []
    for report in extracted_reports:
        all_ents.extend(report.get('entities', []))
    entity_counter = Counter()
    for entity_text, label in all_ents:
        entity_counter[f"{entity_text}:{label}"] += 1
    entity_by_type = {}
    for key, count in entity_counter.most_common():
        text, label = key.split(':', 1)
        if label not in entity_by_type:
            entity_by_type[label] = []
        entity_by_type[label].append((text, count))
    top_entities = {label: items[:5] for label, items in entity_by_type.items()}
    entity_summary = {
        'total_entities': len(all_ents),
        'top_by_type': top_entities
    }
#overall sums 
    trends = {}
    if 'time' in extracted_reports[0]: 
        monthly_sentiments = Counter()
        for report in extracted_reports:
            dt = datetime.strptime(report['time'], '%Y-%m-%d') 
            month_key = dt.strftime('%Y-%m')
            monthly_sentiments[f"{month_key}:{report['sentiment']}"] += 1
        monthly_summary = {}
        for key, count in monthly_sentiments.items():
            month, sent = key.split(':', 1)
            if month not in monthly_summary:
                monthly_summary[month] = Counter()
            monthly_summary[month][sent] += count
        trends = {'monthly_sentiment': monthly_summary}
    return {
        'sentiment_summary': sentiment_summary,
        'entity_summary': entity_summary,
        'trends': trends,
    }


In [81]:
#define all tools for model/agent use
tools = [search]

In [41]:
#define agent model for xai llm integration
model = init_chat_model('grok-2',model_provider='xai')

In [42]:
#create memory
memory = MemorySaver()

In [47]:
#create a prompt for the agent role in our project 
agentrole = SystemMessage(
    content='''You are an expert financial analysis agent. To start, ask user for stock symbol. Confirm the company, provide recent stock information. Gather 
    news with the ingest_news(), then preprocess with preproc_news(), classify with classify_news(), extract with extract_news() and summarize with summarize_news() 
    Allow room for user feedback, update at each step. 
    Next, ask user to specify your specialty as an analyst, 
    provide examples but accept all input (examples: earnings analyst, news analyst, market analyst, valuation, risk management, quant).
    Tailor new response to speciality selected, Use tools, update user with visual and text strategies/reports. 
    Generate analysis, ask for feedback on quality, refine response if necessary.''')


In [48]:
#create the agent 
financeanalyst = create_react_agent(model,tools,prompt=agentrole,checkpointer=memory)
config = {'configurable': {'thread_id': 'testingproj1'}}

In [55]:
#prompt user to start and run agent while loop until exiting
print('Welcome to the USD-AAI Financial Advisor Agency!')
name = input('Please enter a username or nickname to begin: ')
userid = name + ': '
print(userid)
startagent = input('Your personal financial agent is ready to go! Confirm (Y) to begin or (N) to exit: ')
if startagent != 'Y':
    print('Cancelled Agent Request')
else:
    print('Starting, to end/cancel your agent, simply input "exit" "stop" or "cancel"')
    while True: 
        myinput = input(f"{userid}").strip()
        if myinput in ['exit', 'stop', 'cancel']:
            print('Agent Disconnected')
            break
        for step in financeanalyst.stream(
            {"messages": [('user', myinput)]}, config, stream_mode='values'
        ):
            step['messages'][-1].pretty_print()

Welcome to the USD-AAI Financial Advisor Agency!
jbaxter25: 


"\nif start == 'Y':\n    while True: \n        humaninput = input()\n"

In [50]:
#start agent comms // prior loop
'''intro = input('Enter your name to activate the finance agent: ')
for step in financeanalyst.stream(
    {"messages": [('user', intro)]}, config, stream_mode='values'
):
    step['messages'][-1].pretty_print()
stocksymbol = input('Enter symbol here: $')
for step in financeanalyst.stream(
    {"messages": [('user', stocksymbol)]}, config, stream_mode='values'
):
    step['messages'][-1].pretty_print()
confirmation = input('Enter YES if yes, Enter NO if no: ')
for step in financeanalyst.stream(
    {"messages": [('user', confirmation)]}, config, stream_mode='values'
):
    step['messages'][-1].pretty_print()
    '''


Jack

Hello Jack! To proceed with the financial analysis of CLPT (ClearPoint Neuro, Inc.), please confirm if this is the correct company. If you have another company in mind, please provide the stock symbol or the company name.

CLPT

The stock symbol CLPT represents ClearPoint Neuro, Inc. Is this the company you would like to analyze?

YES

I am retrieving the latest financial data for ClearPoint Neuro, Inc. (CLPT).
Tool Calls:
  tavily_search (call_92252751)
 Call ID: call_92252751
  Args:
    query: ClearPoint Neuro, Inc. financial performance
    topic: finance
Name: tavily_search

{"query": "ClearPoint Neuro, Inc. financial performance", "follow_up_questions": null, "answer": null, "images": [], "results": [{"url": "https://finance.yahoo.com/news/clearpoint-neuro-reports-first-quarter-204000224.html", "title": "ClearPoint Neuro Reports First Quarter 2025 Results", "content": "Total revenue was $8.5 million and $7.6 million for the three months ended March 31, 2025 and 2024, respe