In [8]:
from langchain_openai import ChatOpenAI
from langchain.chains import create_extraction_chain
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.tools.render import format_tool_to_openai_function
from langchain_core.utils.function_calling import convert_to_openai_function
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
import sys
import os

In [3]:
import sys
import os
from pathlib import Path

sys.path.append(str(Path.cwd().parent))

from config import LLM_MODEL


In [4]:
llm = ChatOpenAI(temperature=0, model=LLM_MODEL)

extraction_schema = {
            "title": "StockInsights",
            "description": "Extracted insights about a stock from financial news and reports",
            "type": "object",
            "properties": {
                "financial_metrics": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "metric_name": {"type": "string"},
                            "value": {"type": "string"},
                            "comparison": {"type": "string"},
                            "sentiment": {"type": "string", "enum": ["positive", "negative", "neutral"]}
                        }
                    }
                },
                "analyst_opinions": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "analyst_or_firm": {"type": "string"},
                            "rating": {"type": "string"},
                            "target_price": {"type": "string"},
                            "previous_rating": {"type": "string"},
                            "previous_target": {"type": "string"}
                        }
                    }
                },
                "business_developments": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "event_type": {"type": "string"},
                            "description": {"type": "string"},
                            "expected_impact": {"type": "string"},
                            "timeline": {"type": "string"}
                        }
                    }
                },
                "market_sentiment": {
                    "type": "object",
                    "properties": {
                        "overall_sentiment": {"type": "string", "enum": ["bullish", "bearish", "neutral"]},
                        "trading_volume": {"type": "string"},
                        "price_movement": {"type": "string"},
                        "volatility": {"type": "string"}
                    }
                },
                "risk_factors": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "risk_type": {"type": "string"},
                            "description": {"type": "string"},
                            "severity": {"type": "string", "enum": ["high", "medium", "low"]}
                        }
                    }
                }
            }
        }

In [9]:
# extraction_function = format_tool_to_openai_function({
#     "name": "extract_stock_insights",
#     "description": "Extract structured insights from financial text about a stock",
#     "parameters": extraction_schema
# })

extraction_function = convert_to_openai_function({
    "name": "extract_stock_insights",
    "description": "Extract structured insights from financial text about a stock",
    "parameters": extraction_schema
})
extraction_function



{'name': 'extract_stock_insights',
 'description': 'Extract structured insights from financial text about a stock',
 'parameters': {'title': 'StockInsights',
  'description': 'Extracted insights about a stock from financial news and reports',
  'type': 'object',
  'properties': {'financial_metrics': {'type': 'array',
    'items': {'type': 'object',
     'properties': {'metric_name': {'type': 'string'},
      'value': {'type': 'string'},
      'comparison': {'type': 'string'},
      'sentiment': {'type': 'string',
       'enum': ['positive', 'negative', 'neutral']}}}},
   'analyst_opinions': {'type': 'array',
    'items': {'type': 'object',
     'properties': {'analyst_or_firm': {'type': 'string'},
      'rating': {'type': 'string'},
      'target_price': {'type': 'string'},
      'previous_rating': {'type': 'string'},
      'previous_target': {'type': 'string'}}}},
   'business_developments': {'type': 'array',
    'items': {'type': 'object',
     'properties': {'event_type': {'type': '

In [10]:
extraction_llm = llm.bind(
    functions=[extraction_function],
    function_call={"name": "extract_stock_insights"}
)
extraction_llm


RunnableBinding(bound=ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x12c0d64d0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x12c110f10>, root_client=<openai.OpenAI object at 0x12c09c550>, root_async_client=<openai.AsyncOpenAI object at 0x12c0d6560>, model_name='gpt-4o-mini', temperature=0.0, model_kwargs={}, openai_api_key=SecretStr('**********')), kwargs={'functions': [{'name': 'extract_stock_insights', 'description': 'Extract structured insights from financial text about a stock', 'parameters': {'title': 'StockInsights', 'description': 'Extracted insights about a stock from financial news and reports', 'type': 'object', 'properties': {'financial_metrics': {'type': 'array', 'items': {'type': 'object', 'properties': {'metric_name': {'type': 'string'}, 'value': {'type': 'string'}, 'comparison': {'type': 'string'}, 'sentiment': {'type': 'string', 'enum': ['positive', 'negative', 'neutral']}}}}, 'analys

In [11]:
parser = JsonOutputFunctionsParser()
parser

JsonOutputFunctionsParser()

In [13]:
summary_template = """
        You are a financial analyst specializing in extracting key insights for stock investors.
        
        Analyze the following article about {ticker} ({company_name}) and extract the most important 
        information for a short-term investor (last 3 month horizon).
        
        Focus on:
        1. Recent financial results
        2. Analyst ratings and price targets
        3. Business developments and news
        4. Market sentiment and trading patterns
        5. Key risk factors
        
        Article:
        {text}
        
        Provide a concise summary of the key investment insights:
        """

summary_prompt = PromptTemplate(
    input_variables=["ticker", "company_name", "text"],
    template=summary_template
)
summary_prompt

PromptTemplate(input_variables=['company_name', 'text', 'ticker'], input_types={}, partial_variables={}, template='\n        You are a financial analyst specializing in extracting key insights for stock investors.\n        \n        Analyze the following article about {ticker} ({company_name}) and extract the most important \n        information for a short-term investor (last 3 month horizon).\n        \n        Focus on:\n        1. Recent financial results\n        2. Analyst ratings and price targets\n        3. Business developments and news\n        4. Market sentiment and trading patterns\n        5. Key risk factors\n        \n        Article:\n        {text}\n        \n        Provide a concise summary of the key investment insights:\n        ')

In [15]:

summary_chain = PromptTemplate.from_template(summary_template) | llm | StrOutputParser()
summary_chain

PromptTemplate(input_variables=['company_name', 'text', 'ticker'], input_types={}, partial_variables={}, template='\n        You are a financial analyst specializing in extracting key insights for stock investors.\n        \n        Analyze the following article about {ticker} ({company_name}) and extract the most important \n        information for a short-term investor (last 3 month horizon).\n        \n        Focus on:\n        1. Recent financial results\n        2. Analyst ratings and price targets\n        3. Business developments and news\n        4. Market sentiment and trading patterns\n        5. Key risk factors\n        \n        Article:\n        {text}\n        \n        Provide a concise summary of the key investment insights:\n        ')
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x12c0d64d0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x12c110f10>, root_client=<openai.OpenAI obje

In [24]:
from agents.research import ResearchAgent
from agents.filtering import FilteringSystem

research_agent = ResearchAgent()
filtering_system = FilteringSystem()

research_results = research_agent.research("MSFT", "Microsoft Corporation")
filtered_results = filtering_system.filter(research_results, "MSFT", "Microsoft Corporation")




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `tavily_search_results_json` with `{'query': 'Microsoft Corporation MSFT recent quarterly results earnings analyst ratings business developments October 2023'}`


[0m[36;1m[1;3m[{'title': 'After-Hours Earnings Report for October 24, 2023 : MSFT, GOOGL ...', 'url': 'https://www.nasdaq.com/articles/after-hours-earnings-report-for-october-24-2023-:-msft-googl-goog-v-txn-cb-cni-wm-csgp', 'content': "The computer software company's consensus earnings per share forecast from the 16 analysts that follow the stock is $2.65. This value represents", 'score': 0.78154784}, {'title': 'Microsoft (MSFT) Earnings Dates, Call Summary & Reports - TipRanks', 'url': 'https://www.tipranks.com/stocks/msft/earnings', 'content': 'MSFT Earnings History ; Oct 24, 2023, 2024 (Q1). 2.65 / 2.99 ; Jul 25, 2023, 2023 (Q4). 2.55 / 2.69 ; Apr 25, 2023, 2023 (Q3). 2.24 / 2.45 ; Jan 24, 2023, 2023 (Q2).', 'score': 0.7668008}, {'title': 'Microsoft

In [29]:
def extract(article, ticker, company_name):
    """
    Extract structured insights from an article.
    
    Args:
        article (dict): Article content and metadata
        ticker (str): Stock ticker symbol
        company_name (str): Company name
        
    Returns:
        dict: Structured insights
    """
    # Use the first 4000 chars of the article to avoid token limits
    content = article['content'][:4000]
    
    # Extract structured data
    chain = extraction_llm | parser
    
    try:
        structured_insights = chain.invoke(
            f"Extract insights about {ticker} ({company_name}) from this article: {content}"
        )
    except Exception as e:
        print(f"Error in extraction: {str(e)}")
        structured_insights = {}
    
    # Generate summary
    summary = summary_chain.invoke({
        "ticker": ticker,
        "company_name": company_name,
        "text": content
    })
    
    return {
        "url": article['url'],
        "structured_insights": structured_insights,
        "summary": summary
    }

In [26]:
filtered_results

{'ticker': 'MSFT',
 'company_name': 'Microsoft Corporation',
 'filtered_articles': [{'url': 'https://www.nasdaq.com/articles/after-hours-earnings-report-for-october-24-2023-:-msft-googl-goog-v-txn-cb-cni-wm-csgp))',
   'content': "404 Page Not Found | Nasdaq\nSkip to main content\nNasdaq+\nWeekly Macro+\nScorecard\nMarket Activity\nU.S. Market Activity\n->\nStocks\nOptions\nETFs\nMutual Funds\nIndexes\nCryptocurrency\nCurrencies\nFixed Income\nTrading & Market Services\nNorth American Markets\nNasdaq-100 Index\nNasdaq-100 Index Options\nMarket Data\nEuropean Markets\n->\nShares\nIndexes\nFixed Income\nOptions & Futures\nETPs\nWarrants & Certificates\nFunds\nNews\nEuropean Commodities\nMarket Regulation\n->\nU.S. Regulation\nEuropean Regulation\nU.S. Market Quick Links\nReal-Time Quotes\nAfter-Hours Quotes\nPre-Market Quotes\nNasdaq-100\nSymbol Screener\nGlossary\nSymbol Change History\nIPO Performance\nOwnership Search\nDividend History\nU.S. Market Events\nEconomic Calendar\nEarnings\

In [27]:
ticker = filtered_results["ticker"]
company_name = filtered_results["company_name"]
filtered_articles = filtered_results["filtered_articles"]

print (ticker, company_name, filtered_articles)


MSFT Microsoft Corporation [{'url': 'https://www.nasdaq.com/articles/after-hours-earnings-report-for-october-24-2023-:-msft-googl-goog-v-txn-cb-cni-wm-csgp))', 'content': "404 Page Not Found | Nasdaq\nSkip to main content\nNasdaq+\nWeekly Macro+\nScorecard\nMarket Activity\nU.S. Market Activity\n->\nStocks\nOptions\nETFs\nMutual Funds\nIndexes\nCryptocurrency\nCurrencies\nFixed Income\nTrading & Market Services\nNorth American Markets\nNasdaq-100 Index\nNasdaq-100 Index Options\nMarket Data\nEuropean Markets\n->\nShares\nIndexes\nFixed Income\nOptions & Futures\nETPs\nWarrants & Certificates\nFunds\nNews\nEuropean Commodities\nMarket Regulation\n->\nU.S. Regulation\nEuropean Regulation\nU.S. Market Quick Links\nReal-Time Quotes\nAfter-Hours Quotes\nPre-Market Quotes\nNasdaq-100\nSymbol Screener\nGlossary\nSymbol Change History\nIPO Performance\nOwnership Search\nDividend History\nU.S. Market Events\nEconomic Calendar\nEarnings\nIPO Calendar\nDividend Calendar\nSPO Calendar\nHoliday Sch

In [30]:
extracted_insights = []
for article in filtered_articles:
    insights = extract(article, ticker, company_name)
    extracted_insights.append(insights)

In [31]:
extracted_insights

[{'url': 'https://www.nasdaq.com/articles/after-hours-earnings-report-for-october-24-2023-:-msft-googl-goog-v-txn-cb-cni-wm-csgp))',
  'structured_insights': {},
  'summary': 'The article provided does not contain specific information about Microsoft Corporation (MSFT) or any relevant financial data. However, based on the typical areas of interest for a short-term investor, here’s a general framework for analyzing MSFT if the relevant data were available:\n\n1. **Recent Financial Results**: Look for the latest quarterly earnings report, focusing on revenue growth, earnings per share (EPS), and any guidance provided for future quarters. Key metrics to consider would include cloud revenue growth, software sales, and overall profitability.\n\n2. **Analyst Ratings and Price Targets**: Review recent analyst upgrades or downgrades, along with any changes in price targets. A consensus rating (buy, hold, sell) and the average price target compared to the current stock price can indicate market

In [32]:
extraction_results = {
            "ticker": ticker,
            "company_name": company_name,
            "extracted_insights": extracted_insights
        }

print(f"Extracted insights from {len(extraction_results['extracted_insights'])} articles")
for i, insight in enumerate(extraction_results['extracted_insights']):
    print(f"Article {i+1}: {insight['url']}")
    print(f"Summary: {insight['summary']}")
    print("---")

Extracted insights from 3 articles
Article 1: https://www.nasdaq.com/articles/after-hours-earnings-report-for-october-24-2023-:-msft-googl-goog-v-txn-cb-cni-wm-csgp))
Summary: The article provided does not contain specific information about Microsoft Corporation (MSFT) or any relevant financial data. However, based on the typical areas of interest for a short-term investor, here’s a general framework for analyzing MSFT if the relevant data were available:

1. **Recent Financial Results**: Look for the latest quarterly earnings report, focusing on revenue growth, earnings per share (EPS), and any guidance provided for future quarters. Key metrics to consider would include cloud revenue growth, software sales, and overall profitability.

2. **Analyst Ratings and Price Targets**: Review recent analyst upgrades or downgrades, along with any changes in price targets. A consensus rating (buy, hold, sell) and the average price target compared to the current stock price can indicate market sen