In [3]:
from langchain.chat_models import ChatOpenAI
from langchain.utilities import GoogleSerperAPIWrapper
from langchain.document_loaders import UnstructuredURLLoader
from langchain.schema import Document
from langchain.chains.summarize import load_summarize_chain

import yfinance as yf
import requests
import os, sys
# enable absolute paths transversal (from notebooks folder to src folder)
parent_dir = os.path.abspath('..')
if parent_dir not in sys.path:
    sys.path.append(parent_dir)
    
import config as config

from dotenv import load_dotenv
load_dotenv()

print(os.environ.get('SERPER_API_KEY'))
print(os.environ.get('OPENAI_API_KEY'))

GPT_3_5_TOKEN_LIMIT = 4096

def get_news_for_ticker(ticker, n_search_restuls=3):
    try:
        print(f"Getting news for {ticker}")
        company = yf.Ticker(ticker).info['longName']
        
        # Show the top X relevant news articles from the previous week using Google Serper API
        search = GoogleSerperAPIWrapper(type="news", tbs="qdr:w1", serper_api_key=config.get_api_key('serper'))
        search_query = f"financial news about {company} or {ticker} stock"
        result_dict = search.results(search_query)

        if not result_dict['news']:
            print(f"No search results for: {search_query}.")
        else:
            # Load URL data from the top X news search results
            for i, item in zip(range(n_search_restuls), result_dict['news']):
                try:
                    # Send a HEAD request to the URL
                    response = requests.head(item['link'])

                    # Check the Content-Length header
                    content_length = int(response.headers.get('Content-Length', 0))
                    print(f"Content-Length: {content_length}")

                    # If the content is too large for the model, just print the link
                    #if content_length > 4096:
                    #    st.write(f"Title: {item['title']}\n\nLink: {item['link']}")
                    #else:
                    loader = UnstructuredURLLoader(urls=[item['link']])
                    data = loader.load()
                    
                    print(f"Data type: {type(data)}")

                    # Truncate the data to 4096 characters
                    if isinstance(data, list):
                        for i, element in enumerate(data):
                            # If the element is a Document object, extract and truncate the text
                            print(f"Element {i} is type: {type(element)}")
                            if isinstance(element, Document):
                                print(f"Element {i} is a Document object with page_content length: {len(element.page_content)}")
                                element.page_content = element.page_content[:GPT_3_5_TOKEN_LIMIT]
                                print(f"Truncated page_content to : {len(element.page_content)}")
                                break
                            else:
                                print(f"Element {i} is not a Document object\n{element}")
        
                    # Initialize the ChatOpenAI module, load and run the summarize chain
                    llm = ChatOpenAI(temperature=0, model='gpt-3.5-turbo', openai_api_key=config.get_api_key('openai'))
                    chain = load_summarize_chain(llm, chain_type="map_reduce")
                    summary = chain.run(data)

                    print(f"Title: {item['title']}\n\nLink: {item['link']}\n\nSummary: {summary}")
                except Exception as e:
                    print(f"Title: {item['title']}\n\nLink: {item['link']}")
                    print(f"Exception: {e}")
                    print(f"Exception summarizing news about {company} w/ticker {ticker}: {e}")
    except Exception as e:
        print(f"Exception: {e}")
        print(f"Exception searching for news about {company} w/ticker {ticker}: {e}")

ticker = 'AAPL'
get_news_for_ticker(ticker)

1d3baab3fe99c5a920ec076b7b37bf910d8b192b
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
Getting news for AAPL
Content-Length: 0
Data type: <class 'list'>
Element 0 is type: <class 'langchain.schema.Document'>
Element 0 is a Document object with page_content length: 22644
Truncated page_content to : 4096
Title: Apple Inc. (NASDAQ:AAPL) Stock Position Raised by Main Street ...

Link: https://www.marketbeat.com/instant-alerts/nasdaq-aapl-sec-filing-2023-06-01/

Summary: Main Street Financial Solutions LLC has increased its stake in Apple Inc. by 13.4% in Q4, owning 136,730 shares worth $17.77m, making up approximately 2.8% of its holdings and the fourth largest position.
Content-Length: 0
Data type: <class 'list'>
Element 0 is type: <class 'langchain.schema.Document'>
Element 0 is a Document object with page_content length: 572
Truncated page_content to : 572
Title: Apple Stock: Faithful Await Debut Of Mixed-Reality Headset

Link: https://www.investors.com/news/technology/apple-stock