In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.utilities import GoogleSerperAPIWrapper
from langchain.document_loaders import UnstructuredURLLoader
from langchain.schema import Document
from langchain.chains.summarize import load_summarize_chain

import yfinance as yf
import os, sys
    
from dotenv import load_dotenv
load_dotenv()

print(os.environ.get('SERPER_API_KEY'))
print(os.environ.get('OPENAI_API_KEY'))

GPT_3_5_TOKEN_LIMIT = 4096

def get_news_for_ticker(ticker, n_search_restuls=3):
    news_results = []
    try:
        print(f"Getting news for {ticker}")
        company = yf.Ticker(ticker).info['longName']
        
        # TODO: try bing search or vanilla google - plus might be free vs Serper?
        # Show the top X relevant news articles from the previous week using Google Serper API
        search = GoogleSerperAPIWrapper(type="news", tbs="qdr:w1", serper_api_key=os.environ.get('SERPER_API_KEY'))
        search_query = f"financial news about {company} or {ticker}"
        print(f"Search query: {search_query}")
        
        # search hangs sometimes... trying sleep
        result_dict = search.results(search_query)
        print(f"Search results returned for {search_query}, {result_dict.keys()}")

        if not result_dict['news']:
            logger.error(f"No search results for: {search_query}.")
        else:
            # Load URL data from the top X news search results
            for i, item in zip(range(n_search_restuls), result_dict['news']):
                try:
                    print(f'processing news item {i} for company {company} and ticker {ticker} from link {item["link"]}')
                    # TODO: appears to hang sometimes...
                    loader = UnstructuredURLLoader(urls=[item['link']], continue_on_failure=False)
                    data = loader.load()
                    print(f'done processing news item {i} for company {company} and ticker {ticker} from link {item["link"]}')
                                   
                    summary = "No summary available"     
                    # Truncate the data to 4096 characters
                    if isinstance(data, list):
                        for i, element in enumerate(data):
                            # If the element is a Document object, extract and truncate the text
                            print(f"Element {i} is type: {type(element)}")
                            if isinstance(element, Document):
                                #print(f"Element {i} is a Document object\n{element}")
                                element.page_content = element.page_content[:GPT_3_5_TOKEN_LIMIT]
                                #print(f"Truncated data: {data}")
                                break
                            else:
                                print(f"Element {i} is not a Document object\n{element}")
            
                        # Initialize the ChatOpenAI module, load and run the summarize chain
                        llm = ChatOpenAI(temperature=0, model='gpt-3.5-turbo', openai_api_key=os.environ.get('OPENAI_API_KEY'))
                        chain = load_summarize_chain(llm, chain_type="map_reduce")
                        summary = chain.run(data)

                    news_results.append({'title': item['title'], 'link': item['link'], 'summary': summary})
                except Exception as e:
                    news_results.append({'title': item['title'], 'link': item['link'], 'summary': 'Error while summarizing'})
                    print(f"Exception summarizing news about {company} w/ticker {ticker}: {e}")
                    
    except Exception as e:
       print(f"Exception searching for news about {company} w/ticker {ticker}: {e}")

    print(f"Completed getting news for {ticker}")

tickers = ['AAPL','AMZN','NVDA','MMC','GOOG','MSFT','BTC-USD','ETH-USD','XOM','BAC','V','GOLD']
for ticker in tickers:
    ticker, news = get_news_for_ticker(ticker)
    print(f"Ticker: {ticker}, news: {news}")

In [None]:
from langchain.document_loaders import UnstructuredURLLoader

url = "https://www.nasdaq.com/articles/apple-denies-surveillance-claims-made-by-russias-fsb"

loader = UnstructuredURLLoader(urls=[url], continue_on_failure=False)
data = loader.load()

print(f"found data: {data}")