In [None]:
from datetime import datetime
import os
# Configuration
TICKERS = ["AAPL", "GOOG", "MSFT", "META", "TSLA", "NVDA", "AMZN", "NFLX"]
START_DATE = "2025-01-01"
END_DATE = "2025-06-30"
OUTPUT_DIR = "../data/articles"
# Alpha Vantage API key
API_KEY = os.environ["AV_TOKEN"]
os.makedirs(OUTPUT_DIR, exist_ok=True)
run_date = datetime.strptime(START_DATE, "%Y-%m-%d")
end_date = datetime.strptime(END_DATE, "%Y-%m-%d")
print(run_date)

In [None]:
import json
import os
import time
from datetime import datetime, timedelta
from bs4 import BeautifulSoup
from pprint import pprint
import requests
import yfinance as yf

# Download historical data
hist_data = yf.download(TICKERS, start=START_DATE, end=END_DATE, group_by='ticker', auto_adjust=False)

# Cache static info
info_cache = {ticker: yf.Ticker(ticker).info for ticker in TICKERS}

def fetch_news_sentiment(ticker, start_date, stop_date):
    """Sentiment"""
    url = "https://www.alphavantage.co/query"
    print(f"Fetching news sentiment for {ticker}...")
    params = {
        "function": "NEWS_SENTIMENT",
        "tickers": ticker,
        "apikey": API_KEY,
        "limit": 10000,  # Adjust as needed
        "time_from": start_date.strftime("%Y%m%dT%H%M"),
        "time_to": stop_date.strftime("%Y%m%dT%H%M"),
        "sort": "EARLIEST"
    }

    response = requests.get(url, params=params, timeout=10)
    data = response.json()
    # print(data)

    if "feed" not in data:
        print(f"No news data found for {ticker}:", data)
        return None

    articles = []
    for article in data["feed"]:
        published_date = article["time_published"][:8]  # Extract YYYYMMDD
        sentiment_score = float(article["overall_sentiment_score"])  # Convert to float

        # Determine sentiment label based on sentiment score
        if sentiment_score <= -0.35:
            sentiment_label = "bearish"
        elif -0.35 < sentiment_score <= -0.15:
            sentiment_label = "somewhat-bearish"
        elif -0.15 < sentiment_score < 0.15:
            sentiment_label = "neutral"
        elif 0.15 <= sentiment_score < 0.35:
            sentiment_label = "somewhat-bullish"
        else:
            sentiment_label = "bullish"
        
        article_data = {
            "Ticker": ticker,
            "Title": article.get("title", ""),
            "Published": published_date,
            "Sentiment": sentiment_label.capitalize(),
            "Sentiment Score": article.get("overall_sentiment_score", ""),
            "Summary": article.get("summary", ""),
            "Source": article.get("source", ""),
            "Source_domain": article["source_domain"],
            "URL": article.get("url", ""),
            "Full_Article": None
        }
        articles.append(article_data)
        # try:
        #     print(f"Attempting to scrape article {article["url"]}"
        #     article_response = requests.get(article["url"], timeout=10)
        #     soup = BeautifulSoup(article_response.text, "html.parser")

        #     # Extract the main text content (simplified approach)
        #     paragraphs = soup.find_all("p")
        #     full_text = "\n".join([p.get_text() for p in paragraphs])
        #     article_data["Full_Article"] = full_text
        #     articles.append(article_data)
        # except ConnectionError as e:
        #     article_data["Full_Article"] = f"Error fetching article: {e}"
    return articles

In [None]:
# Initialize ticker_last_run to track the last run date for each ticker
ticker_next_run = {ticker: run_date for ticker in TICKERS}
try:
    # Loop through each ticker and fetch news sentiment
    for ticker in TICKERS:
        run_date = ticker_next_run[ticker]
        while run_date <= end_date:
            print(f"Processing date: {run_date.strftime('%Y-%m-%d')}")
            # Fetch news sentiment for each ticker
            articles = fetch_news_sentiment(ticker, run_date, end_date)
            pprint(articles)
            ticker_next_run[ticker] += timedelta(days=1)
            file = ticker + "_" + run_date.strftime('%Y-%m-%d') + "_" + end_date.strftime('%Y-%m-%d')
            filename = os.path.join(OUTPUT_DIR, f"{file}.json")
            with open(filename, "w", encoding="utf-8") as f:
                json.dump(articles, f, indent=2)
except Exception as e:
    print(e)

In [None]:
pprint(articles)

In [None]:
ticker_next_run[ticker] += timedelta(days=1)

In [None]:
print(ticker_next_run[ticker])

In [None]:
import os
from alpha_vantage_pro import AlphaVantageClient, AlphaVantageClientAsync
run_date = datetime.strptime(START_DATE, "%Y-%m-%d").strftime("%Y%m%dT%H%M")
end_date = datetime.strptime(END_DATE, "%Y-%m-%d").strftime("%Y%m%dT%H%M")
TICKERS = ["AAPL", "GOOG", "MSFT", "META", "TSLA", "NVDA", "AMZN", "NFLX"]
print(run_date)

In [None]:
client = AlphaVantageClient(api_key=os.environ['AV_TOKEN'])
df = client.get_daily("AAPL")
news = client.get_news_sentiment(TICKERS, run_date, end_date, 10000)

In [None]:
print(news.count())
df

In [None]:
client = AlphaVantageClientAsync(api_key="YOUR_KEY")
df = await client.get_daily_async(TICKERS)
await client.close()

In [None]:
df