In [2]:

API_KEY = "MDWCEC91J5490I54"


In [9]:
import requests
import time
import pandas as pd


In [5]:
url = (
    "https://www.alphavantage.co/query?"
    "function=NEWS_SENTIMENT"
    "&tickers=AAPL"
    "&sort=LATEST"
    f"&apikey={API_KEY}"
)

response = requests.get(url)
data = response.json()

# Check API response
data.keys()

dict_keys(['items', 'sentiment_score_definition', 'relevance_score_definition', 'feed'])

In [6]:
sentiment_rows = []

for article in data["feed"]:
    time_str = article["time_published"]  # e.g. "20240307T143000"
    date = pd.to_datetime(time_str[:8])

    for ticker in article["ticker_sentiment"]:
        if ticker["ticker"] == "AAPL":
            sentiment_rows.append({
                "date": date,
                "sentiment_score": float(ticker["ticker_sentiment_score"])
            })

sentiment_df = pd.DataFrame(sentiment_rows)
sentiment_df.head()


Unnamed: 0,date,sentiment_score
0,2025-11-30,0.046304
1,2025-11-30,0.053888
2,2025-11-30,0.335438
3,2025-11-30,0.209085
4,2025-11-30,0.124185


In [19]:
def _to_alpha_time(dt: pd.Timestamp) -> str:
    """Convert pandas Timestamp to Alpha Vantage time format YYYYMMDDTHHMM."""
    return dt.strftime("%Y%m%dT%H%M")

In [28]:
def fetch_alpha_sentiment_aapl_via_topics(
    api_key: str,
    start_date: str = "2016-04-01",
    end_date: str | None = None,
    chunk_days: int = 30,
    limit: int = 1000,
    sleep_sec: int = 15,
):
    if end_date is None:
        end_ts = pd.Timestamp.utcnow().tz_localize(None).normalize()
    else:
        end_ts = pd.to_datetime(end_date).tz_localize(None)

    start_ts = pd.to_datetime(start_date).tz_localize(None)

    all_rows = []
    current_start = start_ts

    while current_start <= end_ts:
        current_end = min(current_start + pd.Timedelta(days=chunk_days - 1), end_ts)

        time_from = current_start.strftime("%Y%m%dT%H%M")
        time_to   = (current_end + pd.Timedelta(hours=23, minutes=59)).strftime("%Y%m%dT%H%M")

        print(f"Window: {current_start.date()} → {current_end.date()}")

        url = (
            "https://www.alphavantage.co/query?"
            "function=NEWS_SENTIMENT"
            "&topics=technology,earnings,financial_markets,ipo"
            f"&time_from={time_from}"
            f"&time_to={time_to}"
            "&sort=EARLIEST"
            f"&limit={limit}"
            f"&apikey={api_key}"
        )

        r = requests.get(url)
        data = r.json()

        if "feed" not in data:
            print("⚠️ API issue:", data)
            break

        feed = data["feed"]
        if not feed:
            print("No news in this window.")
        else:
            for article in feed:
                tp = article["time_published"]
                time_published = pd.to_datetime(tp, format="%Y%m%dT%H%M%S")

                for tk in article.get("ticker_sentiment", []):
                    if tk.get("ticker") == "AAPL":   # ✅ HARD FILTER TO AAPL ONLY
                        all_rows.append({
                            "time_published": time_published,
                            "date": time_published.normalize(),
                            "title": article.get("title", ""),
                            "sentiment_score": float(tk.get("ticker_sentiment_score", 0.0)),
                            "sentiment_label": tk.get("ticker_sentiment_label", ""),
                        })

        time.sleep(sleep_sec)
        current_start = current_end + pd.Timedelta(days=1)

    return pd.DataFrame(all_rows)


In [29]:
alpha_raw = fetch_alpha_sentiment_aapl_via_topics(
    api_key=API_KEY,
    start_date="2016-04-01",
    chunk_days=30,
    limit=1000,
    sleep_sec=15
)

print("✅ Total AAPL sentiment rows:", len(alpha_raw))
alpha_raw.head()


Window: 2016-04-01 → 2016-04-30
⚠️ API issue: {'Information': 'No articles found. Please adjust the time range or refer to the API documentation https://www.alphavantage.co/documentation#newsapi and try again.'}
✅ Total AAPL sentiment rows: 0


In [12]:
alpha_daily = (
    alpha_raw
    .groupby("date", as_index=False)["sentiment_score"]
    .mean()
    .sort_values("date")
)

alpha_daily.head(), alpha_daily.tail()


(        date  sentiment_score
 0 2010-01-01         0.224705
 1 2010-01-05         0.198539
 2 2010-01-06         0.137764
 3 2010-01-07         0.310485
 4 2010-01-21         0.100315,
         date  sentiment_score
 2 2010-01-06         0.137764
 3 2010-01-07         0.310485
 4 2010-01-21         0.100315
 5 2010-01-25         0.737967
 6 2010-01-26         0.439101)

In [14]:
alpha_raw.to_csv("final_tft_dataset_alpha_sentiment.csv", index=False)

In [15]:
alpha_daily = (
    pd.DataFrame(alpha_raw)
    .groupby("date", as_index=False)["sentiment_score"]
    .mean()
    .sort_values("date")
)

print("Start:", alpha_daily["date"].min())
print("End  :", alpha_daily["date"].max())


Start: 2010-01-01 00:00:00
End  : 2010-01-26 00:00:00
