## Setting Up Alpaca API Access

__Before__ executing the codes, follow the steps below to set up your Alpaca API credentials for accessing financial news and stock price data.

### 1. Create an Alpaca API Account

Refer to the [`Creating_an_Alpaca_API_Account.pdf`] file in the `materials/` folder for instructions on creating your Alpaca account.

You will need to obtain:

- **API Key ID**
- **Secret Key**

These credentials are necessary for connecting to the Alpaca API and retrieving financial data.

### 2. Store Your Credentials in a `.env` File

Once you have your API credentials:

1. Go to the `materials/` folder in your project directory.
2. Create a new file named `.env` (e.g., `materials/.env`).
3. Add the following lines to the `.env` file, replacing the placeholders with your actual credentials:

    ```.env
    APCA_API_KEY_ID=your_alpaca_key_id_here
    APCA_API_SECRET_KEY=your_alpaca_secret_key_here
    ```

> **Important:** Keep your `.env` file secure. Do **not** upload it to public repositories. Make sure to add `.env` to your `.gitignore` file to prevent accidental exposure.

### Fetch financial news headlines and summaries

In [2]:
# import necessary libraries
import requests
import os
import csv
import json
from datetime import datetime, timedelta
from dotenv import load_dotenv

# load environment variables from .env file
load_dotenv()

# load Alpaca API credential from the .env file
API_KEY = os.getenv("APCA-API-KEY-ID")
API_SECRET = os.getenv("APCA-API-SECRET-KEY")

# define constants for API access
BASE_URL = "https://data.alpaca.markets/v1beta1/news" # Alpaca news endpoint
SYMBOL = "NVDA" # target stock symbol
LIMIT = 50  # max news articles per request


def fetch_news_for_date(date_str):
    """
    Fetch news articles for a specific date.
    Args:
        date_str (str): date in 'YYYY-MM-DD' format.
    Returns:
        list: a list of news articles for the specified date.
    """

    # define start and end times for the entire day
    start = f"{date_str}T00:00:00Z"
    end = f"{date_str}T23:59:59Z"

    # construct API request URL with query parameters
    url = f"{BASE_URL}?symbols={SYMBOL}&start={start}&end={end}&limit={LIMIT}"

    # prepare request headers with Alpaca API credentials
    headers = {
        "accept": "application/json",
        "APCA-API-KEY-ID": API_KEY,
        "APCA-API-SECRET-KEY": API_SECRET
    }

    # make GET request to the Alpaca API
    response = requests.get(url, headers=headers)

    # check for a successful response
    if response.status_code != 200:
        print(f"Failed to fetch news for {date_str}: {response.status_code}")
        return []

    # return the list of news articles
    return response.json().get("news", [])


def save_news_to_csv(start_date_str, end_date_str, output_path="../data/nvda_news.csv"):
    """
    Compile news headline and summary from a date range and saves it to a CSV file.
    Args:
        start_date_str (str): start date in 'YYYY-MM-DD' format.
        end_date_str (str): end date in 'YYYY-MM-DD' format.
        output_path (str): path to save the output CSV.
    """

    # convert date strings to datetime objects
    start_date = datetime.strptime(start_date_str, "%Y-%m-%d")
    end_date = datetime.strptime(end_date_str, "%Y-%m-%d")

    all_news = []

    current_date = start_date
    while current_date <= end_date:
        date_str = current_date.strftime("%Y-%m-%d")
        print(f"Fetching news for {date_str}...")

        # fetch news for the current date
        daily_news = fetch_news_for_date(date_str)

        content_list = []
        for item in daily_news:

            # clean and collect headline and summary
            headline = item.get("headline", "").replace("\n", " ").strip()
            summary = item.get("summary", "").replace("\n", " ").strip()

            if headline:
                content_list.append(headline)
            if summary:
                content_list.append(summary)

        # add date and content list (as JSON string) to result
        all_news.append({
            "date": date_str,
            "content": json.dumps(content_list)  # Store as JSON string for proper list format
        })

        # move to the next day
        current_date += timedelta(days=1)

    # write collected news data to CSV
    with open(output_path, mode="w", newline="", encoding="utf-8") as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=["date", "content"])
        writer.writeheader()
        writer.writerows(all_news)

    print(f"Saved news for {len(all_news)} days to {output_path}")

# execute news aggregation function for a given date range
save_news_to_csv("2025-01-06", "2025-02-28")

Fetching news for 2025-01-06...
Fetching news for 2025-01-07...
Fetching news for 2025-01-08...
Fetching news for 2025-01-09...
Fetching news for 2025-01-10...
Fetching news for 2025-01-11...
Fetching news for 2025-01-12...
Fetching news for 2025-01-13...
Fetching news for 2025-01-14...
Fetching news for 2025-01-15...
Fetching news for 2025-01-16...
Fetching news for 2025-01-17...
Fetching news for 2025-01-18...
Fetching news for 2025-01-19...
Fetching news for 2025-01-20...
Fetching news for 2025-01-21...
Fetching news for 2025-01-22...
Fetching news for 2025-01-23...
Fetching news for 2025-01-24...
Fetching news for 2025-01-25...
Fetching news for 2025-01-26...
Fetching news for 2025-01-27...
Fetching news for 2025-01-28...
Fetching news for 2025-01-29...
Fetching news for 2025-01-30...
Fetching news for 2025-01-31...
Fetching news for 2025-02-01...
Fetching news for 2025-02-02...
Fetching news for 2025-02-03...
Fetching news for 2025-02-04...
Fetching news for 2025-02-05...
Fetching

### Fetch stock open price data

In [5]:
# reload environment variables
load_dotenv()


def build_url(start_date, end_date, symbol="NVDA", timeframe="1D", limit=1000):
    """
    Build the URL for Alpaca historical bar data.
    Args:
        start_date (str): start date in 'YYYY-MM-DD' format.
        end_date (str): end date in 'YYYY-MM-DD' format.
        symbol (str): stock symbol.
        timeframe (str): time interval (e.g., '1D' for daily).
        limit (int): max number of records to fetch.
    Returns:
        str: a fully formatted API request URL.
    """

    base_url = f"https://data.alpaca.markets/v2/stocks/{symbol}/bars"
    return f"{base_url}?timeframe={timeframe}&start={start_date}&end={end_date}&limit={limit}"


def get_opening_prices(start_date, end_date):
    """
    Retrieves the daily opening prices for a given stock over a date range.
    Args:
        start_date (str): start date in 'YYYY-MM-DD' format.
        end_date (str): end date in 'YYYY-MM-DD' format.
    Returns:
        list: a list of dictionaries with date and opening price.
    """

    url = build_url(start_date, end_date)
    headers = {
        "accept": "application/json",
        "APCA-API-KEY-ID": os.getenv("APCA-API-KEY-ID"),
        "APCA-API-SECRET-KEY": os.getenv("APCA-API-SECRET-KEY")
    }

    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to fetch data: {response.status_code}")
        return []

    bar_data = response.json().get("bars", [])
    
    return [
        {
            "date": bar["t"].split("T")[0],  # extract date part only from timestamp
            "open_price": bar["o"]
        }
        for bar in bar_data
    ]


def save_open_prices_to_csv(start_date, end_date, output_path="../data/nvda_open_prices.csv"):
    """
    Fetches opening prices for a stock and saves them to a CSV file.
    Args:
        start_date (str): Start date in 'YYYY-MM-DD' format.
        end_date (str): End date in 'YYYY-MM-DD' format.
        output_path (str): File path for the output CSV.
    """

    data = get_opening_prices(start_date, end_date)

    if not data:
        print("No data found.")
        return

    with open(output_path, mode="w", newline="") as csvfile:
        fieldnames = ["date", "open_price"]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(data)

    print(f"Saved {len(data)} entries to {output_path}")

# execute function to save opening prices
save_open_prices_to_csv("2025-01-06", "2025-02-28")

Saved 37 entries to ../data/nvda_open_prices.csv
