<a href="https://colab.research.google.com/github/jadenfix/financial_cpp/blob/main/quant_data_mining.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# # Financial Data Fetching Notebook
# Inspired by the blog post: [Free Finance Datasets for Quants](https://dm13450.github.io/2023/11/25/Free-Finance-Datasets-for-Quants.html)

In [39]:
!pip install yfinance pandas pandas-datareader matplotlib sec-edgar-downloader requests --upgrade --quiet


# Import all necessary libraries for the script.


In [40]:
import yfinance as yf
import pandas as pd
import pandas_datareader.data as web
import datetime
import matplotlib.pyplot as plt
import os
from sec_edgar_downloader import Downloader
import requests # For direct API calls (e.g., Alpha Vantage)
import warnings
from google.colab import userdata # For Colab Secrets


load in data

In [41]:
try:
    # Recommended: Fetch from Colab Secrets
    ALPHA_VANTAGE_API_KEY = userdata.get('ALPHA_VANTAGE_API_KEY')
    if not ALPHA_VANTAGE_API_KEY: raise KeyError # Treat empty secret same as missing
    print("✅ Alpha Vantage API Key loaded from Colab Secrets.")
except KeyError:
    # Fallback if secret not set
    print("⚠️ Alpha Vantage API Key not found in Colab Secrets.")
    print("   Add a secret named 'ALPHA_VANTAGE_API_KEY' (key icon 🔑 left sidebar).")
    print("   Get a free key from: https://www.alphavantage.co/support/#api-key")
    print("   Using a placeholder - Alpha Vantage fetch will likely fail.")
    ALPHA_VANTAGE_API_KEY = "YOUR_API_KEY_PLACEHOLDER" # << REPLACE if not using Secrets


✅ Alpha Vantage API Key loaded from Colab Secrets.


paths

In [42]:
ALPHA_DOWNLOAD_PATH = "/content/alpha_vantage_data"

dates

In [43]:
start_date = datetime.datetime(2024, 1, 1)
end_date = datetime.datetime(2025, 4, 30)

In [44]:

def fetch_alpha_vantage_intraday_data(symbol, interval='5min', outputsize='compact', api_key=None):
    """
    Fetches intraday (minute-level) stock data from Alpha Vantage.

    Args:
        symbol (str): The stock ticker symbol (e.g., 'IBM', 'GOOGL').
        interval (str): The time interval between data points.
                        Valid values: '1min', '5min', '15min', '30min', '60min'.
        outputsize (str): 'compact' returns the latest 100 data points.
                          'full' returns up to 2 years of historical data for premium members,
                          limited history for free tier (typically 1-2 days).
        api_key (str): Your Alpha Vantage API key.

    Returns:
        pandas.DataFrame: DataFrame containing the intraday data, or None if an error occurs.
    """
    print(f"\n--- Fetching Alpha Vantage {interval} data for {symbol} ({outputsize}) ---")

    # Validate API Key
    if not api_key or api_key == "YOUR_API_KEY_PLACEHOLDER":
        print("❌ Error: Alpha Vantage API key not provided or is placeholder.")
        print("   Configure 'ALPHA_VANTAGE_API_KEY' in Colab Secrets or the config cell.")
        return None

    # Validate interval
    valid_intervals = ['1min', '5min', '15min', '30min', '60min']
    if interval not in valid_intervals:
        print(f"❌ Error: Invalid interval '{interval}'. Choose from: {valid_intervals}")
        return None

    # Construct the API URL
    # Documentation: https://www.alphavantage.co/documentation/#intraday
    url = (
        f'https://www.alphavantage.co/query?'
        f'function=TIME_SERIES_INTRADAY'
        f'&symbol={symbol}'
        f'&interval={interval}'
        f'&outputsize={outputsize}'
        f'&apikey={api_key}'
        # '&adjusted=true' # Default is true, uncomment to be explicit
        # '&extended_hours=true' # Default is true, uncomment to be explicit
    )

    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
        data = response.json()

        # --- Check for API Errors or Notes ---
        if "Error Message" in data:
            print(f"❌ Alpha Vantage API Error: {data['Error Message']}")
            return None
        if "Note" in data:
            print(f"ℹ️ Alpha Vantage API Note: {data['Note']}")
            # Often indicates free tier limit reached, but data might still be present

        # --- Process Data ---
        # The key in the JSON response is dynamic based on the interval
        time_series_key = f"Time Series ({interval})"

        if time_series_key in data:
            # Convert the nested dictionary to a Pandas DataFrame
            df = pd.DataFrame.from_dict(data[time_series_key], orient='index')
            df.index = pd.to_datetime(df.index) # Convert index to datetime
            df = df.astype(float) # Convert columns to numeric

            # Rename columns ('1. open' -> 'open', '4. close' -> 'close', etc.)
            df.rename(columns=lambda x: x.split('. ')[1].replace(' ', '_'), inplace=True)
            df.sort_index(inplace=True) # Ensure chronological order

            print(f"✅ Successfully fetched {len(df)} data points from Alpha Vantage.")
            print(df.head()) # Print first few rows

            # --- Simple Plot ---
            plt.figure(figsize=(12, 5)) # Create a new figure
            df['close'].plot(title=f'{symbol} Close Price ({interval} - Alpha Vantage)')
            plt.ylabel('Price')
            plt.xlabel('Time')
            plt.grid(True)
            # plt.show() # Display plot immediately if needed, otherwise wait for cell end

            return df
        else:
            print(f"❌ Could not find '{time_series_key}' in Alpha Vantage response.")
            print("   Check symbol, API key, and API limits.")
            print("   Response received:", data) # Print response for debugging
            return None

    except requests.exceptions.RequestException as e:
        print(f"❌ Network error fetching Alpha Vantage data: {e}")
        return None
    except Exception as e:
        print(f"❌ Error processing Alpha Vantage data: {e}")
        return None

In [58]:
ticker_symbol = 'APPL'   # Example: IBM, try others like 'GOOGL', 'MSFT', 'NVDA'
data_interval = '1min' # Options: '1min', '5min', '15min', '30min', '60min'
data_outputsize = 'full' # 'compact' (last 100 points) or 'full' (more history, subject to limits)
print(f"--- Starting Intraday Data Fetching ({datetime.datetime.now()}) ---")


--- Starting Intraday Data Fetching (2025-05-01 06:15:37.600419) ---


fetch data

In [59]:
APPL_data = fetch_alpha_vantage_intraday_data(
    symbol=ticker_symbol,
    interval=data_interval,
    outputsize=data_outputsize,
    api_key=ALPHA_VANTAGE_API_KEY
)

# --- Display Plot ---
if APPL_data is not None:
    print("\n--- Displaying Generated Plot ---")
    plt.tight_layout()
    plt.show() # Display the plot created by the function
    print("\n--- Data Fetching and Plotting Complete ---")
else:
    print("\n--- Data Fetching Failed ---")


--- Fetching Alpha Vantage 1min data for APPL (full) ---
❌ Alpha Vantage API Error: Invalid API call. Please retry or visit the documentation (https://www.alphavantage.co/documentation/) for TIME_SERIES_INTRADAY.

--- Data Fetching Failed ---


save to GD

In [57]:
import os
os.makedirs('/content/drive/MyDrive/quant data', exist_ok=True)
APPL_data.to_csv('/content/drive/MyDrive/quant data/APPL_data.csv', index=True)