1. put api key into secret.txt file next to the download.ipynb
2. change the list of tickers and set the timeframe (timespan and multiplier)

In [2]:
# Hardcoded list of tickers
TICKERS = ['AAPL', 'MSFT']  # Modify this list as needed

timespan = 'minute' # second, minute, hour, day, week, month, quarter, year
multiplier = 1

In [3]:
!pip install polygon-api-client pandas python-dateutil

Collecting polygon-api-client
  Downloading polygon_api_client-1.14.6-py3-none-any.whl (44 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.2/44.2 KB[0m [31m641.5 kB/s[0m eta [36m0:00:00[0m31m1.3 MB/s[0m eta [36m0:00:01[0m
[?25hCollecting pandas
  Downloading pandas-2.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.3 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m45.2 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m36m0:00:01[0m
Collecting websockets<15.0,>=10.3
  Downloading websockets-14.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (169 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m169.3/169.3 KB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting urllib3<3.0.0,>=1.26.9
  Using cached urllib3-2.4.0-py3-none-any.whl (128 kB)
Collecting certifi<2026.0.0,>=2022.5.18
  Usi

start the download:

In [4]:
import os
import pandas as pd
from polygon import RESTClient
from datetime import datetime, timedelta
import logging
from dateutil import tz
import time
import sys

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        # logging.FileHandler('polygon_download.log'),
        logging.StreamHandler(sys.stdout)
    ]
)
logger = logging.getLogger()

# Read API key from secret.txt
def read_api_key(file_path='secret.txt'):
    """Read API key from secret.txt."""
    try:
        with open(file_path, 'r') as f:
            api_key = f.read().strip()
        logger.info("Successfully read API key from secret.txt")
        return api_key
    except Exception as e:
        logger.error(f"Error reading API key from {file_path}: {e}")
        return None

# Polygon API client
API_KEY = read_api_key()
if not API_KEY:
    logger.error("No API key provided. Exiting.")
    sys.exit(1)
client = RESTClient(API_KEY)

# Configuration
OUTPUT_DIR = './stock_data/'  # Directory to save CSV files
START_DATE = '2003-01-01'  # Start date for historical data
END_DATE = datetime.now().strftime('%Y-%m-%d')  # Current date
CHUNK_DAYS = 365  # Process one year at a time to manage memory
RETRY_LIMIT = 3  # Number of retries for API failures
RETRY_DELAY = 5  # Seconds to wait between retries

# Ensure output directory exists
os.makedirs(OUTPUT_DIR, exist_ok=True)

def convert_to_et(timestamp_ms):
    """Convert Unix timestamp (ms) in UTC to Eastern Time (ET) datetime string."""
    utc_time = datetime.utcfromtimestamp(timestamp_ms / 1000).replace(tzinfo=tz.tzutc())
    et_time = utc_time.astimezone(tz.gettz('America/New_York'))
    return et_time.strftime('%Y-%m-%d %H:%M:%S')

def fetch_aggs(ticker, start_date, end_date):
    """Fetch second-by-second aggregates for a ticker within a date range."""
    data = []
    attempt = 0
    while attempt < RETRY_LIMIT:
        try:
            logger.info(f"Fetching data for {ticker} from {start_date} to {end_date}")
            counter = 0
            for agg in client.list_aggs(
                ticker=ticker,
                multiplier=multiplier,
                timespan=timespan,
                from_=start_date,
                to=end_date,
                adjusted=True,
                sort='asc',
                limit=50000
            ):
                data.append({
                    'timestamp': convert_to_et(agg.timestamp),
                    'open': agg.open,
                    'high': agg.high,
                    'low': agg.low,
                    'close': agg.close,
                    'volume': agg.volume,
                    'vwap': agg.vwap,
                    'transactions': agg.transactions,
                    'otc': agg.otc
                })
                counter += 1
                if counter % (50 * 1000) == 0:
                    logger.info(f"Retrieved {counter} records for {ticker}")
            logger.info(f"Retrieved {len(data)} records for {ticker}")
            return data
        except Exception as e:
            attempt += 1
            logger.warning(f"Attempt {attempt}/{RETRY_LIMIT} failed for {ticker}: {e}")
            if attempt < RETRY_LIMIT:
                time.sleep(RETRY_DELAY)
            else:
                logger.error(f"Failed to fetch data for {ticker} after {RETRY_LIMIT} attempts")
                return []

def save_to_csv(ticker, data):
    """Save data to a CSV file, appending if the file exists."""
    if not data:
        logger.warning(f"No data to save for {ticker}")
        return
    df = pd.DataFrame(data)
    output_path = os.path.join(OUTPUT_DIR, f"{ticker}.csv")
    logger.info(f"Saving to {output_path}")
    try:
        if os.path.exists(output_path):
            # Append to existing file, avoid duplicating headers
            df.to_csv(output_path, mode='a', header=False, index=False)
        else:
            df.to_csv(output_path, mode='w', header=True, index=False)
        logger.info(f"Saved {len(df)} records to {output_path}")
    except Exception as e:
        logger.error(f"Error saving CSV for {ticker}: {e}")

def process_ticker(ticker, start_date, end_date):
    """Process a single ticker, fetching data in chunks."""
    start = datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.strptime(end_date, '%Y-%m-%d')
    current_start = start

    while current_start < end:
        current_end = min(current_start + timedelta(days=CHUNK_DAYS), end)
        data = fetch_aggs(ticker, current_start.strftime('%Y-%m-%d'), current_end.strftime('%Y-%m-%d'))
        save_to_csv(ticker, data)
        current_start = current_end + timedelta(days=1)

def main():
    if not TICKERS:
        logger.error("No tickers defined. Exiting.")
        return

    for ticker in TICKERS:
        logger.info(f"Processing ticker: {ticker}")
        process_ticker(ticker, START_DATE, END_DATE)

if __name__ == '__main__':
    main()

2025-06-10 19:22:37,249 - INFO - Successfully read API key from secret.txt
2025-06-10 19:22:37,251 - INFO - Processing ticker: AAPL
2025-06-10 19:22:37,251 - INFO - Fetching data for AAPL from 2003-01-01 to 2004-01-01
2025-06-10 19:22:39,381 - INFO - Retrieved 32143 records for AAPL
2025-06-10 19:22:39,418 - INFO - Saving to ./stock_data/AAPL.csv
2025-06-10 19:22:39,554 - INFO - Saved 32143 records to ./stock_data/AAPL.csv
2025-06-10 19:22:39,555 - INFO - Fetching data for AAPL from 2004-01-02 to 2005-01-01
2025-06-10 19:22:41,844 - INFO - Retrieved 50000 records for AAPL
2025-06-10 19:22:43,840 - INFO - Retrieved 100000 records for AAPL
2025-06-10 19:22:44,466 - INFO - Retrieved 109827 records for AAPL
2025-06-10 19:22:44,640 - INFO - Saving to ./stock_data/AAPL.csv
2025-06-10 19:22:45,134 - INFO - Saved 109827 records to ./stock_data/AAPL.csv
2025-06-10 19:22:45,137 - INFO - Fetching data for AAPL from 2005-01-02 to 2006-01-02
2025-06-10 19:22:47,782 - INFO - Retrieved 50000 records 