### Imports

In [25]:

import os
import time
import pandas as pd

from massive import RESTClient
from dotenv import load_dotenv
from datetime import datetime, date
from zoneinfo import ZoneInfo
from tqdm import tqdm

load_dotenv()
MASSIVE_API_KEY = os.getenv("MASSIVE_API_KEY")


client = RESTClient(api_key=MASSIVE_API_KEY)

In [72]:
MULTIPLIER = 1
TIMESPAN = "day"
ADJUSTED = "true"
SORT = "asc"
LIMIT = 5000
START = None   # Can only go back 2 years
DIR = "data"
SLEEP_TIME = 60/5   # Rate limit is 5/min

os.makedirs(DIR, exist_ok=True)


https://massive.com/docs/rest/stocks/tickers/all-tickers
https://massive.com/docs/rest/stocks/aggregates/custom-bars

In [None]:
aggs = []
for a in client.list_aggs(
    "NVDA",
    1,
    "day",
    "2025-11-21",
    "2025-11-24",
    adjusted="true",
    sort="asc",
    limit=3,
):
    aggs.append(a)

print(aggs)

[Agg(open=181.235, high=184.56, low=172.93, close=178.88, volume=346920745.0, vwap=179.3063, timestamp=1763701200000, transactions=5306174, otc=None)]


In [57]:
def today():
    return datetime.now(ZoneInfo("America/New_York")).date()

def parse_date(string):
    return datetime.strptime(string, "%Y-%m-%d").date()

def clamp_date(end_date):
    return end_date.replace(year=end_date.year - 2)

In [61]:
def convert(bars):
    cols = ["date", "open", "high", "low", "close", "volume", "vwap"]
    if not bars:
        return pd.DataFrame(columns=cols)
    
    df = pd.DataFrame([vars(bar) for bar in bars])

    df["date"] = pd.to_datetime(df["timestamp"], unit="ms").dt.date

    result = df[cols].sort_values("date").reset_index(drop=True)
    
    return result

In [65]:
def save_early(collected, filepath):
    if not collected:
        print("Nothing returned from API before error.")
        return
    
    df_partial = convert(collected)
    df_partial.to_csv(filepath, index=False)
    print(f"Early save of ({len(df_partial)}) rows at {filepath}")


def fetch_data(client, ticker, start_date, end_date):
    aggs = []
    
    print(f"Grabbing {ticker} from {start_date.isoformat()} to {end_date.isoformat()}")
    
    with tqdm(desc="Fetching bars", unit="bars") as pbar:
        for bar in client.list_aggs(
            ticker, MULTIPLIER, TIMESPAN,
            start_date.isoformat(), end_date.isoformat(),
            adjusted=ADJUSTED, sort=SORT, limit=LIMIT
        ):
            aggs.append(bar)
            pbar.update(1)


    return aggs


In [70]:
def query(ticker):
    end_date = today()
    allowed_start = clamp_date(end_date)
    start_date = parse_date(START) if START else allowed_start

    if start_date < allowed_start:
        print(f"Error: {start_date} older than 2-year limit {allowed_start}")
        return

    client = RESTClient(MASSIVE_API_KEY)
    res = []

    try:
        res = fetch_data(client, ticker, start_date, end_date)
        
        df = convert(res)
        filename = f"{DIR}/{ticker}_full.csv"
        df.to_csv(filename, index=False)
        
    except Exception as exc:
        print(f"type: {type(exc).__name__}")
        print(f"error: {exc}")

        filename = f"{DIR}/{ticker}_early.csv"
        save_early(res, filename)

In [75]:
# Stocks
tickers = ['AAPL', 'NVDA', "XOM", "JPM", 'TSLA', 'AMZN']
for ticker in tickers:
    query(ticker)
    print("Waiting for rate limit...")
    time.sleep(SLEEP_TIME)

Grabbing AMZN from 2023-11-24 to 2025-11-24


Fetching bars: 500bars [00:00, 988.21bars/s]


Waiting for rate limit...


In [76]:
# ETFs
tickers = ['XLK', 'SOXX', "XLE", "XLF", 'ARKK', 'XLY']
for ticker in tickers:
    query(ticker)
    print("Waiting for rate limit...")
    time.sleep(SLEEP_TIME)

Grabbing XLK from 2023-11-24 to 2025-11-24


Fetching bars: 500bars [00:00, 1129.06bars/s]


Waiting for rate limit...
Grabbing SOXX from 2023-11-24 to 2025-11-24


Fetching bars: 500bars [00:00, 1115.86bars/s]


Waiting for rate limit...
Grabbing XLE from 2023-11-24 to 2025-11-24


Fetching bars: 500bars [00:00, 1075.84bars/s]


Waiting for rate limit...
Grabbing XLF from 2023-11-24 to 2025-11-24


Fetching bars: 500bars [00:00, 1129.05bars/s]


Waiting for rate limit...
Grabbing ARKK from 2023-11-24 to 2025-11-24


Fetching bars: 500bars [00:00, 1171.81bars/s]


Waiting for rate limit...
Grabbing XLY from 2023-11-24 to 2025-11-24


Fetching bars: 500bars [00:00, 1158.65bars/s]


Waiting for rate limit...
