### Imports

In [2]:

import os
import time
import pandas as pd

from massive import RESTClient
from dotenv import load_dotenv
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo
from tqdm import tqdm

load_dotenv()
MASSIVE_API_KEY = os.getenv("MASSIVE_API_KEY")


client = RESTClient(api_key=MASSIVE_API_KEY)

In [3]:
MULTIPLIER = 1
TIMESPAN = "hour"
ADJUSTED = "true"
SORT = "asc"
LIMIT = 50000
START = None   # Can only go back 2 years
DIR = "data"
SLEEP_TIME = 60/5   # Rate limit is 5/min

os.makedirs(DIR, exist_ok=True)


https://massive.com/docs/rest/stocks/tickers/all-tickers
https://massive.com/docs/rest/stocks/aggregates/custom-bars

In [4]:
aggs = []
for a in client.list_aggs(
    "XLE",
    1,
    "hour",
    "2025-11-21",
    "2025-11-24",
    adjusted="true",
    sort="asc",
    limit=3,
):
    aggs.append(a)

print(aggs)

[Agg(open=88.85, high=88.85, low=88.85, close=88.85, volume=1302, vwap=88.85, timestamp=1763715600000, transactions=21, otc=None)]


In [19]:
def today():
    return datetime.now(ZoneInfo("America/New_York")).date()

def parse_date(string):
    return datetime.strptime(string, "%Y-%m-%d").date()

def clamp_date(end_date):
    return end_date.replace(year=end_date.year - 2)

In [31]:
def convert(bars):
    cols = ["timestamp", "open", "high", "low", "close", "volume", "vwap"]
    if not bars:
        return pd.DataFrame(columns=cols)
    
    df = pd.DataFrame([vars(bar) for bar in bars])

    df = df.drop_duplicates(subset=["timestamp"], keep="first")
    df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms")

    result = df[cols].sort_values("timestamp").reset_index(drop=True)
    
    return result

In [32]:
def save_early(collected, filepath):
    if not collected:
        print("Nothing returned from API before error.")
        return
    
    df_partial = convert(collected)
    df_partial.to_csv(filepath, index=False)
    print(f"Early save of ({len(df_partial)}) rows at {filepath}")


def fetch_data(client, ticker, start_date, end_date):
    aggs = []
    current_start = start_date

    print(f"Grabbing {ticker} from {start_date.isoformat()} to {end_date.isoformat()}")
    with tqdm(desc="Fetching bars", unit="bars") as pbar:
        while current_start <= end_date:
            try:
                for bar in client.list_aggs(
                    ticker, MULTIPLIER, TIMESPAN,
                    current_start.isoformat(), end_date.isoformat(),
                    adjusted=ADJUSTED, sort=SORT, limit=LIMIT
                ):
                    aggs.append(bar)
                    pbar.update(1)

                break

            except Exception:
                last_bar = aggs[-1]
                last_date = datetime.fromtimestamp(last_bar.timestamp / 1000, tz=ZoneInfo("America/New_York")).date()
                time.sleep(SLEEP_TIME * 2)
                current_start = last_date

    return aggs



In [33]:
def query(ticker):
    end_date = today()
    allowed_start = clamp_date(end_date)
    start_date = parse_date(START) if START else allowed_start

    if start_date < allowed_start:
        print(f"Error: {start_date} older than 2-year limit {allowed_start}")
        return

    client = RESTClient(MASSIVE_API_KEY)
    res = []

    try:
        res = fetch_data(client, ticker, start_date, end_date)
        
        df = convert(res)
        filename = f"{DIR}/{ticker}_full.csv"
        df.to_csv(filename, index=False)
        
    except Exception as exc:
        print(f"type: {type(exc).__name__}")
        print(f"error: {exc}")

        filename = f"{DIR}/{ticker}_early.csv"
        save_early(res, filename)

In [34]:
# Stocks
tickers = ['AAPL', 'NVDA', "XOM", "JPM", 'TSLA', 'AMZN']
for ticker in tickers:
    query(ticker)

Grabbing AAPL from 2023-11-26 to 2025-11-26


Fetching bars: 8019bars [01:16, 104.41bars/s] 


Grabbing NVDA from 2023-11-26 to 2025-11-26


Fetching bars: 8037bars [02:07, 63.00bars/s] 


Grabbing XOM from 2023-11-26 to 2025-11-26


Fetching bars: 7667bars [00:50, 150.62bars/s] 


Grabbing JPM from 2023-11-26 to 2025-11-26


Fetching bars: 6993bars [00:50, 137.60bars/s] 


Grabbing TSLA from 2023-11-26 to 2025-11-26


Fetching bars: 8030bars [02:06, 63.28bars/s]  


Grabbing AMZN from 2023-11-26 to 2025-11-26


Fetching bars: 8028bars [02:05, 63.85bars/s]  


In [None]:
# ETFs
tickers = ['XLK', 'SOXX', "XLE", "XLF", 'ARKK', 'XLY']
for ticker in tickers:
    query(ticker)

Grabbing XLF from 2023-11-26 to 2025-11-26


Fetching bars: 7881bars [00:26, 294.83bars/s] 


Grabbing ARKK from 2023-11-26 to 2025-11-26


Fetching bars: 7782bars [01:15, 102.80bars/s] 


Grabbing XLY from 2023-11-26 to 2025-11-26


Fetching bars: 5345bars [00:50, 105.10bars/s] 
