In [1]:
db_api = 'db-hkwKMi3t4jrFnfuA3w543YK3Jr3Dv'

In [None]:
# import databento as db
# client = db.Historical(db_api)

# df = client.timeseries.get_range(
#     dataset="DBEQ.BASIC",
#     schema="trades",
#     symbols=["SPY", "QQQ", "NVDA"],
#     start="2023-08-17T00:00:00",
#     end="2023-08-17T13:30:00",
# ).to_df(tz="America/New_York")

# print(df[["symbol", "price", "size"]])

### Things to ensure for Last Traded Price

1. Time should be in ET
2. Should work for pre-market and post-market as well
3. Should work for LEAST TRADED stocks
4. Should be adjusted prices


In [44]:
import databento as db
from datetime import datetime, timedelta
import pytz
import warnings
import pandas as pd
from pandas.tseries.offsets import BDay
import json

def get_last_trade(symbol, timestamp):
    """
    Get last valid trade for a symbol relative to timestamp using binary search
    
    Args:
        symbol (str): Stock symbol
        timestamp (datetime): Target timestamp to find last trade before
    Returns:
        dict: Trade information or None if no trade found
    """
    if timestamp.tzinfo is None:
        et_tz = pytz.timezone('US/Eastern')
        timestamp = et_tz.localize(timestamp)
    
    timestamp_utc = timestamp.astimezone(pytz.UTC)
    client = db.Historical(db_api)
    
    # Quick check for recent trades (15 seconds)
    try:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            quick_df = client.timeseries.get_range(
                dataset="DBEQ.BASIC",
                schema="trades",
                symbols=[symbol],
                start=timestamp_utc - timedelta(seconds=15),
                end=timestamp_utc + timedelta(microseconds=1),
                limit=10
            ).to_df(tz="UTC")
            
        if not quick_df.empty:
            return process_trades(quick_df, timestamp_utc, "recent")
    except Exception as e:
        print(f"Quick check error: {e}")
    
    # Binary search approach
    windows = [
        timedelta(hours=1),
        timedelta(hours=2),
        timedelta(hours=4),
        timedelta(hours=8),
        timedelta(days=1),
        timedelta(days=2),
        timedelta(days=5)
    ]
    
    last_trade_df = None
    window_used = None
    
    # Find first window with trades
    for window in windows:
        try:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                df = client.timeseries.get_range(
                    dataset="DBEQ.BASIC",
                    schema="trades",
                    symbols=[symbol],
                    start=timestamp_utc - window,
                    end=timestamp_utc + timedelta(microseconds=1),
                    limit=100
                ).to_df(tz="UTC")
                
            if not df.empty:
                last_trade_df = df
                window_used = window
                break
                
        except Exception as e:
            print(f"Error with {window} window: {e}")
            continue
    
    if last_trade_df is None:
        print(f"No trades found for {symbol} in any window")
        return None
    
    # Process the trades
    quality = "delayed" if window_used <= timedelta(days=1) else "historical"
    return process_trades(last_trade_df, timestamp_utc, quality)

def process_trades(df, timestamp_utc, quality):
    """Process trade data and return formatted result"""
    
    # Filter invalid trades
    valid_df = df[
        (df['price'] > 0) &  # Valid price
        (~df['flags'].isin([130, 131]))  # Not cancelled/corrected
    ]
    
    if valid_df.empty:
        return None
    
    # Get most recent valid trade
    latest_trade = valid_df.sort_values('ts_event', ascending=False).iloc[0]
    
    # Convert to ET for display
    et_tz = pytz.timezone('US/Eastern')
    trade_time_et = latest_trade['ts_event'].tz_convert(et_tz)
    
    # Calculate age
    age_td = timestamp_utc - latest_trade['ts_event']
    minutes_old = age_td.total_seconds() / 60
    hours_old = minutes_old / 60
    trading_days_old = len(pd.date_range(latest_trade['ts_event'].date(), 
                                       timestamp_utc.date(), 
                                       freq=BDay()))
    
    # Determine session
    hour = trade_time_et.hour
    minute = trade_time_et.minute
    if (hour < 9) or (hour == 9 and minute < 30):
        session = 'pre_market'
    elif hour >= 16:
        session = 'post_market'
    else:
        session = 'regular'
    
    return {
        'symbol': latest_trade['symbol'],
        'price': float(latest_trade['price']),
        'size': int(latest_trade['size']),
        'trade_time': trade_time_et,
        'age': {
            'minutes': minutes_old,
            'hours': hours_old,
            'trading_days': trading_days_old
        },
        'conditions': latest_trade['flags'],
        'session': session,
        'trading_day': trade_time_et.date(),
        'data_quality': quality
    }

# Test
test_times = [
    datetime(2024, 3, 19, 14, 30),  # Regular
    datetime(2024, 3, 19, 8, 30),   # Pre
    datetime(2024, 3, 19, 16, 30)   # Post
]

for test_time in test_times:
    print(f"\nTesting at {test_time} ET")
    result = get_last_trade("SNDL", test_time)
    print(f"\nResult:")
    print(json.dumps(result, default=str, indent=2))


Testing at 2024-03-19 14:30:00 ET

Result:
{
  "symbol": "SNDL",
  "price": 1.485,
  "size": 85,
  "trade_time": "2024-03-19 14:25:30.512668227-04:00",
  "age": {
    "minutes": 4.491455516666666,
    "hours": 0.07485759194444444,
    "trading_days": 1
  },
  "conditions": "194",
  "session": "regular",
  "trading_day": "2024-03-19",
  "data_quality": "delayed"
}

Testing at 2024-03-19 08:30:00 ET

Result:
{
  "symbol": "SNDL",
  "price": 1.465,
  "size": 1,
  "trade_time": "2024-03-18 10:00:44.940867750-04:00",
  "age": {
    "minutes": 1349.2509855333333,
    "hours": 22.487516425555555,
    "trading_days": 2
  },
  "conditions": "194",
  "session": "regular",
  "trading_day": "2024-03-18",
  "data_quality": "delayed"
}

Testing at 2024-03-19 16:30:00 ET

Result:
{
  "symbol": "SNDL",
  "price": 1.48,
  "size": 754,
  "trade_time": "2024-03-19 15:59:57.005927271-04:00",
  "age": {
    "minutes": 30.0499012,
    "hours": 0.5008316866666667,
    "trading_days": 1
  },
  "conditions": 

In [None]:
df