### Backtesting NVDA Trading Strategy Using MACD and OBI Indicators
- This notebook uses nautilus trader alongside databento to import NASDAQ ITCH data
- Uses modern Cython with uv management


In [None]:
from dotenv import load_dotenv, find_dotenv
from pathlib import Path
import databento as db
import os

# Load environment variables from .env file.
# load_dotenv() will search for .env and load it.
# It returns True if .env was found and loaded, False otherwise.
if load_dotenv():
    print(".env file loaded successfully.")
    # Now find the path of the loaded .env file to determine project root
    # find_dotenv() should now return the path of the loaded .env
    env_path_str = find_dotenv()
    if env_path_str:
        PROJECT_ROOT = Path(env_path_str).parent
        print(f"Project root (derived from .env location): {PROJECT_ROOT.resolve()}")
    else:
        # This case should be rare if load_dotenv() succeeded
        print("Warning: .env loaded, but find_dotenv() could not locate its path. Using CWD as project root.")
        PROJECT_ROOT = Path.cwd()
else:
    print("Warning: .env file not found or could not be loaded. Using CWD as project root. Ensure .env is in the project root or parent directories.")
    PROJECT_ROOT = Path.cwd() # Fallback if .env is not found

# Prepare a directory for the raw Databento DBN format data, relative to PROJECT_ROOT
DATABENTO_DATA_DIR = PROJECT_ROOT / "databento_data"
print(f"DATABENTO_DATA_DIR is set to: {DATABENTO_DATA_DIR.resolve()}")
DATABENTO_DATA_DIR.mkdir(exist_ok=True)

# Initialize Databento historical client
# This will use the DATABENTO_API_KEY environment variable (recommended best practice)
client = db.Historical()

In [9]:
import os
print(f"DATABENTO_API_KEY: {os.getenv('DATABENTO_API_KEY')}")

DATABENTO_API_KEY: db-eKPHfQppqyT7Yvysp3pewXfSmQTGM


In [None]:
publishers = client.metadata.list_publishers()
# Show only first five from long list
publishers[:5]

[{'publisher_id': 1,
  'dataset': 'GLBX.MDP3',
  'venue': 'GLBX',
  'description': 'CME Globex MDP 3.0'},
 {'publisher_id': 2,
  'dataset': 'XNAS.ITCH',
  'venue': 'XNAS',
  'description': 'Nasdaq TotalView-ITCH'},
 {'publisher_id': 3,
  'dataset': 'XBOS.ITCH',
  'venue': 'XBOS',
  'description': 'Nasdaq BX TotalView-ITCH'},
 {'publisher_id': 4,
  'dataset': 'XPSX.ITCH',
  'venue': 'XPSX',
  'description': 'Nasdaq PSX TotalView-ITCH'},
 {'publisher_id': 5,
  'dataset': 'BATS.PITCH',
  'venue': 'BATS',
  'description': 'Cboe BZX Depth'}]

In [13]:
# Set variables for schema, publisher, and symbol
schema = "mbo"
dataset = "XNAS.ITCH"
venue = "XNAS"
symbol = "NVDA"
# Check available range for symbol
available_range = client.metadata.get_dataset_range(dataset=dataset)
print(f"Available range for {symbol}: {available_range}")

Available range for NVDA: {'start': '2018-05-01T00:00:00.000000000Z', 'end': '2025-06-14T00:00:00.000000000Z'}


In [15]:
# Select a date range
start_date = "2025-01-01"
end_date = "2025-01-03"
# Get costs for date range
cost = client.metadata.get_cost(
    dataset=dataset,
    symbols=[symbol],
    schema=schema,  # 1 hour bars ; only time-ranges that are multiplies of 10-minutes (cannot be used for 1-min bars)
    start=start_date, # including start
    end=end_date    # excluding end
)

cost

0.935332176089

In [16]:
# Download data for the specified date range
data = client.timeseries.get_range(
    dataset=dataset,
    symbols=[symbol],
    schema=schema,  # 1 hour bars ; only time-ranges that are multiplies of 10-minutes (cannot be used for 1-min bars)
    start=start_date, # including start
    end=end_date
)

In [19]:
# Convert to DataFrame and save to Parquet at ./databento_data
df = data.to_df()
print(DATABENTO_DATA_DIR / f"{symbol}_{schema}_data.parquet")
df.head()


databento_data/NVDA_mbo_data.parquet


Unnamed: 0_level_0,ts_event,rtype,publisher_id,instrument_id,action,side,price,size,channel_id,order_id,flags,ts_in_delta,sequence,symbol
ts_recv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2025-01-02 08:05:38.373775093+00:00,2025-01-02 08:05:38.373608337+00:00,160,2,11667,R,N,,0,0,0,8,0,0,NVDA
2025-01-02 09:00:00.017769278+00:00,2025-01-02 09:00:00.017585971+00:00,160,2,11667,A,A,140.97,100,0,1531,128,183307,282985,NVDA
2025-01-02 09:00:00.023755277+00:00,2025-01-02 09:00:00.023588055+00:00,160,2,11667,A,B,132.1,3000,0,2051,130,167222,283142,NVDA
2025-01-02 09:00:00.024020907+00:00,2025-01-02 09:00:00.023854397+00:00,160,2,11667,A,A,157.05,73,0,2087,130,166510,283157,NVDA
2025-01-02 09:00:00.093627349+00:00,2025-01-02 09:00:00.093445031+00:00,160,2,11667,A,B,125.0,20,0,5703,0,182318,284767,NVDA


In [22]:
# Save data to databento_data directory
# from pathlib import Path # Path should be imported from a previous cell

target_path = DATABENTO_DATA_DIR / f"{symbol}_{schema}_data.parquet"
absolute_path = target_path.resolve()

print(f"Current working directory: {Path.cwd()}")
print(f"DATABENTO_DATA_DIR is: {DATABENTO_DATA_DIR}")
print(f"Symbol is: {symbol}")
print(f"Schema is: {schema}")
print(f"Attempting to save to relative path: {target_path}")
print(f"Attempting to save to absolute path: {absolute_path}")

try:
    df.to_parquet(target_path)

    # Check if the file was created
    if absolute_path.exists():
        print(f"SUCCESS: File successfully saved: {absolute_path}")
        print(f"File size: {absolute_path.stat().st_size} bytes")
    else:
        print(f"FAILURE: File NOT found after saving attempt: {absolute_path}")
        # Check if the directory exists
        if DATABENTO_DATA_DIR.resolve().exists() and DATABENTO_DATA_DIR.resolve().is_dir():
            print(f"The directory {DATABENTO_DATA_DIR.resolve()} exists.")
            print(f"Contents of {DATABENTO_DATA_DIR.resolve()}:")
            for item in DATABENTO_DATA_DIR.resolve().iterdir():
                print(f"  - {item.name}")
        else:
            print(f"The directory {DATABENTO_DATA_DIR.resolve()} does NOT exist or is not a directory.")
except Exception as e:
    print(f"An error occurred during df.to_parquet: {e}")

Current working directory: /home/david/repos/nautilusTrader/.venv/bin
DATABENTO_DATA_DIR is: databento_data
Symbol is: NVDA
Schema is: mbo
Attempting to save to relative path: databento_data/NVDA_mbo_data.parquet
Attempting to save to absolute path: /home/david/repos/nautilusTrader/.venv/bin/databento_data/NVDA_mbo_data.parquet
SUCCESS: File successfully saved: /home/david/repos/nautilusTrader/.venv/bin/databento_data/NVDA_mbo_data.parquet
File size: 305803650 bytes
SUCCESS: File successfully saved: /home/david/repos/nautilusTrader/.venv/bin/databento_data/NVDA_mbo_data.parquet
File size: 305803650 bytes
