# profiler.ipynb


In [1]:
# If needed, install dependencies (run once)
%pip -q install memory_profiler psutil pandas matplotlib

Note: you may need to restart the kernel to use updated packages.


In [2]:
# Import necessary modules and classes
%run ./data_loader.ipynb
%run ./strategies.ipynb

First 10 rows:
MarketDataPoint(timestamp=datetime.datetime(2026, 1, 20, 0, 0), symbol='GLD', price=437.23)
MarketDataPoint(timestamp=datetime.datetime(2026, 1, 16, 0, 0), symbol='GLD', price=421.29)
MarketDataPoint(timestamp=datetime.datetime(2026, 1, 15, 0, 0), symbol='GLD', price=423.33)
MarketDataPoint(timestamp=datetime.datetime(2026, 1, 14, 0, 0), symbol='GLD', price=425.94)
MarketDataPoint(timestamp=datetime.datetime(2026, 1, 13, 0, 0), symbol='GLD', price=421.63)
MarketDataPoint(timestamp=datetime.datetime(2026, 1, 12, 0, 0), symbol='GLD', price=422.23)
MarketDataPoint(timestamp=datetime.datetime(2026, 1, 9, 0, 0), symbol='GLD', price=414.47)
MarketDataPoint(timestamp=datetime.datetime(2026, 1, 8, 0, 0), symbol='GLD', price=411.49)
MarketDataPoint(timestamp=datetime.datetime(2026, 1, 7, 0, 0), symbol='GLD', price=409.23)
MarketDataPoint(timestamp=datetime.datetime(2026, 1, 6, 0, 0), symbol='GLD', price=413.18)
{'NaiveMovingAverageStrategy': {'per_tick_time': 'O(i) (worst O(n)) d

In [3]:
import timeit
import cProfile
import pstats
import io
from memory_profiler import memory_usage
import pandas as pd

## Load data (GLD_market_data.csv)

In [10]:
# NOTE: data_loader.ipynb defines load_gld_market_data(...)
data = load_gld_market_data("GLD_market_data.csv")
len(data), data[0]

(1016,
 MarketDataPoint(timestamp=datetime.datetime(2026, 1, 20, 0, 0), symbol='GLD', price=437.23))

## Define benchmark helpers

In [11]:
def run_strategy(strategy, ticks):
    """Run a strategy over a list of ticks; return number of signals."""
    n_signals = 0
    for t in ticks:
        n_signals += len(strategy.generate_signals(t))
    return n_signals


def measure_time_timeit(strategy_factory, ticks, repeats=3):
    """Best-of-repeats runtime using timeit."""
    def _fn():
        s = strategy_factory()
        run_strategy(s, ticks)
    times = timeit.repeat(_fn, number=1, repeat=repeats)
    return min(times)


def measure_peak_memory(strategy_factory, ticks, interval=0.01):
    """Peak memory in MB using memory_profiler."""
    def _fn():
        s = strategy_factory()
        run_strategy(s, ticks)

    samples = memory_usage((_fn,), interval=interval, timeout=None)
    return float(max(samples))


def measure_cprofile(strategy_factory, ticks, topn=20):
    """Return cProfile 'topn' functions by cumulative time."""
    pr = cProfile.Profile()
    s = strategy_factory()

    pr.enable()
    run_strategy(s, ticks)
    pr.disable()

    buf = io.StringIO()
    stats = pstats.Stats(pr, stream=buf).strip_dirs().sort_stats("cumtime")
    stats.print_stats(topn)
    return buf.getvalue()

## Benchmark settings

In [12]:
SIZES = [1_000, 10_000, 100_000]
SIZES = [s for s in SIZES if s <= len(data)]
SIZES

[1000]

In [13]:
strategies = {
    "NaiveMovingAverageStrategy": lambda: NaiveMovingAverageStrategy(),
    "WindowedMovingAverageStrategy(k=10)": lambda: WindowedMovingAverageStrategy(window_size=10),
}

results = []
cprofile_outputs = {}

for n in SIZES:
    ticks = data[:n]
    for name, factory in strategies.items():
        t = measure_time_timeit(factory, ticks, repeats=3)
        peak_mb = measure_peak_memory(factory, ticks, interval=0.01)
        prof = measure_cprofile(factory, ticks, topn=20)

        results.append({
            "strategy": name,
            "n_ticks": n,
            "time_seconds": t,
            "peak_memory_mb": peak_mb,
        })
        cprofile_outputs[(name, n)] = prof

df = pd.DataFrame(results).sort_values(["strategy", "n_ticks"])
df

Unnamed: 0,strategy,n_ticks,time_seconds,peak_memory_mb
0,NaiveMovingAverageStrategy,1000,0.002246,76.671875
1,WindowedMovingAverageStrategy(k=10),1000,0.000675,60.59375


## Save results table

In [14]:
df.to_csv("benchmark_results.csv", index=False)
print("Saved benchmark_results.csv")

Saved benchmark_results.csv


## View cProfile hotspots (largest run for each strategy)

In [15]:
for name in strategies.keys():
    n = max(SIZES)
    key = (name, n)
    if key in cprofile_outputs:
        print(f"\n=== cProfile: {name} (n={n}) ===")
        print(cprofile_outputs[key])


=== cProfile: NaiveMovingAverageStrategy (n=1000) ===
         6002 function calls in 0.003 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.003    0.003 171056832.py:1(run_strategy)
     1000    0.001    0.000    0.002    0.000 964951430.py:36(generate_signals)
     1000    0.001    0.000    0.001    0.000 {built-in method builtins.sum}
     1000    0.000    0.000    0.000    0.000 1687773101.py:24(moving_average_decision)
     1000    0.000    0.000    0.000    0.000 {method 'append' of 'list' objects}
     2000    0.000    0.000    0.000    0.000 {built-in method builtins.len}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}




=== cProfile: WindowedMovingAverageStrategy(k=10) (n=1000) ===
         6992 function calls in 0.002 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
  