# main.ipynb

One-click orchestration:
1. Load `GLD_market_data.csv`
2. Benchmark strategies on 1k/10k/100k (timeit + memory_profiler + cProfile)
3. Save `benchmark_results.csv`
4. Plot runtime/memory scaling
5. Generate `complexity_report.md`


In [None]:
%pip -q install memory_profiler psutil pandas matplotlib

In [None]:
%run ./data_loader.ipynb
%run ./strategies.ipynb

In [None]:
import timeit
import cProfile
import pstats
import io
from memory_profiler import memory_usage
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

In [None]:
data = load_gld_market_data("GLD_market_data.csv")
print("Rows:", len(data))
print("First row:", data[0])

In [None]:
def run_strategy(strategy, ticks):
    n_signals = 0
    for t in ticks:
        n_signals += len(strategy.generate_signals(t))
    return n_signals

def measure_time_timeit(strategy_factory, ticks, repeats=3):
    def _fn():
        s = strategy_factory()
        run_strategy(s, ticks)
    return min(timeit.repeat(_fn, number=1, repeat=repeats))

def measure_peak_memory(strategy_factory, ticks, interval=0.01):
    def _fn():
        s = strategy_factory()
        run_strategy(s, ticks)
    samples = memory_usage((_fn,), interval=interval, timeout=None)
    return float(max(samples))

def measure_cprofile(strategy_factory, ticks, topn=20):
    pr = cProfile.Profile()
    s = strategy_factory()
    pr.enable()
    run_strategy(s, ticks)
    pr.disable()
    buf = io.StringIO()
    pstats.Stats(pr, stream=buf).strip_dirs().sort_stats("cumtime").print_stats(topn)
    return buf.getvalue()

In [None]:
SIZES = [1_000, 10_000, 100_000]
SIZES = [s for s in SIZES if s <= len(data)]
SIZES

In [None]:
strategies = {
    "NaiveMovingAverageStrategy": lambda: NaiveMovingAverageStrategy(),
    "WindowedMovingAverageStrategy(k=10)": lambda: WindowedMovingAverageStrategy(window_size=10),
}

rows = []
cprofile_text = {}

for n in SIZES:
    ticks = data[:n]
    for name, factory in strategies.items():
        t = measure_time_timeit(factory, ticks, repeats=3)
        peak = measure_peak_memory(factory, ticks, interval=0.01)
        prof = measure_cprofile(factory, ticks, topn=20)
        rows.append({"strategy": name, "n_ticks": n, "time_seconds": t, "peak_memory_mb": peak})
        cprofile_text[(name, n)] = prof

df = pd.DataFrame(rows).sort_values(["strategy", "n_ticks"])
df

In [None]:
df.to_csv("benchmark_results.csv", index=False)
print("Saved benchmark_results.csv")

In [None]:
# Plots
runtime_plot = "runtime_vs_input.png"
memory_plot = "memory_vs_input.png"

plt.figure()
for strat, sub in df.groupby("strategy"):
    sub = sub.sort_values("n_ticks")
    plt.plot(sub["n_ticks"], sub["time_seconds"], marker="o", label=strat)
plt.xlabel("Input size (ticks)")
plt.ylabel("Runtime (seconds)")
plt.title("Runtime vs Input Size")
plt.legend()
plt.tight_layout()
plt.savefig(runtime_plot)
plt.close()
print("Saved", runtime_plot)

plt.figure()
for strat, sub in df.groupby("strategy"):
    sub = sub.sort_values("n_ticks")
    plt.plot(sub["n_ticks"], sub["peak_memory_mb"], marker="o", label=strat)
plt.xlabel("Input size (ticks)")
plt.ylabel("Peak memory (MB)")
plt.title("Peak Memory vs Input Size")
plt.legend()
plt.tight_layout()
plt.savefig(memory_plot)
plt.close()
print("Saved", memory_plot)

In [None]:
# Generate complexity_report.md
report_path = Path("complexity_report.md")

lines = []
lines.append("# Complexity Report: Runtime & Space Complexity in Financial Signal Processing\n\n")
lines.append("## Benchmark Table (timeit + memory_profiler)\n\n")
lines.append("| Strategy | Ticks | Runtime (s) | Peak Memory (MB) |\n")
lines.append("|---|---:|---:|---:|\n")
for _, r in df.sort_values(["strategy", "n_ticks"]).iterrows():
    lines.append(f"| {r['strategy']} | {int(r['n_ticks'])} | {r['time_seconds']:.6f} | {r['peak_memory_mb']:.2f} |\n")

lines.append("\n## Scaling Plots\n\n")
lines.append(f"![Runtime vs Input Size]({runtime_plot})\n\n")
lines.append(f"![Peak Memory vs Input Size]({memory_plot})\n\n")

lines.append("## Theoretical Complexity Annotations\n\n")
lines.append("- **NaiveMovingAverageStrategy**: per-tick time **O(i)** (worst **O(n)**) due to summing full history; total **O(nÂ²)**; space **O(n)**.\n")
lines.append("- **WindowedMovingAverageStrategy**: per-tick time **O(1)** using a fixed window and running sum; total **O(n)**; space **O(k)**.\n")

lines.append("\n## Narrative Comparison\n\n")
lines.append(
    "The naive strategy recomputes the moving average by summing the entire history each tick, "
    "which causes superlinear scaling and high memory usage as history grows. "
    "The windowed strategy keeps only the last k prices and updates a running sum, so it scales linearly "
    "with bounded memory and is appropriate for large tick streams.\n"
)

report_path.write_text(''.join(lines), encoding='utf-8')
print("Generated", report_path)

## cProfile hotspots (largest input size)

These are useful for the 'hotspots' part of the unit tests/report.

In [None]:
largest = max(SIZES)
for name in strategies.keys():
    key = (name, largest)
    print(f"\n=== cProfile: {name} (n={largest}) ===")
    print(cprofile_text[key])