In [1]:
# data_loader.py
import csv
from dataclasses import dataclass
from datetime import datetime
from typing import List, Iterator


# =========================
# Immutable Market Data Type
# =========================
@dataclass(frozen=True)
class MarketDataPoint:
    """
    Immutable market tick.

    Space Complexity per object: O(1)
    (fixed number of attributes: datetime, str, float)
    """
    timestamp: datetime
    symbol: str
    price: float


# =========================================
# Load FULL dataset into memory (List-based)
# =========================================
def load_gld_market_data(path: str = "GLD_market_data.csv") -> List[MarketDataPoint]:
    """
    Reads GLD_market_data.csv and stores ALL ticks in memory.

    CSV Columns (assumed):
      - Timestamp (format: MM/DD/YYYY)
      - Price

    Time Complexity:
      O(n)
      - One pass over n rows
      - Each parse operation is O(1)

    Space Complexity:
      O(n)
      - Stores n MarketDataPoint objects in a Python list
    """
    market_data: List[MarketDataPoint] = []

    with open(path, "r", encoding="utf-8-sig", newline="") as file:
        reader = csv.DictReader(file)

        for row in reader:
            # datetime parsing is O(1) per row (bounded string length)
            ts = datetime.strptime(row["Timestamp"], "%m/%d/%Y")
            price = float(row["Price"])  # O(1)

            # amortized O(1) append
            market_data.append(
                MarketDataPoint(
                    timestamp=ts,
                    symbol="GLD",
                    price=price,
                )
            )

    return market_data


# =========================================
# Streaming version (Memory-Optimized)
# =========================================
def stream_gld_market_data(path: str = "GLD_market_data.csv") -> Iterator[MarketDataPoint]:
    """
    Generator that streams ticks one by one.

    Time Complexity:
      O(n)
      - One pass over CSV

    Space Complexity:
      O(1) extra
      - Does NOT store full dataset
      - Only one MarketDataPoint in memory at a time
    """
    with open(path, "r", encoding="utf-8-sig", newline="") as file:
        reader = csv.DictReader(file)

        for row in reader:
            ts = datetime.strptime(row["Timestamp"], "%m/%d/%Y")
            price = float(row["Price"])
            yield MarketDataPoint(
                timestamp=ts,
                symbol="GLD",
                price=price,
            )


# =========================
# Debug / Quick Test
# =========================
if __name__ == "__main__":
    data = load_gld_market_data("GLD_market_data.csv")
    print("First 10 rows:")
    for dp in data[:10]:
        print(dp)


First 10 rows:
MarketDataPoint(timestamp=datetime.datetime(2026, 1, 20, 0, 0), symbol='GLD', price=437.23)
MarketDataPoint(timestamp=datetime.datetime(2026, 1, 16, 0, 0), symbol='GLD', price=421.29)
MarketDataPoint(timestamp=datetime.datetime(2026, 1, 15, 0, 0), symbol='GLD', price=423.33)
MarketDataPoint(timestamp=datetime.datetime(2026, 1, 14, 0, 0), symbol='GLD', price=425.94)
MarketDataPoint(timestamp=datetime.datetime(2026, 1, 13, 0, 0), symbol='GLD', price=421.63)
MarketDataPoint(timestamp=datetime.datetime(2026, 1, 12, 0, 0), symbol='GLD', price=422.23)
MarketDataPoint(timestamp=datetime.datetime(2026, 1, 9, 0, 0), symbol='GLD', price=414.47)
MarketDataPoint(timestamp=datetime.datetime(2026, 1, 8, 0, 0), symbol='GLD', price=411.49)
MarketDataPoint(timestamp=datetime.datetime(2026, 1, 7, 0, 0), symbol='GLD', price=409.23)
MarketDataPoint(timestamp=datetime.datetime(2026, 1, 6, 0, 0), symbol='GLD', price=413.18)
