In [None]:
# streaming_history_utils.py
from pathlib import Path
from typing import Iterable, Optional, Union
import pandas as pd


def load_streaming_history(paths: Optional[Iterable[Union[str, Path]]] = None) -> pd.DataFrame:
    """
    Load StreamingHistory JSON files (e.g. StreamingHistoryMusic_0.json, StreamingHistory_music_1.json).
    If paths is None, searches current working directory for 'StreamingHistory_music*.json'.
    Returns a concatenated DataFrame with parsed 'endTime'.
    """
    if paths is None:
        paths = sorted(Path.cwd().glob("StreamingHistory_music*.json"))
    else:
        paths = [Path(p) for p in paths]
    paths = [p for p in paths if p.exists()]
    if not paths:
        raise FileNotFoundError("No StreamingHistory_music*.json files found (or provided paths).")
    dfs = []
    for p in paths:
        df = pd.read_json(p)
        if "endTime" in df.columns:
            df["endTime"] = pd.to_datetime(df["endTime"])
        dfs.append(df)
    df = pd.concat(dfs, ignore_index=True).drop_duplicates().sort_values("endTime")
    return df


def history_for_month(year: int, month: int, paths: Optional[Iterable[Union[str, Path]]] = None) -> pd.DataFrame:
    """
    Return listening history rows for the specified year and month.
    Example: history_for_month(2020, 8)
    """
    df = load_streaming_history(paths)
    if "endTime" not in df.columns:
        raise KeyError("Loaded data has no 'endTime' column.")
    mask = (df["endTime"].dt.year == year) & (df["endTime"].dt.month == month)
    return df.loc[mask].copy().reset_index(drop=True)


# Example usage in a notebook:
# from streaming_history_utils import history_for_month
# august_2020 = history_for_month(2020, 8)