# This code uses a .json file with file names and detected song intervals, then assembles either: 1) recording segments that contain song or 2) entire files that contain song.

## Both data outputs are saved in the original folder_path.
- folder_path is a path to a folder that contains all of the .wav file recordings.
- json_path is a path to a .json file output by a song detector code, which contains each .wav file name from folder_path and, if there was detected song, the time segments with detected song in them.

## Option 1: only generate spectrograms of time setgments with detected song. 
### This code uses the time segments inside of the .json file to combine songs into 1 minute .wav files containing song, then saves those .wav files and generates .pngs of their spectrograms.

In [1]:
from pathlib import Path
animal_id      = "USA5207"     # shown in the output filenames
recording_date = "2025-07-19"  # YYYY‑MM‑DD              ↑

# absolute **or** relative path to the folder that holds the .wav files
folder_path = Path(
    "/Volumes/my_own_SSD/UO_stuff/nerve_transections/USA5207/33"
)

# absolute **or** relative path to the JSON file with detected intervals
json_path = Path(
    "/Volumes/my_own_SSD/UO_stuff/nerve_transections/USA5207/33_periodicity_only_detected_song_intervals_combined_segments.json"
)

In [2]:
#!/usr/bin/env python
# -*- coding: utf‑8 -*-
"""
Slice detected song intervals out of a folder of .wav files, concatenate them,
split into ≤ chunk_duration_sec chunks, and save the chunks with filenames that
embed `animal_id` and `recording_date`, e.g.

    USA5288_2025-04-08_detected_song_segment_3.wav
"""

import json
from pathlib import Path
from typing import Optional, List

import numpy as np
from scipy.io import wavfile

# ────────────────────────────────────────────────────────────────
# USER‑EDITABLE METADATA
# ────────────────────────────────────────────────────────────────
# animal_id      = "USA5207"     # shown in the output filenames
# recording_date = "2025-07-19"  # YYYY‑MM‑DD              ↑

# # absolute **or** relative path to the folder that holds the .wav files
# folder_path = Path(
#     "/Volumes/my_own_SSD/UO_stuff/nerve_transections/USA5207/33"
# )

# # absolute **or** relative path to the JSON file with detected intervals
# json_path = Path(
#     "/Volumes/my_own_SSD/UO_stuff/nerve_transections/USA5207/33_periodicity_only_detected_song_intervals_combined_segments.json"
# )
# ────────────────────────────────────────────────────────────────
# RUNTIME SETTINGS
# ────────────────────────────────────────────────────────────────
chunk_duration_sec       = 60      # target length of each output clip
save_partial_final_chunk = True    # keep a trailing < 60‑s chunk?
pad_partial_with_zeros   = False   # …and pad it out to full length?

# ────────────────────────────────────────────────────────────────
# QUICK VALIDATION OF THE TWO CRITICAL PATHS
# ────────────────────────────────────────────────────────────────
if not folder_path.exists():
    raise FileNotFoundError(f"Audio folder not found:\n  {folder_path.resolve()}")

if not json_path.is_file():
    raise FileNotFoundError(f"JSON file not found:\n  {json_path.resolve()}")

# ────────────────────────────────────────────────────────────────
# OUTPUT FOLDER
# ────────────────────────────────────────────────────────────────
output_folder = folder_path / "detected_song_segments"
output_folder.mkdir(parents=True, exist_ok=True)

# ────────────────────────────────────────────────────────────────
# STEP 1 – load JSON with [start, end] pairs (seconds)
# ────────────────────────────────────────────────────────────────
with open(json_path, "r", encoding="utf‑8") as f:
    detected_intervals: dict[str, List[List[float]]] = json.load(f)

# ────────────────────────────────────────────────────────────────
# STEP 2 – extract every detected interval from every file
# ────────────────────────────────────────────────────────────────
detected_segments: List[np.ndarray] = []
sample_rate_reference: Optional[int] = None
missing_files: list[str] = []

for file_name, intervals in detected_intervals.items():
    wav_path = folder_path / file_name
    if not wav_path.exists():
        missing_files.append(file_name)
        continue

    try:
        samplerate, data = wavfile.read(wav_path)

        # use the first file as the reference sample‑rate
        if sample_rate_reference is None:
            sample_rate_reference = samplerate
        elif samplerate != sample_rate_reference:
            raise ValueError(f"Sample‑rate mismatch in {file_name}")

        # stereo → mono
        if data.ndim > 1:
            data = data.mean(axis=1)

        # pull every [start, end] snippet
        for start_time, end_time in intervals:
            start = int(start_time * samplerate)
            end   = int(end_time   * samplerate)
            detected_segments.append(data[start:end])

    except Exception as e:
        print(f"⚠️  Error reading {file_name}: {e}")

# Report any listed but missing files
if missing_files:
    print("\n⚠️  The following files were listed in the JSON but not found in"
          f" {folder_path}:\n  • " + "\n  • ".join(missing_files))

if not detected_segments:
    raise RuntimeError("No detected song segments were found for the paths "
                       "you provided. Double‑check the folder/JSON pairing.")

# ────────────────────────────────────────────────────────────────
# STEP 3 – concatenate & split into ≤ chunk_duration_sec chunks
# ────────────────────────────────────────────────────────────────
concatenated   = np.concatenate(detected_segments)
target_samples = chunk_duration_sec * sample_rate_reference
song_segments  = []

for i in range(0, len(concatenated), target_samples):
    chunk = concatenated[i : i + target_samples]

    if len(chunk) == target_samples:
        # full‑length chunk
        song_segments.append(chunk)

    elif save_partial_final_chunk and len(chunk) > 0:
        # trailing short chunk
        if pad_partial_with_zeros:
            pad_len = target_samples - len(chunk)
            chunk   = np.pad(chunk, (0, pad_len), mode="constant")
        song_segments.append(chunk)

# ────────────────────────────────────────────────────────────────
# STEP 4 – write each chunk to disk with descriptive filename
# ────────────────────────────────────────────────────────────────
for idx, segment in enumerate(song_segments, start=1):
    duration_sec = len(segment) / sample_rate_reference
    suffix       = "_partial" if duration_sec < chunk_duration_sec else ""
    fname = f"{animal_id}_{recording_date}_detected_song_segment_{idx}{suffix}.wav"

    wavfile.write(output_folder / fname,
                  sample_rate_reference,
                  segment.astype(np.int16))

    print(f"✅  Saved {fname}  ({duration_sec:.2f} s)")

print("\nDone!  Segments are in:", output_folder.resolve())


✅  Saved USA5207_2025-07-19_detected_song_segment_1.wav  (60.00 s)
✅  Saved USA5207_2025-07-19_detected_song_segment_2.wav  (60.00 s)
✅  Saved USA5207_2025-07-19_detected_song_segment_3.wav  (60.00 s)
✅  Saved USA5207_2025-07-19_detected_song_segment_4.wav  (60.00 s)
✅  Saved USA5207_2025-07-19_detected_song_segment_5.wav  (60.00 s)
✅  Saved USA5207_2025-07-19_detected_song_segment_6.wav  (60.00 s)
✅  Saved USA5207_2025-07-19_detected_song_segment_7.wav  (60.00 s)
✅  Saved USA5207_2025-07-19_detected_song_segment_8.wav  (60.00 s)
✅  Saved USA5207_2025-07-19_detected_song_segment_9.wav  (60.00 s)
✅  Saved USA5207_2025-07-19_detected_song_segment_10.wav  (60.00 s)
✅  Saved USA5207_2025-07-19_detected_song_segment_11.wav  (60.00 s)
✅  Saved USA5207_2025-07-19_detected_song_segment_12.wav  (60.00 s)
✅  Saved USA5207_2025-07-19_detected_song_segment_13.wav  (60.00 s)
✅  Saved USA5207_2025-07-19_detected_song_segment_14.wav  (60.00 s)
✅  Saved USA5207_2025-07-19_detected_song_segment_15.wav 

## Generate the spectrograms:

In [12]:
# #!/usr/bin/env python
# # -*- coding: utf-8 -*-
# """
# Render filtered spectrogram panels for every
# <parent>/detected_song_segments/*.wav **and** draw red dashed lines wherever
# the concatenated audio switches from one original .wav file to the next.

# Required inputs
# ---------------
# parent_folder : Path to the folder that also contains
#                 ├─ detected_song_segments/
#                 └─ <something>.json          (# same JSON you fed the exporter)

# The JSON must have the form::
#     {
#         "original_a.wav": [[s0,e0], [s1,e1], ...],
#         "original_b.wav": [[s2,e2], ...],
#         ...
#     }

# The script re‑creates the exporter’s concatenation logic to know where one
# file stops and the next one starts inside each 60‑s segment.
# """

# from __future__ import annotations

# import re
# from pathlib import Path
# from typing import Dict, List, Tuple

# import numpy as np
# import matplotlib.pyplot as plt
# from scipy.io import wavfile
# from scipy.signal import spectrogram, windows, ellip, filtfilt
# import json
# import tkinter as tk

# # ────────────────────────────────────────────────────────────────
# # USER CONFIG  (edit these three paths / constants)
# # ────────────────────────────────────────────────────────────────
# # parent_folder = Path(
# #     "/Volumes/my_own_SSD/UO_stuff/nerve_transections/USA5207/33"
# # )
# # json_path = "/Volumes/my_own_SSD/UO_stuff/nerve_transections/USA5207/33_periodicity_only_detected_song_intervals_combined_segments.json"

# segment_duration    = 10          # seconds per spectrogram panel
# panels_per_fig      = 6           # ⇒ 60 s max per detected segment
# low_cut, high_cut   = 500, 8000   # Hz filter before spectrogram
# cmap_choice         = "binary"

# # ════════════════════════════════════════════════════════════════
# # DERIVED PATHS
# # ════════════════════════════════════════════════════════════════
# segments_folder = folder_path / "detected_song_segments"
# png_folder      = segments_folder / "spectrograms"
# png_folder.mkdir(parents=True, exist_ok=True)

# # ════════════════════════════════════════════════════════════════
# # UTILS
# # ════════════════════════════════════════════════════════════════
# def get_screen_inches() -> Tuple[float, float]:
#     try:
#         root = tk.Tk(); root.withdraw()
#         w, h = root.winfo_screenwidth(), root.winfo_screenheight()
#         root.destroy()
#         return w / 100, h / 100
#     except Exception:
#         return 12, 8

# width_inches, height_inches = get_screen_inches()

# segment_idx_re = re.compile(r"_segment_(\d+)")

# # ════════════════════════════════════════════════════════════════
# # STEP 1 – build boundary map  {segment_number: [times (s), …]}
# #         Each time marks EITHER the start OR the end of a slice
# # ════════════════════════════════════════════════════════════════
# with open(json_path, "r", encoding="utf-8") as f:
#     detected_intervals: Dict[str, List[List[float]]] = json.load(f)

# # grab the sample‑rate once from any segment file
# any_seg = next(segments_folder.glob("*.wav"), None)
# if any_seg is None:
#     raise FileNotFoundError("No *.wav in detected_song_segments/")
# sr, _ = wavfile.read(any_seg)

# SEG_LEN_S   = panels_per_fig * segment_duration          # 60 s
# SEG_LEN_SMP = SEG_LEN_S * sr

# boundary_map: Dict[int, List[float]] = {}
# seg_idx            = 1
# pos_in_seg_samples = 0
# boundaries: List[float] = []

# for src_name, intervals in detected_intervals.items():
#     for start_t, end_t in intervals:
#         slice_len_smp = int(round((end_t - start_t) * sr))
#         remaining = slice_len_smp

#         while remaining > 0:
#             space_left = SEG_LEN_SMP - pos_in_seg_samples
#             take       = min(remaining, space_left)

#             # ─── record start & end of the chunk that will be copied ───
#             slice_start_time = pos_in_seg_samples              / sr
#             slice_end_time   = (pos_in_seg_samples + take)     / sr
#             boundaries.extend([slice_start_time, slice_end_time])

#             # ─── update counters ──────────────────────────────────────
#             pos_in_seg_samples += take
#             remaining          -= take

#             # ─── segment filled? commit & start a fresh one ───────────
#             if pos_in_seg_samples == SEG_LEN_SMP:
#                 # drop potential duplicates & keep ascending order
#                 boundary_map[seg_idx] = sorted(set(boundaries))
#                 seg_idx            += 1
#                 pos_in_seg_samples  = 0
#                 boundaries          = []

# # commit the (possibly shorter) final segment
# if boundaries:
#     boundary_map[seg_idx] = sorted(set(boundaries))


# # ════════════════════════════════════════════════════════════════
# # STEP 2 – process ONE .wav
# # ════════════════════════════════════════════════════════════════
# def process_wav_file(wav_path: Path, boundaries: List[float]):
#     base_name = wav_path.stem
#     sr, data  = wavfile.read(wav_path)
#     if data.ndim > 1:
#         data = data.mean(axis=1)
#     if np.issubdtype(data.dtype, np.integer):
#         data = data.astype(np.float32)

#     nyq = sr / 2
#     b, a = ellip(5, 0.2, 40, [low_cut / nyq, high_cut / nyq], btype="band")
#     data = filtfilt(b, a, data)

#     total_secs   = data.size / sr
#     samples_per_panel = int(segment_duration * sr)
#     n_panels     = min(panels_per_fig, int(np.ceil(total_secs / segment_duration)))

#     fig, axs = plt.subplots(n_panels, 1,
#                             figsize=(width_inches, height_inches),
#                             sharex=True, gridspec_kw={'hspace': 0.0})
#     axs = [axs] if n_panels == 1 else axs

#     for i in range(n_panels):
#         start_samp = i * samples_per_panel
#         seg = data[start_samp : start_samp + samples_per_panel]
#         if seg.size < samples_per_panel:
#             seg = np.pad(seg, (0, samples_per_panel - seg.size))

#         f, t, Sxx = spectrogram(
#             seg, fs=sr,
#             window=windows.gaussian(2048, std=2048/8),
#             nperseg=2048, noverlap=2048 - 119
#         )
#         S_log  = 10 * np.log10(Sxx + np.finfo(float).eps)
#         S_log  = np.clip(S_log, a_min=3, a_max=None)
#         S_norm = (S_log - S_log.min()) / (S_log.ptp() or 1.0)
#         S_norm **= 0.7

#         axs[i].imshow(S_norm, aspect='auto', origin='lower',
#                       extent=[0, segment_duration, f.min(), f.max()],
#                       cmap=cmap_choice)
#         axs[i].set_ylim(0, 11000)
#         axs[i].set_ylabel("Freq [Hz]")

#         # Draw the red dashed boundaries that fall inside this 10‑s panel
#         panel_t0 = i * segment_duration
#         panel_t1 = panel_t0 + segment_duration
#         for b_t in boundaries:
#             if panel_t0 < b_t < panel_t1:
#                 axs[i].axvline(b_t - panel_t0, color='red',
#                                linestyle='--', linewidth=1)

#         if i == n_panels - 1:
#             axs[i].set_xlabel("Time [s]")
#             axs[i].set_xticks(np.linspace(0, segment_duration, 5))

#     fig.suptitle(f"{base_name}  –  Spectrogram ({low_cut}-{high_cut} Hz)",
#                  fontsize=14)
#     fig.tight_layout()

#     out_png = png_folder / f"{base_name}_spectrogram.png"
#     fig.savefig(out_png, dpi=300)
#     plt.close(fig)
#     print("✅", out_png.name)

# # ════════════════════════════════════════════════════════════════
# # STEP 3 – batch over segments
# # ════════════════════════════════════════════════════════════════
# wav_files = sorted(segments_folder.glob("*.wav"))
# if not wav_files:
#     raise FileNotFoundError("No .wav files found in detected_song_segments/")

# print(f"📂 Rendering spectrograms for {len(wav_files)} files …\n")

# for wav_file in wav_files:
#     m = segment_idx_re.search(wav_file.stem)
#     if not m:
#         print("⚠️  Could not parse segment index from", wav_file.name)
#         continue
#     seg_idx = int(m.group(1))
#     boundaries = boundary_map.get(seg_idx, [])
#     process_wav_file(wav_file, boundaries)

# print("\nDone!  PNGs are in:", png_folder.resolve())

In [6]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Render filtered spectrogram panels for every
<parent>/detected_song_segments/*.wav and save a PNG for each clip.
(NO red dashed boundaries are drawn.)
"""

from __future__ import annotations

import re
from pathlib import Path
from typing import Dict, List, Tuple

import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
from scipy.signal import spectrogram, windows, ellip, filtfilt
import json
import tkinter as tk

# ────────────────────────────────────────────────────────────────
# USER CONFIG –– EDIT these three items
# ────────────────────────────────────────────────────────────────
folder_path = Path(
    "/Volumes/my_own_SSD/UO_stuff/nerve_transections/USA5207/33"
)
json_path = Path(
    "/Volumes/my_own_SSD/UO_stuff/nerve_transections/USA5207/"
    "33_periodicity_only_detected_song_intervals_combined_segments.json"
)

segment_duration    = 10          # seconds per spectrogram panel
panels_per_fig      = 6           # ⇒ 60 s max per detected segment
low_cut, high_cut   = 500, 8000   # Hz filter before spectrogram
cmap_choice         = "binary"

# ════════════════════════════════════════════════════════════════
# DERIVED PATHS
# ════════════════════════════════════════════════════════════════
segments_folder = folder_path / "detected_song_segments"
png_folder      = segments_folder / "spectrograms"
png_folder.mkdir(parents=True, exist_ok=True)

# ════════════════════════════════════════════════════════════════
# UTILS
# ════════════════════════════════════════════════════════════════
def get_screen_inches() -> Tuple[float, float]:
    try:
        root = tk.Tk(); root.withdraw()
        w, h = root.winfo_screenwidth(), root.winfo_screenheight()
        root.destroy()
        return w / 100, h / 100
    except Exception:
        return 12, 8

width_inches, height_inches = get_screen_inches()
segment_idx_re = re.compile(r"_segment_(\d+)")

# ════════════════════════════════════════════════════════════════
# BUILD boundary map  {segment_number: [times (s), …]}
#   (We still compute it for completeness, but boundaries are no
#   longer drawn.)
# ════════════════════════════════════════════════════════════════
with open(json_path, "r", encoding="utf-8") as f:
    detected_intervals: Dict[str, List[List[float]]] = json.load(f)

any_seg = next(segments_folder.glob("*.wav"), None)
if any_seg is None:
    raise FileNotFoundError("No *.wav in detected_song_segments/")
sr, _ = wavfile.read(any_seg)

SEG_LEN_S   = panels_per_fig * segment_duration          # 60 s
SEG_LEN_SMP = SEG_LEN_S * sr

boundary_map: Dict[int, List[float]] = {}
seg_idx            = 1
pos_in_seg_samples = 0
boundaries: List[float] = []

for src_name, intervals in detected_intervals.items():
    for start_t, end_t in intervals:
        slice_len_smp = int(round((end_t - start_t) * sr))
        remaining = slice_len_smp

        while remaining > 0:
            space_left = SEG_LEN_SMP - pos_in_seg_samples
            take       = min(remaining, space_left)

            # record start & end of the chunk that will be copied
            slice_start_time = pos_in_seg_samples              / sr
            slice_end_time   = (pos_in_seg_samples + take)     / sr
            boundaries.extend([slice_start_time, slice_end_time])

            pos_in_seg_samples += take
            remaining          -= take

            if pos_in_seg_samples == SEG_LEN_SMP:
                boundary_map[seg_idx] = sorted(set(boundaries))
                seg_idx            += 1
                pos_in_seg_samples  = 0
                boundaries          = []

if boundaries:
    boundary_map[seg_idx] = sorted(set(boundaries))

# ════════════════════════════════════════════════════════════════
# PROCESS ONE .wav  (no red dashed lines drawn)
# ════════════════════════════════════════════════════════════════
def process_wav_file(wav_path: Path):
    base_name = wav_path.stem
    sr, data  = wavfile.read(wav_path)
    if data.ndim > 1:
        data = data.mean(axis=1)
    if np.issubdtype(data.dtype, np.integer):
        data = data.astype(np.float32)

    nyq = sr / 2
    b, a = ellip(5, 0.2, 40, [low_cut / nyq, high_cut / nyq], btype="band")
    data = filtfilt(b, a, data)

    total_secs        = data.size / sr
    samples_per_panel = int(segment_duration * sr)
    n_panels          = min(panels_per_fig, int(np.ceil(total_secs / segment_duration)))

    fig, axs = plt.subplots(
        n_panels, 1, figsize=(width_inches, height_inches),
        sharex=True, gridspec_kw={'hspace': 0.0}
    )
    axs = [axs] if n_panels == 1 else axs

    for i in range(n_panels):
        start_samp = i * samples_per_panel
        seg = data[start_samp : start_samp + samples_per_panel]
        if seg.size < samples_per_panel:
            seg = np.pad(seg, (0, samples_per_panel - seg.size))

        f, t, Sxx = spectrogram(
            seg, fs=sr,
            window=windows.gaussian(2048, std=2048/8),
            nperseg=2048, noverlap=2048 - 119
        )
        S_log  = 10 * np.log10(Sxx + np.finfo(float).eps)
        S_log  = np.clip(S_log, a_min=3, a_max=None)
        S_norm = (S_log - S_log.min()) / (S_log.ptp() or 1.0)
        S_norm **= 0.7

        axs[i].imshow(
            S_norm, aspect='auto', origin='lower',
            extent=[0, segment_duration, f.min(), f.max()],
            cmap=cmap_choice
        )
        axs[i].set_ylim(0, 11000)
        axs[i].set_ylabel("Freq [Hz]")

        if i == n_panels - 1:
            axs[i].set_xlabel("Time [s]")
            axs[i].set_xticks(np.linspace(0, segment_duration, 5))

    fig.suptitle(f"{base_name}  –  Spectrogram ({low_cut}-{high_cut} Hz)", fontsize=14)
    fig.tight_layout()

    out_png = png_folder / f"{base_name}_spectrogram.png"
    fig.savefig(out_png, dpi=300)
    plt.close(fig)
    print("✅", out_png.name)

# ════════════════════════════════════════════════════════════════
# BATCH over segments
# ════════════════════════════════════════════════════════════════
wav_files = sorted(segments_folder.glob("*.wav"))
if not wav_files:
    raise FileNotFoundError("No .wav files found in detected_song_segments/")

print(f"📂 Rendering spectrograms for {len(wav_files)} files …\n")

for wav_file in wav_files:
    process_wav_file(wav_file)

print("\nDone!  PNGs are in:", png_folder.resolve())


📂 Rendering spectrograms for 43 files …

✅ USA5207_2025-07-19_detected_song_segment_1_spectrogram.png
✅ USA5207_2025-07-19_detected_song_segment_10_spectrogram.png
✅ USA5207_2025-07-19_detected_song_segment_11_spectrogram.png
✅ USA5207_2025-07-19_detected_song_segment_12_spectrogram.png
✅ USA5207_2025-07-19_detected_song_segment_13_spectrogram.png
✅ USA5207_2025-07-19_detected_song_segment_14_spectrogram.png
✅ USA5207_2025-07-19_detected_song_segment_15_spectrogram.png
✅ USA5207_2025-07-19_detected_song_segment_16_spectrogram.png
✅ USA5207_2025-07-19_detected_song_segment_17_spectrogram.png
✅ USA5207_2025-07-19_detected_song_segment_18_spectrogram.png
✅ USA5207_2025-07-19_detected_song_segment_19_spectrogram.png
✅ USA5207_2025-07-19_detected_song_segment_2_spectrogram.png
✅ USA5207_2025-07-19_detected_song_segment_20_spectrogram.png
✅ USA5207_2025-07-19_detected_song_segment_21_spectrogram.png
✅ USA5207_2025-07-19_detected_song_segment_22_spectrogram.png
✅ USA5207_2025-07-19_detected_s

# Option 2: This portion combines all .wav files containing songs into 1-minute recordings, then  generates spectrograms of them.

In [7]:
# import os, json
# import numpy as np
# from scipy.io import wavfile
# from pathlib import Path   # (unused but often handy)

# # ╔══════════════════════════════════════════════════════════════════════╗
# # CONFIG ── update these two paths only
# # ╚══════════════════════════════════════════════════════════════════════╝
# # folder_path = '/your/recordings/folder'
# # json_path   = '/your/detector_output.json'

# # ── derived paths ──────────────────────────────────────────────────────
# output_folder       = os.path.join(folder_path, 'detected_song_files_full_recordings')
# os.makedirs(output_folder, exist_ok=True)
# metadata_output_path = os.path.join(output_folder, 'segment_metadata.json')

# # ╔══════════════════════════════════════════════════════════════════════╗
# # LOAD detector JSON  →  {wav_file : [[start,end], …]}
# # ╚══════════════════════════════════════════════════════════════════════╝
# with open(json_path) as f:
#     detected_intervals = json.load(f)

# # process files in deterministic order
# file_names = sorted(detected_intervals.keys())

# # ╔══════════════════════════════════════════════════════════════════════╗
# # READ all wavs into memory (could be streamed if very large files)
# # ╚══════════════════════════════════════════════════════════════════════╝
# audio_queue            = []            # [(file_name, data)]
# sample_rate_reference  = None

# for fn in file_names:
#     wav_path = os.path.join(folder_path, fn)
#     if not os.path.exists(wav_path):
#         print(f"⚠️  Missing file: {fn}")
#         continue
#     try:
#         sr, data = wavfile.read(wav_path)
#         if sample_rate_reference is None:
#             sample_rate_reference = sr
#         elif sr != sample_rate_reference:
#             raise ValueError(f"Sample‑rate mismatch in {fn} ({sr} vs {sample_rate_reference})")
#         if data.ndim > 1:                       # stereo → mono
#             data = data.mean(axis=1)
#         audio_queue.append((fn, data.astype(np.float32)))
#     except Exception as e:
#         print(f"⚠️  Error reading {fn}: {e}")

# if not audio_queue:
#     raise RuntimeError("No audio files were loaded successfully.")

# # ╔══════════════════════════════════════════════════════════════════════╗
# # CHUNKING variables & helpers
# # ╚══════════════════════════════════════════════════════════════════════╝
# minute_samples      = 60 * sample_rate_reference
# leftover            = np.array([], dtype=np.float32)
# leftover_provenance = []      # list of dicts: {"source_file", "range_start", "range_end"}
# segment_metadata    = {}
# chunk_count         = 0

# def get_song_segments_within_range(source_file, range_start, range_end):
#     """Return detector intervals (sec) that overlap [range_start, range_end) in sample coords,
#        expressed relative to the *start of that range*."""
#     out = []
#     for t_start, t_end in detected_intervals.get(source_file, []):
#         s_start = int(t_start * sample_rate_reference)
#         s_end   = int(t_end   * sample_rate_reference)
#         ov_start = max(s_start, range_start)
#         ov_end   = min(s_end,   range_end)
#         if ov_start < ov_end:
#             out.append([
#                 round((ov_start - range_start) / sample_rate_reference, 3),
#                 round((ov_end   - range_start) / sample_rate_reference, 3)
#             ])
#     return out

# def finalize_chunk(chunk_data, provenance, song_segments, idx):
#     fname = f'detected_song_minute_{idx+1}.wav'
#     fpath = os.path.join(output_folder, fname)
#     wavfile.write(fpath, sample_rate_reference, chunk_data.astype(np.int16))
#     segment_metadata[fname] = {
#         "source_files": provenance,
#         "song_segments_in_chunk": song_segments
#     }
#     print(f"✅  Saved {fname}")

# # ╔══════════════════════════════════════════════════════════════════════╗
# # MAIN loop – stitch files, slice 60‑s chunks, preserve provenance
# # ╚══════════════════════════════════════════════════════════════════════╝
# for file_name, data in audio_queue:

#     # 1) build new "combined" buffer = leftover + current file
#     combined         = np.concatenate([leftover, data])
#     combined_ranges  = []          # [{"source_file", "range_start", "range_end"}]
#     offset           = 0

#     # carry forward *all* leftover provenance
#     for prov in leftover_provenance:
#         dur = prov["range_end"] - prov["range_start"]
#         combined_ranges.append({
#             "source_file": prov["source_file"],
#             "range_start": offset,
#             "range_end":   offset + dur
#         })
#         offset += dur

#     # append current file’s span
#     combined_ranges.append({
#         "source_file": file_name,
#         "range_start": offset,
#         "range_end":   offset + len(data)
#     })

#     # 2) slice full‑minute chunks
#     cursor = 0
#     while cursor + minute_samples <= len(combined):
#         chunk               = combined[cursor:cursor + minute_samples]
#         chunk_provenance    = []
#         chunk_song_segments = []

#         chunk_start = cursor
#         chunk_end   = cursor + minute_samples

#         for rng in combined_ranges:
#             src_start, src_end = rng["range_start"], rng["range_end"]
#             ov_start = max(chunk_start, src_start)
#             ov_end   = min(chunk_end,   src_end)
#             if ov_start < ov_end:   # overlap exists
#                 # provenance entry
#                 rel_start_sec = (ov_start - chunk_start) / sample_rate_reference
#                 rel_end_sec   = (ov_end   - chunk_start) / sample_rate_reference
#                 chunk_provenance.append({
#                     "source_file": rng["source_file"],
#                     "time_range_in_chunk_seconds": [round(rel_start_sec,3),
#                                                    round(rel_end_sec,3)]
#                 })
#                 # song segments from this slice
#                 slice_rel_start = ov_start - src_start
#                 slice_rel_end   = ov_end   - src_start
#                 for seg in get_song_segments_within_range(
#                         rng["source_file"], slice_rel_start, slice_rel_end):
#                     chunk_song_segments.append([
#                         round(seg[0] + rel_start_sec, 3),
#                         round(seg[1] + rel_start_sec, 3)
#                     ])

#         finalize_chunk(chunk, chunk_provenance, chunk_song_segments, chunk_count)
#         chunk_count += 1
#         cursor      += minute_samples

#     # 3) whatever is left < 60 s → carry to next iteration
#     leftover = combined[cursor:]
#     leftover_provenance = []
#     if len(leftover) > 0:
#         for rng in combined_ranges:
#             if rng["range_end"] > cursor:                      # part survives
#                 leftover_provenance.append({
#                     "source_file": rng["source_file"],
#                     "range_start": max(0,   rng["range_start"] - cursor),
#                     "range_end":   rng["range_end"] - cursor
#                 })

# # ╔══════════════════════════════════════════════════════════════════════╗
# # FINAL (possibly padded) chunk made from leftover audio
# # ╚══════════════════════════════════════════════════════════════════════╝
# if len(leftover) > 0:
#     padded = np.pad(leftover, (0, minute_samples - len(leftover)), mode="constant")
#     final_prov  = []
#     final_segs  = []
#     for rng in leftover_provenance:
#         p_start_sec = rng["range_start"] / sample_rate_reference
#         p_end_sec   = rng["range_end"]   / sample_rate_reference
#         final_prov.append({
#             "source_file": rng["source_file"],
#             "time_range_in_chunk_seconds": [round(p_start_sec,3), round(p_end_sec,3)]
#         })
#         for seg in get_song_segments_within_range(
#                 rng["source_file"], 0, rng["range_end"] - rng["range_start"]):
#             final_segs.append([
#                 round(seg[0] + p_start_sec, 3),
#                 round(seg[1] + p_start_sec, 3)
#             ])
#     finalize_chunk(padded, final_prov, final_segs, chunk_count)

# # ╔══════════════════════════════════════════════════════════════════════╗
# # WRITE master metadata file
# # ╚══════════════════════════════════════════════════════════════════════╝
# with open(metadata_output_path, "w") as f:
#     json.dump(segment_metadata, f, indent=2)

# print(f"📄  Metadata saved to: {metadata_output_path}")


In [8]:
#!/usr/bin/env python
# -*- coding: utf‑8 -*-

"""
Split one or more long .wav recordings into consecutive 60‑second clips
and keep track of (1) which original file(s) contributed to each clip and
(2) where song was detected inside each clip.

The output clips are named:
    <animal_id>_<recording_date>_detected_song_segment_<N>.wav
and a JSON file `segment_metadata.json` is written alongside them.
"""

import os, json
import numpy as np
from scipy.io import wavfile
from pathlib import Path   # (unused here, but handy for future tweaks)

# ╔══════════════════════════════════════════════════════════════════════╗
# CONFIG –– update the four values below
# ╚══════════════════════════════════════════════════════════════════════╝
# folder_path   = "/Volumes/my_own_SSD/UO_stuff/nerve_transections/USA5207/33"        # directory with raw .wav files
# json_path     = "/Volumes/my_own_SSD/UO_stuff/nerve_transections/USA5207/33_periodicity_only_detected_song_intervals_combined_segments.json"      # detector output (song intervals)
# animal_id     = 'USA5207'                          # e.g. 'USA5207'
# recording_date = '2025-07-19'                      # e.g. 'YYYY‑MM‑DD'

# ── derived paths ──────────────────────────────────────────────────────
output_folder        = os.path.join(folder_path, 'detected_song_files_full_recordings')
os.makedirs(output_folder, exist_ok=True)
metadata_output_path = os.path.join(output_folder, 'segment_metadata.json')

# ╔══════════════════════════════════════════════════════════════════════╗
# LOAD detector JSON  →  {wav_file : [[start,end], …]}
# ╚══════════════════════════════════════════════════════════════════════╝
with open(json_path, 'r') as f:
    detected_intervals = json.load(f)

# deterministic processing order
file_names = sorted(detected_intervals.keys())

# ╔══════════════════════════════════════════════════════════════════════╗
# READ all wavs into memory (could be streamed if very large files)
# ╚══════════════════════════════════════════════════════════════════════╝
audio_queue           = []      # list of tuples (file_name, mono_float32_data)
sample_rate_reference = None

for fn in file_names:
    wav_path = os.path.join(folder_path, fn)
    if not os.path.exists(wav_path):
        print(f"⚠️  Missing file: {fn}")
        continue
    try:
        sr, data = wavfile.read(wav_path)
        if sample_rate_reference is None:
            sample_rate_reference = sr
        elif sr != sample_rate_reference:
            raise ValueError(f"Sample‑rate mismatch in {fn} ({sr} vs {sample_rate_reference})")

        # stereo → mono
        if data.ndim > 1:
            data = data.mean(axis=1)

        audio_queue.append((fn, data.astype(np.float32)))
    except Exception as e:
        print(f"⚠️  Error reading {fn}: {e}")

if not audio_queue:
    raise RuntimeError("No audio files were loaded successfully.")

# ╔══════════════════════════════════════════════════════════════════════╗
# CHUNKING variables & helpers
# ╚══════════════════════════════════════════════════════════════════════╝
minute_samples      = 60 * sample_rate_reference
leftover            = np.array([], dtype=np.float32)
leftover_provenance = []      # list of dicts: {"source_file", "range_start", "range_end"}
segment_metadata    = {}
chunk_count         = 0

def get_song_segments_within_range(source_file, range_start, range_end):
    """Return detector intervals (sec) that overlap [range_start, range_end) in sample
    coordinates, expressed relative to the *start* of that range."""
    out = []
    for t_start, t_end in detected_intervals.get(source_file, []):
        s_start = int(t_start * sample_rate_reference)
        s_end   = int(t_end   * sample_rate_reference)
        ov_start = max(s_start, range_start)
        ov_end   = min(s_end,   range_end)
        if ov_start < ov_end:
            out.append([
                round((ov_start - range_start) / sample_rate_reference, 3),
                round((ov_end   - range_start) / sample_rate_reference, 3)
            ])
    return out

def finalize_chunk(chunk_data, provenance, song_segments, idx,
                   animal_id, recording_date):
    """Write one 60‑s clip to disk and log its metadata."""
    fname = (
        f"{animal_id}_{recording_date}_detected_song_segment_{idx+1}.wav"
    )
    fpath = os.path.join(output_folder, fname)

    # ensure int16 range
    chunk_int16 = np.clip(chunk_data, -32768, 32767).astype(np.int16)
    wavfile.write(fpath, sample_rate_reference, chunk_int16)

    segment_metadata[fname] = {
        "source_files": provenance,
        "song_segments_in_chunk": song_segments
    }
    print(f"✅  Saved {fname}")

# ╔══════════════════════════════════════════════════════════════════════╗
# MAIN loop –– stitch files, slice 60‑s chunks, preserve provenance
# ╚══════════════════════════════════════════════════════════════════════╝
for file_name, data in audio_queue:

    # 1) build new "combined" buffer = leftover + current file
    combined        = np.concatenate([leftover, data])
    combined_ranges = []          # [{"source_file", "range_start", "range_end"}]
    offset          = 0

    # carry forward *all* leftover provenance
    for prov in leftover_provenance:
        dur = prov["range_end"] - prov["range_start"]
        combined_ranges.append({
            "source_file": prov["source_file"],
            "range_start": offset,
            "range_end":   offset + dur
        })
        offset += dur

    # append current file’s span
    combined_ranges.append({
        "source_file": file_name,
        "range_start": offset,
        "range_end":   offset + len(data)
    })

    # 2) slice full‑minute chunks
    cursor = 0
    while cursor + minute_samples <= len(combined):
        chunk               = combined[cursor:cursor + minute_samples]
        chunk_provenance    = []
        chunk_song_segments = []

        chunk_start = cursor
        chunk_end   = cursor + minute_samples

        for rng in combined_ranges:
            src_start, src_end = rng["range_start"], rng["range_end"]
            ov_start = max(chunk_start, src_start)
            ov_end   = min(chunk_end,   src_end)
            if ov_start < ov_end:   # overlap exists
                # provenance entry
                rel_start_sec = (ov_start - chunk_start) / sample_rate_reference
                rel_end_sec   = (ov_end   - chunk_start) / sample_rate_reference
                chunk_provenance.append({
                    "source_file": rng["source_file"],
                    "time_range_in_chunk_seconds": [
                        round(rel_start_sec, 3),
                        round(rel_end_sec,   3)
                    ]
                })

                # song segments within this overlap
                slice_rel_start = ov_start - src_start
                slice_rel_end   = ov_end   - src_start
                for seg in get_song_segments_within_range(
                        rng["source_file"], slice_rel_start, slice_rel_end):
                    chunk_song_segments.append([
                        round(seg[0] + rel_start_sec, 3),
                        round(seg[1] + rel_start_sec, 3)
                    ])

        finalize_chunk(chunk, chunk_provenance, chunk_song_segments,
                       chunk_count, animal_id, recording_date)
        chunk_count += 1
        cursor      += minute_samples

    # 3) whatever is left (< 60 s) → carry to next iteration
    leftover = combined[cursor:]
    leftover_provenance = []
    if len(leftover) > 0:
        for rng in combined_ranges:
            if rng["range_end"] > cursor:      # part survives
                leftover_provenance.append({
                    "source_file": rng["source_file"],
                    "range_start": max(0, rng["range_start"] - cursor),
                    "range_end":   rng["range_end"] - cursor
                })

# ╔══════════════════════════════════════════════════════════════════════╗
# FINAL (possibly padded) chunk made from leftover audio
# ╚══════════════════════════════════════════════════════════════════════╝
if len(leftover) > 0:
    padded = np.pad(
        leftover, (0, minute_samples - len(leftover)),
        mode="constant"
    )

    final_prov, final_segs = [], []
    for rng in leftover_provenance:
        p_start_sec = rng["range_start"] / sample_rate_reference
        p_end_sec   = rng["range_end"]   / sample_rate_reference
        final_prov.append({
            "source_file": rng["source_file"],
            "time_range_in_chunk_seconds": [
                round(p_start_sec, 3),
                round(p_end_sec,   3)
            ]
        })
        for seg in get_song_segments_within_range(
                rng["source_file"], 0, rng["range_end"] - rng["range_start"]):
            final_segs.append([
                round(seg[0] + p_start_sec, 3),
                round(seg[1] + p_start_sec, 3)
            ])

    finalize_chunk(padded, final_prov, final_segs,
                   chunk_count, animal_id, recording_date)

# ╔══════════════════════════════════════════════════════════════════════╗
# WRITE master metadata file
# ╚══════════════════════════════════════════════════════════════════════╝
with open(metadata_output_path, 'w') as f:
    json.dump(segment_metadata, f, indent=2)

print(f"📄  Metadata saved to: {metadata_output_path}")


✅  Saved USA5207_2025-07-19_detected_song_segment_1.wav
✅  Saved USA5207_2025-07-19_detected_song_segment_2.wav
✅  Saved USA5207_2025-07-19_detected_song_segment_3.wav
✅  Saved USA5207_2025-07-19_detected_song_segment_4.wav
✅  Saved USA5207_2025-07-19_detected_song_segment_5.wav
✅  Saved USA5207_2025-07-19_detected_song_segment_6.wav
✅  Saved USA5207_2025-07-19_detected_song_segment_7.wav
✅  Saved USA5207_2025-07-19_detected_song_segment_8.wav
✅  Saved USA5207_2025-07-19_detected_song_segment_9.wav
✅  Saved USA5207_2025-07-19_detected_song_segment_10.wav
✅  Saved USA5207_2025-07-19_detected_song_segment_11.wav
✅  Saved USA5207_2025-07-19_detected_song_segment_12.wav
✅  Saved USA5207_2025-07-19_detected_song_segment_13.wav
✅  Saved USA5207_2025-07-19_detected_song_segment_14.wav
✅  Saved USA5207_2025-07-19_detected_song_segment_15.wav
✅  Saved USA5207_2025-07-19_detected_song_segment_16.wav
✅  Saved USA5207_2025-07-19_detected_song_segment_17.wav
✅  Saved USA5207_2025-07-19_detected_son

## Generate the spectrogram

In [13]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Batch‑render spectrograms for detected‑song minute clips and (re)name the
clips themselves with a clearer convention:

    <animal_id>_<recording_date>_detected_song_file_minute_<n>.wav
"""

import os, json
from pathlib import Path
from typing import List, Tuple

import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
from scipy.signal import spectrogram, windows, ellip, filtfilt
import tkinter as tk

# ────────────────────────────────────────────────────────────────
# variables `animal_id`, `recording_date`, `folder_path`, `json_path`
# were defined in a previous notebook cell
# ────────────────────────────────────────────────────────────────

# ────────────────────────────────────────────────────────────────
# PATHS  –– join with `/` instead of `+`
# ────────────────────────────────────────────────────────────────
base_folder    = folder_path / "detected_song_files_full_recordings"   # ← FIXED
spectrogram_out = base_folder / "spectrograms"
spectrogram_out.mkdir(exist_ok=True)

# Optional metadata JSON
metadata_json = base_folder / "segment_metadata.json"
if metadata_json.is_file():
    with metadata_json.open("r", encoding="utf-8") as f:
        segment_metadata = json.load(f)
else:
    segment_metadata = {}

# ────────────────────────────────────────────────────────────────
# Figure‑size helper
# ────────────────────────────────────────────────────────────────
def get_screen_inches() -> Tuple[float, float]:
    try:
        root = tk.Tk(); root.withdraw()
        w, h = root.winfo_screenwidth(), root.winfo_screenheight()
        root.destroy()
        return w / 100, h / 100
    except Exception:
        return 12, 8

width_inches, height_inches = get_screen_inches()

# ────────────────────────────────────────────────────────────────
# Spectrogram rendering for one clip
# ────────────────────────────────────────────────────────────────
def render_clip(
    wav_path: Path,
    segment_duration: int = 10,
    panels_per_fig: int = 6,
    low_cut: int = 500,
    high_cut: int = 8000,
) -> None:

    base_name = wav_path.stem
    sr, data  = wavfile.read(wav_path)
    if data.ndim > 1:
        data = data.mean(axis=1)
    if np.issubdtype(data.dtype, np.integer):
        data = data.astype(np.float32)

    # band‑pass
    nyq = sr / 2
    b, a = ellip(5, 0.2, 40, [low_cut / nyq, high_cut / nyq], btype="band")
    data = filtfilt(b, a, data)

    total_secs        = data.size / sr
    samples_per_panel = int(segment_duration * sr)
    n_panels          = min(panels_per_fig, int(np.ceil(total_secs / segment_duration)))

    # annotations
    meta_key       = wav_path.name
    song_intervals = segment_metadata.get(meta_key, {}).get("song_segments_in_chunk", [])
    boundary_lines = [
        src["time_range_in_chunk_seconds"][1]
        for src in segment_metadata.get(meta_key, {}).get("source_files", [])
        if "time_range_in_chunk_seconds" in src
    ]

    fig, axs = plt.subplots(
        n_panels, 1, figsize=(width_inches, height_inches),
        sharex=True, gridspec_kw={"hspace": 0.0}
    )
    axs = [axs] if n_panels == 1 else axs

    for idx in range(n_panels):
        start_sample = idx * samples_per_panel
        panel = data[start_sample : start_sample + samples_per_panel]
        if panel.size < samples_per_panel:                 # zero‑pad last panel
            panel = np.pad(panel, (0, samples_per_panel - panel.size))

        f, t, Sxx = spectrogram(
            panel, fs=sr,
            window=windows.gaussian(2048, std=2048 / 8),
            nperseg=2048, noverlap=2048 - 119,
        )
        S_log  = 10 * np.log10(Sxx + np.finfo(float).eps)
        S_log  = np.clip(S_log, a_min=3, a_max=None)
        S_norm = (S_log - S_log.min()) / (S_log.ptp() or 1)
        S_norm **= 0.7

        axs[idx].imshow(
            S_norm, aspect="auto", origin="lower",
            extent=[0, segment_duration, f.min(), f.max()],
            cmap="binary",
        )
        axs[idx].set_ylim(0, 11_000)
        axs[idx].set_ylabel("Freq [Hz]")

        # yellow song spans
        p0, p1 = idx * segment_duration, (idx + 1) * segment_duration
        for s0, s1 in song_intervals:
            if s0 < p1 and s1 > p0:
                x0, x1 = max(0, s0 - p0), min(segment_duration, s1 - p0)
                axs[idx].axvspan(x0, x1, color="yellow", alpha=0.1)

        # (If you removed red boundaries earlier, delete this block.)
        for b in boundary_lines:
            if p0 < b < p1:
                axs[idx].axvline(b - p0, color="red", linestyle="--", linewidth=1.2)

        if idx == n_panels - 1:
            axs[idx].set_xlabel("Time [s]")
            axs[idx].set_xticks(np.linspace(0, segment_duration, 5))

    fig.suptitle(f"{base_name} – Spectrogram ({low_cut}-{high_cut} Hz)", fontsize=14)
    fig.tight_layout()

    out_png = spectrogram_out / f"{base_name}_spectrogram.png"
    fig.savefig(out_png, dpi=300)
    plt.close(fig)
    print(f"✅ spectrogram → {out_png.name}")

# ────────────────────────────────────────────────────────────────
# Batch rename clips, update metadata map, and render
# ────────────────────────────────────────────────────────────────
wav_files: List[Path] = sorted(base_folder.glob("*.wav"))
if not wav_files:
    raise FileNotFoundError(f"No .wav clips found in {base_folder}")

print(f"\n📂 Found {len(wav_files)} clips – renaming & rendering …\n")

for idx, old_path in enumerate(wav_files, start=1):
    new_name = f"{animal_id}_{recording_date}_detected_song_file_minute_{idx}.wav"
    new_path = old_path.with_name(new_name)

    if old_path.name != new_name:           # rename on disk
        old_path.rename(new_path)

    # keep metadata mapping in sync
    if old_path.name in segment_metadata and new_name not in segment_metadata:
        segment_metadata[new_name] = segment_metadata[old_path.name]

    render_clip(new_path)

print("\nDone!  Spectrograms in:", spectrogram_out.resolve())



📂 Found 60 clips – renaming & rendering …

✅ spectrogram → USA5207_2025-07-19_detected_song_file_minute_1_spectrogram.png
✅ spectrogram → USA5207_2025-07-19_detected_song_file_minute_2_spectrogram.png
✅ spectrogram → USA5207_2025-07-19_detected_song_file_minute_3_spectrogram.png
✅ spectrogram → USA5207_2025-07-19_detected_song_file_minute_4_spectrogram.png
✅ spectrogram → USA5207_2025-07-19_detected_song_file_minute_5_spectrogram.png
✅ spectrogram → USA5207_2025-07-19_detected_song_file_minute_6_spectrogram.png
✅ spectrogram → USA5207_2025-07-19_detected_song_file_minute_7_spectrogram.png
✅ spectrogram → USA5207_2025-07-19_detected_song_file_minute_8_spectrogram.png
✅ spectrogram → USA5207_2025-07-19_detected_song_file_minute_9_spectrogram.png
✅ spectrogram → USA5207_2025-07-19_detected_song_file_minute_10_spectrogram.png
✅ spectrogram → USA5207_2025-07-19_detected_song_file_minute_11_spectrogram.png
✅ spectrogram → USA5207_2025-07-19_detected_song_file_minute_12_spectrogram.png
✅ spe

### Trying out different color scales for better song visibility:

In [None]:
# import numpy as np
# import os
# from pathlib import Path
# from scipy.io import wavfile
# from scipy.signal import spectrogram, windows, ellip, filtfilt
# import matplotlib.pyplot as plt
# import tkinter as tk
# import json

# # ── Utility: approximate screen size in inches (assumes 100 dpi) ─────────────
# def get_screen_resolution():
#     root = tk.Tk(); root.withdraw()
#     w_px, h_px = root.winfo_screenwidth(), root.winfo_screenheight()
#     root.destroy()
#     return w_px / 100, h_px / 100

# width_inches, height_inches = get_screen_resolution()

# # ── Load your segment metadata (if you have one) ─────────────────────────────
# # For example:
# # with open("path/to/segment_metadata.json") as f:
# #     segment_metadata = json.load(f)
# segment_metadata = {}  # replace with your actual metadata dict

# def process_wav_file(file_path, spectrogram_folder,
#                      segment_duration=10,
#                      low_cut=500, high_cut=8000):
#     try:
#         base_name = Path(file_path).stem
#         sr, data = wavfile.read(file_path)
#         if data.ndim > 1:
#             data = data.mean(axis=1)

#         # === Band‑pass filter ===
#         nyq = sr / 2
#         b, a = ellip(5, 0.2, 40,
#                      [low_cut/nyq, high_cut/nyq],
#                      btype='band')
#         data = filtfilt(b, a, data)

#         # === Setup segments ===
#         total_secs = data.size / sr
#         seg_samps = int(segment_duration * sr)
#         nseg = min(6, int(np.ceil(total_secs / segment_duration)))
#         out_path = Path(spectrogram_folder) / f"{base_name}_spectrogram.png"

#         fig, axs = plt.subplots(nseg, 1,
#                                 figsize=(width_inches, height_inches),
#                                 sharex=True,
#                                 gridspec_kw={'hspace': 0})
#         if nseg == 1:
#             axs = [axs]

#         # retrieve any song intervals / boundaries
#         key = f"{base_name}.wav"
#         song_intervals = segment_metadata.get(key, {}).get("song_segments_in_chunk", [])
#         boundaries = [src["time_range_in_chunk_seconds"][1]
#                       for src in segment_metadata.get(key, {}).get("source_files", [])
#                       if "time_range_in_chunk_seconds" in src]

#         for i in range(nseg):
#             start = i * seg_samps
#             end = start + seg_samps
#             chunk = np.zeros(seg_samps, dtype=data.dtype)
#             if start < data.size:
#                 chunk[:max(0, min(seg_samps, data.size - start))] = data[start:end]

#             # ── compute spectrogram ──────────────────────────────────────────
#             f, t, Sxx = spectrogram(
#                 chunk,
#                 fs=sr,
#                 window=windows.gaussian(2048, std=2048/8),
#                 nperseg=2048,
#                 noverlap=2048 - 119
#             )

#             # ── convert to dB and clamp dynamic range ────────────────────────
#             Sxx_dB = 10 * np.log10(Sxx + np.finfo(float).eps)
#             vmax = Sxx_dB.max()
#             vmin = vmax - 60   # adjust dynamic range (e.g. top 60 dB)
#             axs[i].imshow(
#                 Sxx_dB,
#                 aspect='auto',
#                 origin='lower',
#                 extent=[0, segment_duration, f.min(), f.max()],
#                 cmap='gray_r',
#                 vmin=vmin,
#                 vmax=vmax
#             )
#             axs[i].set_ylim(0, 11000)

#             # ── yellow highlights ───────────────────────────────────────────
#             panel_off = i * segment_duration
#             for s0, s1 in song_intervals:
#                 if s0 < panel_off + segment_duration and s1 > panel_off:
#                     x0 = max(0, s0 - panel_off)
#                     x1 = min(segment_duration, s1 - panel_off)
#                     axs[i].axvspan(x0, x1, color='yellow', alpha=0.1)

#             # ── red boundaries ───────────────────────────────────────────────
#             for btime in boundaries:
#                 if panel_off < btime < panel_off + segment_duration:
#                     axs[i].axvline(btime - panel_off,
#                                    color='red',
#                                    linestyle='--',
#                                    linewidth=1.2)

#             axs[i].set_ylabel('Freq [Hz]')
#             if i == nseg - 1:
#                 axs[i].set_xlabel('Time [sec]')
#                 axs[i].set_xticks(np.linspace(0, segment_duration, 5))

#         fig.suptitle(f'{base_name} – Spectrogram (Filtered {low_cut}-{high_cut} Hz)', fontsize=14)
#         fig.tight_layout()
#         fig.savefig(out_path, dpi=300)
#         plt.close(fig)
#         print(f"✅ Saved: {out_path}")

#     except Exception as e:
#         print(f"❌ Error processing {file_path} – {e}")

# def batch_process_folder(folder_path, segment_duration=10):
#     spectrogram_folder = Path(folder_path) / "spectrograms"
#     spectrogram_folder.mkdir(parents=True, exist_ok=True)

#     wavs = list(Path(folder_path).glob("*.wav"))
#     if not wavs:
#         print("No .wav files found.")
#         return

#     print(f"\n📂 Processing {len(wavs)} files in {folder_path}\n")
#     for wf in wavs:
#         process_wav_file(str(wf), str(spectrogram_folder),
#                          segment_duration=segment_duration)

# # === USER CONFIGURE & RUN ===
# if __name__ == "__main__":
#     # point this to your folder of .wav files
#     #folder = "/path/to/your/wav_folder"
#     batch_process_folder(spectrogram_output_folder, segment_duration=10)



📂 Processing 2 files in /Users/mirandahulsey-vincent/Documents/allPythonCode/BYOD_class/data_inputs/USA5288_testing_pipeline/detected_song_files_full_recordings

✅ Saved: /Users/mirandahulsey-vincent/Documents/allPythonCode/BYOD_class/data_inputs/USA5288_testing_pipeline/detected_song_files_full_recordings/spectrograms/detected_song_minute_2_spectrogram.png
✅ Saved: /Users/mirandahulsey-vincent/Documents/allPythonCode/BYOD_class/data_inputs/USA5288_testing_pipeline/detected_song_files_full_recordings/spectrograms/detected_song_minute_1_spectrogram.png
