# Segemtation And Saving with Tonic Frequency

In [1]:
import os
import librosa
import torch
import numpy as np
import soundfile as sf
import torchcrepe

# === Paths ===
INPUT_DIR = r"C:\College\Subjects\Sem 6\Speech Processing\Project\Dataset\Augmented"
OUTPUT_DIR = r"C:\College\Subjects\Sem 6\Speech Processing\Project\Dataset\Segment"

SEGMENT_DURATION = 10  # seconds
OVERLAP = 0.5          # 50%
SR = 16000             # Sampling rate
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"  # Use GPU if available

if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

# === Helper Functions ===

def estimate_tonic_torchcrepe(audio, sr=16000, device="cuda"):
    """Estimate tonic using TorchCREPE (fast + GPU) and histogram."""
    if len(audio.shape) > 1:
        audio = librosa.to_mono(audio)

    # TorchCREPE expects shape (1, n_samples), float32
    audio_tensor = torch.tensor(audio, dtype=torch.float32).unsqueeze(0).to(device)

    # Predict pitch and periodicity
    pitch, periodicity = torchcrepe.predict(
        audio_tensor,
        sr,
        hop_length=160,        # 10ms resolution (160 samples @ 16kHz)
        fmin=50.0,
        fmax=1000.0,
        model='full',
        batch_size=128,
        return_periodicity=True,
        device=device
    )

    # Keep confident frames
    confident_pitch = pitch[periodicity > 0.5].cpu().numpy().flatten()
    if len(confident_pitch) == 0:
        return 0.0

    # Histogram mode estimation
    hist, bin_edges = np.histogram(confident_pitch, bins=np.linspace(50, 1000, 500))
    tonic_hz = bin_edges[np.argmax(hist)]
    return round(tonic_hz, 2)

def segment_audio(y, segment_duration, overlap, sr=16000):
    hop_len = int(sr * segment_duration * (1 - overlap))
    segment_len = int(sr * segment_duration)
    segments = []

    for i in range(0, len(y) - segment_len + 1, hop_len):
        seg = y[i:i + segment_len]
        segments.append(seg)

    return segments

# === Main Processing Loop ===

for raga_folder in os.listdir(INPUT_DIR):
    raga_path = os.path.join(INPUT_DIR, raga_folder)
    if not os.path.isdir(raga_path):
        continue

    raga_output_path = os.path.join(OUTPUT_DIR, raga_folder)
    os.makedirs(raga_output_path, exist_ok=True)

    for file in os.listdir(raga_path):
        if not file.endswith(".wav"):
            continue

        file_path = os.path.join(raga_path, file)
        raga_name = raga_folder

        print(f" Processing: {file} (Raaga: {raga_name})")

        try:
            # Load audio
            audio, _ = librosa.load(file_path, sr=SR, mono=True)

            # Estimate tonic using TorchCREPE
            tonic_hz = estimate_tonic_torchcrepe(audio, sr=SR, device=DEVICE)

            # Segment audio
            segments = segment_audio(audio, segment_duration=SEGMENT_DURATION, overlap=OVERLAP, sr=SR)

            # Save segments with base filename to avoid overwrite
            base_name = os.path.splitext(file)[0]
            for idx, seg in enumerate(segments):
                out_filename = f"{base_name}_tonic={tonic_hz}_seg{idx}.wav"
                out_path = os.path.join(raga_output_path, out_filename)
                sf.write(out_path, seg, SR)

            print(f" Saved {len(segments)} segments (Tonic = {tonic_hz} Hz)")

        except Exception as e:
            print(f" Error processing {file}: {e}")

print("\n All files processed with tonic estimated using TorchCREPE (GPU-accelerated)!")


 Processing: Neelambari_2_trimmed.wav (Raaga: Neelambari)


  torch.load(file, map_location=device))


 Saved 48 segments (Tonic = 171.84 Hz)
 Processing: Neelambari_2_trimmed_noise.wav (Raaga: Neelambari)
 Saved 48 segments (Tonic = 171.84 Hz)
 Processing: Neelambari_2_trimmed_pitch.wav (Raaga: Neelambari)
 Saved 48 segments (Tonic = 194.69 Hz)
 Processing: Neelambari_2_trimmed_stretch.wav (Raaga: Neelambari)
 Saved 44 segments (Tonic = 173.75 Hz)
 Processing: Neelambari_3_trimmed.wav (Raaga: Neelambari)
 Saved 114 segments (Tonic = 244.19 Hz)
 Processing: Neelambari_3_trimmed_noise.wav (Raaga: Neelambari)
 Saved 114 segments (Tonic = 244.19 Hz)
 Processing: Neelambari_3_trimmed_pitch.wav (Raaga: Neelambari)
 Saved 114 segments (Tonic = 272.75 Hz)
 Processing: Neelambari_3_trimmed_stretch.wav (Raaga: Neelambari)
 Saved 103 segments (Tonic = 244.19 Hz)
 Processing: Neelambari_4_trimmed.wav (Raaga: Neelambari)
 Saved 47 segments (Tonic = 206.11 Hz)
 Processing: Neelambari_4_trimmed_noise.wav (Raaga: Neelambari)
 Saved 47 segments (Tonic = 206.11 Hz)
 Processing: Neelambari_4_trimmed_pitc

# Markdown

In [3]:
import os
import librosa
import shutil

In [7]:

# Path to your segmented dataset
SEGMENTED_PATH = r"C:\College\Subjects\Sem 6\Speech Processing\Project\Dataset\Segment"
TARGET_PATH = r"C:\College\Subjects\Sem 6\Speech Processing\Project\Dataset\Balanced"

# Ensure target path exists
if not os.path.exists(TARGET_PATH):
    os.makedirs(TARGET_PATH)

# Step 1: Collect total duration for each raaga folder
raaga_durations = {}

for raaga_folder in os.listdir(SEGMENTED_PATH):
    raaga_path = os.path.join(SEGMENTED_PATH, raaga_folder)
    total_duration = 0

    if os.path.isdir(raaga_path):
        for file in os.listdir(raaga_path):
            if file.endswith(".wav"):
                file_path = os.path.join(raaga_path, file)
                y, sr = librosa.load(file_path, sr=None)
                duration = librosa.get_duration(y=y, sr=sr)
                total_duration += duration

        raaga_durations[raaga_folder] = total_duration
        print(f"{raaga_folder} has {total_duration:.2f} seconds of audio.")

# Step 2: Find the minimum total duration
min_duration = min(raaga_durations.values())
print(f"\nTarget duration for all raagas: {min_duration:.2f} seconds\n")

# Step 3: Trim other raaga folders to match the minimum duration
for raaga_folder, total_duration in raaga_durations.items():
    raaga_path = os.path.join(SEGMENTED_PATH, raaga_folder)
    target_raaga_path = os.path.join(TARGET_PATH, raaga_folder)

    if not os.path.exists(target_raaga_path):
        os.makedirs(target_raaga_path)

    current_duration = 0

    for file in sorted(os.listdir(raaga_path)):
        if file.endswith(".wav"):
            file_path = os.path.join(raaga_path, file)
            y, sr = librosa.load(file_path, sr=None)
            duration = librosa.get_duration(y=y, sr=sr)

            if current_duration + duration <= min_duration:
                shutil.copy(file_path, os.path.join(target_raaga_path, file))
                current_duration += duration
            else:
                break  # Stop once we've reached the target duration

    print(f"Copied {current_duration:.2f} seconds for {raaga_folder}")

print("\n All raaga folders now contain equal total duration.")


Aanda bhairavi has 23540.00 seconds of audio.
Darbari Kanada has 24640.00 seconds of audio.
Hamsadhwani has 23450.00 seconds of audio.
Kalyani has 24340.00 seconds of audio.
Kharaharapriya has 23440.00 seconds of audio.
Mayamalawagowla has 26880.00 seconds of audio.
Mohanam has 24860.00 seconds of audio.
Neelambari has 23700.00 seconds of audio.
Shankarabharanam has 30990.00 seconds of audio.
Thodi has 26450.00 seconds of audio.

Target duration for all raagas: 23440.00 seconds

Copied 23440.00 seconds for Aanda bhairavi
Copied 23440.00 seconds for Darbari Kanada
Copied 23440.00 seconds for Hamsadhwani
Copied 23440.00 seconds for Kalyani
Copied 23440.00 seconds for Kharaharapriya
Copied 23440.00 seconds for Mayamalawagowla
Copied 23440.00 seconds for Mohanam
Copied 23440.00 seconds for Neelambari
Copied 23440.00 seconds for Shankarabharanam
Copied 23440.00 seconds for Thodi

 All raaga folders now contain equal total duration.
