# Feature Extraction

In [1]:
import os
import librosa
import torch
import numpy as np
import pandas as pd
import re
import crepe  
import soundfile as sf  

In [3]:

# === Paths ===
INPUT_DIR = r"C:\College\Subjects\Sem 6\Speech Processing\Project\Dataset\Balanced"
OUTPUT_DIR = r"C:\College\Subjects\Sem 6\Speech Processing\Project\Dataset\Features"
SR = 16000

if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

# === Helper Functions ===

def hz_to_cents(pitch_hz, tonic_hz):
    """Convert frequency (Hz) to cents relative to tonic."""
    return 1200 * np.log2(pitch_hz / tonic_hz)

def estimate_tonic_crepe(audio, sr=16000):
    """Estimate tonic using pitch histogram from CREPE output."""
    time, frequency, confidence, activation = crepe.predict(
        audio, sr, viterbi=True, step_size=10
    )
    freq_confident = frequency[confidence > 0.5]
    if len(freq_confident) == 0:
        return 1.0  # Avoid log(0) errors later
    hist, bins = np.histogram(freq_confident, bins=np.linspace(50, 1000, 500))
    tonic_hz = bins[np.argmax(hist)]
    return tonic_hz

def extract_pitch_features(audio, sr=16000, tonic=None):
    time, frequency, confidence, activation = crepe.predict(
        audio, sr, viterbi=True, step_size=10
    )

    # Filter low-confidence frames
    frequency[confidence < 0.5] = np.nan
    pitch_cents = hz_to_cents(frequency, tonic)

    # Interpolate missing (nan) values
    pitch_cents = pd.Series(pitch_cents).interpolate().fillna(method='bfill').fillna(method='ffill').values

    # Derivatives (velocity, acceleration)
    velocity = np.gradient(pitch_cents)
    acceleration = np.gradient(velocity)

    # Stack into matrix
    features = np.stack([pitch_cents, velocity, acceleration], axis=1)
    return time, features

# === Main Processing Loop ===

for raga_folder in os.listdir(INPUT_DIR):
    raga_path = os.path.join(INPUT_DIR, raga_folder)
    if not os.path.isdir(raga_path):
        continue

    output_raga_path = os.path.join(OUTPUT_DIR, raga_folder)
    os.makedirs(output_raga_path, exist_ok=True)

    for file in os.listdir(raga_path):
        if not file.endswith(".wav"):
            continue

        file_path = os.path.join(raga_path, file)
        print(f" Processing: {file}")

        try:
            # Load audio
            audio, _ = librosa.load(file_path, sr=SR, mono=True)

            # === Tonic Handling ===
            tonic_match = re.search(r'tonic=(\d+\.\d+)', file)
            if tonic_match:
                tonic_hz = float(tonic_match.group(1))
                print(f" Tonic found in filename: {tonic_hz} Hz")
            else:
                tonic_hz = estimate_tonic_crepe(audio, sr=SR)
                print(f" Estimated tonic: {tonic_hz:.2f} Hz")

            # === Feature Extraction ===
            time, feature_matrix = extract_pitch_features(audio, sr=SR, tonic=tonic_hz)

            # === Save Features ===
            out_name = os.path.splitext(file)[0] + "_features.npy"
            np.save(os.path.join(output_raga_path, out_name), feature_matrix)

            print(f" Saved features: {feature_matrix.shape} → {out_name}")

        except Exception as e:
            print(f" Error processing {file}: {e}")

print("\n All pitch-based features extracted and saved :)")


 Processing: Thodi_5_trimmed_noise_tonic=129.96_seg0.wav
 Tonic found in filename: 129.96 Hz
 Saved features: (1001, 3) → Thodi_5_trimmed_noise_tonic=129.96_seg0_features.npy
 Processing: Thodi_5_trimmed_noise_tonic=129.96_seg1.wav
 Tonic found in filename: 129.96 Hz
 Saved features: (1001, 3) → Thodi_5_trimmed_noise_tonic=129.96_seg1_features.npy
 Processing: Thodi_5_trimmed_noise_tonic=129.96_seg10.wav
 Tonic found in filename: 129.96 Hz
 Saved features: (1001, 3) → Thodi_5_trimmed_noise_tonic=129.96_seg10_features.npy
 Processing: Thodi_5_trimmed_noise_tonic=129.96_seg100.wav
 Tonic found in filename: 129.96 Hz
 Saved features: (1001, 3) → Thodi_5_trimmed_noise_tonic=129.96_seg100_features.npy
 Processing: Thodi_5_trimmed_noise_tonic=129.96_seg101.wav
 Tonic found in filename: 129.96 Hz
 Saved features: (1001, 3) → Thodi_5_trimmed_noise_tonic=129.96_seg101_features.npy
 Processing: Thodi_5_trimmed_noise_tonic=129.96_seg102.wav
 Tonic found in filename: 129.96 Hz
 Saved features: (10