In [3]:
import os
import numpy as np
import librosa
import soundfile as sf

# ── User parameters ────────────────────────────────────────────────────────────
SAMPLE_RATE       = 48000       # UMIK-1 sample rate
N_MFCC            = 13          # number of coefficients
N_FFT             = 2048        # STFT window size
HOP_LENGTH        = 512         # STFT hop length
SEGMENTED_FOLDER  = 'SegmentedAudio'           # where your 1 s .wav segments live
OUTPUT_FOLDER     = 'MFCC_Features_Calibrated' # where to save .npy files
CALIBRATION_FILE  = 'calibration_file.txt'     # two-column: freq(Hz) gain(dB)
# ───────────────────────────────────────────────────────────────────────────────

os.makedirs(OUTPUT_FOLDER, exist_ok=True)


def load_calibration(cal_file, n_fft, sr):
    """Read freq/gain pairs and interpolate onto STFT bin centers."""
    freqs, gains_db = [], []
    with open(cal_file, 'r') as f:
        for line in f:
            line = line.strip()
            if not line or line.startswith('#'):
                continue
            parts = line.split()
            try:
                f_hz = float(parts[0])
                g_db = float(parts[1])
            except ValueError:
                continue
            freqs.append(f_hz)
            gains_db.append(g_db)

    # FFT bin center frequencies
    bin_freqs = np.linspace(0, sr/2, n_fft//2 + 1)
    # Interpolate (dB) then convert to linear
    gains_interp_db = np.interp(bin_freqs, freqs, gains_db)
    return 10.0 ** (gains_interp_db / 20.0)


def apply_calibration(y, gain_lin, n_fft, hop_length):
    """STFT → apply linear gain to each frequency bin → iSTFT."""
    D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
    # Only magnitude correction; preserve phase
    D[:gain_lin.shape[0], :] *= gain_lin[:, np.newaxis]
    return librosa.istft(D, hop_length=hop_length)


# Pre-compute calibration gains
cal_gain = load_calibration(CALIBRATION_FILE, N_FFT, SAMPLE_RATE)

# Process each 1 s segment
for subdir in os.listdir(SEGMENTED_FOLDER):
    src_dir = os.path.join(SEGMENTED_FOLDER, subdir)
    if not os.path.isdir(src_dir):
        continue

    dst_dir = os.path.join(OUTPUT_FOLDER, subdir)
    os.makedirs(dst_dir, exist_ok=True)

    for fname in os.listdir(src_dir):
        if not fname.lower().endswith('.wav'):
            continue

        # 1) Load
        path_in = os.path.join(src_dir, fname)
        y, sr = librosa.load(path_in, sr=SAMPLE_RATE)

        # 2) Calibrate
        y_cal = apply_calibration(y, cal_gain, N_FFT, HOP_LENGTH)

        # 3) MFCC extraction
        mfcc = librosa.feature.mfcc(
            y=y_cal,
            sr=sr,
            n_mfcc=N_MFCC,
            n_fft=N_FFT,
            hop_length=HOP_LENGTH
        )

        # 4) Save
        out_name = fname.replace('.wav', '.npy')
        np.save(os.path.join(dst_dir, out_name), mfcc)

print("✔️  Calibration-aware MFCC extraction complete.")


✔️  Calibration-aware MFCC extraction complete.


In [2]:
import os
import numpy as np
import librosa
import matplotlib.pyplot as plt

# ── User parameters ────────────────────────────────────────────────────────────
SAMPLE_RATE       = 48000       # UMIK-1 sample rate
N_MFCC            = 13          # number of MFCC coefficients
N_FFT             = 2048        # STFT window size
HOP_LENGTH        = 512         # STFT hop length
SEGMENTED_FOLDER  = 'SegmentedAudio'           # folder with your 1 s .wav files
OUTPUT_IMG_DIR    = 'MFCC_Images'              # where to save the .png files
CALIBRATION_FILE  = 'calibration_file.txt'     # mic calibration: freq(Hz) gain(dB)
# ───────────────────────────────────────────────────────────────────────────────

os.makedirs(OUTPUT_IMG_DIR, exist_ok=True)

def load_calibration(cal_file, n_fft, sr):
    """Read freq/gain pairs and interpolate to STFT bins, return linear gain array."""
    freqs, gains_db = [], []
    with open(cal_file, 'r') as f:
        for line in f:
            line = line.strip()
            if not line or line.startswith('#'):
                continue
            parts = line.split()
            try:
                freqs.append(float(parts[0]))
                gains_db.append(float(parts[1]))
            except:
                pass
    # STFT bin center freqs
    bin_freqs = np.linspace(0, sr/2, n_fft//2 + 1)
    gains_interp = np.interp(bin_freqs, freqs, gains_db)
    return 10.0 ** (gains_interp / 20.0)

def apply_calibration(y, gain_lin, n_fft, hop_length):
    """Apply the calibration in freq-domain and return time-signal."""
    D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
    D[:gain_lin.shape[0], :] *= gain_lin[:, np.newaxis]
    return librosa.istft(D, hop_length=hop_length)

# Precompute calibration gains
cal_gain = load_calibration(CALIBRATION_FILE, N_FFT, SAMPLE_RATE)

# Walk through all segments and save MFCC images
for root, dirs, files in os.walk(SEGMENTED_FOLDER):
    for fname in files:
        if not fname.lower().endswith('.wav'):
            continue

        # 1) load audio
        path_in = os.path.join(root, fname)
        y, sr = librosa.load(path_in, sr=SAMPLE_RATE)

        # 2) apply mic calibration
        y_cal = apply_calibration(y, cal_gain, N_FFT, HOP_LENGTH)

        # 3) compute MFCCs
        mfcc = librosa.feature.mfcc(
            y=y_cal,
            sr=sr,
            n_mfcc=N_MFCC,
            n_fft=N_FFT,
            hop_length=HOP_LENGTH
        )

        # 4) plot & save as PNG
        plt.figure(figsize=(4, 3))
        plt.imshow(mfcc, aspect='auto', origin='lower')
        plt.title(fname)
        plt.xlabel("Frame")
        plt.ylabel("MFCC Coeff")
        plt.tight_layout()

        out_fname = os.path.splitext(fname)[0] + '.png'
        out_path = os.path.join(OUTPUT_IMG_DIR, out_fname)
        plt.savefig(out_path)
        plt.close()

print("✅ All MFCC images saved in", OUTPUT_IMG_DIR)


✅ All MFCC images saved in MFCC_Images
