In [None]:
import numpy as np
import librosa
import scipy.signal as signal
from scipy import ndimage
import matplotlib.pyplot as plt
import IPython.display as ipd

In [None]:
ls_with_heartbeat_path = "/home/AIoT04/Datasets/icbhi_dataset/101_1b1_Al_sc_Meditron.wav"
cough_with_voice_path = "/home/AIoT04/Datasets/icbhi_dataset/218_1b1_Pl_sc_Meditron.wav"
medical_device_sound_path = "/home/AIoT04/Datasets/icbhi_dataset/207_2b3_Ar_mc_AKGC417L.wav"
clock_like_sound_path = "/home/AIoT04/Datasets/icbhi_dataset/107_2b3_Lr_mc_AKGC417L.wav"
whistle_like_sound_path = "/home/AIoT04/Datasets/icbhi_dataset/107_2b4_Ll_mc_AKGC417L.wav"
horn_like_sound_path = "/home/AIoT04/Datasets/icbhi_dataset/147_2b3_Lr_mc_AKGC417L.wav"
stationary_noise_path = "/home/AIoT04/Datasets/icbhi_dataset/204_2b5_Ar_mc_AKGC417L.wav"
mic_peak_path = "/home/AIoT04/Datasets/icbhi_dataset/213_1p5_Tc_mc_AKGC417L.wav"
miss_classified_crackles_path = "/home/AIoT04/Datasets/icbhi_dataset/213_2p2_Pr_mc_AKGC417L.wav"

In [None]:
SAMPLE_RATE = 160000

In [None]:
ls_with_heartbeat = librosa.load(ls_with_heartbeat_path, sr=SAMPLE_RATE)[0]
cough_with_voice = librosa.load(cough_with_voice_path, sr=SAMPLE_RATE)[0]
medical_device_sound = librosa.load(medical_device_sound_path, sr=SAMPLE_RATE)[0]
clock_like_sound = librosa.load(clock_like_sound_path, sr=SAMPLE_RATE)[0]
whistle_like_sound = librosa.load(whistle_like_sound_path, sr=SAMPLE_RATE)[0]
horn_like_sound = librosa.load(horn_like_sound_path, sr=SAMPLE_RATE)[0]
stationary_noise = librosa.load(stationary_noise_path, sr=SAMPLE_RATE)[0]
mic_peak = librosa.load(mic_peak_path, sr=SAMPLE_RATE)[0]
miss_classified_crackles = librosa.load(miss_classified_crackles_path, sr=SAMPLE_RATE)[0]

## Stationary noise removal

In [None]:
ipd.Audio(stationary_noise, rate=SAMPLE_RATE)

In [None]:
def remove_stationary_noise(audio, threshold_db=3, frame_length=2048):
    """
    Remove stationary noise using spectral gating.
    
    Strategy:
    - Extract spectrogram
    - Compute noise profile from quiet frames
    - Apply spectral subtraction
    - Handles: white noise, bzzzz, background hum
    
    Args:
        audio: waveform
        threshold_db: noise floor in dB (relative to max)
        frame_length: FFT frame length
    
    Returns:
        Noise-reduced audio
    """
    # STFT
    D = librosa.stft(audio, n_fft=frame_length, hop_length=frame_length//4)
    magnitude = np.abs(D)
    phase = np.angle(D)
    
    # Convert to dB
    S_db = librosa.power_to_db(magnitude**2, ref=np.max(magnitude**2))
    
    # Estimate noise profile from quiet frames (bottom 10%)
    noise_floor = np.percentile(S_db, 10, axis=1, keepdims=True)
    
    # Spectral subtraction
    S_db_denoised = S_db - noise_floor
    S_db_denoised = np.maximum(S_db_denoised, threshold_db)  # Floor at -40dB
    
    # Convert back to linear
    magnitude_denoised = librosa.db_to_power(S_db_denoised) ** 0.5
    
    # Reconstruct
    D_denoised = magnitude_denoised * np.exp(1j * phase)
    audio_denoised = librosa.istft(D_denoised, hop_length=frame_length//4)
    
    return audio_denoised

In [None]:
staionary_denoised = remove_stationary_noise(stationary_noise, frame_length=512)
ipd.Audio(staionary_denoised, rate=SAMPLE_RATE)

### using noisereduce lib

In [None]:
import noisereduce as nr

# perform noise reduction
reduced_noise = nr.reduce_noise(y=mic_peak, sr=SAMPLE_RATE, stationary=True)
ipd.Audio(reduced_noise, rate=SAMPLE_RATE)

In [None]:
class ICBHINoiseRobustPreprocessor:
    """
    Comprehensive preprocessing pipeline for ICBHI 2017 dataset.
    Addresses: heart sounds, stationary noise, coughs, birds, speech.
    
    Publication-ready with detailed documentation.
    """
    
    def __init__(self, sr=16000, n_mels=128, n_fft=512, hop_length=160):
        self.sr = sr
        self.n_mels = n_mels
        self.n_fft = n_fft
        self.hop_length = hop_length
    
    
    # ============================================================
    # 2. STATIONARY NOISE REMOVAL (Spectral Gating)
    # ============================================================

    
    # ============================================================
    # 3. COUGH DETECTION & REMOVAL
    # ============================================================
    def detect_coughs(self, audio, threshold_percentile=85, min_duration=0.1):
        """
        Detect cough sounds using energy and spectral characteristics.
        
        Cough characteristics:
        - High energy bursts
        - Broad spectrum (different from tonal wheezes)
        - Transient (short duration peaks)
        
        Args:
            audio: waveform
            threshold_percentile: energy threshold for detection
            min_duration: minimum duration in seconds
        
        Returns:
            cough_mask: binary mask (True = cough)
            cough_segments: list of (start, end) frames
        """
        # Compute frame energy
        frame_length = 512
        hop_length = 160
        
        frames = librosa.util.frame(audio, frame_length=frame_length, 
                                    hop_length=hop_length)
        energy = np.sqrt(np.sum(frames**2, axis=0))
        energy_normalized = (energy - np.mean(energy)) / (np.std(energy) + 1e-8)
        
        # Threshold
        threshold = np.percentile(energy_normalized, threshold_percentile)
        cough_frames = energy_normalized > threshold
        
        # Post-processing: remove isolated peaks
        min_frames = int(min_duration * self.sr / hop_length)
        cough_mask = ndimage.binary_closing(cough_frames, 
                                           structure=np.ones(min_frames//2))
        cough_mask = ndimage.binary_opening(cough_mask,
                                           structure=np.ones(min_frames//4))
        
        # Find segments
        changes = np.diff(cough_mask.astype(int))
        starts = np.where(changes == 1)[0]
        ends = np.where(changes == -1)[0]
        
        segments = list(zip(starts, ends))
        
        return cough_mask, segments
    
    def remove_coughs(self, audio, cough_segments, method='interpolate'):
        """
        Remove detected cough segments.
        
        Methods:
        - 'silence': Replace with zero
        - 'interpolate': Smooth interpolation (preserves continuity)
        - 'mute': Reduce amplitude by 50%
        
        Args:
            audio: waveform
            cough_segments: list of (start_frame, end_frame)
            method: removal method
        
        Returns:
            Cough-reduced audio
        """
        audio_processed = audio.copy()
        hop_length = 160
        
        for start_frame, end_frame in cough_segments:
            start_sample = start_frame * hop_length
            end_sample = end_frame * hop_length
            
            if method == 'silence':
                audio_processed[start_sample:end_sample] = 0
            
            elif method == 'interpolate':
                # Smooth interpolation between boundaries
                if start_sample > 0 and end_sample < len(audio_processed):
                    start_val = audio_processed[start_sample - 1]
                    end_val = audio_processed[end_sample]
                    length = end_sample - start_sample
                    interpolated = np.linspace(start_val, end_val, length)
                    audio_processed[start_sample:end_sample] = interpolated
            
            elif method == 'mute':
                audio_processed[start_sample:end_sample] *= 0.3
        
        return audio_processed
    
    # ============================================================
    # 4. SPEECH & BIRD DETECTION (Spectral Shape Analysis)
    # ============================================================
    def detect_anomalous_sounds(self, audio, window_size=2048, hop_length=512):
        """
        Detect speech/bird sounds using spectral characteristics.
        
        Strategy:
        - Speech: Formant patterns, specific spectral peaks
        - Birds: Rapid frequency modulation, higher frequencies
        - Lung sounds: More uniform spectral distribution
        
        Args:
            audio: waveform
            window_size: analysis window
            hop_length: hop length
        
        Returns:
            anomaly_mask: binary mask of detected anomalies
            anomaly_score: confidence scores
        """
        # Compute spectrogram
        D = librosa.stft(audio, n_fft=window_size, hop_length=hop_length)
        S = np.abs(D) ** 2
        S_db = librosa.power_to_db(S, ref=np.max(S))
        
        # Feature 1: Spectral Centroid (high = speech/birds)
        centroid = librosa.feature.spectral_centroid(S=S)[0]
        centroid_norm = (centroid - np.mean(centroid)) / (np.std(centroid) + 1e-8)
        
        # Feature 2: Spectral Flatness (low = tonal speech, high = noisy lung sounds)
        flatness = librosa.feature.spectral_flatness(S=S)[0]
        flatness_norm = (flatness - np.mean(flatness)) / (np.std(flatness) + 1e-8)
        
        # Feature 3: Zero Crossing Rate (high = speech, low = smooth lung sounds)
        zcr = librosa.feature.zero_crossing_rate(audio, hop_length=hop_length)[0]
        zcr_norm = (zcr - np.mean(zcr)) / (np.std(zcr) + 1e-8)
        
        # Combine features
        # Speech/Birds: high centroid + high zcr + low flatness
        anomaly_score = (centroid_norm + zcr_norm - flatness_norm) / 3
        anomaly_score = (anomaly_score - np.mean(anomaly_score)) / (np.std(anomaly_score) + 1e-8)
        
        # Threshold
        threshold = np.percentile(anomaly_score, 75)
        anomaly_mask = anomaly_score > threshold
        
        return anomaly_mask, anomaly_score
    
    def apply_spectral_masking(self, audio, anomaly_mask, hop_length=512, 
                               method='soft_mask', attenuation_db=-20):
        """
        Apply spectral masking to anomalous regions.
        
        Args:
            audio: waveform
            anomaly_mask: binary mask from detect_anomalous_sounds
            method: 'soft_mask' (reduce) or 'hard_mask' (remove)
            attenuation_db: how much to reduce anomalous regions
        
        Returns:
            Masked audio
        """
        # Convert mask to frequency domain
        D = librosa.stft(audio, n_fft=512, hop_length=hop_length)
        magnitude = np.abs(D)
        phase = np.angle(D)
        
        if method == 'soft_mask':
            # Soft masking - reduce anomalous frames
            attenuation = 10 ** (attenuation_db / 20)
            for t_idx, is_anomaly in enumerate(anomaly_mask):
                if is_anomaly:
                    magnitude[:, t_idx] *= attenuation
        
        elif method == 'hard_mask':
            # Hard masking - remove anomalous frames
            magnitude[:, anomaly_mask] = 0
        
        D_masked = magnitude * np.exp(1j * phase)
        audio_masked = librosa.istft(D_masked, hop_length=hop_length)
        
        return audio_masked
    
    # ============================================================
    # 5. COMPLETE PIPELINE
    # ============================================================
    def preprocess(self, audio, apply_heart_removal=True, 
                   apply_noise_removal=True, apply_cough_removal=True,
                   apply_anomaly_masking=True, verbose=True):
        """
        Complete preprocessing pipeline.
        
        Recommended for publication:
        1. Heart sound removal
        2. Stationary noise removal
        3. Cough detection & removal
        4. Speech/bird anomaly masking
        """
        audio_processed = audio.copy()
        
        if verbose:
            print("=" * 60)
            print("ICBHI PREPROCESSING PIPELINE")
            print("=" * 60)
        
        # Step 1: Heart sound removal
        if apply_heart_removal:
            audio_processed = self.remove_heart_sounds(audio_processed, cutoff_hz=2000)
            if verbose:
                print("✓ Heart sound removal (HP filter @ 2000Hz)")
        
        # Step 2: Stationary noise removal
        if apply_noise_removal:
            audio_processed = self.remove_stationary_noise(audio_processed, 
                                                          threshold_db=-40)
            if verbose:
                print("✓ Stationary noise removal (spectral subtraction)")
        
        # Step 3: Cough detection and removal
        if apply_cough_removal:
            cough_mask, cough_segments = self.detect_coughs(audio_processed)
            audio_processed = self.remove_coughs(audio_processed, cough_segments,
                                                method='interpolate')
            if verbose:
                print(f"✓ Cough removal ({len(cough_segments)} segments detected)")
        
        # Step 4: Speech/bird anomaly masking
        if apply_anomaly_masking:
            anomaly_mask, scores = self.detect_anomalous_sounds(audio_processed)
            audio_processed = self.apply_spectral_masking(audio_processed, 
                                                         anomaly_mask,
                                                         method='soft_mask',
                                                         attenuation_db=-20)
            if verbose:
                pct_anomalous = 100 * np.sum(anomaly_mask) / len(anomaly_mask)
                print(f"✓ Anomaly masking ({pct_anomalous:.1f}% flagged)")
        
        if verbose:
            print("=" * 60)
        
        return audio_processed
    
    # ============================================================
    # 6. VISUALIZATION & QUALITY CONTROL
    # ============================================================
    def plot_preprocessing_stages(self, audio, title=""):
        """
        Visualize before/after at each preprocessing stage.
        """
        fig, axes = plt.subplots(5, 2, figsize=(14, 12))
        
        stages = [
            ("Original", audio),
            ("After Heart Removal", self.remove_heart_sounds(audio)),
            ("After Noise Removal", self.remove_stationary_noise(audio)),
        ]
        
        audio_cough = self.remove_stationary_noise(audio)
        cough_mask, segments = self.detect_coughs(audio_cough)
        audio_cough = self.remove_coughs(audio_cough, segments)
        stages.append(("After Cough Removal", audio_cough))
        
        audio_final = self.preprocess(audio, verbose=False)
        stages.append(("Final Output", audio_final))
        
        for idx, (stage_name, audio_stage) in enumerate(stages):
            # Waveform
            ax = axes[idx, 0]
            t = np.linspace(0, len(audio_stage)/self.sr, len(audio_stage))
            ax.plot(t, audio_stage, linewidth=0.5, color='steelblue')
            ax.set_title(f"{stage_name} - Waveform", fontweight='bold')
            ax.set_ylabel('Amplitude')
            if idx == len(stages) - 1:
                ax.set_xlabel('Time (s)')
            
            # Spectrogram
            ax = axes[idx, 1]
            D = librosa.stft(audio_stage)
            S_db = librosa.power_to_db(np.abs(D)**2, ref=np.max)
            img = librosa.display.specshow(S_db, sr=self.sr, hop_length=self.hop_length,
                                          ax=ax, x_axis='time' if idx == len(stages)-1 else None,
                                          y_axis='hz')
            ax.set_title(f"{stage_name} - Spectrogram", fontweight='bold')
            if idx == 0:
                plt.colorbar(img, ax=ax, format='%+2.0f dB')
        
        plt.tight_layout()
        plt.savefig(f'preprocessing_stages_{title}.png', dpi=300, bbox_inches='tight')
        plt.show()