In [None]:
import librosa
import numpy as np
import soundfile as sf
import matplotlib.pyplot as plt
import speech_recognition as sr
from scipy.signal import wiener
import pywt

In [None]:
def speech_intelligibility_score(audio_path):
    recognizer = sr.Recognizer()

    try:
        with sr.AudioFile(audio_path) as source:
            audio = recognizer.record(source)

        # Try to transcribe
        try:
            transcript = recognizer.recognize_google(audio)
            # Higher score for more successful transcription
            return len(transcript)
        except sr.UnknownValueError:
            # If no speech detected
            return 0
        except sr.RequestError:
            # If API is unavailable
            return -1
    except Exception as e:
        print(f"Error processing audio: {e}")
        return -1

def advanced_noise_analysis(input_path):
    # Load the audio file
    y, sr = librosa.load(input_path, sr=None)

    # Expanded noise reduction techniques
    def noise_reduction_techniques(y):
        techniques = {
            'Spectral Subtraction': spectral_subtraction,
            'Wiener Filter': wiener_filtering,
            'Soft Thresholding': soft_thresholding,
            'Wavelet Denoising': wavelet_denoising,
            'Rolling Window Noise Reduction': rolling_window_noise_reduction
        }
        return {name: technique(y, sr) for name, technique in techniques.items()}

    def spectral_subtraction(y, sr):
        D = librosa.stft(y)
        magnitude = np.abs(D)
        noise_floor = np.percentile(magnitude, 10, axis=1)
        reduced_magnitude = np.maximum(magnitude - noise_floor[:, np.newaxis], 0)
        return librosa.istft(reduced_magnitude * np.exp(1j * np.angle(D)))

    def wiener_filtering(y, sr):
        return wiener(y, 5)

    def soft_thresholding(y, sr):
        D = librosa.stft(y)
        magnitude = np.abs(D)
        threshold = np.median(magnitude) * 1.5
        reduced_magnitude = np.where(magnitude > threshold, magnitude, 0)
        return librosa.istft(reduced_magnitude * np.exp(1j * np.angle(D)))

    def wavelet_denoising(y, sr):
        # Decompose the signal
        coeffs = pywt.wavedec(y, 'db4', level=5)

        # Apply thresholding
        threshold = np.sqrt(2 * np.log(len(y)))
        new_coeffs = []
        for i, coeff in enumerate(coeffs):
            if i == 0:  # Keep approximation coefficients
                new_coeffs.append(coeff)
            else:
                new_coeffs.append(pywt.threshold(coeff, threshold, mode='soft'))

        # Reconstruct the signal
        return pywt.waverec(new_coeffs, 'db4')

    def rolling_window_noise_reduction(y, sr, window_size=0.025):
        window_length = int(window_size * sr)
        cleaned_signal = np.zeros_like(y)

        for i in range(0, len(y), window_length):
            window = y[i:i+window_length]
            if len(window) < window_length:
                break

            # Compute noise threshold for this window
            noise_threshold = np.median(np.abs(window)) * 1.5

            # Reduce noise in the window
            cleaned_window = np.where(np.abs(window) > noise_threshold, window, 0)
            cleaned_signal[i:i+window_length] = cleaned_window

        return cleaned_signal

    # Apply noise reduction techniques
    reduced_audios = noise_reduction_techniques(y)

    # Evaluate and select the best technique
    technique_scores = {}
    for name, processed_y in reduced_audios.items():
        # Temporarily save processed audio for evaluation
        temp_path = f'{name}_noise_reduced.wav'
        sf.write(temp_path, processed_y, sr)

        # Get intelligibility score
        score = speech_intelligibility_score(temp_path)
        technique_scores[name] = score

    # Select the best technique
    best_technique = max(technique_scores, key=technique_scores.get)
    best_audio = reduced_audios[best_technique]

    # Save the best audio
    sf.write('best_noise_reduced.wav', best_audio, sr)

    # Visualization (optional)
    plt.figure(figsize=(15, 10))

    # Original spectrogram
    plt.subplot(2, 3, 1)
    D = librosa.stft(y)
    librosa.display.specshow(librosa.amplitude_to_db(np.abs(D), ref=np.max),
                             sr=sr, y_axis='hz', x_axis='time')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Original Spectrogram')

    # Best method spectrogram
    plt.subplot(2, 3, 2)
    D_best = librosa.stft(best_audio)
    librosa.display.specshow(librosa.amplitude_to_db(np.abs(D_best), ref=np.max),
                             sr=sr, y_axis='hz', x_axis='time')
    plt.colorbar(format='%+2.0f dB')
    plt.title(f'Best Method: {best_technique}')

    # Adjust based on number of techniques
    num_techniques = len(reduced_audios)
    rows = 2
    cols = min(num_techniques, 3)

    # Technique comparison spectograms
    for i, (name, processed_y) in enumerate(reduced_audios.items(), 3):
        plt.subplot(rows, cols, i)
        D_processed = librosa.stft(processed_y)
        librosa.display.specshow(librosa.amplitude_to_db(np.abs(D_processed), ref=np.max),
                                 sr=sr, y_axis='hz', x_axis='time')
        plt.colorbar(format='%+2.0f dB')
        plt.title(f'{name} (Score: {technique_scores[name]})')

    plt.tight_layout()
    plt.savefig('noise_reduction_comparison.png')
    plt.close()

    print("Technique Scores:", technique_scores)
    print(f"Best Technique: {best_technique}")

    return best_audio, reduced_audios

In [None]:
def plot_two_audios(original_path, comparison_path):
    # Load both audio files
    original_y, original_sr = librosa.load(original_path, sr=None)
    comparison_y, comparison_sr = librosa.load(comparison_path, sr=None)

    if original_sr != comparison_sr:
        raise ValueError("Sampling rates of the original and comparison audio must be the same.")

    # Compute spectrograms
    original_D = librosa.stft(original_y)
    comparison_D = librosa.stft(comparison_y)

    # Visualization
    plt.figure(figsize=(15, 5))

    # Original spectrogram
    plt.subplot(1, 2, 1)
    librosa.display.specshow(librosa.amplitude_to_db(np.abs(original_D), ref=np.max),
                             sr=original_sr, y_axis='hz', x_axis='time')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Original Spectrogram')

    # Comparison spectrogram
    plt.subplot(1, 2, 2)
    librosa.display.specshow(librosa.amplitude_to_db(np.abs(comparison_D), ref=np.max),
                             sr=comparison_sr, y_axis='hz', x_axis='time')
    plt.colorbar(format='%+2.0f dB')
    plt.title('ML Denoised Spectrogram')

    plt.tight_layout()
    plt.show()
