# Noise Reduction with Spectral Subtraction (Gradio demonstration)

Author- Aryan Gupta
Date- 12/06/2025

## Exploring Gradio App
```bash
pip install gradio
```

### Getting Started

In [9]:
# importing library
import gradio as gr

# defining function
def greet(name):
    return f"Hello, {name}!"

def hello():
    return "Hello, World!"

# defining gradio wrapper
iface = gr.Interface(fn=greet, inputs="text", outputs="text")
# iface = gr.Interface(fn=hello, inputs=None, outputs="text", title="Hello", description="Just says Hello! to the world")
iface.launch()

* Running on local URL:  http://127.0.0.1:7864
* To create a public link, set `share=True` in `launch()`.




### Custom Components

In [27]:
def hello(name):
    return "Hello, World!"

iface1 = gr.Interface(
    fn=hello,
    inputs=[
            # gr.Microphone(format='wav', sources=['upload', 'microphone'], interactive=True, show_download_button=True, show_share_button=True, recording=True),
            gr.Audio(sources=['microphone','upload'], show_download_button=True, format='wav')
            ],
    outputs="text",
    theme='earneleh/paris'
)
iface1.launch()

theme_schema%400.0.1.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


* Running on local URL:  http://127.0.0.1:7880
* To create a public link, set `share=True` in `launch()`.




## Creating the required app

In [2]:
# Import required libraries
import os
import numpy as np
np.complex = complex
# np.complex is a function, and complex is a class used by librosa to do the same thing, i.e, create complex numbers.
# It is just to keep librosa happy

import librosa        # audio I/O & processing
import librosa.display  # plotting
import soundfile as sf  # reading/writing WAV files
import matplotlib.pyplot as plt
from scipy.signal import stft, istft  # STFT operations

import gradio as gr # to create the app

print('Libraries imported successfully!')

Libraries imported successfully!


In [3]:
# sampling rate (in Hz)
sr = 44100

# STFT parameters
frame_len = 2048 # Number of samples that is analyzed as one data point. 2048 samples = 0.05 seconds at 44.1 kHz sampling rate
hop_len = 512    # how many samples you move forward after analyzing each frame. 512 samples = 0.0116 seconds at 44.1 kHz sampling rate

In [10]:
def extract_noise_profile(y, sr, duration=0.5):
    """
    Estimate an average magnitude spectrum from the first duration seconds of y.
    """
    n = int(sr * duration)
    noise = y[:n]
    _, _, Zxx = stft(noise, fs=sr, nperseg=frame_len,
                     noverlap=frame_len-hop_len, boundary=None)
    return np.mean(np.abs(Zxx), axis=1)


def spectral_subtract(y, sr, noise_spec):
    """
    Subtract noise_spec from the full signal in magnitude domain, then invert.
    """
    _, _, Zxx = stft(y, fs=sr, nperseg=frame_len,
                     noverlap=frame_len-hop_len, boundary=None)
    mag, phase = np.abs(Zxx), np.angle(Zxx)
    sub_mag = np.maximum(mag - noise_spec[:, None], 0.0)
    _, y_rec = istft(sub_mag * np.exp(1j * phase), fs=sr,
                     nperseg=frame_len, noverlap=frame_len-hop_len,
                     boundary=None)
    # Ensure same length
    y_rec = librosa.util.fix_length(y_rec, size=len(y))
    return y_rec

In [17]:
def enhance_audio(noisy_input):
    if isinstance(noisy_input, tuple):
        sr, y_noisy = noisy_input
        # If dtype is int16, convert to float in [-1,1]:
        if y_noisy.dtype.kind == 'i':
            y_noisy = y_noisy.astype(np.float32) / np.iinfo(y_noisy.dtype).max
    else:
        # assume it's a filepath
        y_noisy, sr = librosa.load(noisy_input, sr=None)

# Estimate noise profile
    noise_spec = extract_noise_profile(y_noisy, sr, duration=0.5)

# Apply spectral subtraction
    y_denoised = spectral_subtract(y_noisy, sr, noise_spec)
    # print('Denoising complete!')
    
    
    # Dummy data insertion
    margin_ms = 5  # milliseconds to replace at start
    n_dummy = int(margin_ms * 1e-3 * sr)

    # print(f'Dummy samples at start: {n_dummy}')

    # Overwrite first n_dummy samples with zeros
    y_denoised[:n_dummy] = 0
    # print(f'First {n_dummy} samples replaced with zeros for plotting.')
    
    return sr , y_denoised
    # sf.write(output_path, y_denoised, sr)
    # print(f"Denoised audio saved to: {output_path}")

In [None]:
plt.figure(figsize=(14, 8))

# Waveforms
plt.subplot(2, 2, 1)
librosa.display.waveshow(y_noisy, sr=sr, alpha=0.6)
plt.title('Original Noisy Waveform')

plt.subplot(2, 2, 2)
librosa.display.waveshow(y_denoised, sr=sr, color='r', alpha=0.6)
plt.title('Denoised Waveform (dummy start)')

# Spectrograms
D_noisy = librosa.amplitude_to_db(np.abs(librosa.stft(y_noisy, n_fft=frame_len, hop_length=hop_len)), ref=np.max)
D_denoised = librosa.amplitude_to_db(np.abs(librosa.stft(y_denoised, n_fft=frame_len, hop_length=hop_len)), ref=np.max)

plt.subplot(2, 2, 3)
librosa.display.specshow(D_noisy, sr=sr, hop_length=hop_len, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Noisy Spectrogram')

plt.subplot(2, 2, 4)
librosa.display.specshow(D_denoised, sr=sr, hop_length=hop_len, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Denoised Spectrogram')

plt.tight_layout()
plt.show()

In [None]:
iface1 = gr.Interface(
    fn=enhance_audio,
    inputs=[
            # gr.Microphone(format='wav', sources=['upload', 'microphone'], interactive=True, show_download_button=True, show_share_button=True, recording=True),
            gr.Audio(sources=['microphone','upload'], show_download_button=True, format='wav')
            ],
    outputs=gr.Audio(type="numpy"),
    theme='earneleh/paris',
    examples=['./noisy_sample.wav']
)
iface1.launch(
    # debug=True
    )

* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.


  _, y_rec = istft(sub_mag * np.exp(1j * phase), fs=sr,


Keyboard interruption in main thread... closing server.


