# DC offset and Signal-to-Noise Ratio (SNR) estimation

In [30]:
import sys
sys.path.append('../')

import torch
import csv

from pathlib import Path

from src.data.egfxset import load_egfxset
from src.data.springset import load_springset
from src.utils.checkpoints import select_device

In [31]:
DATA_DIR = Path('../data/raw/')
MODELS_DIR = Path('../models/')
RESULTS_DIR = Path('../data/features/')
PLOTS_DIR = Path('../docs/plots/')

## DC offset

\begin{equation}
\text{DC Offset} = \frac{1}{N} \sum_{i=1}^{N} x_i
\end{equation}

In [32]:
def compute_dc_offset(signal):
    """
    Compute the DC offset of an audio signal.
    """
    return torch.mean(signal)

## Signal-to-Noise Ratio

\begin{equation}
\text{res} = \text{dry} - \text{wet}
\end{equation}

\begin{equation}
\text{SNR (dB)} = 10 \cdot \log_{10}\left(\frac{\text{wet}^2}{\text{res}^2 + \text{eps}}\right)
\end{equation}

where $\text{wet}$ is the wet signal, $\text{res}$ is the residual signal, and $\text{eps}$ is a small number to avoid division by zero.

In [33]:
def compute_snr(dry, wet, eps=1e-8):
    """
    Compute the Signal-to-Noise Ratio (SNR) using PyTorch.
    Values are in dB.
    """
    # reduce potential DC offset
    dry = dry - dry.mean()
    wet = wet - wet.mean()

    # compute SNR
    res = dry - wet
    snr = 10 * torch.log10(
        (wet ** 2).sum() / ((res ** 2).sum() + eps)
    )
    return snr.item()

In [34]:
device = select_device('cpu')

Using device: cpu


## EGFxSet

Select a dataset to work with. 


In [35]:
dataset = 'egfxset'

if dataset == 'egfxset':
    train_loader, _, _ = load_egfxset(DATA_DIR, batch_size=1, train_ratio=1.0, valid_ratio=0.0, test_ratio=0.0)
    
elif dataset == 'springset':  # Added an 'elif' here for clarity.
    train_loader, _, _ = load_springset(DATA_DIR, batch_size=1, train_ratio=1.0)

In [36]:
# Write to a CSV file
destination = RESULTS_DIR / f'{dataset}_dc_snr.csv'

with open(destination, 'w', newline='') as csv_file:
    writer = csv.writer(csv_file)
    writer.writerow(["Index", "dry_DC", "wet_DC", "SNR"])  # Fixed the headers 

    for idx, (dry, wet) in enumerate(train_loader):
        # Move the tensors to the computation device
        dry = dry.to(device)
        wet = wet.to(device)
        
        # Compute metrics
        dc_offset_dry = compute_dc_offset(dry)
        dc_offset_wet = compute_dc_offset(wet)
        snr = compute_snr(dry, wet)

        writer.writerow([idx, dc_offset_dry.item(), dc_offset_wet.item(), snr])

        print(f"Sample {idx}: DC Offset (dry) = {dc_offset_dry:.3f}, DC Offset (wet) = {dc_offset_wet:.3f}, SNR = {snr:.3f} dB", end='\r')

    print("\nAudio metrics saved to audio_metrics.csv!")  # Added a newline character for cleaner printing.
        

Sample 13: DC Offset (dry) = -0.000, DC Offset (wet) = 0.000, SNR = 5.543 dBB

Sample 689: DC Offset (dry) = -0.000, DC Offset (wet) = -0.000, SNR = 1.156 dBB
Audio metrics saved to audio_metrics.csv!


## SpringSet

In [37]:
dataset = 'springset'

if dataset == 'egfxset':
    train_loader, _, _ = load_egfxset(DATA_DIR, batch_size=1, train_ratio=1.0, valid_ratio=0.0, test_ratio=0.0)
    
elif dataset == 'springset':  # Added an 'elif' here for clarity.
    train_loader, _, _ = load_springset(DATA_DIR, batch_size=1, train_ratio=1.0)

Found 4 files in ../data/raw/springset
Using dry_train.h5 and wet_train.h5 for train split.


Found 4 files in ../data/raw/springset
Using dry_val_test.h5 and wet_val_test.h5 for test split.


In [38]:
# Write to a CSV file
destination = RESULTS_DIR / f'{dataset}_dc_snr.csv'

with open(destination, 'w', newline='') as csv_file:
    writer = csv.writer(csv_file)
    writer.writerow(["Index", "dry_DC", "wet_DC", "SNR"])  # Fixed the headers 

    for idx, (dry, wet) in enumerate(train_loader):
        # Move the tensors to the computation device
        dry = dry.to(device)
        wet = wet.to(device)
        
        # Compute metrics
        dc_offset_dry = compute_dc_offset(dry)
        dc_offset_wet = compute_dc_offset(wet)
        snr = compute_snr(dry, wet)

        writer.writerow([idx, dc_offset_dry.item(), dc_offset_wet.item(), snr])

        print(f"Sample {idx}: DC Offset (dry) = {dc_offset_dry:.3f}, DC Offset (wet) = {dc_offset_wet:.3f}, SNR = {snr:.3f} dB", end='\r')

    print("\nAudio metrics saved to audio_metrics.csv!")  # Added a newline character for cleaner printing.

Sample 1121: DC Offset (dry) = 0.000, DC Offset (wet) = 0.000, SNR = -2.657 dBBB
Audio metrics saved to audio_metrics.csv!
