In [1]:
import os
import sys
import numpy as np
import pandas as pd

In [2]:
import librosa
import matplotlib.pyplot as plt

In [3]:
# Project root
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(PROJECT_ROOT)

# Import folder paths
from src.paths import RAW_AUDIO_DIR, PROCESSED_AUDIO_DIR


In [4]:
csv_path = os.path.join(PROCESSED_AUDIO_DIR, "extracted_features.csv")
df = pd.read_csv(csv_path)


In [5]:
def compute_snr_db(audio_path):
    y, sr = librosa.load(audio_path, sr=None)
    signal_power = np.mean(y ** 2)
    
    # Estimate noise: take first 0.1 sec as "noise" (or any quiet section)
    noise_len = min(len(y), int(sr*0.1))
    noise_power = np.mean(y[:noise_len] ** 2) + 1e-10  # avoid div by zero
    
    snr_db = 10 * np.log10(signal_power / noise_power)
    return snr_db


In [6]:
snr_values = []

for filename in df['filename']:
    audio_path = os.path.join(RAW_AUDIO_DIR, filename)
    snr_db = compute_snr_db(audio_path)
    snr_values.append(snr_db)

# Add SNR column to dataframe
df['snr_db'] = snr_values

df.head()


Unnamed: 0,filename,recording_length,rms_power,zero_crossing_rate,crest_factor,mfcc_1_mean,mfcc_1_std,mfcc_2_mean,mfcc_2_std,mfcc_3_mean,...,spectral_centroid,spectral_rolloff,spectral_spread,spectral_flatness,spectral_skewness,spectral_kurtosis,spectral_std,spectral_slope,spectral_decrease,snr_db
0,C01.mp3,10.34449,0.038089,0.135228,14.321193,-241.30396,91.666275,84.14268,21.002813,-4.462087,...,2704.818597,6107.294385,2786.48923,0.051419,16.852112,348.250119,21.021013,-0.001677,0.006595,17.210676
1,C02.mp3,8.646531,0.025775,0.125209,16.920517,-327.7023,119.82505,91.567764,26.206709,-8.425058,...,2444.247799,5257.334429,2619.219154,0.034675,8.877528,101.385738,6.324075,-0.00077,0.004322,17.624053
2,C03.mp3,13.322449,0.05313,0.140864,10.463289,-269.7161,159.61797,77.87096,23.231524,-21.224615,...,2723.267619,6019.074231,2751.604184,0.030861,5.078017,34.227901,15.07185,-0.002068,0.005871,27.620551
3,C04.mp3,12.617143,0.052493,0.102208,10.619624,-268.23877,134.48067,107.67993,19.424442,-31.156693,...,1856.466526,3648.927935,1945.712922,0.009132,5.999629,42.551798,17.076027,-0.002201,0.008966,23.899612
4,C05.mp3,10.13551,0.089877,0.078488,8.532025,-199.1724,93.77112,126.6597,25.368902,-24.815289,...,1683.891182,3190.338679,2068.88321,0.006417,5.443716,31.609686,38.163662,-0.004678,0.021986,7.898879


In [7]:
# Compute thresholds
min_snr = df['snr_db'].min()
max_snr = df['snr_db'].max()
interval = (max_snr - min_snr) / 3

threshold_poor = min_snr + interval
threshold_mild = threshold_poor + interval

# Classification function
def classify_snr(snr):
    if snr <= threshold_poor:
        return "Poor"
    elif snr <= threshold_mild:
        return "Mild"
    else:
        return "Strong"

# Apply classification
df['signal_strength'] = df['snr_db'].apply(classify_snr)

df.head()


Unnamed: 0,filename,recording_length,rms_power,zero_crossing_rate,crest_factor,mfcc_1_mean,mfcc_1_std,mfcc_2_mean,mfcc_2_std,mfcc_3_mean,...,spectral_rolloff,spectral_spread,spectral_flatness,spectral_skewness,spectral_kurtosis,spectral_std,spectral_slope,spectral_decrease,snr_db,signal_strength
0,C01.mp3,10.34449,0.038089,0.135228,14.321193,-241.30396,91.666275,84.14268,21.002813,-4.462087,...,6107.294385,2786.48923,0.051419,16.852112,348.250119,21.021013,-0.001677,0.006595,17.210676,Mild
1,C02.mp3,8.646531,0.025775,0.125209,16.920517,-327.7023,119.82505,91.567764,26.206709,-8.425058,...,5257.334429,2619.219154,0.034675,8.877528,101.385738,6.324075,-0.00077,0.004322,17.624053,Mild
2,C03.mp3,13.322449,0.05313,0.140864,10.463289,-269.7161,159.61797,77.87096,23.231524,-21.224615,...,6019.074231,2751.604184,0.030861,5.078017,34.227901,15.07185,-0.002068,0.005871,27.620551,Strong
3,C04.mp3,12.617143,0.052493,0.102208,10.619624,-268.23877,134.48067,107.67993,19.424442,-31.156693,...,3648.927935,1945.712922,0.009132,5.999629,42.551798,17.076027,-0.002201,0.008966,23.899612,Strong
4,C05.mp3,10.13551,0.089877,0.078488,8.532025,-199.1724,93.77112,126.6597,25.368902,-24.815289,...,3190.338679,2068.88321,0.006417,5.443716,31.609686,38.163662,-0.004678,0.021986,7.898879,Poor


In [8]:
output_csv = os.path.join(PROCESSED_AUDIO_DIR, "extracted_features_with_snr.csv")
df.to_csv(output_csv, index=False)

print("Updated CSV saved at:", output_csv)


Updated CSV saved at: c:\Users\user\OneDrive\Desktop\portfolio\portfolio_projects\DRI_SWAASA\data\processed\audio\extracted_features_with_snr.csv


In [9]:
# for plotting

# Paths
from src.paths import PROCESSED_AUDIO_DIR, PLOTS_DIR

# Optional: SNR plot subfolder
SNR_PLOTS_DIR = os.path.join(PLOTS_DIR, "snr")
os.makedirs(SNR_PLOTS_DIR, exist_ok=True)

# Load CSV with SNR
csv_path = os.path.join(PROCESSED_AUDIO_DIR, "extracted_features_with_snr.csv")
df = pd.read_csv(csv_path)

# Bar color based on signal strength
color_map = {"Poor": "red", "Mild": "orange", "Strong": "green"}
colors = df['signal_strength'].map(color_map)

# Plot
plt.figure(figsize=(12,6))
plt.bar(df['filename'], df['snr_db'], color=colors, edgecolor='black')
plt.xlabel("Audio File")
plt.ylabel("SNR (dB)")
plt.title("SNR Values per Audio File")
plt.xticks(rotation=45)
plt.grid(axis='y', alpha=0.7)

desc = ("This SNR bar plot compares signal clarity across all recordings.\n"
        "Higher SNR values indicate cleaner cough audio with less background interference.\n"
        "Color coding highlights poor, mild, and strong signal-strength categories.")
plt.figtext(0.5, 0.01, desc, ha='center', fontsize=9, wrap=True)
plt.tight_layout(rect=[0, 0.13, 1, 1])

# Save plot
out_path = os.path.join(SNR_PLOTS_DIR, "snr_bar_plot.png")
plt.savefig(out_path)
plt.close()
print("SNR plot saved at:", out_path)

SNR plot saved at: c:\Users\user\OneDrive\Desktop\portfolio\portfolio_projects\DRI_SWAASA\plots\snr\snr_bar_plot.png
