In [1]:
import os
import sys
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import skew, kurtosis
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Ensure project root
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(PROJECT_ROOT)

# Import paths
from src.paths import RAW_AUDIO_DIR, PLOTS_DIR, MFCC_INDIVIDUAL_FROM_AUDIO, SPECTRAL_INDIVIDUAL_FROM_AUDIO, SPECTRAL_FEATURES

print("Raw Audio Directory:", RAW_AUDIO_DIR)
print("Plots Directory:", PLOTS_DIR)
print("MFCC Folders:", len(MFCC_INDIVIDUAL_FROM_AUDIO))
print("Spectral Feature Folders:", len(SPECTRAL_INDIVIDUAL_FROM_AUDIO))

Raw Audio Directory: c:\Users\iamsh\OneDrive\Desktop\CHI 2026\DRI_SWAASA-main\data\raw\audio
Plots Directory: c:\Users\iamsh\OneDrive\Desktop\CHI 2026\DRI_SWAASA-main\plots
MFCC Folders: 13
Spectral Feature Folders: 9


In [3]:
# Load audio files
audio_files = [
    f for f in os.listdir(RAW_AUDIO_DIR) 
    if f.lower().endswith(('.wav', '.mp3', '.flac', '.ogg'))
]

audio_files = sorted(audio_files)
print(f"Found {len(audio_files)} audio files:")
for f in audio_files:
    print(f"  - {f}")

Found 10 audio files:
  - C01.mp3
  - C02.mp3
  - C03.mp3
  - C04.mp3
  - C05.mp3
  - C06.mp3
  - C07.mp3
  - C08.mp3
  - C09.mp3
  - C10.mp3


In [4]:
def extract_comprehensive_features(audio_path, sr=22050, n_mfcc=13):
    """Extract all features directly from audio file"""
    try:
        y, sr = librosa.load(audio_path, sr=sr)
    except Exception as e:
        print(f"Error loading {audio_path}: {e}")
        return None
    
    features = {}
    
    # Temporal features
    features['duration'] = librosa.get_duration(y=y, sr=sr)
    features['rms_energy'] = np.mean(librosa.feature.rms(y=y)[0])
    features['zero_crossing_rate'] = np.mean(librosa.feature.zero_crossing_rate(y=y)[0])
    
    # MFCC features
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    for i in range(n_mfcc):
        features[f'mfcc_{i+1}_mean'] = np.mean(mfcc[i])
        features[f'mfcc_{i+1}_std'] = np.std(mfcc[i])
        features[f'mfcc_{i+1}_max'] = np.max(mfcc[i])
        features[f'mfcc_{i+1}_min'] = np.min(mfcc[i])
    
    # Spectral features
    spec = np.abs(librosa.stft(y))
    freqs = librosa.fft_frequencies(sr=sr)
    psd = np.mean(spec**2, axis=1)
    
    features['spectral_centroid'] = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    features['spectral_rolloff'] = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
    features['spectral_spread'] = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    features['spectral_flatness'] = np.mean(librosa.feature.spectral_flatness(y=y))
    features['spectral_skewness'] = skew(psd)
    features['spectral_kurtosis'] = kurtosis(psd)
    features['spectral_std'] = np.std(psd)
    features['spectral_slope'] = np.polyfit(freqs, psd, 1)[0]
    features['spectral_decrease'] = np.mean((psd[1:] - psd[0]) / (freqs[1:] + 1e-8))
    
    # Additional features
    features['crest_factor'] = np.max(np.abs(y)) / (np.sqrt(np.mean(y**2)) + 1e-8)
    features['peak_frequency'] = freqs[np.argmax(psd)]
    
    # Chroma features
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    for i in range(12):
        features[f'chroma_{i}_mean'] = np.mean(chroma[i])
    
    return features

# Extract features from all audio files
all_features = {}
for audio_file in audio_files:
    audio_path = os.path.join(RAW_AUDIO_DIR, audio_file)
    print(f"Processing: {audio_file}...", end=" ")
    feats = extract_comprehensive_features(audio_path)
    if feats is not None:
        all_features[audio_file] = feats
        print("Done")
    else:
        print("Failed")

print(f"\nSuccessfully extracted features from {len(all_features)} files")

Processing: C01.mp3... Done
Processing: C02.mp3... Done
Processing: C03.mp3... Done
Processing: C04.mp3... Done
Processing: C05.mp3... Done
Processing: C06.mp3... Done
Processing: C07.mp3... Done
Processing: C08.mp3... Done
Processing: C09.mp3... Done
Processing: C10.mp3... Done

Successfully extracted features from 10 files


In [5]:
# Visualize MFCC Features - One plot per coefficient
print("Creating MFCC visualizations...")

for mfcc_idx in range(1, 14):
    mfcc_col = f'mfcc_{mfcc_idx}_mean'
    
    # Collect values for this coefficient from all files
    filenames = []
    values = []
    
    for audio_file, features in all_features.items():
        if mfcc_col in features:
            filenames.append(audio_file)
            values.append(features[mfcc_col])
    
    # Create bar plot
    plt.figure(figsize=(12, 5))
    bars = plt.bar(range(len(filenames)), values, color='skyblue', edgecolor='navy', alpha=0.7)
    
    # Add value labels on bars
    for bar, val in zip(bars, values):
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height,
                f'{val:.2f}', ha='center', va='bottom', fontsize=8)
    
    plt.xlabel('Audio File', fontsize=11, fontweight='bold')
    plt.ylabel('MFCC Mean Value', fontsize=11, fontweight='bold')
    plt.title(f'MFCC Coefficient {mfcc_idx} - All Samples', fontsize=12, fontweight='bold')
    plt.xticks(range(len(filenames)), filenames, rotation=45, ha='right', fontsize=9)
    plt.tight_layout()
    
    # Save to individual MFCC folder in plots_from_audio
    output_dir = MFCC_INDIVIDUAL_FROM_AUDIO[mfcc_idx - 1]
    output_path = os.path.join(output_dir, f'mfcc_{mfcc_idx}_all_samples.png')
    plt.savefig(output_path, dpi=100, bbox_inches='tight')
    plt.close()
    
    print(f"  Saved: plots_from_audio/mfcc/mfcc{mfcc_idx}/mfcc_{mfcc_idx}_all_samples.png")

print("MFCC visualizations complete!")

Creating MFCC visualizations...
  Saved: plots_from_audio/mfcc/mfcc1/mfcc_1_all_samples.png
  Saved: plots_from_audio/mfcc/mfcc2/mfcc_2_all_samples.png
  Saved: plots_from_audio/mfcc/mfcc3/mfcc_3_all_samples.png
  Saved: plots_from_audio/mfcc/mfcc4/mfcc_4_all_samples.png
  Saved: plots_from_audio/mfcc/mfcc5/mfcc_5_all_samples.png
  Saved: plots_from_audio/mfcc/mfcc6/mfcc_6_all_samples.png
  Saved: plots_from_audio/mfcc/mfcc7/mfcc_7_all_samples.png
  Saved: plots_from_audio/mfcc/mfcc8/mfcc_8_all_samples.png
  Saved: plots_from_audio/mfcc/mfcc9/mfcc_9_all_samples.png
  Saved: plots_from_audio/mfcc/mfcc10/mfcc_10_all_samples.png
  Saved: plots_from_audio/mfcc/mfcc11/mfcc_11_all_samples.png
  Saved: plots_from_audio/mfcc/mfcc12/mfcc_12_all_samples.png
  Saved: plots_from_audio/mfcc/mfcc13/mfcc_13_all_samples.png
MFCC visualizations complete!


In [6]:
# Visualize Spectral Features - One plot per feature
print("Creating Spectral Feature visualizations...")

for feat_idx, feature_name in enumerate(SPECTRAL_FEATURES):
    # Collect values for this feature from all files
    filenames = []
    values = []
    
    for audio_file, features in all_features.items():
        if feature_name in features:
            filenames.append(audio_file)
            values.append(features[feature_name])
    
    # Create bar plot
    plt.figure(figsize=(12, 5))
    bars = plt.bar(range(len(filenames)), values, color='lightcoral', edgecolor='darkred', alpha=0.7)
    
    # Add value labels on bars
    for bar, val in zip(bars, values):
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height,
                f'{val:.2f}', ha='center', va='bottom', fontsize=8)
    
    plt.xlabel('Audio File', fontsize=11, fontweight='bold')
    plt.ylabel('Feature Value', fontsize=11, fontweight='bold')
    plt.title(f'{feature_name.replace("_", " ").title()} - All Samples', fontsize=12, fontweight='bold')
    plt.xticks(range(len(filenames)), filenames, rotation=45, ha='right', fontsize=9)
    plt.tight_layout()
    
    # Save to individual spectral feature folder in plots_from_audio
    output_dir = SPECTRAL_INDIVIDUAL_FROM_AUDIO[feat_idx]
    output_path = os.path.join(output_dir, f'{feature_name}_all_samples.png')
    plt.savefig(output_path, dpi=100, bbox_inches='tight')
    plt.close()
    
    print(f"  Saved: plots_from_audio/spectral/{feature_name}/all_samples.png")

print("Spectral feature visualizations complete!")

Creating Spectral Feature visualizations...
  Saved: plots_from_audio/spectral/spectral_centroid/all_samples.png
  Saved: plots_from_audio/spectral/spectral_rolloff/all_samples.png
  Saved: plots_from_audio/spectral/spectral_spread/all_samples.png
  Saved: plots_from_audio/spectral/spectral_flatness/all_samples.png
  Saved: plots_from_audio/spectral/spectral_skewness/all_samples.png
  Saved: plots_from_audio/spectral/spectral_kurtosis/all_samples.png
  Saved: plots_from_audio/spectral/spectral_std/all_samples.png
  Saved: plots_from_audio/spectral/spectral_slope/all_samples.png
  Saved: plots_from_audio/spectral/spectral_decrease/all_samples.png
Spectral feature visualizations complete!


In [7]:
# Create individual audio file feature plots
print("Creating individual audio file visualizations...")

from src.paths import PLOTS_FROM_AUDIO_DIR

for audio_file, features in all_features.items():
    # Create a comprehensive plot for this audio file
    fig, axes = plt.subplots(2, 1, figsize=(14, 10))
    
    # MFCC mean values
    mfcc_cols = [f'mfcc_{i+1}_mean' for i in range(13)]
    mfcc_vals = [features.get(col, 0) for col in mfcc_cols]
    
    axes[0].bar(range(1, 14), mfcc_vals, color='skyblue', edgecolor='navy', alpha=0.7)
    axes[0].set_xlabel('MFCC Coefficient', fontsize=10, fontweight='bold')
    axes[0].set_ylabel('Mean Value', fontsize=10, fontweight='bold')
    axes[0].set_title(f'{audio_file} - MFCC Mean Features', fontsize=11, fontweight='bold')
    axes[0].grid(axis='y', alpha=0.3)
    
    # Spectral features
    spectral_vals = [features.get(feat, 0) for feat in SPECTRAL_FEATURES]
    
    axes[1].bar(range(len(SPECTRAL_FEATURES)), spectral_vals, color='lightcoral', edgecolor='darkred', alpha=0.7)
    axes[1].set_xlabel('Spectral Feature', fontsize=10, fontweight='bold')
    axes[1].set_ylabel('Value', fontsize=10, fontweight='bold')
    axes[1].set_title(f'{audio_file} - Spectral Features', fontsize=11, fontweight='bold')
    axes[1].set_xticks(range(len(SPECTRAL_FEATURES)))
    axes[1].set_xticklabels([f.replace('_', '\n') for f in SPECTRAL_FEATURES], fontsize=8)
    axes[1].grid(axis='y', alpha=0.3)
    
    plt.tight_layout()
    
    # Save to plots_from_audio root directory
    output_path = os.path.join(PLOTS_FROM_AUDIO_DIR, f'{audio_file}_all_features.png')
    plt.savefig(output_path, dpi=100, bbox_inches='tight')
    plt.close()
    
    print(f"  Saved: plots_from_audio/{audio_file}_all_features.png")

print("Individual file visualizations complete!")

Creating individual audio file visualizations...
  Saved: plots_from_audio/C01.mp3_all_features.png
  Saved: plots_from_audio/C02.mp3_all_features.png
  Saved: plots_from_audio/C03.mp3_all_features.png
  Saved: plots_from_audio/C04.mp3_all_features.png
  Saved: plots_from_audio/C05.mp3_all_features.png
  Saved: plots_from_audio/C06.mp3_all_features.png
  Saved: plots_from_audio/C07.mp3_all_features.png
  Saved: plots_from_audio/C08.mp3_all_features.png
  Saved: plots_from_audio/C09.mp3_all_features.png
  Saved: plots_from_audio/C10.mp3_all_features.png
Individual file visualizations complete!
