# Short-Time Fourier Transform

# Extract features to XLSX

In [10]:
import os
import numpy as np
import pandas as pd
from scipy.io import wavfile
from scipy.signal import stft

def extract_features(file_path):
    """
    Extract audio features from a preprocessed WAV file.
    Args:
        file_path (str): Path to the audio file.
    Returns:
        dict: Extracted features.
    """
    # Load the preprocessed audio file
    sample_rate, data = wavfile.read(file_path)

    # Compute RMS amplitude
    rms = np.sqrt(np.mean(data**2))

    # Compute spectral centroid
    frequencies, times, Zxx = stft(data, fs=sample_rate, nperseg=512)
    magnitude = np.abs(Zxx)
    spectral_centroid = np.sum(frequencies[:, None] * magnitude, axis=0) / np.sum(magnitude, axis=0)
    mean_spectral_centroid = np.mean(spectral_centroid)

    # Compute energy in frequency bands
    low_energy = np.sum(magnitude[frequencies < 300])
    mid_energy = np.sum(magnitude[(frequencies >= 300) & (frequencies < 2000)])
    high_energy = np.sum(magnitude[frequencies >= 2000])

    # Strip .wav extension from file name
    file_name = os.path.basename(file_path).replace('.wav', '')

    # Return extracted features
    return {
        "file": file_name,  # Stripped of .wav
        "rms": rms,
        "spectral_centroid": mean_spectral_centroid,
        "low_energy": low_energy,
        "mid_energy": mid_energy,
        "high_energy": high_energy,
    }

def process_features(input_directory, output_file):
    """
    Process all audio files in a directory to extract features and save to Excel.
    Args:
        input_directory (str): Directory containing preprocessed audio files.
        output_file (str): Path to save the extracted features as an Excel file.
    """
    feature_list = []
    for file_name in os.listdir(input_directory):
        if file_name.endswith(".wav"):
            file_path = os.path.join(input_directory, file_name)
            features = extract_features(file_path)
            feature_list.append(features)
    
    # Convert to DataFrame and save to Excel
    df_features = pd.DataFrame(feature_list)
    df_features.to_excel(output_file, index=False)
    print(f"Features saved to: {output_file}")

# Define input and output paths
input_directory = "/Users/jakob/Downloads/IDMT_Traffic/preprocessed_audio"
output_file = "/Users/jakob/Library/CloudStorage/OneDrive-student.kit.edu/Studium/02_Master/4. Semester/seminar/RTN-jakob/df_stft.xlsx"

# Process and save features
process_features(input_directory, output_file)

  spectral_centroid = np.sum(frequencies[:, None] * magnitude, axis=0) / np.sum(magnitude, axis=0)


Features saved to: /Users/jakob/Library/CloudStorage/OneDrive-student.kit.edu/Studium/02_Master/4. Semester/seminar/RTN-jakob/df_stft.xlsx


# Merge df_stft.xlsx with df_dataset.xlsx

In [1]:
import pandas as pd

# Load datasets
df_dataset = pd.read_excel("df_dataset.xlsx")
df_stft = pd.read_excel("df_stft.xlsx")

# Merge datasets on 'file' column (left join to retain all rows in df_dataset)
df_merged = pd.merge(df_dataset, df_stft, on="file", how="left")

# Save the merged dataset
df_merged.to_excel("df_dataset_merged.xlsx", index=False)
print("Merged dataset saved as df_dataset_merged.xlsx")

Merged dataset saved as df_dataset_merged.xlsx
