# Extracting Features from WAV Files

In [29]:
# Load required libraries
import sys
sys.path.append('/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages')
import librosa
import librosa.display
import soundfile
import IPython.display as ipd
import matplotlib.pyplot as plt
import numpy as np

In [78]:
# Load the sample WAV file
audio_file = 'sample_audio/debussy.wav'
audio, sample_rate = librosa.load(audio_file)

In [79]:
# Set the default frame size and hop length
FRAME_SIZE = 2048
HOP_SIZE = 512

## Time-Domain Features

### Estimated Tempo

In [161]:
bpm_audio = librosa.beat.tempo(y=audio, sr=sample_rate, hop_length=HOP_SIZE)

### Amplitude Envelope

In [80]:
def amplitude_envelope(signal, frame_size, hop_length):
    return np.array([max(signal[i:(i + frame_size)]) for i in range(0, len(signal), hop_length)])

In [81]:
ae_audio = amplitude_envelope(audio, FRAME_SIZE, HOP_SIZE)
ae_mean = np.mean(ae_audio)
ae_stddev = np.std(ae_audio)

### Root-Mean Square Energy

In [82]:
rms_audio = librosa.feature.rms(audio, frame_length=FRAME_SIZE, hop_length=HOP_SIZE)[0]
rms_mean = np.mean(rms_audio)
rms_stddev = np.std(rms_audio)

### Energy Entropy

In [166]:
def compute_energy(frame):
    return np.sum(np.abs(frame)**2) / len(frame)


def compute_frame_energy_entropy(frame, num_subframes=20):
    subframe_size = int(np.floor(len(frame) / num_subframes))
    subframes = [frame[i:(i + subframe_size)] for i in range(0, len(frame), subframe_size)]
    
    energy = np.array([compute_energy(subframe) for subframe in subframes])
    energy = energy / np.sum(energy)
    
    return -np.sum(energy * np.log2(energy))
    

def energy_entropy(signal, frame_length, hop_length):
    return np.array([compute_frame_energy_entropy(signal[i:(i + frame_length)]) for i in range(0, len(signal), hop_length)])

In [172]:
ee_audio = energy_entropy(audio, FRAME_SIZE, HOP_SIZE)
ee_mean = np.mean(ee_audio)
ee_stddev = np.std(ee_audio)

### Zero-Crossing Rate

In [83]:
zcr_audio = librosa.feature.zero_crossing_rate(audio, frame_length=FRAME_SIZE, hop_length=HOP_SIZE)[0]
zcr_mean = np.mean(zcr_audio)
zcr_stddev = np.std(zcr_audio)

## Frequency-Domain Features

In [84]:
# Extract the spectrograms
audio_spctgm = librosa.stft(audio, n_fft=FRAME_SIZE, hop_length=HOP_SIZE)

### Band-Energy Ratio

In [85]:
SPLIT_FREQ = 2000

In [175]:
def calculate_split_frequency_bin(spectrogram, split_frequency, sample_rate):
    frequency_range = sample_rate / 2
    frequency_delta_per_bin = frequency_range / spectrogram.shape[0]
    split_frequency_bin = np.floor(split_frequency / frequency_delta_per_bin)
    return int(split_frequency_bin)


def to_power_spectrogram(spectrogram):
    return (np.abs(spectrogram) ** 2)


def calculate_ber_for_frame(frequencies_in_frame, split_frequency_bin):
    sum_power_low_freq = np.sum(frequencies_in_frame[:split_frequency_bin])
    sum_power_high_freq = np.sum(frequencies_in_frame[split_frequency_bin:])
    return (sum_power_low_freq / sum_power_high_freq)
    band_energy_ratio.append(ber_curr_frame)

    
def band_energy_ratio(spectrogram, split_frequency, sample_rate):
    split_frequency_bin = calculate_split_frequency_bin(spectrogram, split_frequency, sample_rate)
    power_spec = to_power_spectrogram(spectrogram).T
    ber = [calculate_ber_for_frame(freqs_in_frame, split_frequency_bin) for freqs_in_frame in power_spec]
    return np.array(ber)

In [176]:
ber_audio = band_energy_ratio(audio_spctgm, SPLIT_FREQ, sample_rate)
ber_mean = np.mean(ber_audio)
ber_stddev = np.std(ber_audio)

### Spectral Centroid

In [96]:
sc_audio = librosa.feature.spectral_centroid(audio, sr=sample_rate, n_fft=FRAME_SIZE, hop_length=HOP_SIZE)[0]
sc_mean = np.mean(sc_audio)
sc_stddev = np.std(sc_audio)

### Bandwidth / Spectral Spread

In [99]:
ss_audio = librosa.feature.spectral_bandwidth(audio, sr=sample_rate, n_fft=FRAME_SIZE, hop_length=HOP_SIZE)[0]
ss_mean = np.mean(ss_audio)
ss_stddev = np.std(ss_audio)

### Spectral Rolloff

In [101]:
sroll_audio = librosa.feature.spectral_rolloff(audio, sr=sample_rate, n_fft=FRAME_SIZE, hop_length=HOP_SIZE)[0]
sroll_mean = np.mean(sroll_audio)
sroll_stddev = np.std(sroll_audio)

### Spectral Flatness

In [155]:
sflat_audio = librosa.feature.spectral_flatness(audio, n_fft=FRAME_SIZE, hop_length=HOP_SIZE)[0]
sflat_mean = np.mean(sflat_audio)
sflat_stddev = np.std(sflat_audio)

### Spectral Contrast

In [158]:
sconstrast_audio = librosa.feature.spectral_contrast(audio, sr=sample_rate, n_fft=FRAME_SIZE, hop_length=HOP_SIZE)[0]
sconstrast_mean = np.mean(sconstrast_audio)
sconstrast_stddev = np.std(sconstrast_audio)

### Spectral Flux

https://www.sciencedirect.com/topics/engineering/spectral-flux

In [152]:
def normalize(spectra):
    return spectra / np.sum(spectra)

def compute_spect_flux_for_frame(prev_spectra_nmlzd, curr_spectra):
    curr_spectra_nmlzd = normalize(curr_spectra)
    frame_sf = np.linalg.norm(curr_spectra_nmlzd - prev_spectra_nmlzd)
    return frame_sf, curr_spectra_nmlzd

def spectral_flux(spectrogram):
    num_frames = spectrogram.shape[1]
    
    curr_spectra = spectrogram[:, 0]
    curr_spectra_nmlzd = normalize(curr_spectra)

    sf = []
    for i in range(1, num_frames):
        frame_sf, curr_spectra_nmlzd = compute_spect_flux_for_frame(curr_spectra_nmlzd, spectrogram[:, i])
        sf.append(frame_sf)
        
    return np.array(sf)

In [151]:
sf_audio = spectral_flux(audio_spctgm)
sf_mean = np.mean(sf_audio)
sf_stddev = np.std(sf_audio)

## Mel-Frequency Cepstral Coefficients

In [111]:
mfccs_audio = librosa.feature.mfcc(audio, n_mfcc=13, sr=sample_rate)
mfccs_mean = np.mean(mfccs_audio, axis=1)
mfccs_stddev = np.std(mfccs_audio, axis=1)

## Chroma Vector

In [119]:
chroma_audio = librosa.feature.chroma_stft(y=audio, sr=sample_rate, n_chroma=12, 
                                           n_fft=FRAME_SIZE, hop_length=HOP_SIZE)
chroma_mean = np.mean(chroma_audio, axis=1)
chroma_stddev = np.std(chroma_audio, axis=1)