# Feature Extraction for All Test Data

In [None]:
import pandas as pd
import numpy as np
import librosa
import librosa.display
from IPython.display import Audio
import IPython
from tqdm import tqdm
import pickle
from datetime import datetime
import time
import os

In [None]:
test_boring_folder = A_PATH
test_engaging_folder = A_PATH

In [None]:
test_boring_files = []
for file in os.listdir(test_boring_folder):
    if file[-4:] == '.mp3':
        test_boring_files.append(file)
test_boring_files = sorted(test_boring_files)
len(test_boring_files)

In [None]:
test_engaging_files = []
for file in os.listdir(test_engaging_folder):
    if file[-4:] == '.mp3':
        test_engaging_files.append(file)
test_engaging_files = sorted(test_engaging_files)
len(test_engaging_files)

## Make a DataFrame

In [None]:
records = []
for file in test_engaging_files:
    rec = {}
    rec['name'] = 'Researcher'
    rec['file_name'] = file
    rec['label'] = 'engaging'
    rec['path'] = test_engaging_folder +"/" + file
    records.append(rec)

for file in test_boring_files:
    rec = {}
    rec['name'] = 'Researcher'
    rec['file_name'] = file
    rec['label'] = 'boring'
    rec['path'] = test_boring_folder +"/" + file
    records.append(rec)

In [None]:
all_test_data = pd.DataFrame(records)

In [None]:
all_test_data.shape

In [None]:
all_test_data.head()

In [None]:
import matplotlib.pyplot as plt

In [None]:
## visualize the number of engaging and boring data
plt.figure(figsize=(9, 5))
(all_test_data['label'].value_counts()/all_test_data.shape[0]).plot(kind='bar')
#plt.title("Label Distribution in Data Set")
# Rotate the tick labels and set their font size and bold
plt.xticks(rotation=30, fontsize=12, fontweight='bold')
plt.yticks(fontsize=12, fontweight='bold')

plt.xlabel("", fontsize=12, fontweight='bold')

plt.ylabel("Percentage of Label", fontsize=12, fontweight='bold')

plt.tight_layout()


## Extract Waveform Data

In [None]:
%%time
waveform_vectors = []
for idx, row in tqdm(all_test_data.iterrows()):
    try:
        path = row['path']
        data, sr = librosa.load(path)
        waveform_vectors.append(data)
    except Exception as e:
        print("Errors during processing the voice file: {}".format(row['file']))
        waveform_vectors.append([])

In [None]:
len(waveform_vectors)

In [None]:
len(waveform_vectors[1])

## MFCC Feature Extraction
Mel-frequency cepstral coefficients (MFCCs): Mel Frequency Cepstral Coefficients form a cepstral representation where the frequency bands are not linear but distributed according to the mel-scale.

In [None]:
def extract_mfcc_features(data, sample_rate=22050):
    # MFCC
    mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0)

    return mfcc

In [None]:
mfcc_vectors = []
for waveform in tqdm(waveform_vectors):
    try:
        if len(waveform) > 0:
            mfcc = extract_mfcc_features(waveform)
            mfcc_vectors.append(mfcc)
        else:
            mfcc_vectors.append(np.nan)
    except Exception as e:
        print(e)
        mfcc_vectors.append(np.nan)

In [None]:
all_test_data['mfcc_feature'] = [list(vec) for vec in mfcc_vectors]

In [None]:
all_test_data.isna().sum(0)

In [None]:
# Save the mfcc data in disk
save_data_path = "all_test_data_features.csv"

In [None]:
## Save the all_data with MFCC features
all_test_data.to_csv(save_data_path, index=None)

## Visualize the MFCC Features

In [None]:
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, random_state=0)

In [None]:
# Extract features and labels
mfcc_features = np.array(all_test_data['mfcc_feature'].tolist())
labels = all_test_data['label']

In [None]:
# Apply t-SNE
%time
tsne_results = tsne.fit_transform(mfcc_features)

In [None]:
import matplotlib.pyplot as plt

# Map labels to colors
color_map = {'engaging': 'red', 'boring': 'blue'}
colors = labels.map(color_map)

# Create scatter plot
plt.figure(figsize=(10, 8))
scatter = plt.scatter(tsne_results[:, 0], tsne_results[:, 1], c=colors)

# Create a legend with appropriate labels
legend_labels = [plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=color_map[label], markersize=10) for label in color_map]
plt.legend(legend_labels, color_map.keys())

plt.title('2D t-SNE of MFCC Features')
plt.xlabel('t-SNE 1')
plt.ylabel('t-SNE 2')
plt.show()

## Zero Crossing Rate
The rate of sign-changes of the signal during the duration of a particular frame.

In [None]:
all_test_data.shape

In [None]:
all_test_data.columns

In [None]:
len(waveform_vectors)

In [None]:
zcrate_means = []
for data in tqdm(waveform_vectors):
    if len(data) > 0:
        try:
            azcrate_mean = np.mean(librosa.feature.zero_crossing_rate(data).T, axis=0)
            zcrate_means.append(azcrate_mean[0])
        except Exception as e:
            print(e)
    else:
        zcrate_means.append(None)

In [None]:
len(zcrate_means)

In [None]:
pd.isna(np.array(zcrate_means)).sum()

In [None]:
all_test_data['zcrate_mean'] = zcrate_means

In [None]:
all_test_data.head()

## Chroma stft
Compute a chromagram from a waveform or power spectrogram.

In [None]:
sample_rate = sr
sample_rate

In [None]:
chroma_stft_means = []
for data in tqdm(waveform_vectors):
    if len(data) > 0:
        try:
            stft = np.abs(librosa.stft(data))
            amean = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate), axis=1)
            chroma_stft_means.append(list(amean))
        except Exception as e:
            print(e)
            chroma_stft_means.append(None)
    else:
        chroma_stft_means.append(None)

In [None]:
len(chroma_stft_means)

In [None]:
chroma_stft_means[10]

In [None]:
all_test_data['chroma_stft_mean'] = chroma_stft_means

In [None]:
all_test_data.head()

In [None]:
all_test_data.chroma_stft_mean.isna().sum()

In [None]:
# Save the data with features in disk
data_features_path = "all_test_data_features.csv"

In [None]:
all_test_data.to_csv(data_features_path, index=None)

## Melspectrogram
A Mel spectrogram is a representation of the power spectrum of a sound signal, where the frequencies are converted to the Mel scale. The Mel scale is designed to mimic the human ear's perception of sound, where each Mel unit corresponds to a perceived equal step in pitch. This makes the Mel spectrogram a powerful tool for audio analysis, particularly in speech and music processing.

In [None]:
melspectrogram_means = []
for data in tqdm(waveform_vectors):
    if len(data) > 0:
        try:
            amelspectrogram_mean = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate), axis=1)
            melspectrogram_means.append(list(amelspectrogram_mean))
        except Exception as e:
            print(e)
            melspectrogram_means.append(None)
    else:
        melspectrogram_means.append(None)

In [None]:
all_test_data['melspectrogram_mean'] = melspectrogram_means

In [None]:
all_test_data.shape

In [None]:
all_test_data.head()

## RMS Feature
Compute root-mean-square (RMS) value for each frame, either from the audio samples y or from a spectrogram S.

In [None]:
rms_means = []
for data in tqdm(waveform_vectors):
    if len(data) > 0:
        try:
            rms_mean = np.mean(librosa.feature.rms(y=data))
            rms_means.append(rms_mean)
        except Exception as e:
            print(e)
            rms_means.append(None)
    else:
        rms_means.append(None)

In [None]:
rms_means[:3]

In [None]:
all_test_data['rms_mean'] = rms_means

In [None]:
all_test_data.head()

## Chroma CQT
Compute the constant-Q transform of an audio signal.

In [None]:
chroma_cqt_means = []
for data in tqdm(waveform_vectors):
    if len(data) > 0:
        try:
            amean = np.mean(librosa.feature.chroma_cqt(y=data, sr=sample_rate), axis=1)
            chroma_cqt_means.append(list(amean))
        except Exception as e:
            print(e)
            chroma_cqt_means.append(None)
    else:
        chroma_cqt_means.append(None)

In [None]:
len(chroma_cqt_means)

In [None]:
chroma_cqt_means[2]

In [None]:
all_test_data['chroma_cqt_mean'] = chroma_cqt_means

In [None]:
all_test_data.head(2)

## Chroma cens
Compute the chroma variant “Chroma Energy Normalized” (CENS)

In [None]:
chroma_cens_means = []
for data in tqdm(waveform_vectors):
    if len(data) > 0:
        try:
            amean = np.mean(librosa.feature.chroma_cens(y=data, sr=sample_rate), axis=1)
            chroma_cens_means.append(list(amean))
        except Exception as e:
            print(e)
            chroma_cens_means.append(None)
    else:
        chroma_cens_means.append(None)

In [None]:
len(chroma_cens_means)

In [None]:
chroma_cens_means[2]

In [None]:
all_test_data['chroma_cens_mean'] = chroma_cens_means

In [None]:
all_test_data.head(2)

## Variable-Q chromagram

This differs from CQT-based chroma by supporting non-equal temperament intervals.

In [None]:
chroma_vqt_means = []
for data in tqdm(waveform_vectors):
    if len(data) > 0:
        try:
            amean = np.mean(librosa.feature.chroma_vqt(y=data, intervals='ji5', sr=sample_rate), axis=1)
            chroma_vqt_means.append(list(amean))
        except Exception as e:
            print(e)
            chroma_vqt_means.append(None)
    else:
        chroma_vqt_means.append(None)

In [None]:
len(chroma_vqt_means)

In [None]:
chroma_vqt_means[2]

In [None]:
all_test_data['chroma_vqt_mean'] = chroma_vqt_means

In [None]:
all_test_data.head(2)

In [None]:
all_test_data.shape

In [None]:
all_test_data.to_csv(data_features_path, index=None)

## Compute the spectral centroid.

Each frame of a magnitude spectrogram is normalized and treated as a distribution over frequency bins, from which the mean (centroid) is extracted per frame.

In [None]:
spcent_means = []
for data in tqdm(waveform_vectors):
    if len(data) > 0:
        try:
            amean = np.mean(librosa.feature.spectral_centroid(y=data, sr=sample_rate))
            spcent_means.append(amean)
        except Exception as e:
            print(e)
            spcent_means.append(None)
    else:
        spcent_means.append(None)

In [None]:
len(spcent_means)

In [None]:
spcent_means[2]

In [None]:
all_test_data['spcent_mean'] = spcent_means

In [None]:
all_test_data.head(2)

## Compute the spectral bandwith

Compute p’th-order spectral bandwidth.

In [None]:
spband_means = []
for data in tqdm(waveform_vectors):
    if len(data) > 0:
        try:
            amean = np.mean(librosa.feature.spectral_bandwidth(y=data, sr=sample_rate))
            spband_means.append(amean)
        except Exception as e:
            print(e)
            spband_means.append(None)
    else:
        spband_means.append(None)

In [None]:
len(spband_means)

In [None]:
spband_means[2]

In [None]:
all_test_data['spband_mean'] = spband_means

In [None]:
all_test_data.head(2)

## Compute the spectral contrast

Compute spectral contrast

Each frame of a spectrogram S is divided into sub-bands. For each sub-band, the energy contrast is estimated by comparing the mean energy in the top quantile (peak energy) to that of the bottom quantile (valley energy). High contrast values generally correspond to clear, narrow-band signals, while low contrast values correspond to broad-band noise.

Jiang, Dan-Ning, Lie Lu, Hong-Jiang Zhang, Jian-Hua Tao, and Lian-Hong Cai. “Music type classification by spectral contrast feature.” In Multimedia and Expo, 2002. ICME’02. Proceedings. 2002 IEEE International Conference on, vol. 1, pp. 113-116. IEEE, 2002.

In [None]:
spcontrast_means = []
for data in tqdm(waveform_vectors):
    if len(data) > 0:
        try:
            amean = np.mean(librosa.feature.spectral_contrast(y=data, sr=sample_rate), axis=1)
            spcontrast_means.append(list(amean))
        except Exception as e:
            print(e)
            spcontrast_means.append(None)
    else:
        spcontrast_means.append(None)

In [None]:
len(spcontrast_means)

In [None]:
spcontrast_means[2]

In [None]:
all_test_data['spcontrast_mean'] = spcontrast_means

In [None]:
all_test_data.head(2)

## Compute the spectral flatness

Spectral flatness (or tonality coefficient) is a measure to quantify how much noise-like a sound is, as opposed to being tone-like 1. A high spectral flatness (closer to 1.0) indicates the spectrum is similar to white noise. It is often converted to decibel.

In [None]:
spflat_means = []
for data in tqdm(waveform_vectors):
    if len(data) > 0:
        try:
            amean = np.mean(librosa.feature.spectral_flatness(y=data))
            spflat_means.append(amean)
        except Exception as e:
            print(e)
            spflat_means.append(None)
    else:
        spflat_means.append(None)

In [None]:
len(spflat_means)

In [None]:
spflat_means[1]

In [None]:
all_test_data['spflat_mean'] = spflat_means

In [None]:
all_test_data.head(2)

## Compute the spectral roll off

Compute roll-off frequency.

The roll-off frequency is defined for each frame as the center frequency for a spectrogram bin such that at least roll_percent (0.85 by default) of the energy of the spectrum in this frame is contained in this bin and the bins below. This can be used to, e.g., approximate the maximum (or minimum) frequency by setting roll_percent to a value close to 1 (or 0).

In [None]:
sprolloff_means = []
for data in tqdm(waveform_vectors):
    if len(data) > 0:
        try:
            amean = np.mean(librosa.feature.spectral_rolloff(y=data, sr=sample_rate))
            sprolloff_means.append(amean)
        except Exception as e:
            print(e)
            sprolloff_means.append(None)
    else:
        sprolloff_means.append(None)

In [None]:
len(sprolloff_means)

In [None]:
sprolloff_means[2]

In [None]:
all_test_data['sprolloff_mean'] = sprolloff_means

In [None]:
all_test_data.head(2)

In [None]:
all_test_data.columns

In [None]:
all_test_data.to_csv(data_features_path, index=None)

In [None]:
# save to a backup
all_test_data.to_csv(OUTPUT_PATH, index=None)