# Classify Voice Clips on Combination of the following Features using Deep Neural Networks:
In this notebook, we will build classifiers to classify voice clips on the features that have been extracted. The following features are extracted:
1. Zero Crossing Rate : The rate of sign-changes of the signal during the duration of a particular frame.
2. Chroma STFT (Short-Time Fourier Transform): Refers to the chroma feature representation derived from the short-time Fourier transform of an audio signal. Chroma features, or chromagrams, represent the energy distribution among the twelve different pitch classes (C, C#, D, ..., B) of the musical octave. .
3. Mel Spectrogram: A Mel spectrogram is a representation of the power spectrum of a sound signal, where the frequencies are converted to the Mel scale. The Mel scale is designed to mimic the human ear's perception of sound, where each Mel unit corresponds to a perceived equal step in pitch.
4.  MFCC: Mel Frequency Cepstral Coefficients form a cepstral representation where the frequency bands are not linear but distributed according to the mel-scale.
5. RMS: root-mean-square (RMS) value for each frame, either from the audio samples or from a spectrogram.
6. Chroma CQT: Constant-Q chromagram
7. Chroma CENS: Chroma variant “Chroma Energy Normalized” (CENS)
8. Chroma VQT: Variable-Q chromagram
9. Spectral Centroid : The center of gravity of the spectrum.
10. Spectral Bandwidth: Compute pth-order spectral bandwidth..
11. Spectral Contrast :  Compute spectral contrast.
12. Spectral Flatness: Compute spectral flatness
13. Spectral Rolloff : The roll-off frequency is defined for each frame as the center frequency for a spectrogram bin such that at least roll_percent (0.85 by default) of the energy of the spectrum in this frame is contained in this bin and the bins below. This can be used to, e.g., approximate the maximum (or minimum) frequency by setting roll_percent to a value close to 1 (or 0).

## Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Load the Data and Features

In [None]:
data_features_path = A_PATH_TO_DATA_FEATURES

test_data_features_path = A_PATH_TO_TEST_DATA_FEATURES


In [None]:
all_data_features = pd.read_csv(data_features_path)

test_data_features = pd.read_csv(test_data_features_path)

In [None]:
all_data_features.shape, test_data_features.shape

In [None]:
all_data_features.columns

In [None]:
from itertools import combinations

def generate_subsets_min_length(lst, min_length):
    subsets = []
    for i in range(min_length, len(lst) + 1):
        subsets.extend(combinations(lst, i))
    return [list(subset) for subset in subsets]

In [None]:
features_list = ['zcrate_mean',
       'chroma_stft_mean', 'melspectrogram_mean', 'mfcc_feature', 'rms_mean',
       'chroma_cqt_mean', 'chroma_cens_mean', 'chroma_vqt_mean', 'spcent_mean',
       'spband_mean', 'spcontrast_mean', 'spflat_mean', 'sprolloff_mean']

In [None]:
len(features_list)

In [None]:
all_features_combs = generate_subsets_min_length(features_list, 2)

In [None]:
len(all_features_combs)

## Evaluat on Different Sets of Features

In [None]:
all_data_features.columns

In [None]:
import keras

In [None]:
from tensorflow import keras

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam

In [None]:
def create_cnn_model(n_features):

    model = Sequential()
    # 1st Convolutional Layer
    model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(n_features, 1)))
    model.add(MaxPooling1D(pool_size=2))

    # 2nd Convolutional Layer
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))

    # 3rd Convolutional Layer
    #model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
    #model.add(MaxPooling1D(pool_size=2))

    # Flatten the output of the last pooling layer
    model.add(Flatten())

    # Fully connected layer
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))

    # Output layer
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
def create_denseLayer_model(n_features):

    model = Sequential()
    model.add(Input(shape=(n_features,)))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
import ast

In [None]:
features_combs = generate_subsets_min_length(features_list, 2)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm
import ast
import time

## NN on 1 Feature

In [None]:
features_list_list = [[feature] for feature in features_list]
features_list_list

In [None]:
%%time

records = []

for features_comb in tqdm(features_list_list):

    columns_to_use = ['label']
    columns_to_use.extend(features_comb)

    data = all_data_features[columns_to_use]

    test_data = test_data_features[columns_to_use]

    data = data.copy()

    col_to_convert = []
    for acol in features_comb:
        if data[acol].dtype == 'object':
            col_to_convert.append(acol)

    # Create a list to hold the new DataFrames
    new_cols_df = []

    for col in col_to_convert:

        data[col] = data[col].apply(ast.literal_eval)

        ## Flatten the list to make individual columns for each individual elements

        # Create a DataFrame with the new columns
        new_col_df = pd.DataFrame(data[col].tolist(), index=data.index)
        new_col_df.columns = [f"{col}_{idx}" for idx in new_col_df.columns]

        # Append the new DataFrame to the list
        new_cols_df.append(new_col_df)

    # Concatenate the original DataFrame with the new columns
    new_data = pd.concat([data] + new_cols_df, axis=1)

    # Drop the original string columns
    new_data = new_data.drop(columns=col_to_convert)

    ### Do the same for test data

    test_data = test_data.copy()

    # Create a list to hold the new DataFrames
    new_test_cols_df = []

    for col in col_to_convert:

        test_data[col] = test_data[col].apply(ast.literal_eval)

        ## Flatten the list to make individual columns for each individual elements

        # Create a DataFrame with the new columns
        new_test_col_df = pd.DataFrame(test_data[col].tolist(), index=test_data.index)
        new_test_col_df.columns = [f"{col}_{idx}" for idx in new_test_col_df.columns]

        # Append the new DataFrame to the list
        new_test_cols_df.append(new_test_col_df)

    # Concatenate the original DataFrame with the new columns
    new_test_data = pd.concat([test_data] + new_test_cols_df, axis=1)

    # Drop the original string columns
    new_test_data = new_test_data.drop(columns=col_to_convert)


    ## make the train, test, and validation data sets

    X = new_data.drop(columns=['label']).values

    X_val = new_test_data.drop(columns=['label']).values

    y = np.array(new_data['label'].map({'engaging':0, 'boring':1}))

    y_val = np.array(new_test_data['label'].map({'engaging':0, 'boring':1}))

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # make train as the whole set
    X_train = X
    y_train = y

    # scale the features
    sc = StandardScaler()
    sc.fit(X_train)

    X_train_std = sc.transform(X_train)
    X_test_std = sc.transform(X_test)
    X_val_std = sc.transform(X_val)

    ## Classify by NN

    # use the n_features to switch between cnn and dense layers
    n_features = X_train_std.shape[1]

    model_name = "NN"

    try:
        if n_features < 20:

            #print("Use Dense Layers: {}".format(n_features))
            model = create_denseLayer_model(n_features)

            model_name = "Dense Layers"

        else:
            #print("Use CNN: {}".format(n_features))
            model = create_cnn_model(n_features)

            # make the right shape of the inputs
            X_train_std = X_train_std[..., None]  # Add a channel dimension
            X_test_std = X_test_std[..., None]    # Add a channel dimension

            X_val_std = X_val_std[..., None]      # Add a channel dimension

            model_name = "CNN"


        # Train the model
        model.fit(X_train_std, y_train, epochs=20, batch_size=32, validation_data=(X_test_std, y_test), verbose=0)

        # Make predictions
        y_pred_prob = model.predict(X_test_std)
        y_pred = (y_pred_prob > 0.5).astype("int32")  # Convert probabilities to binary outputs

        rec = {}
        rec['features'] = features_comb
        rec['model'] = model_name
        rec['evaluation_data'] = 'split from training'
        rec['accuracy'] = accuracy_score(y_test, y_pred)
        rec['precision_boring'] = precision_score(y_test, y_pred, pos_label=1)
        rec['recall_boring'] = recall_score(y_test, y_pred, pos_label=1)
        rec['f1_boring'] = f1_score(y_test, y_pred, pos_label=1)
        rec['precision_engaging'] = precision_score(y_test, y_pred, pos_label=0)
        rec['recall_engaging'] = recall_score(y_test, y_pred, pos_label=0)
        rec['f1_engaging'] = f1_score(y_test, y_pred, pos_label=0)
        records.append(rec)

        # Make predictions on individual set
        y_val_pred_prob = model.predict(X_val_std)
        y_val_pred = (y_val_pred_prob > 0.5).astype("int32")  # Convert probabilities to binary outputs

        rec = {}
        rec['features'] = features_comb
        rec['model'] = model_name
        rec['evaluation_data'] = 'individual set'
        rec['accuracy'] = accuracy_score(y_val, y_val_pred)
        rec['precision_boring'] = precision_score(y_val, y_val_pred, pos_label=1)
        rec['recall_boring'] = recall_score(y_val, y_val_pred, pos_label=1)
        rec['f1_boring'] = f1_score(y_val, y_val_pred, pos_label=1)
        rec['precision_engaging'] = precision_score(y_val, y_val_pred, pos_label=0)
        rec['recall_engaging'] = recall_score(y_val, y_val_pred, pos_label=0)
        rec['f1_engaging'] = f1_score(y_val, y_val_pred, pos_label=0)
        records.append(rec)

        if len(records) % 40 == 0:
            evals = pd.DataFrame(records)
            evals.to_csv("test_evaluation_nn_1_feature_results.csv", index=None)
    except Exception as e:
        print("There are issues for features: {}".format(features_comb))

evals = pd.DataFrame(records)
evals.to_csv("test_evaluation_nn_1_feature_results.csv", index=None)

In [None]:
evals

In [None]:
f1_boring_max = evals[evals.evaluation_data == 'individual set'].f1_boring.max()

In [None]:
evals[evals.f1_boring == f1_boring_max]

In [None]:
evals[evals.evaluation_data == 'individual set'].sort_values(by='f1_boring', ascending=False)

## NN on At Least 2 Features

In [None]:
len(features_combs[4400:])

In [None]:
%%time

records = []

for features_comb in tqdm(features_combs[4400:]):

    columns_to_use = ['label']
    columns_to_use.extend(features_comb)

    data = all_data_features[columns_to_use]

    test_data = test_data_features[columns_to_use]

    data = data.copy()

    col_to_convert = []
    for acol in features_comb:
        if data[acol].dtype == 'object':
            col_to_convert.append(acol)

    # Create a list to hold the new DataFrames
    new_cols_df = []

    for col in col_to_convert:

        data[col] = data[col].apply(ast.literal_eval)

        ## Flatten the list to make individual columns for each individual elements

        # Create a DataFrame with the new columns
        new_col_df = pd.DataFrame(data[col].tolist(), index=data.index)
        new_col_df.columns = [f"{col}_{idx}" for idx in new_col_df.columns]

        # Append the new DataFrame to the list
        new_cols_df.append(new_col_df)

    # Concatenate the original DataFrame with the new columns
    new_data = pd.concat([data] + new_cols_df, axis=1)

    # Drop the original string columns
    new_data = new_data.drop(columns=col_to_convert)

    ### Do the same for test data

    test_data = test_data.copy()

    # Create a list to hold the new DataFrames
    new_test_cols_df = []

    for col in col_to_convert:

        test_data[col] = test_data[col].apply(ast.literal_eval)

        ## Flatten the list to make individual columns for each individual elements

        # Create a DataFrame with the new columns
        new_test_col_df = pd.DataFrame(test_data[col].tolist(), index=test_data.index)
        new_test_col_df.columns = [f"{col}_{idx}" for idx in new_test_col_df.columns]

        # Append the new DataFrame to the list
        new_test_cols_df.append(new_test_col_df)

    # Concatenate the original DataFrame with the new columns
    new_test_data = pd.concat([test_data] + new_test_cols_df, axis=1)

    # Drop the original string columns
    new_test_data = new_test_data.drop(columns=col_to_convert)


    ## make the train, test, and validation data sets

    X = new_data.drop(columns=['label']).values

    X_val = new_test_data.drop(columns=['label']).values

    y = np.array(new_data['label'].map({'engaging':0, 'boring':1}))

    y_val = np.array(new_test_data['label'].map({'engaging':0, 'boring':1}))

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # make train as the whole set
    X_train = X
    y_train = y

    # scale the features
    sc = StandardScaler()
    sc.fit(X_train)

    X_train_std = sc.transform(X_train)
    X_test_std = sc.transform(X_test)
    X_val_std = sc.transform(X_val)

    ## Classify by NN

    # use the n_features to switch between cnn and dense layers
    n_features = X_train_std.shape[1]

    model_name = "NN"

    try:
        if n_features < 20:

            #print("Use Dense Layers: {}".format(n_features))
            model = create_denseLayer_model(n_features)

            model_name = "Dense Layers"

        else:
            #print("Use CNN: {}".format(n_features))
            model = create_cnn_model(n_features)

            # make the right shape of the inputs
            X_train_std = X_train_std[..., None]  # Add a channel dimension
            X_test_std = X_test_std[..., None]    # Add a channel dimension

            X_val_std = X_val_std[..., None]      # Add a channel dimension

            model_name = "CNN"


        # Train the model
        model.fit(X_train_std, y_train, epochs=20, batch_size=32, validation_data=(X_test_std, y_test), verbose=0)

        # Make predictions
        y_pred_prob = model.predict(X_test_std)
        y_pred = (y_pred_prob > 0.5).astype("int32")  # Convert probabilities to binary outputs

        rec = {}
        rec['features'] = features_comb
        rec['model'] = model_name
        rec['evaluation_data'] = 'split from training'
        rec['accuracy'] = accuracy_score(y_test, y_pred)
        rec['precision_boring'] = precision_score(y_test, y_pred, pos_label=1)
        rec['recall_boring'] = recall_score(y_test, y_pred, pos_label=1)
        rec['f1_boring'] = f1_score(y_test, y_pred, pos_label=1)
        rec['precision_engaging'] = precision_score(y_test, y_pred, pos_label=0)
        rec['recall_engaging'] = recall_score(y_test, y_pred, pos_label=0)
        rec['f1_engaging'] = f1_score(y_test, y_pred, pos_label=0)
        records.append(rec)

        # Make predictions on individual set
        y_val_pred_prob = model.predict(X_val_std)
        y_val_pred = (y_val_pred_prob > 0.5).astype("int32")  # Convert probabilities to binary outputs

        rec = {}
        rec['features'] = features_comb
        rec['model'] = model_name
        rec['evaluation_data'] = 'individual set'
        rec['accuracy'] = accuracy_score(y_val, y_val_pred)
        rec['precision_boring'] = precision_score(y_val, y_val_pred, pos_label=1)
        rec['recall_boring'] = recall_score(y_val, y_val_pred, pos_label=1)
        rec['f1_boring'] = f1_score(y_val, y_val_pred, pos_label=1)
        rec['precision_engaging'] = precision_score(y_val, y_val_pred, pos_label=0)
        rec['recall_engaging'] = recall_score(y_val, y_val_pred, pos_label=0)
        rec['f1_engaging'] = f1_score(y_val, y_val_pred, pos_label=0)
        records.append(rec)

        if len(records) % 40 == 0:
            evals = pd.DataFrame(records)
            evals.to_csv("test_evaluation_nn_4400_results.csv", index=None)
    except Exception as e:
        print("There are issues for features: {}".format(features_comb))

evals = pd.DataFrame(records)
evals.to_csv("test_evaluation_nn_4400_results.csv", index=None)

In [None]:
evals

In [None]:
f1_boring_max = evals[evals.evaluation_data == 'individual set'].f1_boring.max()

In [None]:
evals[evals.f1_boring == f1_boring_max]

In [None]:
evals[evals.evaluation_data == 'individual set'].sort_values(by='f1_boring', ascending=False)

## Merge all results

In [None]:
results_1 = pd.read_csv("test_evaluation_nn_1_results.csv")
results_2 = pd.read_csv("test_evaluation_nn_760_results.csv")
results_3 = pd.read_csv("test_evaluation_nn_4400_results.csv")

In [None]:
results_1.shape, results_2.shape, results_3.shape

In [None]:
results = pd.concat([results_1, results_2, results_3], ignore_index=True)

In [None]:
results.shape

In [None]:
results.head()

In [None]:
#results.to_csv("test_evaluation_nn_all_results.csv", index=None)

In [None]:
f1_boring_max = results[results.evaluation_data == 'individual set'].f1_boring.max()

In [None]:
max_boring_f1 = results[results.f1_boring == f1_boring_max]
max_boring_f1

In [None]:
max_boring_f1.features.tolist()