In [1]:
import pandas as pd
import numpy as np
import glob
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Bidirectional, LSTM, Dense, Flatten
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, f1_score, accuracy_score, precision_score, recall_score
from tensorflow.compat.v1.keras.layers import CuDNNLSTM

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Define the path to the directory containing the feature files
directory_path = '/content/drive/MyDrive/FYP_dataset/features'

# Get a list of all feature file paths in the directory
feature_files = glob.glob(directory_path + '/*.csv')

# Read the feature file into a DataFrame
df = pd.read_csv(feature_files[0])

list_of_column_names = list(df.columns.drop(['Arousal_Value', 'Valence_Value', 'frameTime']))

print(list_of_column_names)
print(len(list_of_column_names))

['F0final_sma_stddev', 'F0final_sma_amean', 'voicingFinalUnclipped_sma_stddev', 'voicingFinalUnclipped_sma_amean', 'jitterLocal_sma_stddev', 'jitterLocal_sma_amean', 'jitterDDP_sma_stddev', 'jitterDDP_sma_amean', 'shimmerLocal_sma_stddev', 'shimmerLocal_sma_amean', 'logHNR_sma_stddev', 'logHNR_sma_amean', 'audspec_lengthL1norm_sma_stddev', 'audspec_lengthL1norm_sma_amean', 'audspecRasta_lengthL1norm_sma_stddev', 'audspecRasta_lengthL1norm_sma_amean', 'pcm_RMSenergy_sma_stddev', 'pcm_RMSenergy_sma_amean', 'pcm_zcr_sma_stddev', 'pcm_zcr_sma_amean', 'audSpec_Rfilt_sma[0]_stddev', 'audSpec_Rfilt_sma[0]_amean', 'audSpec_Rfilt_sma[1]_stddev', 'audSpec_Rfilt_sma[1]_amean', 'audSpec_Rfilt_sma[2]_stddev', 'audSpec_Rfilt_sma[2]_amean', 'audSpec_Rfilt_sma[3]_stddev', 'audSpec_Rfilt_sma[3]_amean', 'audSpec_Rfilt_sma[4]_stddev', 'audSpec_Rfilt_sma[4]_amean', 'audSpec_Rfilt_sma[5]_stddev', 'audSpec_Rfilt_sma[5]_amean', 'audSpec_Rfilt_sma[6]_stddev', 'audSpec_Rfilt_sma[6]_amean', 'audSpec_Rfilt_sma[7

In [3]:
! pip install opensmile
import pandas as pd
import opensmile

# Initialize OpenSMILE
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.ComParE_2016,
    feature_level=opensmile.FeatureLevel.Functionals,
)

# Process the audio file
y = smile.process_file('/content/drive/MyDrive/FYP_dataset/CNN Features/abc.wav')

# Set Pandas options to display more rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Print the extracted features
smile_features= set((y.keys()))
print(smile_features)

Collecting opensmile
  Downloading opensmile-2.4.2-py3-none-any.whl (4.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m51.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting audobject>=0.6.1 (from opensmile)
  Downloading audobject-0.7.9-py3-none-any.whl (24 kB)
Collecting audinterface>=0.7.0 (from opensmile)
  Downloading audinterface-1.1.0-py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.3/66.3 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting audformat<2.0.0,>=1.0.1 (from audinterface>=0.7.0->opensmile)
  Downloading audformat-1.0.1-py3-none-any.whl (65 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.8/65.8 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting audiofile>=1.3.0 (from audinterface>=0.7.0->opensmile)
  Downloading audiofile-1.3.0-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m69.5 M

In [4]:
set_our_features=set(list_of_column_names)
print(set_our_features)

{'audSpec_Rfilt_sma_de[16]_stddev', 'pcm_fftMag_spectralRollOff90.0_sma_de_stddev', 'pcm_fftMag_mfcc_sma_de[9]_stddev', 'audSpec_Rfilt_sma[10]_amean', 'audSpec_Rfilt_sma[25]_amean', 'audSpec_Rfilt_sma_de[15]_stddev', 'audSpec_Rfilt_sma[17]_stddev', 'pcm_fftMag_mfcc_sma[12]_stddev', 'audSpec_Rfilt_sma_de[21]_amean', 'pcm_fftMag_spectralSlope_sma_amean', 'shimmerLocal_sma_stddev', 'audSpec_Rfilt_sma_de[23]_stddev', 'pcm_fftMag_mfcc_sma_de[10]_stddev', 'F0final_sma_amean', 'audspecRasta_lengthL1norm_sma_de_amean', 'audSpec_Rfilt_sma_de[24]_amean', 'audSpec_Rfilt_sma[24]_stddev', 'audSpec_Rfilt_sma[19]_amean', 'jitterLocal_sma_de_stddev', 'shimmerLocal_sma_de_amean', 'audSpec_Rfilt_sma_de[7]_stddev', 'audSpec_Rfilt_sma[2]_amean', 'audSpec_Rfilt_sma[17]_amean', 'audSpec_Rfilt_sma_de[10]_stddev', 'audSpec_Rfilt_sma[12]_stddev', 'audSpec_Rfilt_sma[3]_stddev', 'pcm_fftMag_spectralSkewness_sma_amean', 'jitterDDP_sma_stddev', 'pcm_RMSenergy_sma_stddev', 'audSpec_Rfilt_sma_de[6]_stddev', 'pcm_fft

In [5]:
dont_have_features_set=set_our_features-smile_features
print(dont_have_features_set)

dont_have_features_list=list(dont_have_features_set)
print(len(dont_have_features_list))

{'pcm_fftMag_mfcc_sma_de[9]_stddev', 'pcm_fftMag_mfcc_sma[12]_stddev', 'audSpec_Rfilt_sma_de[21]_amean', 'pcm_fftMag_mfcc_sma_de[10]_stddev', 'audspecRasta_lengthL1norm_sma_de_amean', 'audSpec_Rfilt_sma_de[24]_amean', 'pcm_fftMag_mfcc_sma_de[1]_amean', 'pcm_fftMag_mfcc_sma[2]_amean', 'pcm_fftMag_mfcc_sma_de[8]_amean', 'pcm_fftMag_mfcc_sma[10]_stddev', 'pcm_fftMag_mfcc_sma_de[14]_stddev', 'audSpec_Rfilt_sma_de[7]_amean', 'pcm_fftMag_mfcc_sma[1]_stddev', 'pcm_fftMag_spectralHarmonicity_sma_de_amean', 'pcm_fftMag_spectralFlux_sma_de_amean', 'pcm_fftMag_spectralKurtosis_sma_de_amean', 'pcm_fftMag_mfcc_sma_de[4]_stddev', 'pcm_fftMag_mfcc_sma_de[12]_stddev', 'pcm_fftMag_fband250-650_sma_de_amean', 'pcm_fftMag_mfcc_sma[14]_stddev', 'pcm_fftMag_mfcc_sma[7]_stddev', 'audspec_lengthL1norm_sma_de_amean', 'audSpec_Rfilt_sma_de[14]_amean', 'pcm_fftMag_mfcc_sma_de[5]_amean', 'audSpec_Rfilt_sma_de[8]_amean', 'pcm_fftMag_mfcc_sma[5]_amean', 'pcm_fftMag_spectralSlope_sma_de_amean', 'pcm_fftMag_spectral

In [6]:
smile_features_have_in_the_dataset=set_our_features - dont_have_features_set
print(smile_features_have_in_the_dataset)

smile_features_have_in_the_dataset=list(smile_features_have_in_the_dataset)
print(len(smile_features_have_in_the_dataset))

{'audSpec_Rfilt_sma_de[16]_stddev', 'pcm_fftMag_spectralRollOff90.0_sma_de_stddev', 'audSpec_Rfilt_sma[10]_amean', 'audSpec_Rfilt_sma[25]_amean', 'audSpec_Rfilt_sma_de[15]_stddev', 'audSpec_Rfilt_sma[17]_stddev', 'pcm_fftMag_spectralSlope_sma_amean', 'shimmerLocal_sma_stddev', 'audSpec_Rfilt_sma_de[23]_stddev', 'F0final_sma_amean', 'audSpec_Rfilt_sma[24]_stddev', 'audSpec_Rfilt_sma[19]_amean', 'jitterLocal_sma_de_stddev', 'shimmerLocal_sma_de_amean', 'audSpec_Rfilt_sma_de[7]_stddev', 'audSpec_Rfilt_sma[2]_amean', 'audSpec_Rfilt_sma[17]_amean', 'audSpec_Rfilt_sma_de[10]_stddev', 'audSpec_Rfilt_sma[12]_stddev', 'audSpec_Rfilt_sma[3]_stddev', 'pcm_fftMag_spectralSkewness_sma_amean', 'jitterDDP_sma_stddev', 'pcm_RMSenergy_sma_stddev', 'audSpec_Rfilt_sma_de[6]_stddev', 'shimmerLocal_sma_de_stddev', 'audspecRasta_lengthL1norm_sma_stddev', 'audSpec_Rfilt_sma[6]_stddev', 'pcm_fftMag_spectralFlux_sma_stddev', 'audSpec_Rfilt_sma_de[2]_stddev', 'audSpec_Rfilt_sma[6]_amean', 'audSpec_Rfilt_sma_de[

In [7]:
drop_list=[]
#drop_list=dont_have_features_list
for i in range(0,len(dont_have_features_list)):
  drop_list.append(dont_have_features_list[i])

In [8]:
drop_list.append('Arousal_Value')
drop_list.append('Valence_Value')
drop_list.append('frameTime')

print(drop_list)
print(len(drop_list))

['pcm_fftMag_mfcc_sma_de[9]_stddev', 'pcm_fftMag_mfcc_sma[12]_stddev', 'audSpec_Rfilt_sma_de[21]_amean', 'pcm_fftMag_mfcc_sma_de[10]_stddev', 'audspecRasta_lengthL1norm_sma_de_amean', 'audSpec_Rfilt_sma_de[24]_amean', 'pcm_fftMag_mfcc_sma_de[1]_amean', 'pcm_fftMag_mfcc_sma[2]_amean', 'pcm_fftMag_mfcc_sma_de[8]_amean', 'pcm_fftMag_mfcc_sma[10]_stddev', 'pcm_fftMag_mfcc_sma_de[14]_stddev', 'audSpec_Rfilt_sma_de[7]_amean', 'pcm_fftMag_mfcc_sma[1]_stddev', 'pcm_fftMag_spectralHarmonicity_sma_de_amean', 'pcm_fftMag_spectralFlux_sma_de_amean', 'pcm_fftMag_spectralKurtosis_sma_de_amean', 'pcm_fftMag_mfcc_sma_de[4]_stddev', 'pcm_fftMag_mfcc_sma_de[12]_stddev', 'pcm_fftMag_fband250-650_sma_de_amean', 'pcm_fftMag_mfcc_sma[14]_stddev', 'pcm_fftMag_mfcc_sma[7]_stddev', 'audspec_lengthL1norm_sma_de_amean', 'audSpec_Rfilt_sma_de[14]_amean', 'pcm_fftMag_mfcc_sma_de[5]_amean', 'audSpec_Rfilt_sma_de[8]_amean', 'pcm_fftMag_mfcc_sma[5]_amean', 'pcm_fftMag_spectralSlope_sma_de_amean', 'pcm_fftMag_spectral

In [9]:
print(len(feature_files))

1802


In [10]:

# Define lists to store the feature and target data
X = []
y_a = []
y_v = []

# Iterate over each feature file
for file in feature_files:
    # Read the feature file into a DataFrame
    df = pd.read_csv(file)

    # Extract the features and target variables
    features = df.drop(drop_list, axis=1).values
    arousal = df['Arousal_Value'].values
    valence = df['Valence_Value'].values

    # Append the data to the lists
    X.append(features)
    y_a.append(arousal)
    y_v.append(valence)

# Concatenate the feature and target arrays
X = np.concatenate(X)
y_a = np.concatenate(y_a)
y_v = np.concatenate(y_v)

# Scale the features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Reshape the input data for LSTM
X_reshaped = X_scaled.reshape(X_scaled.shape[0], X_scaled.shape[1], 1)

# Define the number of folds for cross-validation
num_folds = 5

# Initialize lists to store evaluation results
mse_v = []
mae_v = []
rmse_v = []
r2_v = []
f1_v = []
accuracy_v = []
precision_v = []
recall_v = []

# Perform tenfold cross-validation
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)
for train_index, test_index in kf.split(X_reshaped):
    X_train, X_val = X_reshaped[train_index], X_reshaped[test_index]
    y_v_train, y_v_val = y_v[train_index], y_v[test_index]

    # Split the validation set further into val and test sets
    val_size = len(X_val) // 2
    X_val, X_test = X_val[:val_size], X_val[val_size:]
    y_v_val, y_v_test = y_v_val[:val_size], y_v_val[val_size:]

    # Build the BiLSTM model for Valence
    model_v = Sequential()
    model_v.add(Bidirectional(CuDNNLSTM(64, return_sequences=True), input_shape=(X_train.shape[1], 1)))
    model_v.add(Flatten())

    # Add a DNN layer after the BiLSTM
    model_v.add(Dense(512, activation='relu'))

    model_v.add(Dense(1))
    model_v.compile(loss='mse', optimizer=Adam(learning_rate=0.001))

    # Train the Arousal model
    model_v.fit(X_train, y_v_train, validation_data=(X_val, y_v_val), epochs=25, batch_size=32)

    # Evaluate the model on the test set
    predictions_v = model_v.predict(X_test)

    # Reshape predictions_a to match the shape of y_a_test
    predictions_v = predictions_v.reshape(y_v_test.shape)

    # Calculate additional evaluation metrics
    mse_v.append(mean_squared_error(y_v_test, predictions_v))
    mae_v.append(mean_absolute_error(y_v_test, predictions_v))
    rmse_v.append(np.sqrt(mean_squared_error(y_v_test, predictions_v)))
    r2_v.append(r2_score(y_v_test, predictions_v))

    # Convert regression predictions to binary labels
    threshold = 0
    binary_predictions_v = (predictions_v >= threshold).astype(int)
    y_v_test_binary = (y_v_test >= threshold).astype(int)

    # Calculate F1-score, accuracy, precision, and recall for binary classification
    f1_v.append(f1_score(y_v_test_binary, binary_predictions_v))
    accuracy_v.append(accuracy_score(y_v_test_binary, binary_predictions_v))
    precision_v.append(precision_score(y_v_test_binary, binary_predictions_v))
    recall_v.append(recall_score(y_v_test_binary, binary_predictions_v))

# Calculate average performance across all folds
average_mse_v = np.mean(mse_v)
average_mae_v = np.mean(mae_v)
average_rmse_v = np.mean(rmse_v)
average_r2_v = np.mean(r2_v)
average_f1_v = np.mean(f1_v)
average_accuracy_v = np.mean(accuracy_v)
average_precision_v = np.mean(precision_v)
average_recall_v = np.mean(recall_v)

print(f'Average Valence MSE: {average_mse_v:.4f}')
print(f'Average Valence MAE: {average_mae_v:.4f}')
print(f'Average Valence RMSE: {average_rmse_v:.4f}')
print(f'Average Valence R2 Score: {average_r2_v:.4f}')
print(f'Average Valence F1 Score: {average_f1_v:.4f}')
print(f'Average Valence Accuracy: {average_accuracy_v:.4f}')
print(f'Average Valence Precision: {average_precision_v:.4f}')
print(f'Average Valence Recall: {average_recall_v:.4f}')

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoc

In [2]:
! pip install opensmile

Collecting opensmile
  Downloading opensmile-2.4.2-py3-none-any.whl (4.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting audobject>=0.6.1 (from opensmile)
  Downloading audobject-0.7.9-py3-none-any.whl (24 kB)
Collecting audinterface>=0.7.0 (from opensmile)
  Downloading audinterface-1.1.0-py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.3/66.3 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting audformat<2.0.0,>=1.0.1 (from audinterface>=0.7.0->opensmile)
  Downloading audformat-1.0.1-py3-none-any.whl (65 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.8/65.8 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting audiofile>=1.3.0 (from audinterface>=0.7.0->opensmile)
  Downloading audiofile-1.3.0-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m41.3 M

In [3]:
import opensmile
import numpy as np
import librosa

# Initialize OpenSMILE
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.ComParE_2016,
    feature_level=opensmile.FeatureLevel.Functionals,
    #config_file_path="path_to_your_config_file.conf"
)

# Load the audio signal (replace with your own loading mechanism)
audio_path = '/content/drive/MyDrive/FYP_dataset/CNN Features/abc.wav'
#signal, sr= smile.process_file('/content/drive/MyDrive/FYP_dataset/CNN Features/abc.wav')

signal, sr = librosa.load(audio_path, sr=None)

# Segment the audio signal into 0.5-second segments
segment_duration = 0.5
samples_per_segment = int(segment_duration * sr)
timestamps = np.arange(0, len(signal) / sr, segment_duration)
segments = [signal[i:i + samples_per_segment] for i in range(0, len(signal), samples_per_segment)]

# Initialize a list to store features
features = []

# Extract features and timestamps for each segment
for timestamp, segment in zip(timestamps, segments):
    feature_dict = smile.process_signal(segment, sr)
    feature_dict['timestamp'] = timestamp  # Add the timestamp to the feature dictionary
    features.append(feature_dict)

  signal, sr = librosa.load(audio_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


FileNotFoundError: ignored