In [5]:
import opensmile
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from tensorflow.keras.models import Model
import glob

from google.colab import drive
drive.mount('/content/drive')

# Define the path to the directory containing the feature files
directory_path = '/content/drive/MyDrive/FYP_dataset/features'

# Get a list of all feature file paths in the directory
feature_files = glob.glob(directory_path + '/*.csv')

# Read the feature file into a DataFrame
df = pd.read_csv(feature_files[0])

list_of_column_names = list(df.columns.drop(['Arousal_Value', 'Valence_Value', 'frameTime']))

print(list_of_column_names)
print(len(list_of_column_names))

# Initialize OpenSMILE
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.ComParE_2016,
    feature_level=opensmile.FeatureLevel.Functionals,
)

# Process the audio file
y = smile.process_file('/content/drive/MyDrive/FYP_dataset/CNN Features/aaa.wav')

# Convert features to a numpy array
features = y.values

# Normalize features
scaler = StandardScaler()
normalized_features = scaler.fit_transform(features)

# Prepare data for CNN
num_samples, num_features = normalized_features.shape
normalized_features = normalized_features.reshape(num_samples, num_features, 1)

# Create CNN model
model = Sequential()
model.add(Conv1D(64, kernel_size=3, activation='relu', input_shape=(num_features, 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_features, activation='linear'))  # Output layer, same number of features

# Compile the model
model.compile(loss='mean_squared_error', optimizer='adam')

# Train the model
model.fit(normalized_features, normalized_features, epochs=10, batch_size=32)

# Extract features using the CNN model
dense_layer_model = Model(inputs=model.input, outputs=model.get_layer(index=-2).output)
selected_features = dense_layer_model.predict(normalized_features)

# Get the indices of the top 100 features
top_100_indices = np.argsort(-selected_features.mean(axis=0))[:100]

# Get the names of the top 100 features
top_100_feature_names = y.columns[top_100_indices]

# Print the top 100 feature names
print("Top 100 Features:")
for name in top_100_feature_names:
    print(name)

# Read the feature file into a DataFrame
df = pd.read_csv(feature_files[0])

# List of top features (replace this with your actual top features)
top_features = top_100_feature_names

# Check if top features are present in the column names of the feature file
common_features = [feature for feature in top_features if feature in df.columns]

print("Common Features:", common_features)
print("Count of Common Features:", len(common_features))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
['F0final_sma_stddev', 'F0final_sma_amean', 'voicingFinalUnclipped_sma_stddev', 'voicingFinalUnclipped_sma_amean', 'jitterLocal_sma_stddev', 'jitterLocal_sma_amean', 'jitterDDP_sma_stddev', 'jitterDDP_sma_amean', 'shimmerLocal_sma_stddev', 'shimmerLocal_sma_amean', 'logHNR_sma_stddev', 'logHNR_sma_amean', 'audspec_lengthL1norm_sma_stddev', 'audspec_lengthL1norm_sma_amean', 'audspecRasta_lengthL1norm_sma_stddev', 'audspecRasta_lengthL1norm_sma_amean', 'pcm_RMSenergy_sma_stddev', 'pcm_RMSenergy_sma_amean', 'pcm_zcr_sma_stddev', 'pcm_zcr_sma_amean', 'audSpec_Rfilt_sma[0]_stddev', 'audSpec_Rfilt_sma[0]_amean', 'audSpec_Rfilt_sma[1]_stddev', 'audSpec_Rfilt_sma[1]_amean', 'audSpec_Rfilt_sma[2]_stddev', 'audSpec_Rfilt_sma[2]_amean', 'audSpec_Rfilt_sma[3]_stddev', 'audSpec_Rfilt_sma[3]_amean', 'audSpec_Rfilt_sma[4]_stddev', 'audSpec_Rfilt_sma[4]_amean', 'audSpec_Rfil