In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.keras.layers as layers
import tensorflow.keras.models as models
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, SimpleRNN, Dropout, GRU
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score, confusion_matrix, log_loss, mean_absolute_error, mean_squared_error, matthews_corrcoef
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from tensorflow.keras.optimizers import Adam
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

In [2]:
# Load your dataset
df = pd.read_csv(r"C:\Users\user\Desktop\Project\EXPERIMENTS\revised-data\audio_features_cp.csv")

# Define feature columns and target column
x_cols = ['mfcc_1', 'mfcc_2', 'mfcc_3', 'mfcc_4', 'mfcc_5', 'mfcc_6', 'mfcc_7', 'mfcc_8', 
           'mfcc_9', 'mfcc_10', 'mfcc_11', 'mfcc_12', 'mfcc_13', 'spectral_centroid', 
           'spectral_bandwidth', 'zero_crossing_rate', 'spectrogram_mean', 'spectrogram_median', 'spectrogram_variance']
y_cols = ['label']

# Normalize the feature data to range [0,1] using MinMaxScaler
SMM = MinMaxScaler(feature_range=(0, 1))
df[x_cols] = SMM.fit_transform(df[x_cols])

# Split data into features and labels
X = df[x_cols].values  # Convert to NumPy array
y = df[y_cols].values.ravel()  # Convert to 1D array

# Train-test split (20% test set)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train_rnn = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test_rnn = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

In [3]:
models = {
    'Logistic Regression': LogisticRegression(C=1.0, solver='lbfgs', max_iter=500, random_state=42),
    'Decision Tree': DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_split=5, min_samples_leaf=2, random_state=42),
    'Random Forest': RandomForestClassifier(criterion='entropy', n_estimators=500, max_depth=10, min_samples_split=5, min_samples_leaf=2, random_state=42),
    'Naive Bayes': GaussianNB(),
    'SVM': SVC(C=1.0, kernel='rbf', gamma='scale', probability=True),
    'KNN': KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2),
    'Gradient Boosting': GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, min_samples_split=5, min_samples_leaf=2, random_state=42),
    'Neural Network': Sequential([
        Dense(128, input_shape=(X_train.shape[1],), activation='relu'),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ]),
    'CNN': Sequential([
        layers.InputLayer(input_shape=(19,)),
        layers.Reshape((19, 1, 1)),
        layers.Conv2D(32, kernel_size=(3, 1), activation='relu', padding='same'),
        layers.MaxPooling2D(pool_size=(2, 1), padding='same'),
        layers.Conv2D(64, kernel_size=(3, 1), activation='relu', padding='same'),
        layers.MaxPooling2D(pool_size=(2, 1), padding='same'),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(1, activation='sigmoid')
    ]),
    'RNN': Sequential([
        SimpleRNN(50, input_shape=(1, 19), activation='relu'),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ]),
    'LSTM': Sequential([
        LSTM(128, input_shape=(X_train_rnn.shape[1], X_train_rnn.shape[2]), activation='relu', return_sequences=False),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ]),
    'GRU': Sequential([
        GRU(128, input_shape=(X_train_rnn.shape[1], X_train_rnn.shape[2]), activation='relu', return_sequences=False),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ])
}

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(**kwargs)


In [4]:
# Compile neural network models
for model_name, model in models.items():
    if model_name in ['Neural Network', 'CNN', 'RNN', 'LSTM', 'GRU']:
        model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

In [5]:
# Evaluation and Confusion Matrix DataFrames
eval_columns = ['model_name', 'accuracy', 'f1', 'precision', 'recall', 'roc_auc', 'log_loss_val', 'mae', 'mse', 'mcc']
conf_matrix_columns = ['model_name', 'true_negative', 'false_positive', 'false_negative', 'true_positive']
evaluation = pd.DataFrame(columns=eval_columns)
conf_matrix_df = pd.DataFrame(columns=conf_matrix_columns)

In [6]:
# Training and Evaluation Function
def evaluate_model(model, model_name, epochs=1, loops=1):
    for _ in range(loops):
        # Train the model
        if model_name in ['Neural Network', 'CNN']:
            model.fit(X_train, y_train, epochs=epochs, batch_size=32, verbose=0)
            y_pred_prob = model.predict(X_test)
        elif model_name in ['RNN', 'LSTM', 'GRU']:
            model.fit(X_train_rnn, y_train, epochs=epochs, batch_size=32, verbose=0)
            y_pred_prob = model.predict(X_test_rnn)
        else:
            model.fit(X_train, y_train)
            y_pred_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else model.predict(X_test)
        
        # Calculate metrics
        y_pred = (y_pred_prob > 0.5).astype(int)
        metrics = {
            'accuracy': accuracy_score(y_test, y_pred),
            'f1': f1_score(y_test, y_pred),
            'precision': precision_score(y_test, y_pred),
            'recall': recall_score(y_test, y_pred),
            'roc_auc': roc_auc_score(y_test, y_pred),
            'log_loss_val': log_loss(y_test, y_pred),
            'mae': mean_absolute_error(y_test, y_pred),
            'mse': mean_squared_error(y_test, y_pred),
            'mcc': matthews_corrcoef(y_test, y_pred)
        }
        
        # Append evaluation metrics
        evaluation.loc[len(evaluation)] = [model_name] + list(metrics.values())
        
        # Confusion matrix
        conf_matrix = confusion_matrix(y_test, y_pred)
        tn, fp, fn, tp = conf_matrix.ravel()
        conf_matrix_df.loc[len(conf_matrix_df)] = [model_name, tn, fp, fn, tp]

In [30]:
# Execute the evaluation for each model
for model_name, model in models.items():
    evaluate_model(model, model_name, epochs=500, loops=100)

# Save results to CSV
evaluation.to_csv(r"C:\Users\user\Desktop\Project\EXPERIMENTS\models\New folder\evaluation\evaluated2.csv", index=False)
conf_matrix_df.to_csv(r"C:\Users\user\Desktop\Project\EXPERIMENTS\models\New folder\evaluation\confusion_matrix2.csv", index=False)

[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━

In [None]:
evaluation.head()

Unnamed: 0,model_name,accuracy,f1,precision,recall,roc_auc,log_loss_val,mae,mse,mcc
0,Logistic Regression,0.863636,0.864865,0.9,0.83237,0.86523,4.915044,0.136364,0.136364,0.729935
1,Logistic Regression,0.863636,0.864865,0.9,0.83237,0.86523,4.915044,0.136364,0.136364,0.729935
2,Logistic Regression,0.863636,0.864865,0.9,0.83237,0.86523,4.915044,0.136364,0.136364,0.729935
3,Logistic Regression,0.863636,0.864865,0.9,0.83237,0.86523,4.915044,0.136364,0.136364,0.729935
4,Logistic Regression,0.863636,0.864865,0.9,0.83237,0.86523,4.915044,0.136364,0.136364,0.729935


In [None]:
conf_matrix_df.head()

Unnamed: 0,model_name,true_negative,false_positive,false_negative,true_positive
0,Logistic Regression,282,32,58,288
1,Logistic Regression,282,32,58,288
2,Logistic Regression,282,32,58,288
3,Logistic Regression,282,32,58,288
4,Logistic Regression,282,32,58,288


In [None]:
evaluation.tail()

Unnamed: 0,model_name,accuracy,f1,precision,recall,roc_auc,log_loss_val,mae,mse,mcc
1795,GRU,0.960606,0.962536,0.95977,0.965318,0.960366,1.419901,0.039394,0.039394,0.92102
1796,GRU,0.954545,0.957143,0.946328,0.968208,0.953849,1.638348,0.045455,0.045455,0.909038
1797,GRU,0.959091,0.961263,0.954416,0.968208,0.958626,1.474513,0.040909,0.040909,0.918035
1798,GRU,0.956061,0.958273,0.954155,0.962428,0.955736,1.583736,0.043939,0.043939,0.911913
1799,GRU,0.966667,0.968481,0.960227,0.976879,0.966146,1.201455,0.033333,0.033333,0.933272


In [None]:
conf_matrix_df.tail()

Unnamed: 0,model_name,true_negative,false_positive,false_negative,true_positive
1795,GRU,300,14,12,334
1796,GRU,295,19,11,335
1797,GRU,298,16,11,335
1798,GRU,298,16,13,333
1799,GRU,300,14,8,338


In [24]:
#Only Use When Needed, else, All Data Will Be Deleted.


# evaluation=evaluation.drop(evaluation.index)
# conf_matrix_df=conf_matrix_df.drop(conf_matrix_df.index)