In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, SimpleRNN, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
from sklearn.neighbors import KNeighborsClassifier
from tensorflow.keras.optimizers import Adam
from sklearn.ensemble import GradientBoostingClassifier
import librosa

In [4]:

# Load your dataset
df = pd.read_csv(r"C:\Users\user\Desktop\Project\EXPERIMENTS\revised-data\audio_features_cp.csv")

# Define feature columns and target column
x_cols = ['mfcc_1', 'mfcc_2', 'mfcc_3', 'mfcc_4', 'mfcc_5', 'mfcc_6', 'mfcc_7', 'mfcc_8', 
           'mfcc_9', 'mfcc_10', 'mfcc_11', 'mfcc_12', 'mfcc_13', 'spectral_centroid', 
           'spectral_bandwidth', 'zero_crossing_rate' , 'spectrogram_mean' , 'spectrogram_median' , 'spectrogram_variance']
y_cols = ['label']

# Normalize the feature data to range [0,1] using MinMaxScaler
SMM = MinMaxScaler(feature_range=(0, 1))
df[x_cols] = SMM.fit_transform(df[x_cols])

# Split data into features and labels
X = df[x_cols].values  # Convert to NumPy array
y = df[y_cols].values.ravel()  # Convert to 1D array

# Train-test split (20% test set)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # Reshape the data for RNN (samples, timesteps, features) where timesteps=1 since it's not a time series
# X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
# X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])



In [5]:
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

# Define the LSTM model
model = Sequential()
model.add(LSTM(128, input_shape=(X_train.shape[1], X_train.shape[2]), activation='relu', return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))  # For binary classification

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Summarize the model architecture
model.summary()

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

  super().__init__(**kwargs)


Epoch 1/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - accuracy: 0.5604 - loss: 0.6881 - val_accuracy: 0.7939 - val_loss: 0.6439
Epoch 2/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.7430 - loss: 0.6190 - val_accuracy: 0.8061 - val_loss: 0.4844
Epoch 3/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.7724 - loss: 0.5251 - val_accuracy: 0.8409 - val_loss: 0.4006
Epoch 4/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8242 - loss: 0.4353 - val_accuracy: 0.8515 - val_loss: 0.3694
Epoch 5/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8187 - loss: 0.4297 - val_accuracy: 0.8394 - val_loss: 0.3729
Epoch 6/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8376 - loss: 0.4021 - val_accuracy: 0.8636 - val_loss: 0.3179
Epoch 7/50
[1m83/83[0m [32m━━━━━━━━━

In [6]:
### Evaluation Metrics
# Make predictions on the test set
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype("int32")

# Evaluate metrics
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

f1 = f1_score(y_test, y_pred)
print(f"F1 Score: {f1}")

conf_matrix = confusion_matrix(y_test, y_pred)
print(f"Confusion Matrix:\n{conf_matrix}")

class_report = classification_report(y_test, y_pred)
print(f"Classification Report:\n{class_report}")


[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step
Accuracy: 0.9287878787878788
F1 Score: 0.9309838472834068
Confusion Matrix:
[[296  18]
 [ 29 317]]
Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.94      0.93       314
           1       0.95      0.92      0.93       346

    accuracy                           0.93       660
   macro avg       0.93      0.93      0.93       660
weighted avg       0.93      0.93      0.93       660



In [12]:

### Prediction Function for New Audio
def extract_features(audio_file):
    # Load the audio file
    y, sr = librosa.load(audio_file, sr=None)
    
    # Extract MFCC (first 13 coefficients)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfccs_mean = np.mean(mfccs.T, axis=0)
    
    # Extract Spectral Centroid
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    
    # Extract Spectral Bandwidth
    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    
    # Extract Zero-Crossing Rate
    zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y=y))
    
    # Extract Spectrogram (STFT)
    spectrogram = np.abs(librosa.stft(y))
    spectrogram_mean = np.mean(spectrogram.T, axis=0)  # Get mean of spectrogram along time axis
    spectrogram_mean2 = np.mean(spectrogram_mean)
    spectrogram_median = np.median(spectrogram_mean)
    spectrogram_variance = np.var(spectrogram_mean)
    
    # Return the features as a list (order must match the training features)
    return [
        mfccs_mean[0], mfccs_mean[1], mfccs_mean[2], mfccs_mean[3], mfccs_mean[4],
        mfccs_mean[5], mfccs_mean[6], mfccs_mean[7], mfccs_mean[8], mfccs_mean[9],
        mfccs_mean[10], mfccs_mean[11], mfccs_mean[12],
        spectral_centroid, spectral_bandwidth, zero_crossing_rate,
        spectrogram_mean2, spectrogram_median, spectrogram_variance
    ]

# Function to predict new audio file
def pred(audio_file):
    # Extract features from the audio file
    features = extract_features(audio_file)
    
    # Convert the features to NumPy array and reshape for RNN
    # feature_array = np.array(features).reshape(1, 1, -1)  # Reshape for RNN (1 sample, 1 timestep, 19 features)
    
    # # Scale the features using the previously fitted scaler
    # feature_array = SMM.transform(feature_array.reshape(1, -1)).reshape(1, 1, -1)  # Scale and reshape back for RNN


    feature_array = np.array(features).reshape(1, -1)
    feature_array = SMM.transform(feature_array)
    feature_array = feature_array.reshape((feature_array.shape[0], 1, feature_array.shape[1]))
    
    # Predict using the trained model
    prediction_prob = model.predict(feature_array)
    prediction = (prediction_prob > 0.5).astype("int32")
    
    if prediction == 1:
        print("Screaming")
    else:
        print("Non_screaming")
    
    print(f"Prediction Probability: {prediction_prob}")



In [20]:
# Test the prediction on a new audio file
ipt = r"C:\Users\user\Desktop\Project\EXPERIMENTS\testaudio\9.wav"
pred(ipt)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
Screaming
Prediction Probability: [[0.91916984]]


