In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
from sklearn.neighbors import KNeighborsClassifier
from tensorflow.keras.optimizers import Adam
import librosa

In [2]:

# Load your dataset
df = pd.read_csv(r"C:\Users\user\Desktop\Project\EXPERIMENTS\revised-data\audio_features_cp.csv")

# Define feature columns and target column
x_cols = ['mfcc_1', 'mfcc_2', 'mfcc_3', 'mfcc_4', 'mfcc_5', 'mfcc_6', 'mfcc_7', 'mfcc_8', 
           'mfcc_9', 'mfcc_10', 'mfcc_11', 'mfcc_12', 'mfcc_13', 'spectral_centroid', 
           'spectral_bandwidth', 'zero_crossing_rate' , 'spectrogram_mean' , 'spectrogram_median' , 'spectrogram_variance']
y_cols = ['label']

# Normalize the feature data to range [0,1] using MinMaxScaler
SMM = MinMaxScaler(feature_range=(0, 1))
df[x_cols] = SMM.fit_transform(df[x_cols])

# Split data into features and labels
X = df[x_cols].values  # Convert to NumPy array
y = df[y_cols].values.ravel()  # Convert to 1D array

# Train-test split (20% test set)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)




In [3]:
# Initialize the ANN model
model = Sequential()

# Input layer and first hidden layer
model.add(Dense(128, input_shape=(X_train.shape[1],), activation='relu'))
model.add(Dropout(0.3))  # Dropout for regularization

# Second hidden layer
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))

# Output layer for binary classification
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Model summary
model.summary()

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.5544 - loss: 0.6814 - val_accuracy: 0.8318 - val_loss: 0.5637
Epoch 2/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7491 - loss: 0.5655 - val_accuracy: 0.8348 - val_loss: 0.4443
Epoch 3/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7947 - loss: 0.4781 - val_accuracy: 0.8424 - val_loss: 0.3924
Epoch 4/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8112 - loss: 0.4624 - val_accuracy: 0.8545 - val_loss: 0.3663
Epoch 5/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.8407 - loss: 0.3993 - val_accuracy: 0.8621 - val_loss: 0.3377
Epoch 6/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8489 - loss: 0.3906 - val_accuracy: 0.8530 - val_loss: 0.3530
Epoch 7/50
[1m83/83[0m [32m━━━━━━━━━

In [5]:

# Predict on the test set
#y_pred = model.predict(X_test)
y_pred = (model.predict(X_test) > 0.5).astype("int32")
# Evaluate metrics
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

f1 = f1_score(y_test, y_pred)
print(f"F1 Score: {f1}")

conf_matrix = confusion_matrix(y_test, y_pred)
print(f"Confusion Matrix:\n{conf_matrix}")

class_report = classification_report(y_test, y_pred)
print(f"Classification Report:\n{class_report}")



[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Accuracy: 0.9196969696969697
F1 Score: 0.9195751138088012
Confusion Matrix:
[[304  10]
 [ 43 303]]
Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.97      0.92       314
           1       0.97      0.88      0.92       346

    accuracy                           0.92       660
   macro avg       0.92      0.92      0.92       660
weighted avg       0.92      0.92      0.92       660



In [11]:

### Prediction Function for New Audio
def extract_features(audio_file):
    # Load the audio file
    y, sr = librosa.load(audio_file, sr=None)
    
    # Extract MFCC (first 13 coefficients)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfccs_mean = np.mean(mfccs.T, axis=0)
    
    # Extract Spectral Centroid
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    
    # Extract Spectral Bandwidth
    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    
    # Extract Zero-Crossing Rate
    zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y=y))
    
    # Extract Spectrogram (STFT)
    spectrogram = np.abs(librosa.stft(y))
    spectrogram_mean = np.mean(spectrogram.T, axis=0)  # Get mean of spectrogram along time axis
    spectrogram_mean2 = np.mean(spectrogram_mean)
    spectrogram_median = np.median(spectrogram_mean)
    spectrogram_variance = np.var(spectrogram_mean)
    
    # Return the features as a list (order must match the training features)
    return [
        mfccs_mean[0], mfccs_mean[1], mfccs_mean[2], mfccs_mean[3], mfccs_mean[4],
        mfccs_mean[5], mfccs_mean[6], mfccs_mean[7], mfccs_mean[8], mfccs_mean[9],
        mfccs_mean[10], mfccs_mean[11], mfccs_mean[12],
        spectral_centroid, spectral_bandwidth, zero_crossing_rate,
        spectrogram_mean2, spectrogram_median, spectrogram_variance
    ]

# Function to predict new audio file
def pred(audio_file):
    # Extract features from the audio file
    features = extract_features(audio_file)
    
    # Convert the features to NumPy array and scale using MinMaxScaler
    feature_array = np.array(features).reshape(1, -1)  # Reshape to 2D array for KNN
    feature_array = SMM.transform(feature_array)  # Scale feature using trained MinMaxScaler
    
    # Predict using the trained KNN model
    prediction = model.predict(feature_array)
    
    if prediction >= 0.5:
        print("Screaming")
    else:
        print("Non_screaming")
    
    print(f"Prediction: {prediction}")

In [20]:
# Test the prediction on a new audio file
ipt = r"C:\Users\user\Desktop\Project\EXPERIMENTS\testaudio\9.wav"
pred(ipt)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
Screaming
Prediction: [[0.78680825]]


