In [1]:
import os
import librosa
import numpy as np

# Directories containing scream and non-scream audio files
scream_dir = r"C:\Users\user\Desktop\Project\files\scream"
non_scream_dir = r"C:\Users\user\Desktop\Project\files\nonscream"

# Function to extract MFCC features for each audio file
def extract_mfcc(audio_file):
    y, sr = librosa.load(audio_file)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    return mfcc

# Function to process all audio files in a directory and store their MFCCs
def process_directory(directory):
    mfcc_list = []
    for filename in os.listdir(directory):
        if filename.endswith(".wav"):  # Assuming your files are in WAV format
            audio_path = os.path.join(directory, filename)
            mfcc = extract_mfcc(audio_path)
            mfcc_list.append(mfcc)
    return mfcc_list

# Extract MFCCs from scream and non-scream directories
scream_mfccs = process_directory(scream_dir)
non_scream_mfccs = process_directory(non_scream_dir)


In [55]:
def pad_mfcc(mfcc, max_len=128):
    if mfcc.shape[1] < max_len:
        # Pad with zeros if too short
        pad_width = max_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        # Truncate if too long
        mfcc = mfcc[:, :max_len]
        mfcc = np.array(mfcc)
    return mfcc

# Apply padding/truncating to MFCCs
scream_mfccs = [pad_mfcc(mfcc) for mfcc in scream_mfccs]
non_scream_mfccs = [pad_mfcc(mfcc) for mfcc in non_scream_mfccs]


In [56]:
# Create labels
scream_labels = [1] * len(scream_mfccs)
non_scream_labels = [0] * len(non_scream_mfccs)

# Combine data and labels
X = np.array(scream_mfccs + non_scream_mfccs)
y = np.array(scream_labels + non_scream_labels)
import sklearn
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)


In [151]:

#model creation
import tensorflow as tf
model = tf.keras.models.Sequential([
    # First convolutional layer
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(13, 128, 1), padding='same'),  # MFCC shape (13, 128) with 1 channel
    tf.keras.layers.MaxPooling2D(2, 2, padding='same'),
    
    # Second convolutional layer
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D(2, 2, padding='same'),
    
    # Third convolutional layer
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D(2, 2, padding='same'),
    tf.keras.layers.Flatten(),
    # Flatten the output and add Dense layers
    #tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')  # Binary classification: scream or non-scream
      # Assuming you want to reshape to 4096

])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)


Epoch 1/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 46ms/step - accuracy: 0.6643 - loss: 10.0997 - val_accuracy: 0.9381 - val_loss: 0.1525
Epoch 2/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 43ms/step - accuracy: 0.9450 - loss: 0.1643 - val_accuracy: 0.9301 - val_loss: 0.1366
Epoch 3/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 43ms/step - accuracy: 0.9545 - loss: 0.1154 - val_accuracy: 0.9581 - val_loss: 0.1070
Epoch 4/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 44ms/step - accuracy: 0.9602 - loss: 0.1050 - val_accuracy: 0.9082 - val_loss: 0.2157
Epoch 5/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 43ms/step - accuracy: 0.9628 - loss: 0.0907 - val_accuracy: 0.9661 - val_loss: 0.1011
Epoch 6/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 43ms/step - accuracy: 0.9781 - loss: 0.0612 - val_accuracy: 0.9601 - val_loss: 0.1189
Epoch 7/10
[1m63/63[0m [32m━━━

<keras.src.callbacks.history.History at 0x29f024c6600>

In [58]:
model.evaluate(X_test, y_test)

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.9343 - loss: 0.2647


[0.24131731688976288, 0.9376996755599976]

In [141]:
def reshape_mfcc(mfcc):
   
    reshaped_mfcc = mfcc.reshape( 13, 128, 1)
    return reshaped_mfcc

In [152]:
inp=r"C:\Users\user\Desktop\Project\files\testaudio\2.wav"
mfcc=extract_mfcc(inp)
test_mfcc=pad_mfcc(mfcc)
test_mfcc=reshape_mfcc(test_mfcc)
test_mfcc.shape

(13, 128, 1)

In [153]:
prediction = model.predict(test_mfcc)

ValueError: Exception encountered when calling Sequential.call().

[1mInput 0 of layer "dense_16" is incompatible with the layer: expected axis -1 of input shape to have value 4096, but received input with shape (13, 2048)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(13, 128, 1), dtype=float32)
  • training=False
  • mask=None