In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical

In [2]:
scream_dir = r"C:\Users\user\Desktop\Project\files\scream"
nonscream_dir = r"C:\Users\user\Desktop\Project\files\nonscream"

In [3]:
def extract_mfcc(audio_path, max_len=128):
    y, sr = librosa.load(audio_path, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    
    # Pad or truncate to fixed length
    if mfcc.shape[1] < max_len:
        pad_width = max_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mfcc = mfcc[:, :max_len]
    
    return mfcc

# Helper function to load and process audio data
def load_data():
    data = []
    labels = []

    # Process scream audio files
    for filename in os.listdir(scream_dir):
        if filename.endswith(".wav"):
            mfcc = extract_mfcc(os.path.join(scream_dir, filename))
            data.append(mfcc)
            labels.append(1)  # 1 for scream

    # Process non-scream audio files
    for filename in os.listdir(nonscream_dir):
        if filename.endswith(".wav"):
            mfcc = extract_mfcc(os.path.join(nonscream_dir, filename))
            data.append(mfcc)
            labels.append(0)  # 0 for non-scream

    # Convert to NumPy arrays
    data = np.array(data)
    labels = np.array(labels)

    # Add channel dimension (needed for CNN input)
    data = data[..., np.newaxis]  # Shape becomes (samples, 13, 128, 1)
    
    return data, labels

In [5]:
# Load and split the dataset
data, labels = load_data()

# Encode labels and split data
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

In [6]:
# Build the CNN model
model = Sequential([
    Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(13, 128, 1)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Function to compile the model (customize as per requirement)
def compile_model():
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
compile_model()

In [8]:
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/20
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 24ms/step - accuracy: 0.7106 - loss: 4.5595 - val_accuracy: 0.9015 - val_loss: 0.2486
Epoch 2/20
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - accuracy: 0.8913 - loss: 0.2813 - val_accuracy: 0.9182 - val_loss: 0.1959
Epoch 3/20
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - accuracy: 0.9275 - loss: 0.2124 - val_accuracy: 0.9212 - val_loss: 0.1981
Epoch 4/20
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - accuracy: 0.9423 - loss: 0.1581 - val_accuracy: 0.9227 - val_loss: 0.2015
Epoch 5/20
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - accuracy: 0.9497 - loss: 0.1373 - val_accuracy: 0.9303 - val_loss: 0.2005
Epoch 6/20
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - accuracy: 0.9576 - loss: 0.1206 - val_accuracy: 0.9242 - val_loss: 0.2144
Epoch 7/20
[1m83/83[0m [32m━━━━

<keras.src.callbacks.history.History at 0x28afac6ccb0>

In [10]:
def pred(audio_file):
    mfcc = extract_mfcc(audio_file)
    mfcc = np.expand_dims(mfcc, axis=0)  # Add batch dimension
    mfcc = mfcc[..., np.newaxis]  # Add channel dimension

    # Predict using the model
    prediction = model.predict(mfcc)

    if prediction >= 0.5:
        return "scream"
    else:
        return "non-scream"


In [24]:
file_to_predict = r"C:\Users\user\Desktop\Project\files\testaudio\4.wav"
prediction_result = pred(file_to_predict)
print("Prediction for the given audio file:", prediction_result)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
Prediction for the given audio file: non-scream
