In [3]:
import librosa
import pandas as pd
import numpy as np
import os

scream_path = r"C:\Users\user\Desktop\Project\EXPERIMENTS\scream"
non_scream_path = r"C:\Users\user\Desktop\Project\EXPERIMENTS\nonscream"

# Get the list of all audio files in the scream and non scream directory
scream_files = [os.path.join(scream_path, f) for f in os.listdir(scream_path) if f.endswith('.wav')]
non_scream_files = [os.path.join(non_scream_path, f) for f in os.listdir(non_scream_path) if f.endswith('.wav')]


# Function to extract features from an audio file
def extract_features(audio_file):
    # Load the audio file
    y, sr = librosa.load(audio_file, sr=None)
    
    # Extract MFCC (first 13 coefficients)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfccs_mean = np.mean(mfccs.T, axis=0)
    
    # Extract Spectral Centroid
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    
    # Extract Spectral Bandwidth
    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    
    # Extract Zero-Crossing Rate
    zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y=y))
    
    # Extract Spectrogram (STFT)
    spectrogram = np.abs(librosa.stft(y))
    spectrogram_mean = np.mean(spectrogram.T, axis=0)  # Get mean of spectrogram along time axis
    spectrogram_mean2 = np.mean(spectrogram_mean)
    spectrogram_median = np.median(spectrogram_mean)
    spectrogram_variance = np.var(spectrogram_mean)
    # Return the features as a dictionary
    return {
        'mfcc_1': mfccs_mean[0],
        'mfcc_2': mfccs_mean[1],
        'mfcc_3': mfccs_mean[2],
        'mfcc_4': mfccs_mean[3],
        'mfcc_5': mfccs_mean[4],
        'mfcc_6': mfccs_mean[5],
        'mfcc_7': mfccs_mean[6],
        'mfcc_8': mfccs_mean[7],
        'mfcc_9': mfccs_mean[8],
        'mfcc_10': mfccs_mean[9],
        'mfcc_11': mfccs_mean[10],
        'mfcc_12': mfccs_mean[11],
        'mfcc_13': mfccs_mean[12],
        'spectral_centroid': spectral_centroid,
        'spectral_bandwidth': spectral_bandwidth,
        'zero_crossing_rate': zero_crossing_rate,
        'spectrogram_mean': spectrogram_mean2,
        'spectrogram_median': spectrogram_median,
        'spectrogram_variance': spectrogram_variance
    }



# Initialize an empty list to store the data
data1 = []
data2 = []
# Loop over all audio files and extract features
for audio_file in scream_files:
    features = extract_features(audio_file)
    # Add a label (you need to assign the correct label for each file)
    features['label'] = 1  # or 'Non-Scream', depending on the file
    data1.append(features)
for audio_file in non_scream_files:
    features = extract_features(audio_file)
    # Add a label (you need to assign the correct label for each file)
    features['label'] = 0  # or 'Non-Scream', depending on the file
    data2.append(features)

# Convert the data into a DataFrame
df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)
# Save the DataFrame to a CSV file
df1.to_csv('audio_features1.csv', index=False)
df2.to_csv('audio_features2.csv', index=False)
# Show the DataFrame
print(df1.head())
print(df2.head())


       mfcc_1      mfcc_2      mfcc_3     mfcc_4     mfcc_5     mfcc_6  \
0 -281.972412  -27.002085  -48.788036  25.448553  21.694799  13.263804   
1 -386.166473   55.684887   -4.840861  19.661909  13.279716  11.665585   
2 -251.500336  -32.128223 -104.347481  -2.496558  34.573792  34.055550   
3 -220.438995  107.304871  -32.439606  11.230356  -7.062341  20.047394   
4 -250.408569  137.835953  -42.434673  -0.093357  -1.588117   3.683796   

      mfcc_7     mfcc_8    mfcc_9   mfcc_10    mfcc_11    mfcc_12    mfcc_13  \
0 -14.020556   7.747015  2.940870 -6.776509 -11.882449  13.568652 -10.740694   
1  -4.929646  -0.588488 -1.708870 -4.072778  -5.881234   6.675362  -3.699013   
2 -24.054390  15.440025  1.407326 -0.697393  -9.792743   2.449520   2.464021   
3   3.320949   1.145091 -2.769462  5.505599  -8.787787  -3.005107  -2.713224   
4   5.298687 -14.840858  5.707088  7.519821 -11.900642  -8.919777   2.564292   

   spectral_centroid  spectral_bandwidth  zero_crossing_rate  \
0        2