In [8]:
# Basic Libraries

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from  pyAudioAnalysis.MidTermFeatures import directory_feature_extraction as dW

import os
import librosa
import librosa.display






classes = ["children_playing", "drilling", "street_music", "siren", "gun_shot", "car_horn", "air_conditioner", "engine_idling",  "dog_bark", "jackhammer"]




In [3]:
df = pd.read_csv('UrbanSound8K\\metadata\\UrbanSound8K.csv')
df

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.000000,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.500000,62.500000,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.500000,64.500000,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.000000,67.000000,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.500000,72.500000,1,5,2,children_playing
...,...,...,...,...,...,...,...,...
8727,99812-1-2-0.wav,99812,159.522205,163.522205,2,7,1,car_horn
8728,99812-1-3-0.wav,99812,181.142431,183.284976,2,7,1,car_horn
8729,99812-1-4-0.wav,99812,242.691902,246.197885,2,7,1,car_horn
8730,99812-1-5-0.wav,99812,253.209850,255.741948,2,7,1,car_horn


# Defs

In [None]:
def seperate_folders(csv_file,folder_path):
    
    # Read CSV file into pandas DataFrame
    df = pd.read_csv(csv_file)

    # Iterate through each row in the DataFrame
    for index, row in df.iterrows():
        file_id = str(row['slice_file_name'])  # Construct the WAV file name from ID
        category = row['class']  # Get the category
        
        # Check if the WAV file exists
        if os.path.exists(os.path.join(folder_path, file_id)):
            # Create folder if it doesn't exist
            category_folder = os.path.join(folder_path, category)
            if not os.path.exists(category_folder):
                os.makedirs(category_folder)
            
            # Move the WAV file to the corresponding category folder
            shutil.move(os.path.join(folder_path, file_id), os.path.join(category_folder, file_id))
            print(f"Moved {file_id} to {category} folder.")
        else:
            print(f"File {file_id} not found.")

    print("Task completed.")

In [None]:
def move_wav_files(csv_file, destination_folder):

    root_dir = "UrbanSound8K\\audio"
    # Create the destination folder if it doesn't exist
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    # Read the CSV file and create a dictionary with slice_file_name as key and folder as value
    wav_mapping = {}
    with open(csv_file, 'r') as file:
        reader = csv.DictReader(file)
        for row in reader:
            wav_mapping[row['slice_file_name']] = row['fold']

    # Iterate through each folder in the root directory
    for folder_name in os.listdir(root_dir):
        folder_path = os.path.join(root_dir, folder_name)
        # Check if it's a directory
        if os.path.isdir(folder_path):
            # Check if there is a WAV file in the folder
            for file_name in os.listdir(folder_path):
                if file_name.endswith('.wav'):
                    wav_file = os.path.join(folder_path, file_name)
                    # Check if the WAV file matches a slice_file_name in the CSV
                    if file_name in wav_mapping:
                        # Move the WAV file to the destination folder
                        shutil.move(wav_file, destination_folder)
                        print(f"Moved {file_name} to {destination_folder}")

In [None]:
def parser_CNN(directory, classes, df):
    feature = []
    label = []
    wav_name = []  
    desired_frames = 128

    for class_name in classes:
        class_directory = os.path.join(directory, class_name)

        for file_name in os.listdir(class_directory):
            file_path = os.path.join(class_directory, file_name)
        
            # Load audio file
            X, sample_rate = librosa.load(file_path)
            
            # Extract mel spectrogram
            mels = librosa.feature.melspectrogram(y=X, sr=sample_rate)
            
            # If the number of frames is less than desired_frames, pad it
            if mels.shape[1] < desired_frames:
                pad_width = desired_frames - mels.shape[1]
                mels = np.pad(mels, pad_width=((0, 0), (0, pad_width)), mode='constant')
            # If the number of frames is more than desired_frames, trim it
            elif mels.shape[1] > desired_frames:
                mels = mels[:, :desired_frames]
            
            # Take the mean along the time axis
            mels_mean = np.mean(mels, axis=1)
            
            feature.append(mels_mean)
            
            # Find the corresponding label from df using file_name
            matching_row = df[df['slice_file_name'] == file_name]
            if not matching_row.empty:
                label.append(matching_row.iloc[0]['class'])
            else:
                label.append(None)  # Or handle as per your requirement
            
            #Keep the file name
            wav_name.append(file_name)
    # Convert lists to DataFrame
    feature_df = pd.DataFrame(data=feature)
    feature_df['slice_file_name'] = wav_name
    feature_df['Class'] = label

    
    return feature_df

    

In [5]:
# all files from children playing begingn with 36429 need to be deleted.
# 19007 from drilling

def extract_features_NN(folder_path):
    NN_Features = pd.DataFrame()  # Create an empty DataFrame to store features

    for i in range(len(classes)):
        folder_name = classes[i]
        folder_path = f"Train_NN\\{folder_name}"
        f, _, fn, = dW(folder_path, 1, 1, 0.1, 0.1)  # Store features and corresponding feature names
    # Create a DataFrame from the i-th NumPy array
        temp_df = pd.DataFrame(f, columns= fn)
        for i in range (0,len(_)):
            part = _[1].split('\\')
            class_cat = part[-2]
            wav_file = part[-1]
        
            temp_df['class'] = class_cat
            temp_df['slice_file_name'] = wav_file
        NN_Features = pd.concat([NN_Features, temp_df], ignore_index=True)
    return NN_Features
        

In [None]:
def delete_files_with_prefix(folder, prefix):
    
    for filename in os.listdir(folder):
        if filename.startswith(prefix):
            file_path = os.path.join(folder, filename)
            try:
                os.remove(file_path)
                print(f"Deleted file: {file_path}")
            except Exception as e:
                print(f"Error deleting file {file_path}: {e}")

# Train Test Split

In [None]:
# Splitting features (X) and labels (y)
X = np_array[:, :7]  # Columns 0 to 6
y = np_array[:, 7]   # Column 7

# Splitting into training and testing sets with stratified sampling
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=18)

# Reconstructing DataFrames for training and testing sets

# Feature names
feature_names = ['slice_file_name', 'fsID', 'start', 'end', 'salience', 'fold', 'classID']

# Reconstructing DataFrames for training and testing sets with feature names
df_train = pd.DataFrame(X_train, columns=feature_names)
df_train['class'] = y_train
df_test = pd.DataFrame(X_test, columns=feature_names)
df_test['class'] = y_test

df_train.to_csv('train.csv', index=False)
df_test.to_csv('test.csv', index=False)



# Main

In [None]:
# Move training files
move_wav_files("train.csv", "Train_NN")

# Move testing files
move_wav_files("test.csv", "Test_NN")

In [None]:
seperate_folders("train.csv","Train_NN")

In [None]:
Extracted_for_CNN = parser_CNN('Train_NN', classes, df_train)

Extracted_for_CNN.to_csv('Features_CNN.csv', index=False)

In [None]:
# we need to delete some files from the dataset

#paths to your directories
children_playing_folder = 'Train_NN/children_playing'
drilling_folder = 'Train_NN/drilling'
jackhammer_folder = 'Train_NN/jackhammer'


children_playing_prefix = '36429'
drilling_prefix = '19007'
jackhammer_prefix = '88466'


delete_files_with_prefix(children_playing_folder, children_playing_prefix)
delete_files_with_prefix(drilling_folder, drilling_prefix)
delete_files_with_prefix(jackhammer_folder, jackhammer_prefix)


In [9]:
Extracted_for_NN = extract_features_NN(classes)

Extracted_for_NN.to_csv('Features_NN.csv', index=False)

Analyzing file 1 of 794: Train_NN\children_playing\100263-2-0-117.wav
Analyzing file 2 of 794: Train_NN\children_playing\100263-2-0-126.wav
Analyzing file 3 of 794: Train_NN\children_playing\100263-2-0-143.wav
Analyzing file 4 of 794: Train_NN\children_playing\100263-2-0-161.wav
Analyzing file 5 of 794: Train_NN\children_playing\100263-2-0-3.wav
Analyzing file 6 of 794: Train_NN\children_playing\100263-2-0-36.wav
Analyzing file 7 of 794: Train_NN\children_playing\101382-2-0-10.wav
Analyzing file 8 of 794: Train_NN\children_playing\101382-2-0-12.wav
Analyzing file 9 of 794: Train_NN\children_playing\101382-2-0-20.wav
Analyzing file 10 of 794: Train_NN\children_playing\101382-2-0-21.wav
Analyzing file 11 of 794: Train_NN\children_playing\101382-2-0-29.wav
Analyzing file 12 of 794: Train_NN\children_playing\101382-2-0-33.wav
Analyzing file 13 of 794: Train_NN\children_playing\101382-2-0-45.wav
Analyzing file 14 of 794: Train_NN\children_playing\104327-2-0-19.wav
Analyzing file 15 of 794: 