# Creating the dataset from:
    - donateacry-corpus: https://github.com/gveres/donateacry-corpus/
    - ESC-50: https://github.com/karolpiczak/ESC-50

## need to eliminate the baby crying from the negatives

In [1]:
# !pip install matplotlib
# !pip install librosa

# donateacry cleaning and testing

In [1]:
import os
import IPython.display as ipd

import os
import numpy as np
import scipy
from scipy.io import wavfile
import scipy.fftpack as fft
from scipy.signal import get_window
import matplotlib.pyplot as plt
import pandas as pd  
import librosa as lb 
import librosa.display 

%matplotlib inline

In [8]:
cry_data_path = "./data/donateacry_cleaned"
cry_categories = os.listdir(cry_data_path)
cry_categories = [cat for cat in cry_categories if cat != 'README.md']
cry_categories

['discomfort', 'tired', 'belly_pain', 'burping', 'hungry']

In [9]:
cry_testing_file = cry_data_path + "/" + cry_categories[0] + "/" + os.listdir(cry_data_path + "/" + cry_categories[0])[0]
cry_testing_file

'./data/donateacry_cleaned/discomfort/10A40438-09AA-4A21-83B4-8119F03F7A11-1430925142-1.0-f-26-dc.wav'

In [5]:

ipd.Audio(cry_testing_file)

In [6]:
sample_rate, audio = wavfile.read(cry_testing_file)
print("Sample rate: {0}Hz".format(sample_rate))
print("Audio duration: {0}s".format(len(audio) / sample_rate))

Sample rate: 8000Hz
Audio duration: 7.0s


## go through the directory and put all files in one dataframe

In [10]:
# cries = pd.DataFrame(columns = ['filename', 'category'])
cries_list = np.array([[fname,category] for category in cry_categories for fname in os.listdir(cry_data_path + "/" + category)]) 
cries = pd.DataFrame(cries_list, columns = ['filename', 'classification'])

In [11]:
cries.head()

Unnamed: 0,filename,classification
0,10A40438-09AA-4A21-83B4-8119F03F7A11-143092514...,discomfort
1,d6cda191-4962-4308-9a36-46d5648a95ed-143108626...,discomfort
2,7b0e160e-0505-459e-8ecb-304d7afae9d2-143748697...,discomfort
3,1309B82C-F146-46F0-A723-45345AFA6EA8-143280169...,discomfort
4,999bf14b-e417-4b44-b746-9253f81efe38-143084501...,discomfort


In [9]:
print(set(cries['classification']))

{'hungry', 'discomfort', 'belly_pain', 'burping', 'tired'}


# ESC50 cleaning

In [3]:
csvPath = "./data/esc_50/meta/esc50.csv" 
metadata = pd.read_csv(csvPath) 
# metadata.head()
clean_esc50 = metadata[metadata['category'] != "crying_baby"]
clean_esc50.head()

Unnamed: 0,filename,fold,target,category,esc10,src_file,take
0,1-100032-A-0.wav,1,0,dog,True,100032,A
1,1-100038-A-14.wav,1,14,chirping_birds,False,100038,A
2,1-100210-A-36.wav,1,36,vacuum_cleaner,False,100210,A
3,1-100210-B-36.wav,1,36,vacuum_cleaner,False,100210,B
4,1-101296-A-19.wav,1,19,thunderstorm,False,101296,A


In [11]:
print(len(metadata))
print(len(clean_esc50))

2000
1960


## put esc50 common format (filename | classification)

In [4]:
esc50_formatted = clean_esc50.filter(['filename'],axis=1)
esc50_formatted.insert(1,'classification','negative')
esc50_formatted.head()

Unnamed: 0,filename,classification
0,1-100032-A-0.wav,negative
1,1-100038-A-14.wav,negative
2,1-100210-A-36.wav,negative
3,1-100210-B-36.wav,negative
4,1-101296-A-19.wav,negative


## MFCCs

In [5]:
def extract_mfccs(audio_file_path):
    n_mfcc = 10
    y, sr = librosa.load(audio_file_path)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return mfccs.mean(axis=1) 

## cries

In [12]:
# List of audio file paths
cry_data_path = "./data/donateacry_cleaned"
cry_audio_files = np.array([f"{cry_data_path}/{category}/{fname}" for category in cry_categories for fname in os.listdir(cry_data_path + "/" + category)]) 

# Initialize an empty list to store the MFCC vectors
cry_mfcc_vectors = []

# Extract MFCC vectors for each audio file and store them in the list
for audio_file_path in cry_audio_files:
    mfcc_vector = extract_mfccs(audio_file_path)
    cry_mfcc_vectors.append(mfcc_vector)

In [13]:
n_mfcc = 10
cries_mfcc_df = pd.DataFrame(data=cry_mfcc_vectors, columns=[f'MFCC_{i}' for i in range(1, n_mfcc + 1)])

cries_mfcc_df.head()


Unnamed: 0,MFCC_1,MFCC_2,MFCC_3,MFCC_4,MFCC_5,MFCC_6,MFCC_7,MFCC_8,MFCC_9,MFCC_10
0,-243.042267,147.078979,-74.544868,-11.935884,0.091409,-44.496132,1.685033,1.311653,-24.178078,3.441578
1,-337.197357,116.165741,-107.521187,-16.738218,15.354076,-40.475506,7.085871,17.918938,-23.558468,-0.820606
2,-340.153992,188.384033,-10.891422,9.568656,21.079254,-22.424561,-1.612709,6.264339,-13.77388,5.847576
3,-276.378754,155.994522,-78.46328,-4.101474,3.821284,-55.568825,0.179907,10.94126,-24.056501,-0.227951
4,-438.819702,139.433563,-4.925777,-31.982075,3.686296,8.685968,-4.593336,-10.453959,-9.564993,-2.140277


In [17]:
print(cries_mfcc_df.size)
print(len(cry_audio_files))

4570
457


## negatives

In [6]:
# List of audio file paths
neg_data_path = "./data/esc_50/audio"
neg_audio_files = np.array([f"{neg_data_path}/{fname}" for fname in os.listdir(neg_data_path)]) 

# Initialize an empty list to store the MFCC vectors
neg_mfcc_vectors = []

# Extract MFCC vectors for each audio file and store them in the list
for audio_file_path in neg_audio_files:
    mfcc_vector = extract_mfccs(audio_file_path)
    neg_mfcc_vectors.append(mfcc_vector)

In [7]:
n_mfcc = 10
neg_mfcc_df = pd.DataFrame(data=neg_mfcc_vectors, columns=[f'MFCC_{i}' for i in range(1, n_mfcc + 1)])

neg_mfcc_df.head()

Unnamed: 0,MFCC_1,MFCC_2,MFCC_3,MFCC_4,MFCC_5,MFCC_6,MFCC_7,MFCC_8,MFCC_9,MFCC_10
0,-309.087555,53.909866,-31.625099,31.483555,-28.693979,2.060634,-19.275547,3.829968,28.201851,7.022366
1,-245.058487,74.467178,-46.726074,31.950815,-1.11571,-7.448606,16.471819,-10.308746,-3.378905,16.06937
2,-604.188721,19.832817,5.069269,5.499698,3.818196,3.637636,2.66149,-0.243153,-2.932766,2.580324
3,81.821144,41.014061,-7.936208,15.689442,-11.899333,9.76838,-7.303794,-0.383247,-7.94295,-2.30728
4,-82.034332,175.983307,-51.190273,22.313408,-18.098845,3.639685,-6.337075,2.72805,-0.632731,-2.188714


# Concatenate dataframes

## Cries classification

In [14]:
cries_dataset = pd.concat([cries,cries_mfcc_df], axis=1)
cries_dataset.head()

Unnamed: 0,filename,classification,MFCC_1,MFCC_2,MFCC_3,MFCC_4,MFCC_5,MFCC_6,MFCC_7,MFCC_8,MFCC_9,MFCC_10
0,10A40438-09AA-4A21-83B4-8119F03F7A11-143092514...,discomfort,-243.042267,147.078979,-74.544868,-11.935884,0.091409,-44.496132,1.685033,1.311653,-24.178078,3.441578
1,d6cda191-4962-4308-9a36-46d5648a95ed-143108626...,discomfort,-337.197357,116.165741,-107.521187,-16.738218,15.354076,-40.475506,7.085871,17.918938,-23.558468,-0.820606
2,7b0e160e-0505-459e-8ecb-304d7afae9d2-143748697...,discomfort,-340.153992,188.384033,-10.891422,9.568656,21.079254,-22.424561,-1.612709,6.264339,-13.77388,5.847576
3,1309B82C-F146-46F0-A723-45345AFA6EA8-143280169...,discomfort,-276.378754,155.994522,-78.46328,-4.101474,3.821284,-55.568825,0.179907,10.94126,-24.056501,-0.227951
4,999bf14b-e417-4b44-b746-9253f81efe38-143084501...,discomfort,-438.819702,139.433563,-4.925777,-31.982075,3.686296,8.685968,-4.593336,-10.453959,-9.564993,-2.140277


In [None]:
cries_dataset.to_csv('cries_classification.csv', index=False)

## Cry detection

In [15]:
neg_dataset = pd.concat([esc50_formatted,neg_mfcc_df],axis=1)
detection_dataset = pd.concat([neg_dataset,cries_dataset])

In [16]:
detection_dataset.head()

Unnamed: 0,filename,classification,MFCC_1,MFCC_2,MFCC_3,MFCC_4,MFCC_5,MFCC_6,MFCC_7,MFCC_8,MFCC_9,MFCC_10
0,1-100032-A-0.wav,negative,-309.087555,53.909866,-31.625099,31.483555,-28.693979,2.060634,-19.275547,3.829968,28.201851,7.022366
1,1-100038-A-14.wav,negative,-245.058487,74.467178,-46.726074,31.950815,-1.11571,-7.448606,16.471819,-10.308746,-3.378905,16.06937
2,1-100210-A-36.wav,negative,-604.188721,19.832817,5.069269,5.499698,3.818196,3.637636,2.66149,-0.243153,-2.932766,2.580324
3,1-100210-B-36.wav,negative,81.821144,41.014061,-7.936208,15.689442,-11.899333,9.76838,-7.303794,-0.383247,-7.94295,-2.30728
4,1-101296-A-19.wav,negative,-82.034332,175.983307,-51.190273,22.313408,-18.098845,3.639685,-6.337075,2.72805,-0.632731,-2.188714


In [17]:
detection_dataset.to_csv('cry_detection.csv', index=False)

## cries visualization

In [None]:
for i, category in enumerate(cry_categories):
    category_cries = cries_dataset.loc[cries_dataset['classification'] == category]
    plt.figure(figsize=(10, 6))
    for index, row in category_cries.iterrows():
        plt.plot(category_cries.columns[2:], row[2:], label=row['filename'])  # Assuming 'Filename' is the column with filenames

    plt.xlabel('MFCC Coefficient')
    plt.ylabel('MFCC Value')
    plt.title(f"{category} MFCCs")
    # plt.legend()
    plt.grid()
    plt.xticks(rotation=45)  # Rotate x-axis labels for better readability
    plt.show()


# Stich Esc50 cries for testing

In [2]:
csvPath = "./data/esc_50/meta/esc50.csv" 
metadata = pd.read_csv(csvPath) 
# metadata.head()
cry_esc50 = metadata[metadata['category'] == "crying_baby"].sample(3)
cry_esc50.head()

Unnamed: 0,filename,fold,target,category,esc10,src_file,take
995,3-151080-A-20.wav,3,20,crying_baby,True,151080,A
445,2-107351-A-20.wav,2,20,crying_baby,True,107351,A
1007,3-152007-A-20.wav,3,20,crying_baby,True,152007,A


In [3]:
rando_esc50 = metadata[metadata['category'] != "crying_baby"].sample(5)
rando_esc50.head()

Unnamed: 0,filename,fold,target,category,esc10,src_file,take
1971,5-257642-A-39.wav,5,39,glass_breaking,False,257642,A
1504,4-197454-A-28.wav,4,28,snoring,False,197454,A
1454,4-187769-B-14.wav,4,14,chirping_birds,False,187769,B
1541,4-208021-A-1.wav,4,1,rooster,True,208021,A
1107,3-172179-A-31.wav,3,31,mouse_click,False,172179,A


In [4]:
test_audio_df = pd.concat([rando_esc50,cry_esc50], axis=0).sample(frac=1).reset_index(drop=True)

In [5]:
test_audio_df.head()

Unnamed: 0,filename,fold,target,category,esc10,src_file,take
0,5-257642-A-39.wav,5,39,glass_breaking,False,257642,A
1,4-197454-A-28.wav,4,28,snoring,False,197454,A
2,3-172179-A-31.wav,3,31,mouse_click,False,172179,A
3,2-107351-A-20.wav,2,20,crying_baby,True,107351,A
4,4-187769-B-14.wav,4,14,chirping_birds,False,187769,B


In [12]:
files = np.array(test_audio_df['filename'])
print(files)

['5-257642-A-39.wav' '4-197454-A-28.wav' '3-172179-A-31.wav'
 '2-107351-A-20.wav' '4-187769-B-14.wav' '3-151080-A-20.wav'
 '4-208021-A-1.wav' '3-152007-A-20.wav']


In [7]:
filepath = "./data/esc_50/audio"

In [13]:
import wave

outfile = "sounds.wav"

data= []
for infile in files:
    w = wave.open(f"{filepath}/{infile}", 'rb')
    data.append( [w.getparams(), w.readframes(w.getnframes())] )
    w.close()
    
output = wave.open(outfile, 'wb')
output.setparams(data[0][0])
for i in range(len(data)):
    output.writeframes(data[i][1])
output.close()