In [1]:
#!wget https://github.com/karoldvl/ESC-50/archive/master.zip
#!unzip -q master.zip

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv('./ESC-50-master/meta/esc50.csv')
df

Unnamed: 0,filename,fold,target,category,esc10,src_file,take
0,1-100032-A-0.wav,1,0,dog,True,100032,A
1,1-100038-A-14.wav,1,14,chirping_birds,False,100038,A
2,1-100210-A-36.wav,1,36,vacuum_cleaner,False,100210,A
3,1-100210-B-36.wav,1,36,vacuum_cleaner,False,100210,B
4,1-101296-A-19.wav,1,19,thunderstorm,False,101296,A
...,...,...,...,...,...,...,...
1995,5-263831-B-6.wav,5,6,hen,False,263831,B
1996,5-263902-A-36.wav,5,36,vacuum_cleaner,False,263902,A
1997,5-51149-A-25.wav,5,25,footsteps,False,51149,A
1998,5-61635-A-8.wav,5,8,sheep,False,61635,A


In [4]:
category_to_class = {
'dog': 'hayvan',
'chirping_birds': 'hayvan',
'vacuum_cleaner': 'arkaplan', 
'thunderstorm': 'arkaplan',
'door_wood_knock': 'arkaplan',
'can_opening': 'arkaplan',
'crow': 'arkaplan',
'clapping': 'insan',
'fireworks': 'arkaplan',
'chainsaw': 'arac',
'airplane': 'arac',
'mouse_click': 'arkaplan',
'pouring_water': 'arkaplan',
'train': 'arac',
'sheep': 'hayvan',
'water_drops': 'arkaplan',
'church_bells': 'arkaplan',
'clock_alarm': 'arkaplan',
'keyboard_typing': 'arkaplan',
'wind': 'arkaplan',
'footsteps': 'insan',
'frog': 'arkaplan',
'cow': 'hayvan',
'brushing_teeth': 'arkaplan',
'car_horn': 'arac',
'crackling_fire': 'arkaplan',
'helicopter': 'arac',
'drinking_sipping': 'arkaplan',
'rain': 'arkaplan',
'insects': 'arkaplan',
'laughing': 'insan',
'hen': 'hayvan',
'engine': 'arac',
'breathing': 'insan',
'crying_baby': 'insan', 
'hand_saw': 'arac',
'coughing': 'insan',
'glass_breaking': 'arkaplan',
'snoring' : 'insan',
'toilet_flush': 'arkaplan',
'pig': 'hayvan',
'washing_machine': 'arac',
'clock_tick': 'arkaplan',
'sneezing' : 'insan',
'rooster': 'hayvan',
'sea_waves': 'arkaplan',
'siren': 'arac',
'cat': 'hayvan',
'door_wood_creaks': 'arkaplan',
'crickets': 'arkaplan',
}

df['category'] = df['category'].map(category_to_class)
df

Unnamed: 0,filename,fold,target,category,esc10,src_file,take
0,1-100032-A-0.wav,1,0,hayvan,True,100032,A
1,1-100038-A-14.wav,1,14,hayvan,False,100038,A
2,1-100210-A-36.wav,1,36,arkaplan,False,100210,A
3,1-100210-B-36.wav,1,36,arkaplan,False,100210,B
4,1-101296-A-19.wav,1,19,arkaplan,False,101296,A
...,...,...,...,...,...,...,...
1995,5-263831-B-6.wav,5,6,hayvan,False,263831,B
1996,5-263902-A-36.wav,5,36,arkaplan,False,263902,A
1997,5-51149-A-25.wav,5,25,insan,False,51149,A
1998,5-61635-A-8.wav,5,8,hayvan,False,61635,A


In [5]:
#cnn_model.pb
import logging, os
logging.disable(logging.WARNING)
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import tensorflow as tf
tf.get_logger().setLevel('INFO')

reloaded_model = tf.saved_model.load('cnn_model')

In [6]:
@tf.function
def load_wav_16k_mono(filename):
    """ Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio. """
    file_contents = tf.io.read_file(filename)
    wav, sample_rate = tf.audio.decode_wav(
          file_contents,
          desired_channels=1)
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav

In [7]:
#!pip install tensorflow-io[tensorflow-gpu]
import tensorflow_io as tfio

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score


my_classes = ['insan', 'hayvan', 'arac', 'arkaplan']


train_df = df[df['fold']<4]
val_df = df[df['fold']==4]
test_df = df[df['fold']==5]

for split_name, split_df in [ ('test', test_df),
                            ('train', train_df),
                            ('val', val_df),
                            ]:

    print(f'Infering for {split_name}')
    actual_list = []
    predicted_list = []

    for _, row in split_df.iterrows():

        testing_wav_data = load_wav_16k_mono('./ESC-50-master/audio/'+row['filename'])

        reloaded_results = reloaded_model(testing_wav_data)
        predicted = my_classes[tf.math.argmax(reloaded_results)]
        actual = row['category']

        actual_list.append(actual)
        predicted_list.append(predicted)

        #print(f'The main sound is: {predicted}, actual: {actual}')
        
    print(confusion_matrix(actual_list, predicted_list))
    print(accuracy_score(actual_list, predicted_list))
    


In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score

confusion_matrix(actual_list, predicted_list)
print(accuracy_score(actual_list, predicted_list))