In [None]:
!kaggle datasets download -d mmoreaux/audio-cats-and-dogs

In [None]:
import zipfile
zip_ref=zipfile.ZipFile('/content/audio-cats-and-dogs.zip','r')
zip_ref.extractall('/content')
zip_ref.close()

In [None]:
import tensorflow as tf
from tensorflow import keras
from keras.layers import Dense , Conv2D , MaxPooling2D , Flatten , BatchNormalization , Dropout
from keras.models import Sequential
import librosa.display, os
import matplotlib.pyplot as plt
import numpy as np

In [None]:

def create_spectrogram(audio_file, image_file):
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    fig.subplots_adjust(left=0, right=1, bottom=0, top=1)

    y, sr = librosa.load(audio_file)
    ms = librosa.feature.melspectrogram(y=y, sr=sr)
    log_ms = librosa.power_to_db(ms, ref=np.max)
    librosa.display.specshow(log_ms, sr=sr)

    fig.savefig(image_file)
    plt.close(fig)

def create_pngs_from_wavs(input_path, output_path):
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    dir = os.listdir(input_path)

    for i, file in enumerate(dir):
        input_file = os.path.join(input_path, file)
        output_file = os.path.join(output_path, file.replace('.wav', '.png'))
        create_spectrogram(input_file, output_file)

In [None]:
create_pngs_from_wavs('/content/cats_dogs/train/cat', 'Spectrograms/train/cat')

In [None]:
create_pngs_from_wavs('/content/cats_dogs/train/dog', 'Spectrograms/train/dog')

In [None]:
train_ds=keras.utils.image_dataset_from_directory(
directory='/content/Spectrograms/train',
labels='inferred',
label_mode='int',
batch_size=32,
image_size=(256,256))

In [None]:
create_pngs_from_wavs('/content/cats_dogs/test/cats', 'Spectrograms/test/cat')
create_pngs_from_wavs('/content/cats_dogs/test/test', 'Spectrograms/test/dog')


In [None]:
validation=keras.utils.image_dataset_from_directory(
    directory='/content/Spectrograms/test',
    labels='inferred',
    label_mode='int',
    batch_size=32,
    image_size=(256,256)
)

In [None]:
model=Sequential()

model.add(Conv2D(128,kernel_size=(3,3),padding='valid',activation='relu',input_shape=(256,256,3)))
model.add(BatchNormalization())
model.add(Conv2D(64,kernel_size=(3,3),padding='valid',activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
model.add(Dropout(0.2))
model.add(Conv2D(32,kernel_size=(3,3),padding='valid',activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128,activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(64,activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1,activation='sigmoid'))


In [None]:
model.summary()

In [None]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
c=keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=3,
    verbose=1,
    mode='auto',
    baseline=None,
    restore_best_weights=False
)
his=model.fit(train_ds,epochs=15,validation_data=validation)



Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15


In [None]:
acc = his.history['accuracy']
val_acc = his.history['val_accuracy']
epochs = range(1,16)

plt.plot(epochs, acc, '-', label='Training Accuracy')
plt.plot(epochs, val_acc, ':', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.plot()

In [None]:
create_spectrogram("/content/cats_dogs/dog_barking_99.wav",'content')

In [None]:
import cv2
img=cv2.imread('/content/content.png')
plt.imshow(img)

In [None]:
img=cv2.resize(img,(256,256))
img=img.reshape(1,256,256,3)
prob=model.predict(img)
if(prob>=0.5):
  print('dog')
else:
  print('cat')

In [None]:
create_spectrogram("/content/cats_dogs/cat_129.wav",'cat_voice')


In [None]:
img2=cv2.imread('/content/cat_voice.png')
plt.imshow(img2)

In [None]:
img2=cv2.resize(img2,(256,256))
img2=img2.reshape(1,256,256,3)
prob=model.predict(img2)
if(prob>=0.5):
  print('dog')
else:
  print('cat')

In [None]:
create_spectrogram("/content/mixkit-sweet-kitty-meow-93.wav","cat")

In [None]:
im=cv2.imread('/content/cat.png')
plt.imshow(im)

In [None]:
im=cv2.resize(im,(256,256))
im=im.reshape(1,256,256,3)
prob=model.predict(im)
if(prob>=0.5):
  print('dog : ',prob)
else:
  print('cat : ',prob)

In [None]:

create_spectrogram("/content/mixkit-happy-puppy-barks-741.wav",'dog')

In [None]:
im=cv2.imread('/content/dog.png')
plt.imshow(im)

In [None]:
im=cv2.resize(im,(256,256))
im=im.reshape(1,256,256,3)
prob=model.predict(im)
if(prob>=0.47):
  print('dog : ',prob)
else:
  print('cat : ',prob)

In [None]:
create_spectrogram("/content/mixkit-giant-dog-aggressive-growl-59.wav",'dog2')


In [None]:

img=cv2.imread('/content/dog2.png')

plt.imshow(img)
img=cv2.resize(img,(256,256))
img=img.reshape(1,256,256,3)
prob=model.predict(img)
if(prob>=0.47):
  print('dog : ',prob)
else:
  print('cat : ',prob)

In [None]:
from tensorflow.keras.applications import VGG16

In [None]:
base=VGG16(include_top=False)

In [None]:
base.summary()

In [None]:
model=Sequential()
model.add(base)
model.add(Flatten())
model.add(Dense(128,activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(64,activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(32,activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1,activation='sigmoid'))


model.summary()