In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from tqdm import tqdm
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow.keras.models as models
import tensorflow.keras.layers as layers

%matplotlib inline
%load_ext tensorboard

In [None]:
csv_path="/content/drive/MyDrive/Datasets-Collab/Kaggle-sound_classification/archive.zip (Unzipped Files)/esc50.csv"
audio_path='/content/drive/MyDrive/Datasets-Collab/Kaggle-sound_classification/archive.zip (Unzipped Files)/audio/audio/44100/'

In [None]:
df_train=pd.read_csv(csv_path)

In [None]:
classes=df_train['category'].unique()

In [None]:
class_dict={i:x for x, i in enumerate(classes)}

In [None]:
sample_df=df_train.drop_duplicates(subset=['target'])

In [None]:
sample_df=sample_df[['filename','target','category']]

In [None]:
signals = {}
mel_spectrograms = {}
mfccs = {}

for index,row in tqdm(sample_df.iterrows()):
  # extracting the signals
  signal,sr=librosa.load(audio_path + row['filename'])
  signals[row['category']]=signal

  #extracting the mel Spectorgam
  mel_spec=librosa.feature.melspectrogram(y=signal,sr=sr,n_fft=2048,hop_length=512)
  mel_power=librosa.power_to_db(mel_spec,ref=np.max)

  mel_spectrograms[row['category']]=mel_power

  #extracting the mfcc
  mfcc=librosa.feature.mfcc(y=signal,sr=sr,n_mfcc=13,dct_type=3)
  mfccs[row['category']]=mfcc

50it [00:49,  1.01it/s]


In [None]:
len(signals)

50

In [None]:
(list(signals.keys())[2])

'vacuum_cleaner'

In [None]:


def plot_signal(signal,name,cmap=None):
    """
    this function will take the signal dictionary and plot the signals
    """
    fig , axes = plt.subplots(nrows=5 , ncols=2 , sharex =False ,sharey=True,
                             figsize=(40,20))
    fig.suptitle('Time series',size=15)
    i=0
    for x in range(5):
        for y in range(2):
          axes[x,y].set_title(list(signal.keys())[i])
          if(name=='signal'):
            print('yes')
            axes[x,y].plot(list(signal.values())[i])
          else:
            
            axes[x,y].imshow(list(signal.values())[i], cmap=cmap,interpolation='nearest')
            
          axes[x,y].get_xaxis().set_visible(False)
          axes[x,y].get_yaxis().set_visible(False)
          i +=1



In [None]:
plot_signal(mfccs,'signl')#,cmap='hot')

In [None]:
df_train.head(2)

In [None]:
X=[]
Y=[]
for index,row in tqdm(df_train.iterrows()):
  signal,sr=librosa.load(audio_path+row['filename'])
  for i in range(3):
    n=np.random.randint(0,int(len(signal)-(2*sr)))
    sign_=signal[n:int(n+(2*sr))]
    mfcc_=librosa.feature.mfcc(sign_,sr=sr,n_mfcc=13)
    X.append(mfcc_)
    Y.append(row['target'])



2000it [18:55,  1.76it/s]


In [None]:
X = np.array(X) 
y = np.array(Y)

#one-hot encoding the target
y = tf.keras.utils.to_categorical(y , num_classes=50)

# our tensorflow model takes input as (no_of_sample , height , width , channel).
# here X has dimension (no_of_sample , height , width).
# So, the below code will reshape it to (no_of_sample , height , width , 1).
X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)

In [None]:
x_train , x_val , y_train , y_val = train_test_split(X , y ,test_size=0.2, random_state=2020)


In [None]:
INPUTSHAPE = (13,87,1)

In [None]:
model =  models.Sequential([
                          layers.Conv2D(64 , (3,3),activation = 'relu',padding='valid', input_shape = INPUTSHAPE),
                          #layers.Conv2D(128, (3,3), activation='relu',padding='valid'),
                          layers.Conv2D(128, (3,3), activation='relu',padding='valid'),
                          #layers.MaxPool2D((3,3)),
                          layers.Conv2D(64, (3,3), activation='relu',padding='valid'),

                          layers.Conv2D(64, (3,3), activation='relu',padding='valid'),
                          layers.Conv2D(32, (3,3), activation='relu',padding='valid'),
                          layers.GlobalAveragePooling2D(),
                          layers.Dense(32 , activation = 'relu'),
                          layers.Dense(50 , activation = 'softmax')
])

model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = 'acc')


In [None]:
%mkdir "cpkt"
%mkdir "logs"
LOGDIR = "logs"
CPKT = "cpkt/"

callback_1 = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=3, verbose=0, mode='auto',
    baseline=None, restore_best_weights=False
)

#this checkpoint saves the best weights of model at every epoch
callback_2 = tf.keras.callbacks.ModelCheckpoint(
    CPKT, monitor='val_loss', verbose=0, save_best_only=True,
    save_weights_only=True, mode='auto', save_freq='epoch', options=None
)

#this is for tensorboard
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=LOGDIR)


mkdir: cannot create directory ‘cpkt’: File exists
mkdir: cannot create directory ‘logs’: File exists


In [None]:
history = model.fit(x_train,y_train ,
            validation_data=(x_val,y_val),
            epochs=100,
            callbacks = [callback_1 , callback_2 , tensorboard_callback])


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
