In [17]:
import os
import numpy as np
from sklearn import preprocessing
import python_speech_features as mfcc
from scipy.io import wavfile

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers.normalization import BatchNormalization

from keras.optimizers import SGD
from keras.utils import np_utils
from sklearn import metrics 

In [46]:
dset_train_in = 'E:/yoda/Original Dset/Train/Indoor'
dset_train_out = 'E:/yoda/Original Dset/Train/Outdoor'
dset_test_in ='E:/yoda/Original Dset/Test/Indoor'
dset_test_out ='E:/yoda/Original Dset/Test/Outdoor'

#load npy filenames
x_train_filename = np.load('E:/yoda/attempts/x_train_filename.npy')
y_train = np.load('E:/yoda/attempts/y_train_labels.npy')

x_test_filename = np.load('E:/yoda/attempts/x_test_filename.npy')
y_test = np.load('E:/yoda/attempts/y_test_labels.npy')

num_rows = 224
num_columns = 224
num_channels = 3


## custom dataset generator classes

In [75]:

class Custom_Train_Generator(keras.utils.Sequence) :
    
    def __init__(self, image_filenames, labels, batch_size) :
        self.image_filenames = image_filenames
        self.labels = labels
        self.batch_size = batch_size
    
    def __len__(self) :
        return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)
  
  
    def __getitem__(self, idx) :
        temp_x = []
    # loading the  chunk of filenames received in batch x,batch y
        batch_x = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size]
        batch_y = self.labels[idx * self.batch_size : (idx+1) * self.batch_size]
        print(batch_x)   
    #  taking one filename at at time, finding the actual audio, producing mfcc, append to train_x
        
        for file_name in batch_x:
            if(file_name.startswith('air' or 'met' or 'sho')):
                sampling_rate, audio_signal =wavfile.read((os.path.join(dset_train_in, file_name)))
            else:
                sampling_rate, audio_signal =wavfile.read((os.path.join(dset_train_out, file_name)))
            mfcc_feature = mfcc.mfcc(audio_signal,sampling_rate, 0.01, 0.001,numcep=20,nfilt=30,nfft = 1200, appendEnergy = True)    
            mfcc_feature = preprocessing.scale(mfcc_feature)
            temp_x.append(mfcc_feature)
        temp_x = np.array(temp_x)
       # temp_x = temp_x.reshape(batch_x.shape[0], num_rows, num_columns, num_channels)
        
        return (temp_x, np.array(batch_y))

In [76]:

class Custom_Test_Generator(keras.utils.Sequence) :
    
    def __init__(self, image_filenames, labels, batch_size) :
        self.image_filenames = image_filenames
        self.labels = labels
        self.batch_size = batch_size
    
    def __len__(self) :
        return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)
  
  
    def __getitem__(self, idx) :
        tempo = []
    # loading the  chunk of filenames received in batch x,batch y
        batch_x = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size]
        batch_y = self.labels[idx * self.batch_size : (idx+1) * self.batch_size]
        print(batch_x)   
    #  taking one filename at at time, finding the actual audio, producing mfcc, append to train_x
        
        for file_name in batch_x:
            if(file_name.startswith('air' or 'met' or 'sho')):
                sampling_rate, audio_signal =wavfile.read((os.path.join(dset_train_in, file_name)))
            else:
                sampling_rate, audio_signal =wavfile.read((os.path.join(dset_train_out, file_name)))
            mfcc_feature = mfcc.mfcc(audio_signal,sampling_rate, 0.01, 0.001,numcep=20,nfilt=30,nfft = 1200, appendEnergy = True)    
            mfcc_feature = preprocessing.scale(mfcc_feature)
            tempo.append(mfcc_feature)
        tempo = np.array(tempo)
       # tempo = tempo.reshape(batch_x.shape[0], num_rows, num_columns, num_channels)
        
        return (tempo, np.array(batch_y))

In [77]:
batch_size = 32

training_batch_generator = Custom_Train_Generator(x_train_filename, y_train, batch_size)
val_batch_generator = Custom_Test_Generator(x_test_filename, y_test, batch_size)

In [82]:
# Construct model 
num_labels= 2

model = Sequential()

model.add(Conv2D(filters=16, kernel_size=3, input_shape=(num_rows, num_columns, num_channels), activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(MaxPooling2D(pool_size=2))

model.add(Conv2D(filters=32, kernel_size=3, activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(MaxPooling2D(pool_size=2))

model.add(Conv2D(filters=64, kernel_size=3, activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(MaxPooling2D(pool_size=2))

model.add(Flatten())

model.add(Dense(256, activation = "relu")) #Fully connected layer
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(60, activation = "relu")) #Fully connected layer
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(num_labels, activation='softmax'))

In [83]:
# Compile the model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='SGD')

In [84]:
model.summary()

# Calculate pre-training accuracy 
#score = model.evaluate(x_test, y_test, verbose=1)
#accuracy = 100*score[1]

#print("Pre-training accuracy: %.4f%%" % accuracy)

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_28 (Conv2D)           (None, 222, 222, 16)      448       
_________________________________________________________________
batch_normalization_27 (Batc (None, 222, 222, 16)      64        
_________________________________________________________________
max_pooling2d_27 (MaxPooling (None, 111, 111, 16)      0         
_________________________________________________________________
conv2d_29 (Conv2D)           (None, 109, 109, 32)      4640      
_________________________________________________________________
batch_normalization_28 (Batc (None, 109, 109, 32)      128       
_________________________________________________________________
max_pooling2d_28 (MaxPooling (None, 54, 54, 32)        0         
_________________________________________________________________
conv2d_30 (Conv2D)           (None, 52, 52, 64)      

## Training

In [85]:
from keras.callbacks import ModelCheckpoint 
from datetime import datetime 


epochs = 2
num_batch_size = 32

checkpointer = ModelCheckpoint(filepath='saved_models/best_weights.pt', 
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit_generator(generator= training_batch_generator,
                   epochs = epochs,
                   verbose = 1,
                   validation_data = val_batch_generator,
                   callbacks=[checkpointer], )


duration = datetime.now() - start
print("Training completed in time: ", duration)



['airport-barcelona-0-0-a_1.wav' 'airport-barcelona-1-69-a_1.wav'
 'airport-barcelona-1-69-a_2.wav' 'airport-barcelona-1-70-a_1.wav'
 'airport-barcelona-1-70-a_2.wav' 'airport-barcelona-1-71-a_1.wav'
 'airport-barcelona-1-71-a_2.wav' 'airport-barcelona-1-72-a_1.wav'
 'airport-barcelona-1-72-a_2.wav' 'airport-barcelona-1-73-a_1.wav'
 'airport-barcelona-1-73-a_2.wav' 'airport-barcelona-1-74-a_1.wav'
 'airport-barcelona-1-74-a_2.wav' 'airport-barcelona-1-75-a_1.wav'
 'airport-barcelona-1-75-a_2.wav' 'airport-barcelona-1-76-a_1.wav'
 'airport-barcelona-1-76-a_2.wav' 'airport-barcelona-1-77-a_1.wav'
 'airport-barcelona-1-77-a_2.wav' 'airport-barcelona-1-78-a_1.wav'
 'airport-barcelona-1-78-a_2.wav' 'airport-barcelona-1-79-a_1.wav'
 'airport-barcelona-1-79-a_2.wav' 'airport-barcelona-1-80-a_1.wav'
 'airport-barcelona-1-80-a_2.wav' 'airport-barcelona-1-81-a_1.wav'
 'airport-barcelona-1-81-a_2.wav' 'airport-barcelona-1-82-a_1.wav'
 'airport-barcelona-1-82-a_2.wav' 'airport-barcelona-1-83-a_1.w

ValueError: in user code:

    C:\Users\User\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\training.py:805 train_function  *
        return step_function(self, iterator)
    C:\Users\User\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    C:\Users\User\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\distribute\distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\Users\User\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\distribute\distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\Users\User\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\distribute\distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\Users\User\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\training.py:788 run_step  **
        outputs = model.train_step(data)
    C:\Users\User\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\training.py:754 train_step
        y_pred = self(x, training=True)
    C:\Users\User\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\base_layer.py:998 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    C:\Users\User\AppData\Roaming\Python\Python38\site-packages\tensorflow\python\keras\engine\input_spec.py:234 assert_input_compatibility
        raise ValueError('Input ' + str(input_index) + ' of layer ' +

    ValueError: Input 0 of layer sequential_10 is incompatible with the layer: : expected min_ndim=4, found ndim=3. Full shape received: (None, None, None)


In [None]:
 Evaluating the model on the training and testing set
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: ", score[1])

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])