# Train the model (Urban Data set)
for training this model you need to download urban data set from https://urbansounddataset.weebly.com/

> 
then place folds in Sound-Data folder
>
you can decide which fold to be used as train data or test data
>
***this file is created as representation for signal processing cource project***


In [0]:
#importing libraries
# feature extractoring and preprocessing data
import librosa
import glob
import numpy as np
import os
# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
 
#Keras
import keras
 
import warnings
warnings.filterwarnings('ignore')


import scipy.io.wavfile as wf

import pyaudio
import wave

from keras import models
from keras import layers
import matplotlib.pyplot as plt
from scipy.io import wavfile as wav
from scipy.fftpack import fft

In [0]:

#functions used for feature extraction and reading file + setting labels

def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),
    sr=sample_rate).T,axis=0)
    return mfccs,stft,chroma,mel,contrast,tonnetz

  
  
  
def parse_audio_files(parent_dir,sub_dirs,file_ext="*.wav"):
    features, labels = np.empty((0,193)), np.empty(0)
    for label, sub_dir in enumerate(sub_dirs):
        for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
            try:
              mfccs,stft, chroma, mel, contrast,tonnetz = extract_feature(fn)
             
            except Exception as e:
              print ("Error encountered while parsing file: ", fn)
              continue
            ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
            print(ext_features.shape())
            features = np.vstack([features,ext_features]) 
            labels = np.append(labels, fn.split('/')[2].split('-')[1])
    return np.array(features), np.array(labels, dtype = np.int)

In [0]:

#doing one-hot encode on labels

def one_hot_encode(labels):
    n_labels = len(labels)
    n_unique_labels = len(np.unique(labels))
    one_hot_encode = np.zeros((n_labels,n_unique_labels))
    one_hot_encode[np.arange(n_labels), labels] = 1
    return one_hot_encode

#defining directories for reading files

parent_dir = 'Sound-Data'
#train directory
tr_sub_dirs = ["fold1","fold3","fold5","fold6","fold7"]
#test directory
tr_sub_dirs=["new"]

tr_features, tr_labels = parse_audio_files(parent_dir,tr_sub_dirs)
ts_features, ts_labels = parse_audio_files(parent_dir,ts_sub_dirs)

tr_labels = one_hot_encode(tr_labels)
ts_labels = one_hot_encode(ts_labels)


In [0]:
#defining the model 
model = models.Sequential()

model.add(layers.Dense(512, activation='relu', input_shape=(tr_features.shape[1],)))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))

In [0]:
#compile and fit the model

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
 
model.fit(tr_features,
          tr_labels,
          epochs=15,
          batch_size=10)

In [0]:
#evaluating the model based on test data

test_loss, test_acc = model.evaluate(ts_features,ts_labels)

print("loss",test_loss)
print("acc",test_acc)

In [0]:
# serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")

# prediction part
After now we dont need to train the model again 

we will just restore it and use model.predict


In [0]:
import os
import pyaudio
import wave


In [0]:
#reading prediction file from user input (sound) 
#in this model it is not used for prediction due to the fact that this datas cannot be created by humans

def audio_input():
    folder = 'db/test1/'
    for the_file in os.listdir(folder):
        file_path = os.path.join(folder, the_file)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
        except Exception as e:
            print(e)

    # audio specifications
    FORMAT = pyaudio.paInt16
    CHANNELS = 2
    RATE = 44100
    CHUNK = 1024
    RECORD_SECONDS = 2  # for recording 4 sec audio :\
    WAVE_OUTPUT_FILENAME = "test.wav"

    audio = pyaudio.PyAudio()

    # start Recording
    stream = audio.open(format=FORMAT, channels=CHANNELS,
                        rate=RATE, input=True,
                        frames_per_buffer=CHUNK)
    print("recording...")
    frames = []

    for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        data = stream.read(CHUNK)
        frames.append(data)
    print("finished recording")

    # stop Recording
    stream.stop_stream()
    stream.close()
    audio.terminate()

    # writing the wav file into the test db/test1
    waveFile = wave.open('db/test1/' + WAVE_OUTPUT_FILENAME, 'wb')
    waveFile.setnchannels(CHANNELS)
    waveFile.setsampwidth(audio.get_sample_size(FORMAT))
    waveFile.setframerate(RATE)
    waveFile.writeframes(b''.join(frames))
    waveFile.close()

In [0]:
#extracting features for prediction data
def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),
    sr=sample_rate).T,axis=0)
    return mfccs,chroma,mel,contrast,tonnetz

def parse_audio_files_pred(parent_dir,sub_dirs,file_ext="*.wav"):
    features = np.empty((0,193))
    for label, sub_dir in enumerate(sub_dirs):
        for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
            mfccs, chroma, mel, contrast,tonnetz = extract_feature(fn)
            ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
            features = np.vstack([features,ext_features])
    return np.array(features)

In [0]:
parent_dir = 'Sound-Data'
pred_sub_dirs = ["test"]
pred_features=parse_audio_files_pred(parent_dir,pred_sub_dirs)

In [0]:
# load json and create model
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("model.h5")
print("Loaded model from disk")

In [0]:
#prediction part
pred_features = parse_audio_files_pred(parent_dir, pred_sub_dirs)
predictions = loaded_model.predict(pred_features)
result = np.argmax(predictions[0])
print(result)
