In [56]:
import pandas as pd
import numpy as np

pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV

from sklearn.preprocessing import MinMaxScaler

In [57]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPool2D, Dropout
from tensorflow.keras.utils import to_categorical 

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

In [58]:
import os
import librosa
import librosa.display
import glob 
import skimage

In [59]:
input_dim = (16, 8, 1)

In [60]:
model = Sequential()
model.add(Conv2D(64, (3, 3), padding = "same", activation = "tanh", input_shape = input_dim))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), padding = "same", activation = "tanh"))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.1))
model.add(Flatten())
model.add(Dense(1024, activation = "tanh"))
model.add(Dense(10, activation = "softmax"))

In [61]:
model.load_weights('audify_weights.hdf5')

In [62]:
sound=['air_conditioner','car_horn','children_playing','dog_bark','drilling','engine_idling','gun_shot','jackhammer','siren','street_music']
genre_arr=['blues','classical','country','disco','hiphop','jazz','metal','pop','reggae','rock']

In [100]:
filename='fin.wav'
x, sample_rate = librosa.load(filename, res_type='kaiser_fast') 
mels_specific = np.mean(librosa.feature.melspectrogram(y=x, sr=sample_rate).T,axis=0)

In [101]:
single_in=mels_specific.reshape(1,16,8,1)

In [102]:
pred2=model.predict(single_in)

In [103]:
pred2

array([[2.0756159e-04, 4.8424770e-18, 1.1443810e-02, 9.9558729e-13,
        3.3737899e-06, 2.5163612e-16, 3.8596877e-09, 9.8812824e-01,
        6.2156264e-13, 2.1697530e-04]], dtype=float32)

In [110]:
for i,s in enumerate(pred2[0]):
    if(s>1e-4):
        print(sound[i])

air_conditioner
children_playing
jackhammer
street_music


In [112]:
sound[np.argmax(pred2)]

'jackhammer'

In [34]:
model_genre = Sequential()
model_genre.add(Conv2D(64, (3, 3), padding = "same", activation = "tanh", input_shape = input_dim))
model_genre.add(MaxPool2D(pool_size=(2, 2)))
model_genre.add(Conv2D(128, (3, 3), padding = "same", activation = "tanh"))
model_genre.add(MaxPool2D(pool_size=(2, 2)))
model_genre.add(Dropout(0.1))
model_genre.add(Flatten())
model_genre.add(Dense(1024, activation = "tanh"))
model_genre.add(Dense(10, activation = "softmax"))

In [35]:
model_genre.load_weights('audify_weights_genre.hdf5')

In [36]:
genre_pred=model_genre.predict(single_in)

In [37]:
genre_pred

array([[1.0012066e-15, 1.3781706e-27, 1.6641731e-13, 5.2828644e-09,
        9.9959999e-01, 6.5010753e-23, 3.9941655e-04, 5.0569970e-13,
        6.0644061e-07, 3.0175482e-08]], dtype=float32)

In [38]:
genre_arr[np.argmax(genre_pred)]

'hiphop'

In [97]:
x1, sample_rate1 = librosa.load('t1.wav', res_type='kaiser_fast')
x2, sample_rate2 = librosa.load('t2.wav', res_type='kaiser_fast')

In [98]:
fin=x1+1.5*x2
fin

array([ 0.01814515,  0.0202877 ,  0.01789281, ..., -0.03620626,
        0.00335447,  0.00561816], dtype=float32)

In [99]:
import soundfile as sf
sf.write('fin.wav', fin, sample_rate1, 'PCM_24')