# **Data Importation**

In [1]:
#Connect to Google Drive
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
!cp -r /content/gdrive/MyDrive/BD_P2M/* /content

^C


# **Library Importation**

In [None]:
from python_speech_features import mfcc
import scipy.io.wavfile as wav
import librosa
import scipy
import numpy as np
from tempfile import TemporaryFile
import os
import pickle
import random 
import operator
import math
import numpy as np

# **Dataset labels**

In [None]:
data = pd.read_csv('/content/UrbanSound8K.csv')

In [None]:
data = data[['slice_file_name','class']]

# **Data preprocessing**

In [None]:
#Get the distance between feature vectors and find neighbors
def getNeighbors(trainingSet, instance, k):
    distances = []
    for x in range (len(trainingSet)):
        dist = distance(trainingSet[x], instance, k )+ distance(instance, trainingSet[x], k)
        distances.append((trainingSet[x][2], dist))
    distances.sort(key=operator.itemgetter(1))
    #Get the first k neighbors
    neighbors = []
    for x in range(k):
        neighbors.append(distances[x][0])
    return neighbors

In [None]:
#Identify the nearest neighbors
def nearestClass(neighbors):
    classVote = {}
    for x in range(len(neighbors)):
        response = neighbors[x]
        if response in classVote:
            classVote[response]+=1 
        else:
            classVote[response]=1
    sorter = sorted(classVote.items(), key = operator.itemgetter(1), reverse=True)
    return sorter[0][0]

In [None]:
#model evaluation
def getAccuracy(testSet, predictions):
    correct = 0 
    for x in range (len(testSet)):
        if testSet[x][-1]==predictions[x]:
            correct+=1
    return 1.0*correct/len(testSet)

In [None]:
#Extract features from the dataset and dump these features into a binary .dat file “my.dat”
directory = "__path_to_dataset__"
f= open("my.dat" ,'wb')
i=0

for folder in os.listdir(directory):
    i+=1
    if i==11 :
        break   
    for file in os.listdir(directory+folder):  
        (rate,sig) = wav.read(directory+folder+"/"+file)
        mfcc_feat = mfcc(sig,rate ,winlen=0.020, appendEnergy = False)
        covariance = np.cov(np.matrix.transpose(mfcc_feat))
        mean_matrix = mfcc_feat.mean(0)
        feature = (mean_matrix , covariance , i)
        pickle.dump(feature , f)

f.close()

In [None]:
#Train and test split on the dataset
dataset = []
def loadDataset(filename , split , trSet , teSet):
    with open("my.dat" , 'rb') as f:
        while True:
            try:
                dataset.append(pickle.load(f))
            except EOFError:
                f.close()
                break  

    for x in range(len(dataset)):
        if random.random() <split :      
            trSet.append(dataset[x])
        else:
            teSet.append(dataset[x])  

trainingSet = []
testSet = []
loadDataset("my.dat" , 0.66, trainingSet, testSet)

In [None]:
#Make prediction using KNN and get the accuracy on test data.
leng = len(testSet)
predictions = []
for x in range (leng):
    predictions.append(nearestClass(getNeighbors(trainingSet ,testSet[x] , 5))) 

accuracy1 = getAccuracy(testSet , predictions)
print(accuracy1)

# **Using CSV file (result of MFCC algorithm)**

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import lightgbm as lgb

In [2]:
df=pd.read_csv('/content/UrbanSound8Keditedv.csv')

In [3]:
print(df.shape)

(4893, 28)


In [4]:
df.head(3)

Unnamed: 0,filename,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,mfcc7,mfcc8,mfcc9,mfcc10,mfcc11,mfc00c12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,189982-0-0-35.wav,0.681211,0.075784,1631.96649,2140.378404,3528.955925,0.055393,-142.56395,125.077187,-10.216515,27.029228,1.33996,18.702974,-6.793038,20.499489,-5.225411,12.160578,-3.948158,5.572355,-2.568972,4.670658,-4.946015,2.168482,3.094648,9.242079,0.71285,5.941217,air_conditioner
1,189982-0-0-28.wav,0.688358,0.075386,1633.404942,2145.214079,3508.169538,0.054292,-141.233398,125.261086,-10.129498,26.120665,1.658286,18.237345,-5.919254,21.346272,-5.087957,11.822664,-4.099065,5.694098,-3.267993,4.600896,-4.898541,1.334499,1.803144,8.87776,0.773029,6.707254,air_conditioner
2,189982-0-0-20.wav,0.607842,0.085357,1747.500105,2173.11644,3748.45768,0.070058,-114.247055,121.277725,-14.160892,21.470415,3.910751,19.410278,-9.138346,17.81506,1.377123,9.119953,-8.878295,-2.979844,-4.846424,5.002172,-4.928491,-1.010129,7.178382,13.637387,2.225455,7.108963,air_conditioner


In [5]:
df.label.value_counts()

air_conditioner     1038
children_playing     566
drilling             550
engine_idling        510
dog_bark             465
street_music         449
car_horn             438
jackhammer           359
siren                312
gun_shot             206
Name: label, dtype: int64

In [6]:
y = df['label']
x = df.drop(['filename','label'],axis=1)

In [7]:
le = LabelEncoder()
yn = le.fit_transform(y)

In [8]:
x_tr,x_val,y_tr,y_val = train_test_split(x,yn,test_size=0.15,random_state=42)

In [9]:
trn_data = lgb.Dataset(x_tr, label=y_tr)
val_data = lgb.Dataset(x_val, label=y_val)

In [16]:
weight = 10
param = { 'num_leaves': 30,
    'max_depth': 6,     
    'boost': 'gbdt', #
    'num_classes' : 10,
    'learning_rate': 0.01,
    'n_estimators':4000,
    'colsample_bytree':0.85,
    'metric':'multi_logloss',
    'scale_pos_weight': round(weight, 1),   
    'objective': 'multiclass',
    'random_state': 42,
    'seed':1222,   
}

In [17]:
lgb_model = lgb.train(param, trn_data, valid_sets = [trn_data, val_data], verbose_eval=200,early_stopping_rounds=200)



Training until validation scores don't improve for 200 rounds.
[200]	training's multi_logloss: 0.603691	valid_1's multi_logloss: 0.8469
[400]	training's multi_logloss: 0.259799	valid_1's multi_logloss: 0.545029
[600]	training's multi_logloss: 0.134751	valid_1's multi_logloss: 0.42425
[800]	training's multi_logloss: 0.0700285	valid_1's multi_logloss: 0.350201
[1000]	training's multi_logloss: 0.0360533	valid_1's multi_logloss: 0.302415
[1200]	training's multi_logloss: 0.0179148	valid_1's multi_logloss: 0.270328
[1400]	training's multi_logloss: 0.00912996	valid_1's multi_logloss: 0.251789
[1600]	training's multi_logloss: 0.00456738	valid_1's multi_logloss: 0.239515
[1800]	training's multi_logloss: 0.00230965	valid_1's multi_logloss: 0.232125
[2000]	training's multi_logloss: 0.00120324	valid_1's multi_logloss: 0.22677
[2200]	training's multi_logloss: 0.000627418	valid_1's multi_logloss: 0.224071
[2400]	training's multi_logloss: 0.000325031	valid_1's multi_logloss: 0.22098
[2600]	training's

In [None]:
test = pd.read_csv('')

In [None]:
pred = lgb_model.predict(test,num_iteration=lgb_model.best_iteration)

# **Use of this model**

In [None]:
#we are going to use this model and apply it to an audio file which contains several mixed sounds
#Steps :
  #Load sound file
  #Decompose it into parts according to its content using the NMF algorithm
  #Predict the label of each part of sound
  #Print predicted sounds

In [None]:
data,rate = librosa.load('file.wav')

In [None]:
S = librosa.stft(data)
X,X_phase = librosa.magphase(S)

In [None]:
n_component = None #specify the number of components that the audio contains
W,H = librosa.decompose.decompose(X,n_component=n_component,sort=True)
print(W.shape)
print(H.shape)

In [None]:
l_sounds=[]
for n in range(n_component):
  Y = scipy.Outer(W[:,n],H[:,n])*X*X_phase
  y = librosa.istft(Y)
  #processing
  mfcc_feat = mfcc(y,rate ,winlen=0.020, appendEnergy = False)
  covariance = np.cov(np.matrix.transpose(mfcc_feat))
  mean_matrix = mfcc_feat.mean(0)
  feature = (mean_matrix , covariance , i)
  pred = lgb_model.predict(test,num_iteration=lgb_model.best_iteration)
  l_sounds.append(pred)

In [None]:
print('The sounds identified in this audio file')
print(l_sounds)