# Testing SVC model with new recordings

In [1]:
import pickle
import librosa
import librosa.display
import pandas as pd
import numpy as np

from os import listdir
from os.path import isfile, join
import csv

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [8]:
scaler = pickle.load(open('..\models\scaler', 'rb'))
svc = pickle.load(open('..\models\svc_model', 'rb'))

## audios aleatórios

In [3]:
# create dataframe header

header = 'filename rms spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

In [4]:
file = open('..\\data\\extracted_data\\test_aleatorio.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)

In [5]:
# extract features from audio data and save in csv file

for i in ['trem', 'pedra', 'aspirador']:
    for filename in os.listdir(f'../data/test_audio/{i}'):
        songname = f'../data/test_audio/{i}/{filename}'
        y, sr = librosa.load(songname, sr =44100)
        rms = librosa.feature.rms(y=y), 
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{filename} {np.mean(rms)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {i}'
        file = open('..\\data\extracted_data\\test_aleatorio.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

In [6]:
df = pd.read_csv('..\\data\\extracted_data\\test_aleatorio.csv')

In [7]:
d = df.drop(columns = ['filename', 'label'])

In [8]:
X = pd.DataFrame(scaler.transform(np.array(d, dtype = float)), index=d.index, columns=d.columns)

In [9]:
proba = svc.predict_proba(X)

for i in range(0, len(proba)):
    print('Probabilidades {}'.format(df.label[i]))
    for j in range(0, len(svc.classes_)):
        print('{}: {}%'.format(svc.classes_[j], round(proba[i][j]*100, 2)))

Probabilidades trem
flu: 22.16%
gel: 17.63%
pia: 7.02%
sax: 15.8%
tru: 18.69%
vio: 18.69%
Probabilidades pedra
flu: 20.7%
gel: 15.62%
pia: 6.43%
sax: 17.56%
tru: 19.51%
vio: 20.18%
Probabilidades aspirador
flu: 18.18%
gel: 12.49%
pia: 5.9%
sax: 19.61%
tru: 21.94%
vio: 21.89%


## IRMAS - test 1

In [10]:
# create dataframe header

header = 'rms spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
# header += ' label'
header = header.split()

In [11]:
file = open('..\\data\\extracted_data\\test_p1.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)

In [13]:
df_test1 = pd.DataFrame(columns=['filename', 'labels'])
for filename in os.listdir('../data/test_audio/IRMAS/Part1'):
    if filename[-3:] == 'wav':
        songname = f'../data/test_audio/IRMAS/Part1/{filename}'
        y, sr = librosa.load(songname, sr =44100)
        rms = librosa.feature.rms(y=y), 
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{np.mean(rms)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {i}'
        file = open('..\\data\extracted_data\\test_p1.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())
    else:
        label = list()
        with open(f'../data/test_audio/IRMAS/Part1/{filename}') as fp:
            for line in fp:
                label.append(line.strip())
        new_row = [filename, label]
        df_test1.loc[len(df_test1)] = new_row

In [21]:
df_test1.to_csv('..\\data\\extracted_data\\p1_info.csv', index=False)

In [9]:
df1 = pd.read_csv('..\\data\\extracted_data\\test_p1.csv', index_col=False)

  df1 = pd.read_csv('..\\data\\extracted_data\\test_p1.csv', index_col=False)


In [10]:
df_test1 =  pd.read_csv('..\\data\\extracted_data\\p1_info.csv', index_col=False)

In [11]:
X1 = pd.DataFrame(scaler.transform(np.array(df1, dtype = float)), index=df1.index, columns=df1.columns)

In [12]:
df_pred1 = pd.DataFrame(columns=['predict', 'proba'])
preds1 = svc.predict(X1)
probas1 = svc.predict_proba(X1)
for i in range(0, len(X1)):
    new_row = [preds1[i], probas1[i].max()]
    df_pred1.loc[len(df_pred1)] = new_row

In [13]:
teste1 = pd.concat([df_test1, df_pred1], axis=1)

In [108]:
teste1[(teste1.proba < 0.5)]#[teste1.labels.str.find('tru') != -1].head(50)

Unnamed: 0,filename,labels,predict,proba
1,(02) dont kill the whale-11.txt,['gel'],gel,0.495181
2,(02) dont kill the whale-12.txt,"['gel', 'voi']",flu,0.471871
3,(02) dont kill the whale-13.txt,"['gel', 'voi']",gel,0.492756
4,(02) dont kill the whale-14.txt,"['gel', 'voi']",gel,0.476229
7,(02) dont kill the whale-3.txt,"['gel', 'voi']",sax,0.423218
...,...,...,...,...
791,1.1_Stranger On The Shore-1.txt,['cla'],vio,0.389638
793,1.1_Stranger On The Shore-12.txt,['cla'],sax,0.277142
795,1.1_Stranger On The Shore-3.txt,['cla'],vio,0.391526
798,1.1_Stranger On The Shore-9.txt,['cla'],tru,0.493196


In [103]:
proba = svc.predict_proba(X1.iloc[353:354])

for j in range(0, len(svc.classes_)):
    print('{}: {}%'.format(svc.classes_[j], round(proba[0][j]*100, 2)))

flu: 1.12%
gel: 8.44%
pia: 1.74%
sax: 76.27%
tru: 4.54%
vio: 7.88%


## IRMAS - test 2

In [32]:
# create dataframe header

header = 'rms spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
# header += ' label'
header = header.split()

In [35]:
file = open('..\\data\\extracted_data\\test_p2.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)

In [36]:
df_test2 = pd.DataFrame(columns=['filename', 'labels'])

for filename in os.listdir('../data/test_audio/IRMAS/IRTestingData-Part2'):
    if filename[-3:] == 'wav':
        songname = f'../data/test_audio/IRMAS/IRTestingData-Part2/{filename}'
        y, sr = librosa.load(songname, sr =44100)
        rms = librosa.feature.rms(y=y), 
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{np.mean(rms)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {i}'
        file = open('..\\data\extracted_data\\test_p2.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())
    else:
        label = list()
        with open(f'../data/test_audio/IRMAS/IRTestingData-Part2/{filename}') as fp:
            for line in fp:
                label.append(line.strip())
        new_row = [filename, label]
        df_test2.loc[len(df_test2)] = new_row

In [27]:
df_test2.to_csv('..\\data\\extracted_data\\p2_info.csv', index=False)

In [22]:
df_test2 =  pd.read_csv('..\\data\\extracted_data\\p2_info.csv', index_col=False)

In [23]:
df2 = pd.read_csv('..\\data\\extracted_data\\test_p2.csv', index_col=False)
X2 = pd.DataFrame(scaler.transform(np.array(df2, dtype = float)), index=df2.index, columns=df2.columns)

  df2 = pd.read_csv('..\\data\\extracted_data\\test_p2.csv', index_col=False)


In [25]:
df_pred2 = pd.DataFrame(columns=['predict', 'proba'])
preds2 = svc.predict(X2)
probas2 = svc.predict_proba(X2)
for i in range(0, len(X2)):
    new_row = [preds2[i], probas2[i].max()]
    df_pred2.loc[len(df_pred2)] = new_row

In [26]:
teste2 = pd.concat([df_test2, df_pred2], axis=1)

In [122]:
teste2[(teste2.proba < 0.35) & (teste2.labels.str[2:-2] != teste2.predict)].tail(50)

Unnamed: 0,filename,labels,predict,proba
994,propaganda - dr. mabuse ('13th life' mix)-26.txt,['voi'],gel,0.276652
997,propaganda - dr. mabuse ('13th life' mix)-9.txt,['voi'],gel,0.312276
1013,radiohead - 01 - airbag-1.txt,"['cel', 'gel']",flu,0.21801
1014,radiohead - 01 - airbag-10.txt,['voi'],tru,0.290514
1015,radiohead - 01 - airbag-11.txt,['voi'],gel,0.23659
1019,radiohead - 01 - airbag-2.txt,"['cel', 'gel']",flu,0.217996
1022,radiohead - 01 - airbag-3.txt,"['cel', 'gel']",gel,0.290173
1023,radiohead - 01 - airbag-4.txt,"['gel', 'voi']",vio,0.312336
1024,radiohead - 01 - airbag-5.txt,"['gel', 'voi']",pia,0.236403
1025,radiohead - 01 - airbag-6.txt,"['gel', 'voi']",pia,0.349573


In [123]:
proba = svc.predict_proba(X2.iloc[1174:1175])

for j in range(0, len(svc.classes_)):
    print('{}: {}%'.format(svc.classes_[j], round(proba[0][j]*100, 2)))

flu: 28.54%
gel: 13.27%
pia: 8.33%
sax: 12.17%
tru: 4.59%
vio: 33.09%


In [116]:
proba = svc.predict_proba(X2.iloc[27:28])

for j in range(0, len(svc.classes_)):
    print('{}: {}%'.format(svc.classes_[j], round(proba[0][j]*100, 2)))

flu: 6.21%
gel: 13.94%
pia: 18.24%
sax: 17.36%
tru: 17.41%
vio: 26.83%


In [109]:
teste2[(teste2.proba < 0.5)]#[teste2.labels.str.find('tru') != -1].head(50)

Unnamed: 0,filename,labels,predict,proba
3,0050_10CC___I_M_NOT_IN_LOVE-15.txt,"['gac', 'org', 'pia', 'voi']",flu,0.475310
5,0050_10CC___I_M_NOT_IN_LOVE-19.txt,"['gac', 'org']",flu,0.490980
7,0050_10CC___I_M_NOT_IN_LOVE-21.txt,"['gac', 'org']",flu,0.294960
8,0050_10CC___I_M_NOT_IN_LOVE-23.txt,"['gac', 'org']",flu,0.488783
10,0050_10CC___I_M_NOT_IN_LOVE-4.txt,"['gac', 'org', 'voi']",flu,0.442177
...,...,...,...,...
1294,Zamfir - The Lonely Shepherd - 01 - The Lonely...,"['flu', 'gac']",vio,0.423599
1297,Zamfir - The Lonely Shepherd - 01 - The Lonely...,"['flu', 'gac']",flu,0.292593
1298,Zamfir - The Lonely Shepherd - 01 - The Lonely...,['flu'],flu,0.319631
1299,Zamfir - The Lonely Shepherd - 01 - The Lonely...,['flu'],flu,0.219396


In [107]:
proba = svc.predict_proba(X2.iloc[1198:1199])

for j in range(0, len(svc.classes_)):
    print('{}: {}%'.format(svc.classes_[j], round(proba[0][j]*100, 2)))

flu: 0.83%
gel: 1.06%
pia: 0.8%
sax: 8.84%
tru: 54.66%
vio: 33.82%


In [101]:
proba = svc.predict_proba(X2.iloc[426:427])

for j in range(0, len(svc.classes_)):
    print('{}: {}%'.format(svc.classes_[j], round(proba[0][j]*100, 2)))

flu: 1.04%
gel: 0.94%
pia: 94.61%
sax: 2.99%
tru: 0.16%
vio: 0.26%


In [96]:
proba = svc.predict_proba(X2.iloc[248:250])

for j in range(0, len(svc.classes_)):
    print('{}: {}%'.format(svc.classes_[j], round(proba[0][j]*100, 2)))

flu: 2.95%
gel: 78.19%
pia: 2.4%
sax: 14.19%
tru: 0.93%
vio: 1.35%


In [81]:
proba = svc.predict_proba(X2.iloc[689:690])

for j in range(0, len(svc.classes_)):
    print('{}: {}%'.format(svc.classes_[j], round(proba[0][j]*100, 2)))

flu: 60.12%
gel: 10.5%
pia: 2.16%
sax: 1.35%
tru: 21.26%
vio: 4.61%


## IRMAS - test 3

In [48]:
# create dataframe header

header = 'rms spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
# header += ' label'
header = header.split()

In [49]:
file = open('..\\data\\extracted_data\\test_p3.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)

In [50]:
df_test3 = pd.DataFrame(columns=['filename', 'labels'])
for filename in os.listdir('../data/test_audio/IRMAS/Part3'):
    if filename[-3:] == 'wav':
        songname = f'../data/test_audio/IRMAS/Part3/{filename}'
        y, sr = librosa.load(songname, sr =44100)
        rms = librosa.feature.rms(y=y), 
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{np.mean(rms)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {i}'
        file = open('..\\data\extracted_data\\test_p3.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())
    else:
        label = list()
        with open(f'../data/test_audio/IRMAS/Part3/{filename}') as fp:
            for line in fp:
                label.append(line.strip())
        new_row = [filename, label]
        df_test3.loc[len(df_test3)] = new_row

In [51]:
df_test3.to_csv('..\\data\\extracted_data\\p3_info.csv', index=False)

In [28]:
df_test3 =  pd.read_csv('..\\data\\extracted_data\\p3_info.csv', index_col=False)

In [29]:
df3 = pd.read_csv('..\\data\\extracted_data\\test_p3.csv', index_col=False)
X3 = pd.DataFrame(scaler.transform(np.array(df3, dtype = float)), index=df3.index, columns=df3.columns)

  df3 = pd.read_csv('..\\data\\extracted_data\\test_p3.csv', index_col=False)


In [30]:
df_pred3 = pd.DataFrame(columns=['predict', 'proba'])
preds3 = svc.predict(X3)
probas3 = svc.predict_proba(X3)
for i in range(0, len(X3)):
    new_row = [preds3[i], probas3[i].max()]
    df_pred3.loc[len(df_pred3)] = new_row

In [31]:
teste3 = pd.concat([df_test3, df_pred3], axis=1)

In [125]:
teste3[(teste3.proba < 0.35) & (teste3.labels.str[2:-2] != teste3.predict)].head(50)

Unnamed: 0,filename,labels,predict,proba
44,02-the_mars_volta-the_widow-jrp-1.txt,"['gac', 'gel', 'voi']",vio,0.276341
59,02-the_mars_volta-the_widow-jrp-5.txt,"['gac', 'voi']",gel,0.23413
62,02-the_mars_volta-the_widow-jrp-9.txt,"['gel', 'tru', 'voi']",flu,0.312597
69,02-the_theory_of_everything_part_2-5.txt,"['pia', 'voi']",gel,0.306017
81,02. School Boy-2.txt,"['pia', 'tru']",sax,0.326557
84,02. School Boy-23.txt,"['pia', 'sax', 'tru']",tru,0.266913
100,02. Yesterdays-6.txt,['cla'],flu,0.272783
101,02. Yesterdays-7.txt,['cla'],pia,0.31672
107,"03 - robert fripp, brian eno - evening star-1...","['gac', 'gel', 'pia']",gel,0.322455
115,"03 - robert fripp, brian eno - evening star-2...","['gac', 'gel']",sax,0.335849


In [110]:
teste3[(teste3.proba < 0.5)]#[teste3.labels.str.find('tru') != -1].tail(50)

Unnamed: 0,filename,labels,predict,proba
5,02 bwv 1068 air on g string-14.txt,"['cel', 'vio']",vio,0.458808
16,02 entangled-10.txt,"['gac', 'gel']",flu,0.353298
17,02 entangled-11.txt,"['gac', 'gel', 'voi']",vio,0.440869
18,02 entangled-13.txt,"['gac', 'gel']",flu,0.391884
19,02 entangled-14.txt,"['gac', 'voi']",flu,0.409313
...,...,...,...,...
752,14 - Tomorrow Never Knows-7.txt,['voi'],gel,0.418429
759,"14-god is an astronaut - all is violent, all i...",['org'],flu,0.333970
762,"14-god is an astronaut - all is violent, all i...","['gel', 'org']",pia,0.458908
763,14. Boots Randolph - Yakety Sax-1.txt,"['gac', 'sax']",tru,0.424035


In [66]:
proba = svc.predict_proba(X3.iloc[0:1])

for j in range(0, len(svc.classes_)):
    print('{}: {}%'.format(svc.classes_[j], round(proba[0][j]*100, 2)))

flu: 0.03%
gel: 0.06%
pia: 0.17%
sax: 0.64%
tru: 1.41%
vio: 97.69%


In [61]:
X3.iloc[0:1]

Unnamed: 0,rms,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,...,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20
0,-0.98446,-0.239741,-0.164789,-0.332847,-0.016941,-0.580453,0.152736,-0.362284,-0.073827,0.686495,...,0.47091,-0.367686,-0.623932,-0.106091,0.344234,0.127928,0.151136,1.047313,0.410854,-0.222474


In [88]:
teste2[teste2.filename.str.find('head') != -1]

Unnamed: 0,filename,labels,predict,proba
1013,radiohead - 01 - airbag-1.txt,"['cel', 'gel']",flu,0.21801
1014,radiohead - 01 - airbag-10.txt,['voi'],tru,0.290514
1015,radiohead - 01 - airbag-11.txt,['voi'],gel,0.23659
1016,radiohead - 01 - airbag-14.txt,['gel'],tru,0.392138
1017,radiohead - 01 - airbag-15.txt,"['cel', 'gel', 'voi']",vio,0.506988
1018,radiohead - 01 - airbag-18.txt,"['gel', 'voi']",gel,0.598241
1019,radiohead - 01 - airbag-2.txt,"['cel', 'gel']",flu,0.217996
1020,radiohead - 01 - airbag-20.txt,"['cel', 'gel', 'voi']",gel,0.531276
1021,radiohead - 01 - airbag-21.txt,"['cel', 'gel']",gel,0.424543
1022,radiohead - 01 - airbag-3.txt,"['cel', 'gel']",gel,0.290173
