# Testing SVC model with new recordings

In [1]:
import pickle
import librosa
import librosa.display
import pandas as pd
import numpy as np

from os import listdir
from os.path import isfile, join
import csv

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
scaler = pickle.load(open('..\models\scaler', 'rb'))
svc = pickle.load(open('..\models\svc_model', 'rb'))

## audios aleatórios

In [3]:
# create dataframe header

header = 'filename rms spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

In [4]:
file = open('..\\data\\extracted_data\\test_aleatorio.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)

In [5]:
# extract features from audio data and save in csv file

for i in ['trem', 'pedra', 'aspirador']:
    for filename in os.listdir(f'../data/test_audio/{i}'):
        songname = f'../data/test_audio/{i}/{filename}'
        y, sr = librosa.load(songname, sr =44100)
        rms = librosa.feature.rms(y=y), 
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{filename} {np.mean(rms)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {i}'
        file = open('..\\data\extracted_data\\test_aleatorio.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

In [6]:
df = pd.read_csv('..\\data\\extracted_data\\test_aleatorio.csv')

In [7]:
d = df.drop(columns = ['filename', 'label'])

In [8]:
X = pd.DataFrame(scaler.transform(np.array(d, dtype = float)), index=d.index, columns=d.columns)

In [9]:
proba = svc.predict_proba(X)

for i in range(0, len(proba)):
    print('Probabilidades {}'.format(df.label[i]))
    for j in range(0, len(svc.classes_)):
        print('{}: {}%'.format(svc.classes_[j], round(proba[i][j]*100, 2)))

Probabilidades trem
flu: 22.16%
gel: 17.63%
pia: 7.02%
sax: 15.8%
tru: 18.69%
vio: 18.69%
Probabilidades pedra
flu: 20.7%
gel: 15.62%
pia: 6.43%
sax: 17.56%
tru: 19.51%
vio: 20.18%
Probabilidades aspirador
flu: 18.18%
gel: 12.49%
pia: 5.9%
sax: 19.61%
tru: 21.94%
vio: 21.89%


## IRMAS - test 1

In [10]:
# create dataframe header

header = 'rms spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
# header += ' label'
header = header.split()

In [11]:
file = open('..\\data\\extracted_data\\test_p1.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)

In [13]:
df_test1 = pd.DataFrame(columns=['filename', 'labels'])
for filename in os.listdir('../data/test_audio/IRMAS/Part1'):
    if filename[-3:] == 'wav':
        songname = f'../data/test_audio/IRMAS/Part1/{filename}'
        y, sr = librosa.load(songname, sr =44100)
        rms = librosa.feature.rms(y=y), 
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{np.mean(rms)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {i}'
        file = open('..\\data\extracted_data\\test_p1.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())
    else:
        label = list()
        with open(f'../data/test_audio/IRMAS/Part1/{filename}') as fp:
            for line in fp:
                label.append(line.strip())
        new_row = [filename, label]
        df_test1.loc[len(df_test1)] = new_row

In [21]:
df_test1.to_csv('..\\data\\extracted_data\\p1_info.csv', index=False)

In [14]:
df1 = pd.read_csv('..\\data\\extracted_data\\test_p1.csv', index_col=False)

  df1 = pd.read_csv('..\\data\\extracted_data\\test_p1.csv', index_col=False)


In [15]:
X1 = pd.DataFrame(scaler.transform(np.array(df1, dtype = float)), index=df1.index, columns=df1.columns)

In [16]:
df_pred1 = pd.DataFrame(columns=['predict', 'proba'])
preds1 = svc.predict(X1)
probas1 = svc.predict_proba(X1)
for i in range(0, len(X1)):
    new_row = [preds1[i], probas1[i].max()]
    df_pred1.loc[len(df_pred1)] = new_row

In [17]:
teste1 = pd.concat([df_test1, df_pred1], axis=1)

In [20]:
teste1[teste1.proba > 0.5]

Unnamed: 0,filename,labels,predict,proba
0,(02) dont kill the whale-1.txt,[gel],sax,0.546729
5,(02) dont kill the whale-15.txt,"[gel, pia]",gel,0.860479
6,(02) dont kill the whale-2.txt,"[gel, voi]",gel,0.592850
8,(02) dont kill the whale-4.txt,[gel],gel,0.690951
10,(02) dont kill the whale-8.txt,[gel],gel,0.628022
...,...,...,...,...
801,[1] - 03 - Alexandre Lagoya - Canarios (Sanz)-...,[gac],pia,0.828793
802,[1] - 03 - Alexandre Lagoya - Canarios (Sanz)-...,[gac],pia,0.858018
803,[1] - 03 - Alexandre Lagoya - Canarios (Sanz)-...,[gac],pia,0.959204
804,[1] - 03 - Alexandre Lagoya - Canarios (Sanz)-...,[gac],sax,0.748439


## IRMAS - test 2

In [32]:
# create dataframe header

header = 'rms spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
# header += ' label'
header = header.split()

In [35]:
file = open('..\\data\\extracted_data\\test_p2.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)

In [36]:
df_test2 = pd.DataFrame(columns=['filename', 'labels'])

for filename in os.listdir('../data/test_audio/IRMAS/IRTestingData-Part2'):
    if filename[-3:] == 'wav':
        songname = f'../data/test_audio/IRMAS/IRTestingData-Part2/{filename}'
        y, sr = librosa.load(songname, sr =44100)
        rms = librosa.feature.rms(y=y), 
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{np.mean(rms)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {i}'
        file = open('..\\data\extracted_data\\test_p2.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())
    else:
        label = list()
        with open(f'../data/test_audio/IRMAS/IRTestingData-Part2/{filename}') as fp:
            for line in fp:
                label.append(line.strip())
        new_row = [filename, label]
        df_test2.loc[len(df_test2)] = new_row

In [27]:
df_test2.to_csv('..\\data\\extracted_data\\p2_info.csv', index=False)

In [44]:
df2 = pd.read_csv('..\\data\\extracted_data\\test_p2.csv', index_col=False)
X3 = pd.DataFrame(scaler.transform(np.array(df2, dtype = float)), index=df2.index, columns=df2.columns)

  df2 = pd.read_csv('..\\data\\extracted_data\\test_p2.csv', index_col=False)


In [45]:
df_pred2 = pd.DataFrame(columns=['predict', 'proba'])
preds2 = svc.predict(X3)
probas2 = svc.predict_proba(X3)
for i in range(0, len(X3)):
    new_row = [preds2[i], probas2[i].max()]
    df_pred2.loc[len(df_pred2)] = new_row

In [46]:
teste2 = pd.concat([df_test2, df_pred2], axis=1)

In [47]:
teste2[teste2.proba > 0.5]

Unnamed: 0,filename,labels,predict,proba
0,0050_10CC___I_M_NOT_IN_LOVE-1.txt,"[gac, org]",gel,0.585035
1,0050_10CC___I_M_NOT_IN_LOVE-11.txt,[voi],sax,0.606247
2,0050_10CC___I_M_NOT_IN_LOVE-13.txt,"[gac, org, voi]",flu,0.523183
4,0050_10CC___I_M_NOT_IN_LOVE-17.txt,"[gac, org, voi]",flu,0.591750
6,0050_10CC___I_M_NOT_IN_LOVE-20.txt,"[gac, org]",flu,0.516993
...,...,...,...,...
1286,Yngwie Malmsteen - Gimme gimme gimme-9.txt,"[gel, voi]",gel,0.874389
1288,Zamfir - The Lonely Shepherd - 01 - The Lonely...,"[flu, gac, tru]",tru,0.775219
1289,Zamfir - The Lonely Shepherd - 01 - The Lonely...,"[flu, gac]",vio,0.505312
1295,Zamfir - The Lonely Shepherd - 01 - The Lonely...,[gac],vio,0.945330


## IRMAS - test 3

In [48]:
# create dataframe header

header = 'rms spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
# header += ' label'
header = header.split()

In [49]:
file = open('..\\data\\extracted_data\\test_p3.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)

In [50]:
df_test3 = pd.DataFrame(columns=['filename', 'labels'])
for filename in os.listdir('../data/test_audio/IRMAS/Part3'):
    if filename[-3:] == 'wav':
        songname = f'../data/test_audio/IRMAS/Part3/{filename}'
        y, sr = librosa.load(songname, sr =44100)
        rms = librosa.feature.rms(y=y), 
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{np.mean(rms)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {i}'
        file = open('..\\data\extracted_data\\test_p3.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())
    else:
        label = list()
        with open(f'../data/test_audio/IRMAS/Part3/{filename}') as fp:
            for line in fp:
                label.append(line.strip())
        new_row = [filename, label]
        df_test3.loc[len(df_test3)] = new_row

In [51]:
df_test3.to_csv('..\\data\\extracted_data\\p3_info.csv', index=False)

In [52]:
df3 = pd.read_csv('..\\data\\extracted_data\\test_p3.csv', index_col=False)
X3 = pd.DataFrame(scaler.transform(np.array(df3, dtype = float)), index=df3.index, columns=df3.columns)

  df3 = pd.read_csv('..\\data\\extracted_data\\test_p3.csv', index_col=False)


In [53]:
df_pred3 = pd.DataFrame(columns=['predict', 'proba'])
preds3 = svc.predict(X3)
probas3 = svc.predict_proba(X3)
for i in range(0, len(X3)):
    new_row = [preds3[i], probas3[i].max()]
    df_pred3.loc[len(df_pred3)] = new_row

In [54]:
teste3 = pd.concat([df_test3, df_pred3], axis=1)

In [55]:
teste3[teste3.proba > 0.5]

Unnamed: 0,filename,labels,predict,proba
0,02 bwv 1068 air on g string-1.txt,"[cel, vio]",vio,0.976910
1,02 bwv 1068 air on g string-10.txt,"[cel, vio]",vio,0.889362
2,02 bwv 1068 air on g string-11.txt,"[cel, vio]",vio,0.662857
3,02 bwv 1068 air on g string-12.txt,"[cel, vio]",vio,0.800655
4,02 bwv 1068 air on g string-13.txt,"[cel, vio]",vio,0.839267
...,...,...,...,...
757,"14-god is an astronaut - all is violent, all i...",[org],gel,0.991870
758,"14-god is an astronaut - all is violent, all i...",[org],gel,0.998361
760,"14-god is an astronaut - all is violent, all i...","[gel, org]",tru,0.698028
761,"14-god is an astronaut - all is violent, all i...","[gel, org]",gel,0.648737
