In [45]:
#https://datashare.ed.ac.uk/handle/10283/1942

In [46]:
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from PIL import Image
import pathlib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [47]:
# get wav paths
def get_wav_paths(speaker):
    speaker_path = "16000_pcm_speeches/" + speaker
    all_paths = [item for item in os.listdir(speaker_path)]
    return all_paths

In [48]:
nelson_mandela_paths = get_wav_paths("Nelson_Mandela")
margaret_thatcher_paths = get_wav_paths("Magaret_Tarcher")
benjamin_netanyau_paths = get_wav_paths("Benjamin_Netanyau")
jens_stoltenberg_paths = get_wav_paths( 'Jens_Stoltenberg')
julia_gillard_paths = get_wav_paths("Julia_Gillard")

noise1_paths = get_wav_paths("_background_noise_")
noise2_paths = get_wav_paths("other")

In [49]:
benjamin_netanyau_paths[0]

'0.wav'

In [50]:
data_dir_benjamin_netanyau = "16000_pcm_speeches/Benjamin_Netanyau/1035.wav"

In [51]:
x , sr = librosa.load(data_dir_benjamin_netanyau, sr=16000)

In [52]:
import IPython.display as ipd
ipd.Audio(data_dir_benjamin_netanyau)

In [53]:
x

array([-0.03967285, -0.06976318, -0.05905151, ..., -0.04055786,
       -0.03866577, -0.03411865], dtype=float32)

In [54]:
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

In [55]:
import csv

In [72]:
file = open('dataset.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
speakers = 'Benjamin_Netanyau Jens_Stoltenberg Julia_Gillard Magaret_Tarcher Nelson_Mandela _background_noise_ other p279 p282 p286 p287 alex'.split()
#speakers = ''.split()
for g in speakers:
    for filename in os.listdir(f'16000_pcm_speeches/{g}'):
        songname = f'16000_pcm_speeches/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=30)
        rmse = librosa.feature.rms(y=y)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('dataset.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

In [125]:
data = pd.read_csv('dataset.csv')

In [126]:
data.head()

Unnamed: 0,filename,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,0.wav,0.591486,0.01679,2317.438562,1930.22209,4579.720792,0.130216,-331.937927,112.295609,-64.693413,...,-12.863947,0.522781,-19.345615,-0.095156,-17.73912,1.09163,-11.79953,-3.142961,-6.347281,Benjamin_Netanyau
1,1.wav,0.385372,0.179463,1567.691273,1400.762952,2986.997847,0.088368,-132.72049,144.912048,-68.599281,...,-8.662812,-5.107401,-19.555819,-6.843742,-14.14328,-1.355101,-9.291818,-11.322633,-6.07941,Benjamin_Netanyau
2,10.wav,0.393671,0.141835,1549.816351,1604.434023,3177.860329,0.061768,-203.742142,129.07048,-47.010952,...,-12.440643,-5.384173,-17.979897,-10.439045,-14.383365,0.69341,-10.107463,-6.366896,-3.922637,Benjamin_Netanyau
3,100.wav,0.621182,0.213655,2215.349723,1445.22462,3488.623602,0.167736,-39.58997,132.840714,-144.588333,...,-17.940376,0.849833,-23.430109,5.039536,-23.027075,1.768793,-15.551888,-0.176673,-8.083587,Benjamin_Netanyau
4,1000.wav,0.628766,0.234467,2216.784645,1368.799392,3282.589999,0.173173,-37.589828,130.614731,-152.456833,...,-18.116173,0.218127,-23.707592,4.97663,-23.603624,2.703873,-15.207455,-0.018577,-6.564107,Benjamin_Netanyau


In [127]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9119 entries, 0 to 9118
Data columns (total 28 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   filename            9119 non-null   object 
 1   chroma_stft         9119 non-null   float64
 2   rmse                9119 non-null   float64
 3   spectral_centroid   9119 non-null   float64
 4   spectral_bandwidth  9119 non-null   float64
 5   rolloff             9119 non-null   float64
 6   zero_crossing_rate  9119 non-null   float64
 7   mfcc1               9119 non-null   float64
 8   mfcc2               9119 non-null   float64
 9   mfcc3               9119 non-null   float64
 10  mfcc4               9119 non-null   float64
 11  mfcc5               9119 non-null   float64
 12  mfcc6               9119 non-null   float64
 13  mfcc7               9119 non-null   float64
 14  mfcc8               9119 non-null   float64
 15  mfcc9               9119 non-null   float64
 16  mfcc10

In [128]:
data = data.drop(['filename'],axis=1)#Encoding the Labels
genre_list = data.iloc[:, -1]

In [129]:
genre_list

0       Benjamin_Netanyau
1       Benjamin_Netanyau
2       Benjamin_Netanyau
3       Benjamin_Netanyau
4       Benjamin_Netanyau
              ...        
9114                 alex
9115                 alex
9116                 alex
9117                 alex
9118                 alex
Name: label, Length: 9119, dtype: object

In [130]:
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)#Scaling the Feature columns
scaler = StandardScaler()
X = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float))#Dividing data into training and Testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [131]:
y

array([0, 0, 0, ..., 6, 6, 6])

In [132]:
X_test[0]

array([ 0.57148018, -1.06909993, -1.25072963, -0.94932675, -1.3160928 ,
       -0.98515723, -1.23786118,  0.46060991,  1.08721732, -0.95631351,
        0.99708767,  0.06768935,  0.9140612 , -1.12424627,  1.44763098,
        1.2207838 ,  3.19072046,  1.87308613,  1.59629371,  2.06776028,
        0.25327417,  1.52489716,  0.23203768,  1.30680297,  0.64225062,
        1.7024454 ])

In [133]:
y_test[0]

11

In [134]:
y_test[1]

2

In [135]:
from sklearn.ensemble import RandomForestClassifier

#Create a Gaussian Classifier
clf=RandomForestClassifier(n_estimators=100)
clf.fit(X_train,y_train)

y_pred=clf.predict(X_test)

In [136]:
from sklearn import metrics
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.9956140350877193


In [137]:
y_test

array([11,  2,  4, ...,  3,  1,  1])

In [138]:
y_pred

array([11,  2,  4, ...,  3,  1,  1])