In [1]:
import glob
import sys
import os
import shutil

import numpy as np
from sklearn import neighbors, datasets
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap


from matplotlib.pyplot import specgram
import matplotlib.style as ms
ms.use("seaborn-muted")
%matplotlib inline

import IPython.display

import librosa
import librosa.display

# import tensorflow as tf
from sklearn.metrics import precision_recall_fscore_support
import pickle


In [2]:
def load_files(filepaths):
    raw_sounds = []
    for filepath in filepaths:
        X,sr = librosa.load(filepath)
        raw_sounds.append(X)
    return raw_sounds

def extract_feature(file_name):
    print("extract_feature")
    X, sample_rate = librosa.load(file_name)
    print("loaded")
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),
    sr=sample_rate).T,axis=0)
    print("returning")
    return mfccs,chroma,mel,contrast,tonnetz

def parse_audio_files(parent_dir,sub_dirs,file_ext="*.wav"):
    features, labels = np.empty((0,193)), np.empty(0) #the shape 193 comes from the shape of features combined
    for label, sub_dir in enumerate(sub_dirs):
        print(label, sub_dir)
        for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
            try:
                print(fn)
                mfccs, chroma, mel, contrast, tonnetz = extract_feature(fn)
            except Exception as e:
                print("Error encountered while parsing file: ", fn)
                continue
            ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])

            features = np.vstack([features,ext_features])
            labels = np.append(labels, fn.split('\\')[2].split('-')[1])
    return np.array(features), np.array(labels, dtype = np.int)

def one_hot_encode(labels):
    n_labels = len(labels)
    n_unique_labels = len(np.unique(labels))
    one_hot_encode = np.zeros((n_labels,n_unique_labels))
    one_hot_encode[np.arange(n_labels),labels] = 1
    return one_hot_encode

In [None]:
print("Starting Audio parsing")
parent_dir = 'TalkingData'
dirs = ["set1", "set2", "fold1", "fold2", "fold3"]
path = "TalkingData\set1\talking_111841-3-0-0.wav"
# X, sample_rate = librosa.load(path)
features, labels = parse_audio_files(parent_dir, dirs)

print("Done parsing audio")

In [None]:
f = open('./pickles/features.pckl', 'wb')
pickle.dump(features,f)
f.close()
fl = open('pickles/labels.pckl', 'wb')
pickle.dump(labels,fl)
fl.close()

In [3]:
f = open("./pickles/features.pckl", 'rb')
features = pickle.load(f)
f.close()


fl = open("./pickles/labels.pckl", "rb")
labels = pickle.load(fl)
fl.close()


In [4]:
print("starting")

# print(labels.shape)


labels = one_hot_encode(labels)

train_test_split = np.random.rand(len(features)) < 0.70
train_x = features[train_test_split]
train_y = labels[train_test_split]
test_x = features[~train_test_split]
test_y = labels[~train_test_split]

print("ending")

starting
ending


In [17]:
# setup data/model
n_neighbors = 15

# iris = datasets.load_iris()

# X = iris.data[:, :2]
X = train_x
y = train_y
# y = iris.target

h = 0.02

# Create color maps
# cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
# cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])

for weights in ['uniform', 'distance']:
    # we create an instance of Neighbours Classifier and fit the data.
    clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
    clf.fit(X, y)

    # Plot the decision boundary. For that, we will assign a color to each
    # point in the mesh [x_min, x_max]x[y_min, y_max].
#     x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
#     y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
#     xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
#                          np.arange(y_min, y_max, h))
#     print(xx.shape, yy.shape)
#     Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    
    pred_y = clf.predict(test_x)
    result = test_y==pred_y.all()
    print(sum(sum(result==True)) / (result.shape[1] * result.shape[0]))
    # Put the result into a color plot
#     Z = Z.reshape(xx.shape)
#     plt.figure()
# #     plt.pcolormesh(xx, yy, Z, cmap=cmap_light)

#     # Plot also the training points
#     plt.scatter(X[:, 0], X[:, 1], c=y,
#                 edgecolor='k', s=20)
#     plt.xlim(xx.min(), xx.max())
#     plt.ylim(yy.min(), yy.max())
#     plt.title("3-Class classification (k = %i, weights = '%s')"
#               % (n_neighbors, weights))

plt.show()


0.75
0.75
