In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization, InputLayer
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

In [20]:
import numpy as np
import librosa
import os
import sklearn.preprocessing

In [2]:
import os
root_logdir = os.path.join(os.curdir, "mfcc_cnn_2d")

def get_run_logdir():
    import time
    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S")
    return os.path.join(root_logdir, run_id)

In [3]:
# batch_size = 12
# target_size = (800,600)
# # n_training_images = 

# datagen = ImageDataGenerator(rescale=1./255)


# train_generator = datagen.flow_from_directory('../../data/cleaned_set_mfcc_pngs/cleaned_set_train',
#                                                     color_mode='rgb',
#                                                     target_size=target_size,
#                                                     batch_size=batch_size,
#                                                     class_mode="categorical")

# validation_generator = datagen.flow_from_directory('../../data/cleaned_set_mfcc_pngs/cleaned_set_test',
#                                                        color_mode='rgb',
#                                                        target_size=target_size,
#                                                        batch_size=batch_size,
#                                                        class_mode='categorical')


# labels = (train_generator.class_indices)
# labels = dict((v, k) for k,v in labels.items())
# labels

Found 299 images belonging to 3 classes.
Found 60 images belonging to 3 classes.


{0: 'english', 1: 'mandarin', 2: 'spanish'}

In [69]:
def get_paths_and_labels(audio_directory_path):
#     class_names = os.listdir(audio_directory_path)
    class_names = ['english', 'spanish', 'mandarin']
#     class_names.remove('.DS_Store')
    label_class_dict = {num:class_ for num, class_ in enumerate(class_names)}
    
    audio_paths = []
    labels = []

    for label, name in enumerate(class_names):
        dir_path = os.path.join(audio_directory_path, name)
        full_paths = [os.path.join(dir_path, filename) for filename in os.listdir(dir_path) if 'ipynb' not in filename and '.DS_Store' not in filename]
        audio_paths += full_paths
        labels += [label] * len(full_paths)
    
    return audio_paths, labels, label_class_dict

def path_to_mfcc(path):
    audio, sr = librosa.load(path, sr=None)
    mfcc = librosa.feature.mfcc(audio, sr, n_fft=512,
                                    n_mfcc=13, n_mels=40,
                                    hop_length=256)
#     mfcc = librosa.feature.delta(mfcc, order=2)
    mfccs = mfcc.copy()
    mfccs.resize((13, 6400), refcheck=False)
    mfccs = sklearn.preprocessing.scale(mfccs)
    return mfccs.flatten()

def create_dataset(audio_paths, labels):
    X = np.zeros((len(audio_paths), 13 * 6400))
    for idx, file in enumerate(audio_paths):
        X[idx] = path_to_mfcc(file)
    y = np.array([0 if x == 0 else 1 for x in labels])
    return X, y

In [70]:
train_path = '../../data/cleaned_set_44/cleaned_set_train'
test_path = '../../data/cleaned_set_44/cleaned_set_test'

In [71]:
train_audio_paths, train_labels, train_label_class_dict = get_paths_and_labels(train_path)

In [72]:
test_audio_paths, test_labels, test_label_class_dict = get_paths_and_labels(test_path)

In [73]:
train_X, train_y = create_dataset(train_audio_paths, train_labels)



In [74]:
test_X, test_y = create_dataset(test_audio_paths, test_labels)



In [76]:
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=10)

In [77]:
model.fit(train_X, train_y)

KNeighborsClassifier(n_neighbors=10)

In [78]:
model.score(test_X, test_y)

0.9

In [82]:
test_y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [81]:
model.predict(test_X)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])