In [75]:
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import os
import scipy.signal
import random
from sklearn.cluster import KMeans

test_data_file = {}

def select_test_files():
        for i in range(0 , 10):                  # we need to take 10% of the audio files for the training set
            rand_spkr = random.randint(1,10)     # choose a random speaker 1-10 
            rootdir = 'C:\\Users\\James Hoffman\\Desktop\\Speech_Recognition\\test_files\\' + str(rand_spkr)
            x = 0                                # simple counter 
            random_file = random.randint(0,500)  # 0-500 because of the 500 audio files per folder
            for filename in os.listdir(rootdir): 
                if(x == random_file):
                    test_data_file[filename] = rand_spkr  # adds these random test files 
                x += 1

def display_graph_for_data(data , sampling_rate):
    plt.figure(figsize=(12 , 4))
    librosa.display.waveplot(data, sr=sampling_rate)
    plt.show()

'''
     takes the location of an audio file and does the fourier transform,
     flattens that and returns it as a single vector
'''    
def transform(audio):
    # sr == sampling rate
    data, sampling_rate = librosa.load(audio, sr=4000)
    spectral = 10 * np.log10(np.abs(librosa.core.stft(data, n_fft=512, hop_length=80, window=scipy.signal.hanning)))
    return spectral


'''
     takes a single directory (one of the numbered one from like 1-30)
     and returns an np.array which contains the flattened data that can
     be fed into the kmeans algorithm
'''
def parse_directory(audio_directory , dir_num): 
    
    flat_audio = np.empty([0,0])         # this will be reassigned anyway and returned
    first_a = True                       # this is for starting the list of matrices
    x = 0                                # simple counter for the purpose of testing
    
    # this just iterates through the directory with all the folders in it
    for filename in os.listdir(audio_directory):
        
        # we don't want to use any of the test data, so if the file is in the test data then we just skip it 
        if filename in test_data_file.keys():
            if(dir_num == test_data_file[filename]):
                continue
        
        # this is because doing all 500 would take a really ong time
        x += 1
        if(x > 10):
            break
            
        # Each file is sent to the transform() function and has a matrix returned
        # All these results are placed next to eachother in a list to be sent to the Kmeans value
        if(first_a == True):
            flat_audio = transform(audio_directory + "\\" + filename)
            first_a = False
        else:
            append_audio = transform(audio_directory + "\\" + filename)
            flat_audio = np.hstack((flat_audio, append_audio))
            
    # we are returned a single list of transformed matrices that had the fourier transform done on them
    return flat_audio


'''
     takes a single directory (the one with all the data)
     and returns an np.array which has the all the np arrays that contain the
     vector for each of the numbered speakers in that directory
'''
def iterate_test_folders(rootdir):
    audio_vectors = []
    for subdir, dirs, files in os.walk(rootdir):
        for dir in dirs:
            print("iterating through directory: " , dir)
            p_d_ = parse_directory(rootdir + dir , dir)
            audio_vectors.append(p_d_)
            
    return audio_vectors

    
# select_test_files()

In [76]:
def get_vectors():
    rootdir = 'C:\\Users\\James Hoffman\\Desktop\\Speech_Recognition\\test_files\\'
    list_of_flattened_vectors = iterate_test_folders(rootdir)
    
    return list_of_flattened_vectors
    

In [77]:
from scipy.spatial import distance
from sklearn import decomposition
from sklearn import datasets

def main():
    print("Starting...")
    select_test_files()
    sample = get_vectors()
    kmeans = [0,0,0,0,0,0,0,0,0,0]
    for i in range(0,10):
        kmeans[i] = KMeans(n_clusters = 64, random_state=0).fit(sample[i])

    train_centers = get_centers(kmeans)
    test_centers = generate_test_kmeans_center()
    
    # this is the code that will be run as soon as we can get the dimensionalities to match
    
    for i in range(0, len(test_centers)):
        print("Test file Dimensions for Central Vector: " , test_centers[i].shape)
    
        # a = (centers[0][0])[:60].tolist()
        # b = kmeans_test_centers[0].tolist()
        # print(len(a))
        # print(len(b))
        
        # dst = distance.euclidean(a,b)
        # print(dst)
    

In [78]:
 def get_centers(kmeans):
        centers = []
        for i in range(0,10):
            centers.append(np.array(kmeans[i].cluster_centers_))
        return centers

In [79]:
def generate_test_kmeans_center():
    test_kmeans_centers = []
    
    # iterate through test data and gather the centers
    for file_n in test_data_file:
        folder = test_data_file[file_n]
        f_trans = transform('C:\\Users\\James Hoffman\\Desktop\\Speech_Recognition\\test_files\\'+ str(folder) +'\\' + str(file_n))
        kmeans_test = KMeans(n_clusters = 64, random_state=0).fit(f_trans)
        center = np.array(kmeans_test.cluster_centers_)
        test_kmeans_centers.append(center)
    return test_kmeans_centers

In [80]:
main() # start the program

Starting...
iterating through directory:  1
iterating through directory:  10
iterating through directory:  2
iterating through directory:  3
iterating through directory:  4
iterating through directory:  5
iterating through directory:  6
iterating through directory:  7
iterating through directory:  8
iterating through directory:  9
Test file Dimensions for Central Vector:  (64, 98)
Test file Dimensions for Central Vector:  (64, 114)
Test file Dimensions for Central Vector:  (64, 94)
Test file Dimensions for Central Vector:  (64, 107)
Test file Dimensions for Central Vector:  (64, 95)
Test file Dimensions for Central Vector:  (64, 101)
Test file Dimensions for Central Vector:  (64, 88)
Test file Dimensions for Central Vector:  (64, 96)
Test file Dimensions for Central Vector:  (64, 77)
Test file Dimensions for Central Vector:  (64, 91)
