In [66]:
# all packages needed in this exercise
import os
import struct
import numpy as np
from sklearn.neighbors import KNeighborsClassifier

In [67]:
# necessary functions

def read_mfcc_file(file_path):
    """The first 4 bytes of the binary file is an integer indicating the total number
    of data points in the file. This is used by the program that reads this file to
    check the endianness of the file by comparing with the file size. The rest
    of the file is simply the data points. Each data point is a 4-byte floating
    point number, in big-endian order.
    """
    with open(file_path, 'rb') as f:
        num_data_points_bytes = f.read(4)
        num_data_points = struct.unpack('>i', num_data_points_bytes)[0]
        data_bytes = f.read()
        data = [struct.unpack('>f', data_bytes[i:i+4])[0] for i in range(0, len(data_bytes), 4)]
        feature_vectors = [data[i:i+13] for i in range(0, len(data), 13)]
        return feature_vectors

def extract_label(file_name):
    """extracts the filename without the filename extension"""
    return file_name[0]

def get_file_path(file_name):
    """total path of data file"""
    base_path = r'data/CEP'
    return os.path.join(base_path, file_name)

def read_file_list(file_path):
    """reads the filenames to be processed"""
    with open(file_path, 'r') as f:
        return [line.strip() for line in f]

def normalize(vector_list, means, stds):
    """normalizes a list of vectors component-wise using means and standard deviations"""
    temp = []
    for vec in vector_list:
        vec = np.asarray(vec)
        normalized_vec = (vec - means) / stds
        temp.append(normalized_vec)
    return temp

In [68]:
training_files = read_file_list(r'data/Listen/trainingSarah.txt')
training_files

['2-BC.cep',
 '2-CH.cep',
 '2-CR.cep',
 '2-DH.cep',
 '2-DK.cep',
 '2-FK.cep',
 '2-FL.cep',
 '2-FX.cep',
 '2-GS.cep',
 '2-IH.cep',
 '2-JB.cep',
 '2-JH.cep',
 '2-KG.cep',
 '2-MW.cep',
 '2-NH.cep',
 '2-RD.cep',
 'A-AL.cep',
 'A-BC.cep',
 'A-CH.cep',
 'A-CR.cep',
 'A-DH.cep',
 'A-DK.cep',
 'A-FK.cep',
 'A-FL.cep',
 'A-FX.cep',
 'A-GS.cep',
 'A-IH.cep',
 'A-JB.cep',
 'A-JH.cep',
 'A-KG.cep',
 'A-MW.cep',
 'A-NH.cep',
 'A-PL.cep',
 'A-RD.cep',
 'E-AL.cep',
 'E-AR.cep',
 'E-BC.cep',
 'E-CH.cep',
 'E-CR.cep',
 'E-DH.cep',
 'E-DK.cep',
 'E-FK.cep',
 'E-FL.cep',
 'E-FX.cep',
 'E-GS.cep',
 'E-IH.cep',
 'E-JB.cep',
 'E-JH.cep',
 'E-KG.cep',
 'E-MW.cep',
 'E-NH.cep',
 'E-PL.cep',
 'E-RD.cep',
 'F-AL.cep',
 'F-AR.cep',
 'F-BC.cep',
 'F-CH.cep',
 'F-CR.cep',
 'F-DH.cep',
 'F-DK.cep',
 'F-FK.cep',
 'F-FL.cep',
 'F-FX.cep',
 'F-GS.cep',
 'F-IH.cep',
 'F-JB.cep',
 'F-JH.cep',
 'F-KG.cep',
 'F-MW.cep',
 'F-PL.cep',
 'F-RD.cep',
 'I-AL.cep',
 'I-AR.cep',
 'I-BC.cep',
 'I-CH.cep',
 'I-CR.cep',
 'I-DH.cep',

In [69]:
test_files = read_file_list(r'data/Listen/testCEP.txt')

In [70]:
# preprocessing data
X_train = []
y_train = []
for file_name in training_files:
    file_path = get_file_path(file_name)
    feature_vectors = read_mfcc_file(file_path)
    middle_vector = feature_vectors[len(feature_vectors) // 2]
    X_train.append(middle_vector)
    y_train.append(extract_label(file_name))

X_train = np.array(X_train)

# calculate mean and standard deviation for each component
means = np.mean(X_train, axis=0)
stds = np.std(X_train, axis=0)

# normalize training data
X_train_normalized = normalize(X_train, means, stds)

In [71]:
X_train

array([[ 1.02777605e+01,  1.02684605e+00, -3.25523376e-01, ...,
        -3.51733118e-01, -7.82287307e-03, -2.77433306e-01],
       [ 1.11097374e+01,  8.89535606e-01, -8.52679014e-01, ...,
        -3.77555162e-01, -1.79887846e-01, -1.58629939e-01],
       [ 1.13142805e+01,  5.35501540e-01, -7.32595026e-01, ...,
        -1.51305333e-01, -2.04772577e-01, -3.36928755e-01],
       ...,
       [ 1.43945427e+01,  2.58302063e-01, -3.25640500e-01, ...,
        -4.73457903e-01,  5.20278960e-02, -1.31161690e-01],
       [ 8.00675011e+00, -1.96256816e-01, -4.01968837e-01, ...,
        -3.52360219e-01, -2.59230822e-01, -1.39844179e-01],
       [ 9.35930538e+00,  8.31444189e-02, -5.89053571e-01, ...,
        -3.12904656e-01, -2.14223787e-01, -1.81437671e-01]])

In [72]:
X_train_normalized

[array([-0.07806298,  1.29021877, -0.44266499, -0.89594552,  1.26459961,
         0.89564533, -0.26210928, -0.95646459, -0.61405684, -0.06464031,
        -1.0796389 ,  0.8363518 , -0.53354426]),
 array([ 0.2255574 ,  1.13671407, -1.66559969,  0.20655238,  1.31810377,
        -0.39991047, -0.6523614 , -1.28491627,  0.15925374,  0.02359783,
        -1.22274752, -0.28333689,  0.14382248]),
 array([ 0.30020306,  0.7409256 , -1.38701996,  0.42862895,  0.48707743,
         0.55060499,  0.17245266, -2.56841978,  0.97585018, -1.39070793,
         0.03115404, -0.44527079, -0.87276223]),
 array([ 1.20418081,  0.49747343, -1.77883137,  0.60361371, -0.13572381,
         0.71331414, -0.63973585, -2.19446231, -1.62951673, -2.00541565,
         0.52084937, -0.82638968, -1.16503821]),
 array([ 0.88446846,  1.05115176, -0.67561063, -0.62636342,  0.95998728,
         1.71203654, -3.2640825 , -0.21101444,  1.15753932, -1.36976913,
         0.79382528, -1.44155591, -0.56476864]),
 array([ 2.79716156,  0.9

In [73]:
# train knn classifier
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(X_train_normalized, y_train)

In [74]:
# process test data with knn model
X_test = []
y_true = []
for file_name in test_files:
    file_path = get_file_path(file_name)
    feature_vectors = read_mfcc_file(file_path)
    middle_vector = feature_vectors[len(feature_vectors) // 2]
    X_test.append(middle_vector)
    y_true.append(extract_label(file_name))

X_test_normalized = normalize(X_test, means, stds)

In [75]:
X_test

[[9.875738143920898,
  0.19895634055137634,
  -0.08192585408687592,
  -0.13684335350990295,
  -0.6725253462791443,
  0.04542205110192299,
  0.006925483699887991,
  0.01757233217358589,
  -0.11003778874874115,
  0.01106294896453619,
  0.059756021946668625,
  -0.15778210759162903,
  -0.019802359864115715],
 [8.920675277709961,
  0.6811258792877197,
  -0.40849289298057556,
  -0.2309054136276245,
  -0.5755773186683655,
  -0.38231682777404785,
  0.16929376125335693,
  0.10655543953180313,
  -0.22407487034797668,
  -0.09524340182542801,
  -0.030092407017946243,
  0.1688658744096756,
  0.004339531529694796],
 [11.018540382385254,
  0.08147092163562775,
  -0.2794187366962433,
  1.4838966131210327,
  -0.4453357458114624,
  -0.6758968830108643,
  -0.5281093120574951,
  -0.15954548120498657,
  -0.11627715826034546,
  -0.5699493288993835,
  0.03608541563153267,
  -0.08712845295667648,
  -0.02041049301624298],
 [9.98066234588623,
  -0.4366571307182312,
  -0.12748616933822632,
  1.3663781881332397,


In [76]:
X_test_normalized

[array([-0.22477641,  0.36468862,  0.12245059, -1.21228998, -1.12862074,
         0.55648137,  0.58671171,  0.85684848,  0.00931194,  1.05623924,
         1.20087939, -0.1394869 ,  0.93535883]),
 array([-0.5733156 ,  0.90372472, -0.63514382, -1.39659984, -0.89268374,
        -1.09582884,  1.36680834,  1.30466049, -0.61095498,  0.44826445,
         0.70292947,  1.98612905,  1.07300572]),
 array([ 0.1922761 ,  0.2333471 , -0.33570803,  1.96346819, -0.57572209,
        -2.22989766, -1.98385713, -0.03450592, -0.02462504, -2.26661793,
         1.06969428,  0.32028185,  0.93189151]),
 array([-0.18648553, -0.34588843,  0.01675639,  1.73319676, -0.41873093,
         0.17113695,  0.95515387, -0.4734156 ,  1.25083006, -0.63938264,
        -1.11596706,  0.3370725 ,  0.19176821]),
 array([-0.8660189 ,  0.10332511,  1.22548559,  0.79267999,  0.65013047,
        -0.63759464,  1.39655193,  0.71645566,  0.68056671, -1.68250845,
         0.81382258,  0.60248421, -0.37638957]),
 array([-0.60722162, -0.1

In [77]:
y_pred = neigh.predict(X_test_normalized)
y_pred

array(['A', 'A', 'E', 'E', 'I', '2', 'I', 'M', 'F', 'F', 'F', 'F', 'F',
       'F', 'F', 'E', 'I', 'I', 'I', 'I', 'M', 'E', '2', 'N', 'M', 'U',
       'O', 'Y', 'N', 'N', 'N', 'N', 'U', 'O', 'O', 'O', 'O', 'U', 'O',
       'S', 'S', 'S', 'S', 'S', 'S', 'S', 'N', 'O', 'U', 'O', 'U', 'U',
       'U', 'Y', 'Y', 'Y', 'Y', 'Y'], dtype='<U1')

In [78]:
# calculate accuracy
accuracy = np.sum(np.array(y_true) == np.array(y_pred)) / len(y_true)
print(f'The sound recognition rate of test sample is {accuracy * 100:.2f}%')

The sound recognition rate of test sample is 72.41%
