In [None]:
import numpy as np
import csv
from scipy.io import wavfile as wav
from scipy.fftpack import fft
import librosa
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import KNeighborsClassifier

FileNames = {"train":"ml-fmi-23-2020//train.txt", "valid":"ml-fmi-23-2020//validation.txt", "test":"ml-fmi-23-2020//test.txt", "ex":"ml-fmi-23-2020//sample_submission.txt","pred":"ml-fmi-23-2020//predictions.txt"}

AudioFolders = {"train":".//ml-fmi-23-2020//audio//train//", "valid":"ml-fmi-23-2020//audio//validation//", "test":"ml-fmi-23-2020//audio//test//"}

def readCsv (fileName, hasLables):
    data = []
    with open(fileName, "r", newline='\n') as csvfile:
        for row in csv.reader(csvfile, delimiter=','):
            data.extend(row)
    if hasLables:
        data = np.transpose(np.array(data).reshape((len(data)//2, 2))) 
    else:
        data = np.array(data)
    return data

def writeCsv (data, labels, fileName = FileNames['pred']):
    with open(fileName, "w", newline='\n') as csvfile:
        writer = csv.writer(csvfile, delimiter=',')
        writer.writerow(['name', 'label'])
        for row in np.transpose([data, labels]):
            writer.writerow(row)

def getData (folderName, dataNames):
    data = []
    for dataName in dataNames:
        audio = librosa.load(AudioFolders[folderName]+dataName)[0]
        data.append(librosa.amplitude_to_db(abs(librosa.stft(audio))).reshape(-1))        
    return np.array(data)

In [None]:
train_data_name, train_labels = readCsv(FileNames['train'], hasLables=True)
valid_data_name, valid_labels = readCsv(FileNames['valid'], hasLables=True)
test_data_name                = readCsv(FileNames['test'],  hasLables=False)

In [None]:
train_data = getData('train', train_data_name)
valid_data = getData('valid', valid_data_name)
test_data  = getData('test',  test_data_name)

In [None]:
# validate model
knn = KNeighborsClassifier(15, metric='l1')
knn.fit(train_data, train_labels)
valid_pred_knn = knn.predict(valid_data)
good_knn = np.argwhere(valid_pred_knn==valid_labels).reshape(-1)
print (len(good_knn)/len(valid_pred_knn))
confusion_matrix(valid_pred_knn, valid_labels)

In [None]:
# predict tests
knn = KNeighborsClassifier(15, metric='l1')
knn.fit(np.concatenate ((train_data, valid_data)), np.concatenate ((train_labels, valid_labels)))
pred_knn = knn.predict(test_data)
writeCsv(test_data_name, pred_knn)