In [1]:
# pip install python_speech_features
import numpy as np
import csv
from scipy.io import wavfile as wav
from scipy.fftpack import fft
from python_speech_features import mfcc, delta, logfbank
from random import randint
import librosa


FileNames = {"train":"ml-fmi-23-2020//train.txt", "valid":"ml-fmi-23-2020//validation.txt", "test":"ml-fmi-23-2020//test.txt", "ex":"ml-fmi-23-2020//sample_submission.txt","pred":"ml-fmi-23-2020//predictions.txt"}

AudioFolders = {"train":"ml-fmi-23-2020//audio//train//", "valid":"ml-fmi-23-2020//audio//validation//", "test":"ml-fmi-23-2020//audio//test//"}

sr   = 16000 # Sample Rate   - 16 kHz
wlen = 0.025 # window length - 25 ms = 400 samples
slen = 0.01  # step   length - 10 ms = 160 samples 
nfft = 512 

def readCsv (fileName, hasLables):
    data = []
    with open(fileName, "r", newline='\n') as csvfile:
        for row in csv.reader(csvfile, delimiter=','):
            data.extend(row)
    if hasLables:
        data = np.transpose(np.array(data).reshape((len(data)//2, 2))) 
    else:
        data = np.array(data)
    return data

def writeCsv (data, labels, fileName = FileNames['pred']):
    with open(fileName, "w", newline='\n') as csvfile:
        writer = csv.writer(csvfile, delimiter=',')
        writer.writerow(['name', 'label'])
        for row in np.transpose([data, labels]):
            writer.writerow(row)

def getData (folderName, dataNames):
    data = []
    for dataName in dataNames:
        audio = librosa.load(AudioFolders[folderName]+dataName)[0]
        data.append(librosa.amplitude_to_db(abs(librosa.stft(audio))))#.reshape(-1))
        # data.append(np.abs(fft(wav.read(AudioFolders[folderName]+dataName)[1])))
        # data.append(mfcc(wav.read(AudioFolders[folderName]+dataName)[1], numcep=12))#.reshape(-1))
        
    return np.array(data)

In [2]:
train_data_name, train_labels = readCsv(FileNames['train'], hasLables=True)
valid_data_name, valid_labels = readCsv(FileNames['valid'], hasLables=True)
test_data_name                = readCsv(FileNames['test'],  hasLables=False)
# writeCsv(train_data, train_labels)

In [3]:
# train_data = getData('train', train_data_name)
# valid_data = getData('valid', valid_data_name)
# test_data  = getData('test',  test_data_name)
# mi = min([train_data.min(), valid_data.min(), test_data.min()])
# ma = max([train_data.max(), valid_data.max(), test_data.max()])
# print (mi, ma)
# train = (train_data - mi) / (ma-mi)
# valid = (valid_data - mi) / (ma-mi)
# test =  (test_data  - mi) / (ma-mi)
# mi = min([train.min(), valid.min(), test.min()])
# ma = max([train.max(), valid.max(), test.max()])
# print (mi, ma)
# train.shape, valid.shape, test.shape

-57.697765 50.504
0.0 1.0


((8000, 1025, 44), (1000, 1025, 44), (3000, 1025, 44))

In [4]:
initialShape = train.shape
train = train.reshape(initialShape[0], initialShape[1], initialShape[2], 1)
initialShape = valid.shape
valid = valid.reshape(initialShape[0], initialShape[1], initialShape[2], 1)
initialShape = test.shape
test = test.reshape(initialShape[0], initialShape[1], initialShape[2], 1)
train.shape, valid.shape, test.shape

((8000, 1025, 44, 1), (1000, 1025, 44, 1), (3000, 1025, 44, 1))

In [34]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPool2D, Flatten, Dense
model = Sequential()

model.add(Conv2D(16, (5,4), activation='relu', strides=1, padding='same', input_shape = train[0].shape))
model.add(Conv2D(16, (5,4), activation='relu', strides=1, padding='same'))
model.add(MaxPool2D((5,4)))

model.add(Flatten())

model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(2, activation='softmax'))

model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_9 (Conv2D)            (None, 1025, 44, 16)      336       
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 1025, 44, 16)      5136      
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 205, 11, 16)       0         
_________________________________________________________________
flatten_4 (Flatten)          (None, 36080)             0         
_________________________________________________________________
dense_10 (Dense)             (None, 32)                1154592   
_________________________________________________________________
dense_11 (Dense)             (None, 16)                528       
_________________________________________________________________
dense_12 (Dense)             (None, 2)                

In [23]:
model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])

In [31]:
model.fit(
    x = train, 
    y = np.array([[1,0] if label == '0' else [0,1] for label in train_labels]), 
    epochs = 1, # first time 6-7 epochs and after increase by one (around 0.72)
    verbose = 1, # progress bar
    # validation_split = 0.1,
    validation_data = (valid, np.array([[1,0] if label == '0' else [0,1] for label in valid_labels])),
    shuffle = True
)

Train on 8000 samples, validate on 1000 samples
Epoch 1/1


<keras.callbacks.callbacks.History at 0x175f99abe08>

In [27]:
model.save('3thirdmodel6')
loaded_model = load_model('3thirdmodel6')
pred = np.array(['0' if a > b else '1' for a, b in loaded_model.predict(valid)])
sum(pred == valid_labels)

In [32]:
pred = np.array(['0' if a > b else '1' for a, b in model.predict(test)])

In [33]:
writeCsv(test_data_name, pred)

In [None]:
# # fit on both train and valid 
# train_valid = []
# train_valid.extend (train)
# train_valid.extend (valid)
# train_valid = np.array(train_valid)
# train_valid_labels = []
# train_valid_labels.extend (train_labels)
# train_valid_labels.extend (valid_labels)
# train_valid_labels = np.array(train_valid_labels)
# model.fit(
#     x = train_valid, 
#     y = np.array([[1,0] if label == '0' else [0,1] for label in train_valid_labels]), 
#     epochs = 1, 
#     verbose = 1, # progress bar
#     # validation_split = 0.1,
#     # validation_data = (valid, np.array([[1,0] if label == '0' else [0,1] for label in valid_labels])),
#     shuffle = True
# )