In [None]:
import pandas as pd
import tensorflow as tf
from keras.layers import (
    Input, Dense, Conv2D, 
    BatchNormalization, MaxPooling2D, Dropout, 
    Flatten, Resizing, Input, BatchNormalization
)

In [None]:
test_csv = './data/test.csv'
sample_sub_csv = './data/sample_submission.csv'
weights ='./callbacks/checkpoints/speaker_conv2d_weights.h5'

In [None]:
test = pd.read_csv(test_csv)
sample_sub = pd.read_csv(sample_sub_csv)

In [None]:
def squeeze(audio):
    audio = tf.squeeze(audio, axis=-1)
    return audio
    
def get_spectrogram(waveform):
    spectrogram = tf.signal.stft(waveform, frame_length=255, frame_step=128)
    spectrogram = tf.abs(spectrogram)
    spectrogram = spectrogram[..., tf.newaxis]
    return spectrogram

In [None]:
data = []
for wav_location in test.file_path:
    raw_audio = tf.io.read_file(f'./data/{wav_location}')
    wave, sr = tf.audio.decode_wav(raw_audio, desired_channels=-1, desired_samples=16000, name=None)
    data.append(wave)
    

In [None]:
test = tf.data.Dataset.from_tensor_slices(data)
test = test.map(squeeze, tf.data.AUTOTUNE)
test = test.map(get_spectrogram, tf.data.AUTOTUNE)

test.take(1)

In [None]:
class SpeakerConv2D:
  @staticmethod
  def build(input_shape,
            n_labels,
            activation='relu', 
            weights_path=None):
    
    model = tf.keras.models.Sequential()
    
    model.add(Input(shape=input_shape))
    model.add(Resizing(64, 69)) 

    model.add(Conv2D(64, kernel_size=(8,8), strides=(2,2), activation='relu')) 
    model.add(MaxPooling2D())
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Conv2D(128, kernel_size=(4,4), strides=(2, 2), activation='relu')) 
    model.add(MaxPooling2D())
    model.add(BatchNormalization())
    model.add(Dropout(0.5))

    model.add(Flatten())

    model.add(Dense(128, activation='relu'))
    model.add(Dense(n_labels, activation='softmax'))

    # if a weights path is supplied (inicating that the model was
    # pre-trained), then load the weights
    if weights_path is not None:
      model.load_weights(weights_path)
    
    return model

In [None]:
test_model = SpeakerConv2D.build((124, 129, 1), 90, activation='relu', weights_path=weights)

pred = test_model.predict(test.cache().prefetch(tf.data.AUTOTUNE), verbose=1)