In [2]:
import tensorflow as tf
import numpy as np
from keras.models import Model
from keras.layers import Conv1D, MaxPooling1D, Dense, Lambda, concatenate, Input, GlobalMaxPooling1D, GlobalAveragePooling1D
import keras.backend as K




Using TensorFlow backend.


In [3]:
num_frames = 599
num_freq_bins = 128
dummy_data = np.random.random((num_frames, num_freq_bins))
num_conv_filters_1 = 256
kernel_size = 4

In [4]:
def l2_norm(x):
    x = x ** 2
    x = K.sum(x, axis=1)
    x = K.sqrt(x)
    return x

In [5]:
class AudioCNNModel():
    def __init__(self, num_frames, num_freq_bins, num_conv_filters1, pool_size_1, kernel_size):
        
        self.num_frames = num_frames
        self.num_freq_bins = num_freq_bins
        self.num_conv_filters1 = num_conv_filters1
        self.pool_size1 = pool_size_1
        self.kernel_size = kernel_size
        self.model_input = Input(shape=(num_frames, num_freq_bins))
        
        x = Conv1D(filters=self.num_conv_filters1, kernel_size=self.kernel_size, input_shape=(self.num_frames, self.num_freq_bins))(self.model_input)
        x = MaxPooling1D(pool_size=self.pool_size1)(x)
        x = Conv1D(filters=256, kernel_size=self.kernel_size)(x)
        x = MaxPooling1D(pool_size=2)(x)
        x = Conv1D(filters=512, kernel_size=self.kernel_size)(x)
        x = MaxPooling1D(pool_size=2)(x)

        #temporal pooling, L2, mean
        max_layer = GlobalMaxPooling1D(data_format='channels_last')(x)
        mean_layer = GlobalAveragePooling1D(data_format='channels_last')(x)
        L2_layer = Lambda(lambda x: l2_norm(x))(x)
        #TODO:concatenate
        
        x = concatenate([max_layer, mean_layer, L2_layer])
        #End
        x = Dense(2048, activation='relu')(x)
        x = Dense(2048, activation='relu')(x)
        latent_factors = Dense(50)(x)
        self.net = Model(inputs=self.model_input, outputs=latent_factors)
        
        


In [6]:
model = AudioCNNModel(num_frames, num_freq_bins, num_conv_filters_1, 4, kernel_size)
model.net.predict(np.array([dummy_data])).shape

Instructions for updating:
Colocations handled automatically by placer.


(1, 50)

In [9]:
sample = np.loadtxt(open("mfcc.csv", "rb"), delimiter=",", skiprows=1)