In [86]:
from keras.layers import Input, Dense
from keras.models import Model
from keras.initializers import glorot_uniform  # Or your initializer of choice
from keras.datasets import mnist
import numpy as np
import random
import mir_utils as miru
from keras.callbacks import TensorBoard

input_dim=20000
# this is the size of our encoded representations
encoding_dim = 1000  #floats -> compression factor of input_dim/encoding_dim

# this is our input placeholder
input_img = Input(shape=(input_dim,))
# "encoded" is the encoded representation of the input
encoded = Dense(encoding_dim, activation='relu')(input_img)
# "decoded" is the lossy reconstruction of the input
decoded = Dense(input_dim, activation='sigmoid')(encoded)

# this model maps an input to its reconstruction
autoencoder = Model(input_img, decoded)

# this model maps an input to its encoded representation
encoder = Model(input_img, encoded)

# create a placeholder for an encoded (32-dimensional) input
encoded_input = Input(shape=(encoding_dim,))
# retrieve the last layer of the autoencoder model
decoder_layer = autoencoder.layers[-1]
# create the decoder model
decoder = Model(encoded_input, decoder_layer(encoded_input))

autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')



In [89]:
# x_train = x_train.astype('float32') / 255.
# x_test = x_test.astype('float32') / 255.
# x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
# x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))

a=miru.loadAudioSubset(20)
#takes an audio dict, desired length of samples and split percentage of test & train subsets
#returns x_train,x_test,y_train,y_test
def audioDictToNp(a,dur=10000,testFraction=0):
    X=[]
    y=[]
    for key,l in a.items():
            for i in l:
                if len(i)>dur:
                    y.append(key)
                    X.append(i[0:dur])
    X=np.asarray(X)
    y=np.asarray(y)
    if testFraction==0:
        return X,y,X,y
    else:
        from sklearn.model_selection import train_test_split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=testFraction, random_state=42)
        return X_train, X_test, y_train, y_test
            
x_train, x_test, y_train, y_test=audioDictToNp(a,testFraction=0.1)

print("train,test shapes:",x_train.shape,x_test.shape)
Wsave = autoencoder.get_weights()
for a in Wsave:
    print(a.shape)


loading: /home/amir/mir/t-sne/samples
loading: /home/amir/mir/t-sne/samples/claps
loading: /home/amir/mir/t-sne/samples/snares
loading: /home/amir/mir/t-sne/samples/kicks
loading: /home/amir/mir/t-sne/samples/rims
loading: /home/amir/mir/t-sne/samples/sines
train,test shapes: (28, 10000) (4, 10000)
(20000, 1000)
(1000,)
(1000, 20000)
(20000,)
[None, 20000]
[None, 1000]
[None, 20000]
Model: "model_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 20000)             0         
_________________________________________________________________
dense_5 (Dense)              (None, 1000)              20001000  
_________________________________________________________________
dense_6 (Dense)              (None, 20000)             20020000  
Total params: 40,021,000
Trainable params: 40,021,000
Non-trainable params: 0
_________________________________________________________

In [80]:
#audio test to see if array works
import sounddevice as sd
sd.play(x_train[9],40000)

In [81]:
print (x_train.shape)
print (x_test.shape)

Wsave = autoencoder.get_weights()
for a in Wsave:
    print(a.shape)
for layer in autoencoder.layers:
     print(layer.get_output_at(0).get_shape().as_list())

print(autoencoder.summary())

(28, 10000)
(4, 10000)
(20000, 1000)
(1000,)
(1000, 20000)
(20000,)
[None, 20000]
[None, 1000]
[None, 20000]
Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 20000)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 1000)              20001000  
_________________________________________________________________
dense_2 (Dense)              (None, 20000)             20020000  
Total params: 40,021,000
Trainable params: 40,021,000
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
x_train=x_train[0:50]
x_test=x_train[0:10]
print (x_train.shape)
print (x_test.shape)
def train():
    initial_weights = autoencoder.get_weights()
    weights = [glorot_uniform(seed=random.randint(0, 1000))(w.shape) if w.ndim > 1 else w for w in autoencoder.get_weights()]
    autoencoder.set_weights(new_weights)

    autoencoder.fit(x_train, x_train,
                    epochs=1000,
                    batch_size=10,
                    shuffle=True,
                    validation_data=(x_test, x_test),
                    verbose=1,
                    callbacks=[TensorBoard(log_dir='/tmp/autoencoder')]
                   )
train()

In [None]:

# encode and decode some digits
# note that we take them from the *test* set
encoded_imgs = encoder.predict(x_test)
decoded_imgs = decoder.predict(encoded_imgs)

from keras.datasets import mnist
import numpy as np
# use Matplotlib (don't ask)
import matplotlib.pyplot as plt

n = 10  # how many digits we will display
plt.figure(figsize=(20, 4))
for i in range(n):
    # display original
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(x_train[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display reconstruction
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(decoded_imgs[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()