In [72]:
import numpy as np

import tensorflow as tf
import tensorflow_probability as tfp

from tensorflow.keras import Model, Sequential
from tensorflow.keras.layers import Layer, BatchNormalization, Flatten, Reshape, Permute, Add, ReLU, Input, MaxPool1D, Conv1D, Conv2D, Dense, MaxPooling2D, GlobalMaxPooling2D, UpSampling2D, Concatenate
import tensorflow.keras.backend as K

from sklearn.decomposition import PCA

import sys
from pathlib import Path
import os

gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

### VGGish model TF2 implementation

In [64]:
class VGGish(Model):
    def __init__(self):
        super().__init__()
        self._name = 'VGGish'
        self.trainable = False
        
        # Block 1
        self.conv2d_1 = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='same')
        self.pool_1 = MaxPooling2D((2, 2), strides=(2, 2), padding='same')
        
        # Block 2
        self.conv2d_2 = Conv2D(128, (3, 3), strides=(1, 1), activation='relu', padding='same')
        self.pool_2 = MaxPooling2D((2, 2), strides=(2, 2), padding='same')

        # Block 3
        self.conv2d_3_1 = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='same')
        self.conv2d_3_2 = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='same')
        self.pool_3 = MaxPooling2D((2, 2), strides=(2, 2), padding='same')

        # Block 4
        self.conv2d_4_1 = Conv2D(512, (3, 3), strides=(1, 1), activation='relu', padding='same')
        self.conv2d_4_2 = Conv2D(512, (3, 3), strides=(1, 1), activation='relu', padding='same')
        self.pool_4 = MaxPooling2D((2, 2), strides=(2, 2), padding='same')
        
#         self.load_weights('../weights/vggish')
        
    def call(self, inputs, return_corr = True, encode_level = None):
        feats = {
            'block1': None,
            'block2': None,
            'block3': None,
            'block4': None
        }
        
        x = self.conv2d_1(inputs)
        x = self.pool_1(x)
        feats['block1'] = x if not return_corr else self.get_feat_corr(x)
        
        if encode_level == 1:
            return x, feats['block1']
        
        x = self.conv2d_2(x)
        x = self.pool_2(x)
        feats['block2'] = x if not return_corr else self.get_feat_corr(x)
        
        if encode_level == 2:
            return x, feats['block2']
        
        x = self.conv2d_3_1(x)
        x = self.conv2d_3_2(x)
        x = self.pool_3(x)
        feats['block3'] = x if not return_corr else self.get_feat_corr(x)
        
        if encode_level == 3:
            return x, feats['block3']
        
        x = self.conv2d_4_1(x)
        x = self.conv2d_4_2(x)
        x = self.pool_4(x)
        feats['block4'] = x if not return_corr else self.get_feat_corr(x)
                
        return x, feats
    
    def get_feat_corr(self, feat):
        feat = Reshape((-1, feat.shape[-1]))(feat)
        feat = Permute((2, 1))(feat)
        corr = tf.linalg.matmul(feat, feat, transpose_b=True) / (feat.shape[2] - 1)
        corr = tfp.math.fill_triangular_inverse(corr, upper=True)
        corr = corr / tf.reduce_max(tf.abs(feat))
        
        return corr
        
    def model(self):
        x = Input(shape=(960, 64, 1))
        return Model(inputs=[x], outputs=self.call(x))
    
    
# # convolutional operation parameters
# n_filters = 16
# kernels = [1, 3, 7]
# skips = []

# history_seq = Input(shape=(WINDOW_SIZE, 1))
# for kernel in kernels:
#     x = Conv1D(n_filters, (kernel, ), activation = 'relu', padding = 'same')(history_seq) 
#     x = MaxPooling1D()(x)
#     x = Conv1D(n_filters * 2, (kernel, ), activation = 'relu', padding = 'same')(x) 
#     skips.append(x)

# features = Add()(skips)
# features = BatchNormalization()(features)
# features = Flatten()(features)

# out = Dropout(0.1)(features)
# out = Dense(256, activation = 'relu')(features)
# out = Dropout(0.15)(out)
# out = Dense(256, activation = 'relu')(out)

vgg = VGGish()
_, feat = vgg(tf.random.uniform((1, 128, 128, 1)))
feat

{'block1': <tf.Tensor: shape=(1, 2080), dtype=float32, numpy=
 array([[3.5273850e-05, 7.6265947e-05, 2.8493220e-04, ..., 3.1446838e-03,
         5.9016519e-03, 1.5245323e-03]], dtype=float32)>,
 'block2': <tf.Tensor: shape=(1, 8256), dtype=float32, numpy=
 array([[0.06765261, 0.00130095, 0.08475295, ..., 0.        , 0.        ,
         0.        ]], dtype=float32)>,
 'block3': <tf.Tensor: shape=(1, 32896), dtype=float32, numpy=
 array([[1.6588690e-03, 1.6743097e-05, 3.4623828e-03, ..., 2.1995839e-03,
         1.5790742e-03, 4.5692816e-04]], dtype=float32)>,
 'block4': <tf.Tensor: shape=(1, 131328), dtype=float32, numpy=
 array([[0.        , 0.        , 0.        , ..., 0.01070831, 0.00795639,
         0.01157182]], dtype=float32)>}

In [67]:
pca = PCA(n_components=1)
# x = pca.fit_transform(feat['block4'].numpy()[0])
# pca.explained_variance_ratio_

In [69]:
feat['block1']

<tf.Tensor: shape=(1, 2080), dtype=float32, numpy=
array([[3.5273850e-05, 7.6265947e-05, 2.8493220e-04, ..., 3.1446838e-03,
        5.9016519e-03, 1.5245323e-03]], dtype=float32)>

In [59]:
x.shape

(512, 4)

In [9]:
class ExtractorCNNBlock(Model):
    def __init__(self, filters, kernel):
        super().__init__()
        self.conv1 = Conv1D(filters, (kernel,), activation='relu', padding='same')
        self.pool = MaxPooling1D()
        self.conv2 = Conv1D(filters * 2, (kernel,), activation='relu', padding='same')
    
    def call(self, inputs):
        x = self.conv1(inputs)
        x = self.pool(x)
        x = self.conv2(x)
        
        return x

class FeatExtractor(Model):
    def __init__(self):
        super().__init__()
        
        self.block1 = ExtractorCNNBlock(16, 1)
        self.block2 = ExtractorCNNBlock(16, 3)
        self.block3 = ExtractorCNNBlock(16, 5)
        self.block4 = ExtractorCNNBlock(16, 7)
        self.bn = BatchNormalization()
        self.flatten = Flatten()
        self.attention = Attention()
        
    def call(self, inputs, flatten = False):
        block1_enc = self.block1(inputs)
        block2_enc = self.block2(inputs)
        block3_enc = self.block3(inputs)
        block4_enc = self.block4(inputs)
        
        x = Add()([block1_enc, block2_enc, block3_enc, block4_enc])
        x = self.bn(x)
        
        att = self.attention([x, x])
        x = Concatenate()([x, att])
        
        if flatten:
            x = self.flatten(x)
            
        return x
        
class Sampler_Z(tfk.layers.Layer):
    
    def call(self, inputs):
        mu, rho = inputs
        sd = tf.math.log(1+tf.math.exp(rho))
        batch_size = tf.shape(mu)[0]
        dim_z = tf.shape(mu)[1]
        z_sample = mu + sd * tf.random.normal(shape=(batch_size, dim_z))
        
        return z_sample, sd
        
class FeaturesEncoder(Model):
    def __init__(self, dim_z, feature_extractor=None, name="encoder", **kwargs):
        super(Encoder_Z, self).__init__(name=name, **kwargs)
        self.dim_x = (28, 28, 1)
        self.dim_z = dim_z
        
        self.feature_extractor = feature_extractor or FeatExtractor()
        self.conv_layer_1 = tfkl.Conv1D(filters=32, kernel_size=3, strides=(2,2), padding='same', activation='relu')
        self.conv_layer_2 = tfkl.Conv1D(filters=64, kernel_size=3, strides=(2,2), padding='same', activation='relu')
        self.conv_layer_3 = tfkl.Conv1D(filters=128, kernel_size=3, strides=(2,2), padding='same', activation='relu')
        
        self.flatten_layer = tfkl.Flatten()
        self.dense_mean = tfkl.Dense(self.dim_z, activation=None, name='z_mean')
        self.dense_raw_stddev = tfkl.Dense(self.dim_z, activation=None, name='z_raw_stddev')
        self.sampler_z = Sampler_Z()
    
    # Functional
    def call(self, inputs, embedding=True):
        if embedding:
            z = self.feature_extractor(inputs)
        else:
            z = inputs
            
        z = self.conv_layer_1(z)
        z = self.conv_layer_2(z)
        z = self.flatten_layer(z)
        mu = self.dense_mean(z)
        rho = self.dense_raw_stddev(z)
        z_sample, sd = self.sampler_z((mu,rho))
        
        return z_sample, mu, sd
    
class FeaturesDecoder(tfk.layers.Layer):
    def __init__(self, dim_z, name="decoder", **kwargs):
        super(Decoder_X, self).__init__(name=name, **kwargs)
        self.dim_z = dim_z
        self.dense_z_input = tfkl.Dense(7*7*32, activation=None)
        self.reshape_layer = tfkl.Reshape((7,7,32))
        self.conv_transpose_layer_1 = tfkl.Conv1DTranspose(filters=64, kernel_size=3, strides=2, padding='same', activation='relu')
        self.conv_transpose_layer_2 = tfkl.Conv1DTranspose(filters=32, kernel_size=3, strides=2, padding='same', activation='relu')
        self.conv_transpose_layer_3 = tfkl.Conv1DTranspose(filters=1, kernel_size=3, strides=1, padding='same')
    
    # Functional
    def call(self, z):
        x_output = self.dense_z_input(z)
        x_output = self.reshape_layer(x_output)
        x_output = self.conv_transpose_layer_1(x_output)
        x_output = self.conv_transpose_layer_2(x_output)
        x_output = self.conv_transpose_layer_3(x_output)
        return x_output
        
class FeaturesMapper(Model):
    def __init__(self, dim_z, learning_rate, kl_weight=1, name="autoencoder", **kwargs):
        super(FeaturesMapper, self).__init__(name=name, **kwargs)
        self.dim_x = (28, 28, 1)
        self.dim_z = dim_z
        self.learning_rate = learning_rate
        self.encoder = Encoder_Z(dim_z=self.dim_z)
        self.decoder = Decoder_X(dim_z=self.dim_z)
        self.kl_weight = kl_weight
        
    # def encode_and_decode(self, x_input):
    def call(self, x_input):
        z_sample, mu, sd = self.encoder(x_input)
        x_recons_logits = self.decoder(z_sample)
        
        kl_divergence = - 0.5 * tf.math.reduce_sum(1+tf.math.log(
          tf.math.square(sd))-tf.math.square(mu)-tf.math.square(sd), axis=1)
        kl_divergence = tf.math.reduce_mean(kl_divergence)
        # self.add_loss(lambda: self.kl_weight * kl_divergence)
        self.add_loss(self.kl_weight * kl_divergence)
        
        return x_recons_logits

class IMuse(Model):
    def __init__(self):
        super().__init__()
        self._name = 'IMuse'
        
        self.encoder = VGGish()
        
        self.bloc1_decoder = Sequential([
            Input((64, 64)),
            Conv1D(128, 3, activation="relu", padding="same"),
            Conv1D(128, 5, activation="relu", padding="same"),
            
        ])
        
imuse = IMuse()
imuse(tf.zeros((1, 128, 60, 1)))

NotImplementedError: Exception encountered when calling layer "IMuse" (type IMuse).

Unimplemented `tf.keras.Model.call()`: if you intend to create a `Model` with the Functional API, please provide `inputs` and `outputs` arguments. Otherwise, subclass `Model` with an overridden `call()` method.

Call arguments received:
  • inputs=tf.Tensor(shape=(1, 128, 60, 1), dtype=float32)
  • training=None
  • mask=None

In [17]:
class ResnetIdentityBlock(tf.keras.Model):
    def __init__(self, kernel_size, filters):
        super(ResnetIdentityBlock, self).__init__(name='')
        filters1, filters2, filters3 = filters

        self.conv2a = tf.keras.layers.Conv2D(filters1, (1, 1))
        self.bn2a = tf.keras.layers.BatchNormalization()

        self.conv2b = tf.keras.layers.Conv2D(filters2, kernel_size, padding='same')
        self.bn2b = tf.keras.layers.BatchNormalization()

        self.conv2c = tf.keras.layers.Conv2D(filters3, (1, 1))
        self.bn2c = tf.keras.layers.BatchNormalization()

    def call(self, input_tensor, training=False):
        x = self.conv2a(input_tensor)
        x = self.bn2a(x, training=training)
        x = tf.nn.relu(x)

        x = self.conv2b(x)
        x = self.bn2b(x, training=training)
        x = tf.nn.relu(x)

        x = self.conv2c(x)
        x = self.bn2c(x, training=training)

        x += input_tensor
        return tf.nn.relu(x)


block = ResnetIdentityBlock(1, [1, 2, 3])

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1 (Dense)             (None, 131072)            67239936  
                                                                 
Total params: 67,239,936
Trainable params: 67,239,936
Non-trainable params: 0
_________________________________________________________________
