In [76]:
import numpy as np

import tensorflow as tf
import tensorflow_probability as tfp

from tensorflow.keras import Model, Sequential
from tensorflow.keras.layers import Layer, Reshape, Permute, ReLU, Input, MaxPool1D, Conv1D, Conv2D, Dense, MaxPooling2D, GlobalMaxPooling2D, UpSampling2D, Concatenate
import tensorflow.keras.backend as K

import sys
from pathlib import Path
import os

gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

### VGGish model TF2 implementation

In [72]:
class VGGish(Model):
    def __init__(self):
        super().__init__()
        self._name = 'VGGish'
        self.trainable = False
        
        # Block 1
        self.conv2d_1 = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='same')
        self.pool_1 = MaxPooling2D((2, 2), strides=(2, 2), padding='same')
        
        # Block 2
        self.conv2d_2 = Conv2D(128, (3, 3), strides=(1, 1), activation='relu', padding='same')
        self.pool_2 = MaxPooling2D((2, 2), strides=(2, 2), padding='same')

        # Block 3
        self.conv2d_3_1 = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='same')
        self.conv2d_3_2 = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='same')
        self.pool_3 = MaxPooling2D((2, 2), strides=(2, 2), padding='same')

        # Block 4
        self.conv2d_4_1 = Conv2D(512, (3, 3), strides=(1, 1), activation='relu', padding='same')
        self.conv2d_4_2 = Conv2D(512, (3, 3), strides=(1, 1), activation='relu', padding='same')
        self.pool_4 = MaxPooling2D((2, 2), strides=(2, 2), padding='same')
        
        self.load_weights('../weights/vggish')
        
    def call(self, inputs, return_corr = True):
        feats = {
            'block1': None,
            'block2': None,
            'block3': None,
            'block4': None
        }
        
        x = self.conv2d_1(inputs)
        x = self.pool_1(x)
        feats['block1'] = x if not return_corr else self.get_feat_corr(x)
        
        x = self.conv2d_2(x)
        x = self.pool_2(x)
        feats['block2'] = x if not return_corr else self.get_feat_corr(x)
        
        x = self.conv2d_3_1(x)
        x = self.conv2d_3_2(x)
        x = self.pool_3(x)
        feats['block3'] = x if not return_corr else self.get_feat_corr(x)
        
        x = self.conv2d_4_1(x)
        x = self.conv2d_4_2(x)
        x = self.pool_4(x)
        feats['block4'] = x if not return_corr else self.get_feat_corr(x)
                
        return x, feats
    
    def get_feat_corr(self, feat):
        feat = Reshape((-1, feat.shape[-1]))(feat)
        feat = Permute((2, 1))(feat)
        corr = tf.linalg.matmul(feat, feat, transpose_b=True) / (feat.shape[2] - 1)
        corr = corr / tf.reduce_max(tf.abs(feat))
        
        return corr
        
    def model(self):
        x = Input(shape=(960, 64, 1))
        return Model(inputs=[x], outputs=self.call(x))
    
# # convolutional operation parameters
# n_filters = 16
# kernels = [3, 5, 7]
# skips = []

# history_seq = Input(shape=(WINDOW_SIZE, 1))
# for kernel in kernels:
#     x = Conv1D(n_filters, (kernel, ), activation = 'relu', padding = 'same')(history_seq) 
#     x = MaxPooling1D()(x)
#     x = Conv1D(n_filters * 2, (kernel, ), activation = 'relu', padding = 'same')(x) 
#     skips.append(x)

# features = Add()(skips)
# features = BatchNormalization()(features)
# features = Flatten()(features)

# out = Dropout(0.1)(features)
# out = Dense(256, activation = 'relu')(features)
# out = Dropout(0.15)(out)
# out = Dense(256, activation = 'relu')(out)

In [75]:
class IMuse(Model):
    def __init__(self):
        super().__init__()
        self._name = 'IMuse'
        
        self.encoder = VGGish()
        
        self.bloc1_decoder = Sequential([
            Input((64, 64)),
            Conv1D(128, 3, activation="relu", padding="same"),
            Conv1D(128, 5, activation="relu", padding="same"),
            
        ])

In [17]:
covar = Lambda(lambda x: K.dot(K.transpose(x),x))(previousLayerOutput)

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1 (Dense)             (None, 131072)            67239936  
                                                                 
Total params: 67,239,936
Trainable params: 67,239,936
Non-trainable params: 0
_________________________________________________________________


In [55]:
x = tf.ones((1, 128, 64))
tf.linalg.matmul(x, x, transpose_b=True)

<tf.Tensor: shape=(1, 128, 128), dtype=float32, numpy=
array([[[64., 64., 64., ..., 64., 64., 64.],
        [64., 64., 64., ..., 64., 64., 64.],
        [64., 64., 64., ..., 64., 64., 64.],
        ...,
        [64., 64., 64., ..., 64., 64., 64.],
        [64., 64., 64., ..., 64., 64., 64.],
        [64., 64., 64., ..., 64., 64., 64.]]], dtype=float32)>