In [1]:
import tensorflow as tf
import keras
from keras import layers
import os
from tensorflow.keras import backend as K
import matplotlib.pyplot as plt
import numpy as np
import pickle
import soundfile as sf

In [2]:
# this sampling layer is the bottleneck layer of variational autoencoder,
# it uses the output from two dense layers z_mean and z_log_var as input, 
# convert them into normal distribution and pass them to the decoder layer
 
class Sampling(layers.Layer):
    """Uses (mean, log_var) to sample z, the vector encoding a digit."""
 
    def call(self, inputs):
        mean, log_var = inputs
        batch = tf.shape(mean)[0]
        dim = tf.shape(mean)[1]
        epsilon = tf.random.normal(shape=(batch, dim))
        z = mean + tf.exp(0.5 * log_var) * epsilon
        print("Z: ", tf.reduce_min(z), "|", tf.reduce_max(z), 
              "__", "mean: ", tf.reduce_min(mean), "|", tf.reduce_max(mean), 
              "__", "log_var: ", tf.reduce_min(log_var), "|", tf.reduce_max(log_var),  
              "__", "epsilon: ", tf.reduce_min(epsilon), "|", tf.reduce_max(epsilon))
        return z
    


class VAE(keras.Model):
    def __init__(self, input_shape, conv_filters, conv_kernels, conv_strides,
                 latent_space_dim, **kwargs):
        super().__init__(**kwargs)
        self.encoder_input_shape = input_shape # [28, 28, 1]
        self.conv_filters = conv_filters # [2, 4, 8]
        self.conv_kernels = conv_kernels # [3, 5, 3]
        self.conv_strides = conv_strides # [1, 2, 2]
        self.latent_space_dim = latent_space_dim # 2
        self.reconstruction_loss_weight = 1000000

        self.encoder = None
        self.decoder = None

        self._num_conv_layers = len(conv_filters)
        self._shape_before_bottleneck = None
        self._model_input = None

        self._build_model()

        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")
 
    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]
 
    def train_step(self, data):
        with tf.GradientTape() as tape:
            mean,log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(
                    keras.losses.binary_crossentropy(data, reconstruction),
                    axis=(1, 2),
                )
            )
            kl_loss = -0.5 * (1 + log_var - tf.square(mean) - tf.exp(log_var))
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
            total_loss = reconstruction_loss + kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }
    
    def reconstruct(self, images):
        mean, log_var, z = self.encoder.predict(images)
        reconstructed_images = self.decoder.predict(z)
        return reconstructed_images, z
    
    def _build_model(self):
        ## Build encoder
        self._model_input = keras.Input(shape=self.encoder_input_shape) 
        # add conv layers 
        x = self._model_input
        for layer_index in range(self._num_conv_layers):
            x = layers.Conv2D(filters=self.conv_filters[layer_index],
                                kernel_size=self.conv_kernels[layer_index],
                                activation="relu",
                                strides=self.conv_strides[layer_index],
                                padding="same")(x)
            x = layers.BatchNormalization()(x)
        # create bottleneck
        self._shape_before_bottleneck = K.int_shape(x)[1:]
        x = layers.Flatten()(x)
        x = layers.Dense(16, activation="relu")(x)
        mean = layers.Dense(self.latent_space_dim, name="mean")(x)
        log_var = layers.Dense(self.latent_space_dim, name="log_var")(x)
        z = Sampling()([mean, log_var])
        
        self.encoder = keras.Model(self._model_input, [mean, log_var, z], name="encoder")
        self.encoder.summary()

        ## Build decoder
        latent_inputs = keras.Input(shape=(self.latent_space_dim,))
        num_neurons = np.prod(self._shape_before_bottleneck)
        x = layers.Dense(num_neurons, activation="relu")(latent_inputs) #64 * 16 * 64
        x = layers.Reshape(self._shape_before_bottleneck)(x) # use shape before bottleneck
        # add conv transpose layers
        for layer_index in reversed(range(1, self._num_conv_layers)):
            x = layers.Conv2DTranspose(filters=self.conv_filters[layer_index],
                                        kernel_size=self.conv_kernels[layer_index],
                                        activation="relu",
                                        strides=self.conv_strides[layer_index],
                                        padding="same")(x)
            x = layers.BatchNormalization()(x)
        decoder_outputs = layers.Conv2DTranspose(1, 
                                                kernel_size=self.conv_kernels[0],
                                                activation="sigmoid", 
                                                strides=self.conv_strides[0], 
                                                padding="same")(x)
        self.decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")
        self.decoder.summary()

    @classmethod
    def load(cls, save_folder="model_vae"):
        parameters_path = os.path.join(save_folder, "parameters.pkl")
        with open(parameters_path, "rb") as f:
            parameters = pickle.load(f)
        print(parameters)
        autoencoder = VAE(*parameters)
        weights_path = os.path.join(save_folder, "weights.h5")
        autoencoder.load_weights(weights_path)
        return autoencoder

    def save_parameters(self, save_folder):
        parameters = [
            self.encoder_input_shape,
            self.conv_filters,
            self.conv_kernels,
            self.conv_strides,
            self.latent_space_dim
        ]
        save_path = os.path.join(save_folder, "parameters.pkl")
        with open(save_path, "wb") as f:
            pickle.dump(parameters, f)


In [5]:
def load_fsdd(spectograms_path):
    x_train = []
    file_paths = []
    for root, _, file_names in os.walk(spectograms_path):
        for file_name in file_names:
            file_path = os.path.join(root, file_name)
            spectogram = np.load(file_path) # ex. (n_bins, n_frames) --> abbiamo bisogno di 3 dimensioni (3 channels)-> (n_bins, n_frames, 1)
            #x_train.append(spectogram)
            file_paths.append(file_path)
    #x_train = np.array(x_train)
    #x_train = x_train[..., np.newaxis] # -> (3000, 256, 64, 1)
    #x_train = tf.data.Dataset.from_tensor_slices(x_train)
    return x_train, file_paths


def plot_latent_space(vae, n=10, figsize=5):
    # display a n*n 2D manifold of images
    img_size = 28
    scale = 0.5
    figure = np.zeros((img_size * n, img_size * n))
    # linearly spaced coordinates corresponding to the 2D plot
    # of images classes in the latent space
    grid_x = np.linspace(-scale, scale, n)
    grid_y = np.linspace(-scale, scale, n)[::-1]
 
    for i, yi in enumerate(grid_y):
        for j, xi in enumerate(grid_x):
            sample = np.array([[xi, yi]])
            x_decoded = vae.decoder.predict(sample, verbose=0)
            images = x_decoded[0].reshape(img_size, img_size)
            figure[
                i * img_size : (i + 1) * img_size,
                j * img_size : (j + 1) * img_size,
            ] = images
 
    plt.figure(figsize=(figsize, figsize))
    start_range = img_size // 2
    end_range = n * img_size + start_range
    pixel_range = np.arange(start_range, end_range, img_size)
    sample_range_x = np.round(grid_x, 1)
    sample_range_y = np.round(grid_y, 1)
    plt.xticks(pixel_range, sample_range_x)
    plt.yticks(pixel_range, sample_range_y)
    plt.xlabel("z[0]")
    plt.ylabel("z[1]")
    plt.imshow(figure, cmap="Greys_r")
    plt.show()



In [6]:
# spectrogram input
SPECTOGRAMS_PATH = os.path.join("dataset", "fsdd", "spectrograms")
x_train, filepaths = load_fsdd(SPECTOGRAMS_PATH)
#with open(os.path.join('dataset', 'fsdd', 'pickled_spectrograms.npy'), 'wb') as f:
#    np.save(f, x_train)

with open(os.path.join('dataset', 'fsdd', 'pickled_filepaths.npy'), 'wb') as f:
    np.save(f, filepaths)

In [10]:
print(len(filepaths))

289187


In [6]:
with open(os.path.join('dataset', 'fsdd', 'pickled_spectrograms.npy'), 'rb') as f:
    x_train = np.load(f)

with open(os.path.join('dataset', 'fsdd', 'pickled_filepaths.npy'), 'rb') as f:
    filepaths = np.load(f)

In [8]:

nan_count = np.count_nonzero(np.isnan(x_train))

In [9]:
print(nan_count)

0


In [10]:
tf.config.run_functions_eagerly(True)

vae = VAE(
        input_shape=(256, 64, 1),
        conv_filters=(512, 256, 128, 64, 32),
        conv_kernels=(3, 3, 3, 3, 3),
        conv_strides=(2, 2, 2, 2, (2, 1)),
        latent_space_dim=128
    )
vae.compile(optimizer=keras.optimizers.Adam(clipnorm=1.0))
#vae.fit(fashion_mnist, epochs=10, batch_size=128)
vae.fit(x_train, epochs=1, batch_size=32)

WEIGHTS_Path = os.path.join("model", "trial.weights.h5")
vae.save_weights(WEIGHTS_Path)


Z:  Tensor("Min:0", shape=(), dtype=float32) | Tensor("Max:0", shape=(), dtype=float32) __ mean:  Tensor("Min_1:0", shape=(), dtype=float32) | Tensor("Max_1:0", shape=(), dtype=float32) __ log_var:  Tensor("Min_2:0", shape=(), dtype=float32) | Tensor("Max_2:0", shape=(), dtype=float32) __ epsilon:  Tensor("Min_3:0", shape=(), dtype=float32) | Tensor("Max_3:0", shape=(), dtype=float32)




Z:  tf.Tensor(-3.410419, shape=(), dtype=float32) | tf.Tensor(3.6750348, shape=(), dtype=float32) __ mean:  tf.Tensor(-0.0056325146, shape=(), dtype=float32) | tf.Tensor(0.00423389, shape=(), dtype=float32) __ log_var:  tf.Tensor(-0.0042618364, shape=(), dtype=float32) | tf.Tensor(0.0051070056, shape=(), dtype=float32) __ epsilon:  tf.Tensor(-3.412494, shape=(), dtype=float32) | tf.Tensor(3.6751635, shape=(), dtype=float32)
[1m   1/9038[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m15:26:28[0m 6s/step - kl_loss: 1.5351e-04 - loss: 11359.3027 - reconstruction_loss: 11359.3027Z:  tf.Tensor(-3.3367374, shape=(), dtype=float32) | tf.Tensor(3.6451888, shape=(), dtype=float32) __ mean:  tf.Tensor(-0.032550476, shape=(), dtype=float32) | tf.Tensor(0.03413359, shape=(), dtype=float32) __ log_var:  tf.Tensor(-0.0384688, shape=(), dtype=float32) | tf.Tensor(0.035835773, shape=(), dtype=float32) __ epsilon:  tf.Tensor(-3.3349082, shape=(), dtype=float32) | tf.Tensor(3.6511385, shape=(), dtype=float32)


In [11]:
import librosa

from preprocess import MinMaxNormaliser

class SoundGenerator:
    """SoundGenerator is responsible for generating audio spectograms.
    """

    def __init__(self, vae, hop_length):
        self.vae = vae
        self.hop_length = hop_length
        self._min_max_normalizer = MinMaxNormaliser(0, 1)

    def generate(self, spectrograms, min_max_values):
        generated_spectrograms = [] 
        latent_representations = []

        for spectrogram in spectrograms:
            generated_spectogram, latent_representation = self.vae.reconstruct(spectrogram)
            generated_spectrograms.append(generated_spectogram)
            latent_representations.append(latent_representation)

        signals = self.convert_spectograms_to_audio(generated_spectrograms, min_max_values)
        return signals, latent_representations

    def convert_spectograms_to_audio(self, spectograms, min_max_values):
        signals = []
        for spectogram, min_max_value in zip(spectograms, min_max_values):
            # reshape the log spectogram
            log_spectogram = spectogram[:, :, 0] 

            # apply denormalization
            denorm_log_spec = self._min_max_normalizer.denormalise(log_spectogram,
                                                                   min_max_value["min"],
                                                                   min_max_value["max"])

            # log spectogram -> spectogram
            spec = librosa.db_to_amplitude(denorm_log_spec)

            # apply Griffin-Lim
            signal = librosa.istft(spec, hop_length=self.hop_length)

            # append signal to "signals"
            signals.append(signal)

        return signals

In [12]:
def select_spectrograms(spectrograms,
                        file_paths,
                        min_max_values,
                        num_spectrograms=2):
    sampled_indexes = np.random.choice(range(len(spectrograms)), num_spectrograms)
    sampled_spectrogrmas = spectrograms[sampled_indexes]
    file_paths = [file_paths[index] for index in sampled_indexes]
    sampled_min_max_values = [min_max_values[file_path] for file_path in
                           file_paths]
    print(file_paths)
    print(sampled_min_max_values)
    return sampled_spectrogrmas, sampled_min_max_values


def save_signals(signals, save_dir, sample_rate=22050):
    for i, signal in enumerate(signals):
        save_path = os.path.join(save_dir, str(i) + ".wav")
        sf.write(save_path, signal, sample_rate)

In [13]:
HOP_LENGTH = 256
SAVE_DIR_ORIGINAL = os.path.join('dataset', 'generation', 'original')
SAVE_DIR_GENERATED = os.path.join('dataset', 'generation', 'generated')
MIN_MAX_VALUES_PATH = os.path.join('dataset', 'fsdd', 'min_max_values.pkl')

vae = VAE(
        input_shape=(256, 64, 1),
        conv_filters=(512, 256, 128, 64, 32),
        conv_kernels=(3, 3, 3, 3, 3),
        conv_strides=(2, 2, 2, 2, (2, 1)),
        latent_space_dim=128
    )

vae.load_weights(os.path.join("model", "trial.weights.h5"))
sound_generator = SoundGenerator(vae, HOP_LENGTH)

# load spectrograms + min max values
with open(MIN_MAX_VALUES_PATH, "rb") as f:
    min_max_values = pickle.load(f)


Z:  Tensor("Min:0", shape=(), dtype=float32) | Tensor("Max:0", shape=(), dtype=float32) __ mean:  Tensor("Min_1:0", shape=(), dtype=float32) | Tensor("Max_1:0", shape=(), dtype=float32) __ log_var:  Tensor("Min_2:0", shape=(), dtype=float32) | Tensor("Max_2:0", shape=(), dtype=float32) __ epsilon:  Tensor("Min_3:0", shape=(), dtype=float32) | Tensor("Max_3:0", shape=(), dtype=float32)


In [35]:
with open(os.path.join('dataset', 'fsdd', 'pickled_spectrograms.npy'), 'rb') as f:
    spectrograms = np.load(f)

print(spectrograms.shape)

with open(os.path.join('dataset', 'fsdd', 'pickled_filepaths.npy'), 'rb') as f:
    file_paths = np.load(f)

print(file_paths.shape)


(289187, 256, 64, 1)
(289187,)


In [36]:
# select random spectrograms to generate similar music
num_new_spectrograms = 5
sampled_indexes = np.random.choice(range(len(spectrograms)), num_new_spectrograms)
sampled_spectrograms = spectrograms[sampled_indexes]
file_paths = [file_paths[index] for index in sampled_indexes]
sampled_min_max_values = [min_max_values[file_path] for file_path in
                           file_paths]

In [52]:
print("min_max: ", list(min_max_values.keys())[:5])
print(min_max_values[list(min_max_values.keys())[1]])
print(type(min_max_values))

print("hi")

min_max:  ['dataset\\fsdd\\spectrograms\\bass_acoustic_000-024-025.wav.npy', 'dataset\\fsdd\\spectrograms\\bass_acoustic_000-024-050.wav.npy', 'dataset\\fsdd\\spectrograms\\bass_acoustic_000-024-075.wav.npy', 'dataset\\fsdd\\spectrograms\\bass_acoustic_000-024-100.wav.npy', 'dataset\\fsdd\\spectrograms\\bass_acoustic_000-024-127.wav.npy']
{'min': -39.44951, 'max': 40.55049}
<class 'dict'>
hi


In [58]:
# predict all samples
mean, log_var, z = vae.encoder.predict(sampled_spectrograms)
generated_spectrograms = vae.decoder.predict(z)
print(generated_spectrograms.shape)
print("predicted all samples")



_min_max_normalizer = MinMaxNormaliser(0, 1)

signals = []
for spectrogram, min_max_value in zip(generated_spectrograms, sampled_min_max_values):
    # reshape the log spectrogram
    log_spectogram = spectrogram[:, :, 0] # eliminating the third dimension
    print(min_max_value)
    # apply denormalization
    denorm_log_spec = _min_max_normalizer.denormalise(log_spectogram,
                                                            min_max_value["min"],
                                                            min_max_value["max"])

    # log spectrogram -> spectrogram
    spec = librosa.db_to_amplitude(denorm_log_spec)

    # apply Griffin-Lim
    signal = librosa.istft(spec, hop_length=HOP_LENGTH)

    # append signal to "signals"
    signals.append(signal)



print(len(signals))



# convert spectrogram samples to audio
original_signals = sound_generator.convert_spectograms_to_audio(sampled_spectrograms, sampled_min_max_values)

# save audio signals
save_signals(signals, SAVE_DIR_GENERATED)
save_signals(original_signals, SAVE_DIR_ORIGINAL)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 142ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step
(5, 256, 64, 1)
predicted all samples
{'min': -50.028675, 'max': 29.971325}
{'min': -50.50171, 'max': 29.498291}
{'min': -44.191456, 'max': 35.808544}
{'min': -42.937996, 'max': 37.062004}
{'min': -49.302788, 'max': 30.697214}
5


In [59]:
plot_latent_space(vae)

InvalidArgumentError: Graph execution error:

Detected at node decoder_1/dense_1_1/Relu defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "C:\Users\janas\AppData\Roaming\Python\Python312\site-packages\ipykernel_launcher.py", line 17, in <module>

  File "C:\Users\janas\AppData\Roaming\Python\Python312\site-packages\traitlets\config\application.py", line 1053, in launch_instance

  File "C:\Users\janas\AppData\Roaming\Python\Python312\site-packages\ipykernel\kernelapp.py", line 739, in start

  File "C:\Users\janas\AppData\Roaming\Python\Python312\site-packages\tornado\platform\asyncio.py", line 195, in start

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\asyncio\base_events.py", line 639, in run_forever

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\asyncio\base_events.py", line 1985, in _run_once

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\asyncio\events.py", line 88, in _run

  File "C:\Users\janas\AppData\Roaming\Python\Python312\site-packages\ipykernel\kernelbase.py", line 529, in dispatch_queue

  File "C:\Users\janas\AppData\Roaming\Python\Python312\site-packages\ipykernel\kernelbase.py", line 518, in process_one

  File "C:\Users\janas\AppData\Roaming\Python\Python312\site-packages\ipykernel\kernelbase.py", line 424, in dispatch_shell

  File "C:\Users\janas\AppData\Roaming\Python\Python312\site-packages\ipykernel\kernelbase.py", line 766, in execute_request

  File "C:\Users\janas\AppData\Roaming\Python\Python312\site-packages\ipykernel\ipkernel.py", line 429, in do_execute

  File "C:\Users\janas\AppData\Roaming\Python\Python312\site-packages\ipykernel\zmqshell.py", line 549, in run_cell

  File "C:\Users\janas\AppData\Roaming\Python\Python312\site-packages\IPython\core\interactiveshell.py", line 3046, in run_cell

  File "C:\Users\janas\AppData\Roaming\Python\Python312\site-packages\IPython\core\interactiveshell.py", line 3101, in _run_cell

  File "C:\Users\janas\AppData\Roaming\Python\Python312\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner

  File "C:\Users\janas\AppData\Roaming\Python\Python312\site-packages\IPython\core\interactiveshell.py", line 3306, in run_cell_async

  File "C:\Users\janas\AppData\Roaming\Python\Python312\site-packages\IPython\core\interactiveshell.py", line 3488, in run_ast_nodes

  File "C:\Users\janas\AppData\Roaming\Python\Python312\site-packages\IPython\core\interactiveshell.py", line 3548, in run_code

  File "C:\Users\janas\AppData\Local\Temp\ipykernel_17188\4083629366.py", line 1, in <module>

  File "C:\Users\janas\AppData\Local\Temp\ipykernel_17188\1763218103.py", line 29, in plot_latent_space

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\site-packages\keras\src\utils\traceback_utils.py", line 118, in error_handler

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 513, in predict

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 212, in one_step_on_data_distributed

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 201, in one_step_on_data

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 93, in predict_step

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\site-packages\keras\src\utils\traceback_utils.py", line 118, in error_handler

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\site-packages\keras\src\layers\layer.py", line 816, in __call__

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\site-packages\keras\src\utils\traceback_utils.py", line 118, in error_handler

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\site-packages\keras\src\ops\operation.py", line 42, in __call__

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\site-packages\keras\src\utils\traceback_utils.py", line 157, in error_handler

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\site-packages\keras\src\models\functional.py", line 188, in call

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\site-packages\keras\src\ops\function.py", line 153, in _run_through_graph

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\site-packages\keras\src\models\functional.py", line 572, in call

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\site-packages\keras\src\utils\traceback_utils.py", line 118, in error_handler

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\site-packages\keras\src\layers\layer.py", line 816, in __call__

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\site-packages\keras\src\utils\traceback_utils.py", line 118, in error_handler

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\site-packages\keras\src\ops\operation.py", line 42, in __call__

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\site-packages\keras\src\utils\traceback_utils.py", line 157, in error_handler

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\site-packages\keras\src\layers\core\dense.py", line 141, in call

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\site-packages\keras\src\activations\activations.py", line 47, in relu

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\site-packages\keras\src\activations\activations.py", line 99, in static_call

  File "c:\Users\janas\anaconda3\envs\musicmodel\Lib\site-packages\keras\src\backend\tensorflow\nn.py", line 16, in relu

Matrix size-incompatible: In[0]: [1,2], In[1]: [128,1024]
	 [[{{node decoder_1/dense_1_1/Relu}}]] [Op:__inference_one_step_on_data_distributed_2188]