<a href="https://colab.research.google.com/github/aetev/Learning-stuff-/blob/main/WGAN-GP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install tensorflow_addons
#!pip install pydub
!pip install librosa



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import tensorflow as tf
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import numpy as np
import tensorflow_addons as tfa
import os
from IPython.display import Audio


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [None]:
import librosa
import numpy as np

def downsample_array(arr, factor, axis):
    return arr.take(np.arange(0, arr.shape[axis], factor), axis)

def create_sliding_window(array, window_size, stride):
    num_windows = (len(array) - window_size) // stride + 1
    sliding_windows = np.lib.stride_tricks.sliding_window_view(array, (window_size,))

    return sliding_windows[::stride]

In [None]:
reduction = 12
wav_file = '/content/drive/MyDrive/bass samples/NBKoanbandstuff.wav'
audio, sr = librosa.load(wav_file, sr=None)
audio_dev = np.std(audio)
audio = audio/audio_dev
result_array = create_sliding_window(audio,44100,100)
result_array = np.expand_dims(result_array, axis=2)
result_array = downsample_array(result_array,reduction,1)

In [None]:


audio_data = result_array[100].ravel()
# Play the audio within the Jupyter Notebook
Audio(data=audio_data, rate=sr/reduction)

In [None]:
#x_train = noise
y_train = result_array
print(y_train[0].shape)

(3675, 1)


In [None]:
class ResNetBlock(layers.Layer):
    def __init__(self, filters,kernel_size=3, strides=1,dilation_rate=1):
        super(ResNetBlock, self).__init__()
        self.conv1 = layers.Conv1D(filters, kernel_size, strides=strides,dilation_rate=dilation_rate, padding='same')
        self.bn1 = layers.BatchNormalization()
        self.conv2 = layers.Conv1D(filters, kernel_size, padding='same')
        self.bn2 = layers.BatchNormalization()

        if strides != 1:
            self.residual = layers.Conv1D(filters, 1, strides=strides)
        else:
            self.residual = lambda x: x

    def call(self, inputs, training=False):
        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = tf.nn.relu(x)
        x = self.conv2(x)
        x = self.bn2(x, training=training)

        r = self.residual(inputs)

        x += r
        return tf.nn.relu(x)

In [None]:
class ResNetBlockup(layers.Layer):
    def __init__(self, filters, kernel_size=3, strides=1, dilation_rate=1):
        super(ResNetBlockup, self).__init__()
        self.conv1 = layers.Conv1DTranspose(filters, kernel_size, strides=strides, dilation_rate=dilation_rate, padding='same')
        self.bn1 = layers.BatchNormalization()
        self.conv2 = layers.Conv1DTranspose(filters, kernel_size, padding='same')
        self.bn2 = layers.BatchNormalization()

        if strides != 1:
            self.residual = layers.Conv1DTranspose(filters, 1, strides=strides)
        else:
            self.residual = lambda x: x

    def call(self, inputs, training=False):
        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = tf.nn.relu(x)
        x = self.conv2(x)
        x = self.bn2(x, training=training)

        r = self.residual(inputs)

        x += r
        return tf.nn.relu(x)

In [None]:
def build_discriminator():
    input_series = layers.Input(shape=(None,1))

    x = layers.BatchNormalization()(input_series)

    # Convolutional layers
    x = ResNetBlock(64,4,1,1)(x)
    x = layers.Dropout(0.2)(x)

    x = ResNetBlock(64,4,1,2)(x)


    x = ResNetBlock(64,4,1,4)(x)
    x = layers.Dropout(0.2)(x)

    x = ResNetBlock(64,4,1,8)(x)
    x = layers.Dropout(0.2)(x)

    x = ResNetBlock(64,4,1,12)(x)
    x = layers.Dropout(0.2)(x)

    x = ResNetBlock(64,4,1,24)(x)
    x = layers.Dropout(0.2)(x)



    # Global pooling
    pooled_output = layers.GlobalAveragePooling1D()(x)

    # Dense layer
    dense_output = layers.Dense(64, activation='relu')(pooled_output)

    # Dense layer
    dense_output = layers.Dense(1, activation='linear')(pooled_output)

    model = tf.keras.models.Model(inputs=input_series, outputs=dense_output)
    return model

discriminator = build_discriminator()
discriminator.summary()


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None, 1)]         0         
                                                                 
 batch_normalization (BatchN  (None, None, 1)          4         
 ormalization)                                                   
                                                                 
 res_net_block (ResNetBlock)  (None, None, 64)         17280     
                                                                 
 dropout (Dropout)           (None, None, 64)          0         
                                                                 
 res_net_block_1 (ResNetBloc  (None, None, 64)         33408     
 k)                                                              
                                                                 
 res_net_block_2 (ResNetBloc  (None, None, 64)         33408 

In [None]:
def build_generator():
    input_series = layers.Input(shape=(None,1))

    x = layers.BatchNormalization()(input_series)

    x = ResNetBlock(64,4,strides=1,dilation_rate=1)(x)

    x = ResNetBlock(64,4,strides=1,dilation_rate=2)(x)

    x = ResNetBlock(64,4,strides=1,dilation_rate=4)(x)

    x = ResNetBlock(64,4,strides=1,dilation_rate=6)(x)

    x = ResNetBlock(64,4,strides=1,dilation_rate=12)(x)

    x = ResNetBlock(64,4,strides=1,dilation_rate=24)(x)

    x = layers.Conv1D(1,1)(x)



    model = tf.keras.models.Model(inputs=input_series, outputs=x)
    return model

generator = build_generator()
generator.summary()


Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, None, 1)]         0         
                                                                 
 batch_normalization_13 (Bat  (None, None, 1)          4         
 chNormalization)                                                
                                                                 
 res_net_block_6 (ResNetBloc  (None, None, 64)         17280     
 k)                                                              
                                                                 
 res_net_block_7 (ResNetBloc  (None, None, 64)         33408     
 k)                                                              
                                                                 
 res_net_block_8 (ResNetBloc  (None, None, 64)         33408     
 k)                                                        

In [None]:
# Compile models
generator_optimizer = tf.keras.optimizers.Adam(0.0004)
discriminator_optimizer = tf.keras.optimizers.Adam(0.0004)

#generator_optimizer = tf.keras.optimizers.experimental.SGD(1e-4)
#discriminator_optimizer = tf.keras.optimizers.experimental.SGD(1e-4)

In [None]:
def gradient_penalty(real, fake, discriminator):
    batch_size = real.shape[0]
    epsilon = tf.random.uniform([batch_size, 1, 1], 0.0, 1.0)
    interpolated = epsilon * real + (1 - epsilon) * fake

    with tf.GradientTape() as tape:
        tape.watch(interpolated)
        pred = discriminator(interpolated, training=True)

    gradients = tape.gradient(pred, [interpolated])[0]
    norm = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=[1, 2]))
    gp = tf.reduce_mean((norm - 1.0) ** 2)

    return gp

In [None]:
def discriminator_loss(real_output, fake_output, gradient_penalty):
    return tf.reduce_mean(fake_output) - tf.reduce_mean(real_output) + gradient_penalty

def generator_loss(fake_output):
    return -tf.reduce_mean(fake_output)



In [None]:
def clip_discriminator_weights(discriminator):
    for l in discriminator.layers:
        weights = l.get_weights()
        weights = [tf.clip_by_value(w, -0.01, 0.01) for w in weights]
        l.set_weights(weights)

In [None]:
def print_img(generator_model):
    # Generate and save sample images
    noise = tf.random.normal([10, 100])
    sampled_labels = tf.constant([[i % 10] for i in range(10)], dtype=tf.int32)
    generated_images = generator_model.predict([noise, sampled_labels])
    fig, axs = plt.subplots(1, 10, figsize=(10, 10))
    for i in range(10):
        axs[i].imshow(generated_images[i], cmap="gray")
        axs[i].axis("off")
    plt.show()

In [None]:
noise = tf.random.normal(shape=(1,10000,1))

test = generator.predict(noise)
print(test.shape)

(1, 10000, 1)


In [None]:

#@tf.function
def train_step(target_audios):


  for i in range(2):

      # Get the shape of the target_audios tensor
      shape = tf.shape(target_audios)


      # Generate noise using tf.random.normal()
      noise = tf.random.normal((shape))
      generated_audio = generator(noise, training=True)
      with tf.GradientTape() as disc_tape:

          real_output = discriminator(target_audios, training=True)
          fake_output = discriminator(generated_audio, training=True)
          gp = gradient_penalty(target_audios, generated_audio, discriminator)
          disc_loss = discriminator_loss(real_output, fake_output, gp)

      gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
      discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))
      #clip_discriminator_weights(discriminator)

      if i ==0:
          weights = discriminator.get_weights()


  with tf.GradientTape() as gen_tape:
    noise = tf.random.normal(shape=(target_audios.shape))
    generated_audio = generator(noise, training=True)
    fake_output = discriminator(generated_audio, training=True)
    gen_loss = generator_loss(fake_output)

  gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
  generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))

  discriminator.set_weights(weights)


  tf.print("disc_loss",disc_loss,'gen_loss',gen_loss)


def train(generator, discriminator
          , epochs, batch_size):
    for epoch in range(epochs):
        for batch in range(len(y_train) // batch_size):
            #images = x_train[batch * batch_size: (batch+1) * batch_size]
            target_audios = y_train[batch * batch_size: (batch+1) * batch_size]

            train_step(target_audios)

        if (epoch + 1) % 5 == 0:
            print(f"Epoch {epoch+1}/{epochs}")
            test = generator.predict(noise)
            audio_data = test.ravel()
            # Play the audio within the Jupyter Notebook
            Audio(data=audio_data, rate=sr/reduction)


# Train the GAN
EPOCHS = 2000000
BATCH_SIZE = 10
num_unrolling_steps = 20  # Set the desired number of unrolling steps
train(generator, discriminator, EPOCHS, BATCH_SIZE)

disc_loss 0.484564185 gen_loss -0.471388817
disc_loss 0.148690462 gen_loss -0.726761937
disc_loss -0.0074070096 gen_loss -0.952836692
disc_loss -0.118462086 gen_loss -1.30932975
disc_loss -0.0877890587 gen_loss -1.69683862
disc_loss -0.184310317 gen_loss -1.97225308
disc_loss -0.107052982 gen_loss -2.16077805
disc_loss -0.23757112 gen_loss -2.29412985
disc_loss -0.218940973 gen_loss -2.48343921
disc_loss -0.493789554 gen_loss -2.4641366
disc_loss -0.582155943 gen_loss -2.44511199
disc_loss -0.801891923 gen_loss -2.36041
disc_loss -1.10971749 gen_loss -2.2431705
disc_loss -1.236238 gen_loss -2.38271976
disc_loss -1.39141726 gen_loss -2.299299
disc_loss -1.54254031 gen_loss -2.31800795
disc_loss -1.71450353 gen_loss -2.30476499
disc_loss -1.86034942 gen_loss -2.44656301
disc_loss -2.02116513 gen_loss -2.44130826
disc_loss -2.07382321 gen_loss -2.32121658
disc_loss -2.10767174 gen_loss -2.63949013
disc_loss -2.20857906 gen_loss -2.63222384
disc_loss -2.14267349 gen_loss -2.80276537
disc_l

KeyboardInterrupt: ignored

In [19]:
test = generator.predict(noise)
audio_data = test.ravel()
# Play the audio within the Jupyter Notebook
Audio(data=audio_data, rate=sr/reduction)

