<a href="https://colab.research.google.com/github/aetev/Learning-stuff-/blob/main/agcnn%202.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install tensorflow_addons
!pip install pydub



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import tensorflow as tf
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import numpy as np
import tensorflow_addons as tfa
from pydub import AudioSegment
from pydub.utils import make_chunks
import os
from IPython.display import Audio


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [4]:
import librosa
import numpy as np

def reduce_sample_rate(wav_file,reduction):
    # Load the audio file
    audio, sr = librosa.load(wav_file, sr=None)

    # Reduce the sample rate by half
    reduced_audio = librosa.resample(audio, orig_sr=sr, target_sr=sr // reduction)

    # Convert the audio to a NumPy array
    audio_array = np.array(reduced_audio)

    return audio_array

def create_sliding_window(array, window_size, stride):
    num_windows = (len(array) - window_size) // stride + 1
    sliding_windows = np.lib.stride_tricks.sliding_window_view(array, (window_size,))

    return sliding_windows[::stride]

In [5]:
reduction = 4
wav_file = '/content/drive/MyDrive/bass samples/NBKoanbandstuff.wav'
audio, sr = librosa.load(wav_file, sr=None)
result_array = reduce_sample_rate(wav_file,reduction)
result_array = create_sliding_window(result_array,2500,100)
result_array = np.expand_dims(result_array, axis=2)
print(result_array.shape)

(449, 2500, 1)


In [6]:
audio_data = result_array[100].ravel()
# Play the audio within the Jupyter Notebook
Audio(data=audio_data, rate=sr/reduction)

In [7]:
#x_train = noise
y_train = result_array
print(y_train[0].shape)

(2500, 1)


In [8]:
class ResNetBlock(layers.Layer):
    def __init__(self, filters,kernel_size=3, strides=1,dilation_rate=1):
        super(ResNetBlock, self).__init__()
        self.conv1 = layers.Conv1D(filters, kernel_size, strides=strides,dilation_rate=dilation_rate, padding='same')
        self.bn1 = layers.BatchNormalization()
        self.conv2 = layers.Conv1D(filters, kernel_size, padding='same')
        self.bn2 = layers.BatchNormalization()

        if strides != 1:
            self.residual = layers.Conv1D(filters, 1, strides=strides)
        else:
            self.residual = lambda x: x

    def call(self, inputs, training=False):
        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = tf.nn.relu(x)
        x = self.conv2(x)
        x = self.bn2(x, training=training)

        r = self.residual(inputs)

        x += r
        return tf.nn.relu(x)

In [9]:
def build_discriminator():
    input_series = layers.Input(shape=(None,1))

    x = layers.BatchNormalization()(input_series)

    # Convolutional layers
    x = ResNetBlock(64,3,1,1)(x)
    x = layers.Dropout(0.2)(x)

    x = ResNetBlock(64,3,1,2)(x)


    x = ResNetBlock(64,3,1,4)(x)
    x = layers.Dropout(0.2)(x)

    x = ResNetBlock(64,3,1,8)(x)
    x = layers.Dropout(0.2)(x)

    x = ResNetBlock(64,3,1,12)(x)
    x = layers.Dropout(0.2)(x)

    x = ResNetBlock(64,3,1,24)(x)
    x = layers.Dropout(0.2)(x)



    # Global pooling
    pooled_output = layers.GlobalAveragePooling1D()(x)

    # Dense layer
    dense_output = layers.Dense(64, activation='relu')(pooled_output)

    # Dense layer
    dense_output = layers.Dense(1, activation='sigmoid')(pooled_output)

    model = tf.keras.models.Model(inputs=input_series, outputs=dense_output)
    return model

discriminator = build_discriminator()
discriminator.summary()


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None, 1)]         0         
                                                                 
 batch_normalization (BatchN  (None, None, 1)          4         
 ormalization)                                                   
                                                                 
 res_net_block (ResNetBlock)  (None, None, 64)         13120     
                                                                 
 dropout (Dropout)           (None, None, 64)          0         
                                                                 
 res_net_block_1 (ResNetBloc  (None, None, 64)         25216     
 k)                                                              
                                                                 
 res_net_block_2 (ResNetBloc  (None, None, 64)         25216 

In [10]:
def build_generator():
    input_series = layers.Input(shape=(None,1))

    x = layers.BatchNormalization()(input_series)

    x = ResNetBlock(64,dilation_rate=12)(x)


    x = ResNetBlock(64,dilation_rate=8)(x)


    x = ResNetBlock(64,dilation_rate=4)(x)


    x = ResNetBlock(64,dilation_rate=2)(x)


    x = ResNetBlock(64,dilation_rate=1)(x)

    x = layers.Conv1D(1,1)(x)



    model = tf.keras.models.Model(inputs=input_series, outputs=x)
    return model

generator = build_generator()
generator.summary()


Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, None, 1)]         0         
                                                                 
 batch_normalization_13 (Bat  (None, None, 1)          4         
 chNormalization)                                                
                                                                 
 res_net_block_6 (ResNetBloc  (None, None, 64)         13120     
 k)                                                              
                                                                 
 res_net_block_7 (ResNetBloc  (None, None, 64)         25216     
 k)                                                              
                                                                 
 res_net_block_8 (ResNetBloc  (None, None, 64)         25216     
 k)                                                        

In [11]:
# Compile models
generator_optimizer = tf.keras.optimizers.Adam(0.0004)
discriminator_optimizer = tf.keras.optimizers.Adam(0.0004)

#generator_optimizer = tf.keras.optimizers.experimental.SGD(1e-4)
#discriminator_optimizer = tf.keras.optimizers.experimental.SGD(1e-4)

In [12]:
def discriminator_loss(real_output, fake_output):
    real_loss = tf.keras.losses.BinaryCrossentropy()(tf.ones_like(real_output), real_output)
    fake_loss = tf.keras.losses.BinaryCrossentropy()(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

def generator_loss(fake_output):
    return tf.keras.losses.BinaryCrossentropy()(tf.ones_like(fake_output), fake_output)



In [13]:
def print_img(generator_model):
    # Generate and save sample images
    noise = tf.random.normal([10, 100])
    sampled_labels = tf.constant([[i % 10] for i in range(10)], dtype=tf.int32)
    generated_images = generator_model.predict([noise, sampled_labels])
    fig, axs = plt.subplots(1, 10, figsize=(10, 10))
    for i in range(10):
        axs[i].imshow(generated_images[i], cmap="gray")
        axs[i].axis("off")
    plt.show()

In [17]:
noise = tf.random.normal(shape=(1,10000,1))

test = generator.predict(noise)
print(test.shape)

(1, 10000, 1)


In [15]:

#@tf.function
def train_step(target_audios):


  for i in range(5):
      noise = tf.random.normal(shape=(target_audios.shape))
      generated_audio = generator(noise, training=True)
      with tf.GradientTape() as disc_tape:

          real_output = discriminator(target_audios, training=True)
          fake_output = discriminator(generated_audio, training=True)

          disc_loss = discriminator_loss(real_output, fake_output)

      gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
      discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))

      if i ==0:
          weights = discriminator.get_weights()


  with tf.GradientTape() as gen_tape:
    noise = tf.random.normal(shape=(target_audios.shape))
    generated_audio = generator(noise, training=True)
    fake_output = discriminator(generated_audio, training=True)
    gen_loss = generator_loss(fake_output)

  gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
  generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))

  discriminator.set_weights(weights)


  tf.print("disc_loss",disc_loss,'gen_loss',gen_loss)


def train(generator, discriminator
          , epochs, batch_size):
    for epoch in range(epochs):
        for batch in range(len(y_train) // batch_size):
            #images = x_train[batch * batch_size: (batch+1) * batch_size]
            target_audios = y_train[batch * batch_size: (batch+1) * batch_size]

            train_step(target_audios)

        if (epoch + 1) % 5 == 0:
            print(f"Epoch {epoch+1}/{epochs}")
            test = generator.predict(noise)
            audio_data = test.ravel()
            # Play the audio within the Jupyter Notebook
            Audio(data=audio_data, rate=sr/reduction)


# Train the GAN
EPOCHS = 2000000
BATCH_SIZE = 64
num_unrolling_steps = 20  # Set the desired number of unrolling steps
train(generator, discriminator, EPOCHS, BATCH_SIZE)

disc_loss 1.18114436 gen_loss 1.86107922
disc_loss 0.92131263 gen_loss 1.70045447
disc_loss 0.775848806 gen_loss 1.47319674
disc_loss 0.714042783 gen_loss 1.27256262
disc_loss 0.833691657 gen_loss 1.06044006
disc_loss 0.930337071 gen_loss 0.94697237
disc_loss 1.07309973 gen_loss 0.85146153
disc_loss 1.00525475 gen_loss 0.866589427
disc_loss 0.829957962 gen_loss 1.00494111


KeyboardInterrupt: ignored

In [18]:
            test = generator.predict(noise)
            audio_data = test.ravel()
            # Play the audio within the Jupyter Notebook
            Audio(data=audio_data, rate=sr/reduction)

