<a href="https://colab.research.google.com/github/aetev/Learning-stuff-/blob/main/agcnn%202.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install tensorflow_addons
!pip install pydub



In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import tensorflow as tf
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import numpy as np
import tensorflow_addons as tfa
from pydub import AudioSegment
from pydub.utils import make_chunks
import os
from IPython.display import Audio


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [5]:
import os
import librosa
import numpy as np

# Specify the folder path
folder_path = '/content/drive/MyDrive/bass samples'

# Initialize an empty list to store the audio chunks
audio_chunks = []

# Iterate through all the files in the folder
for file_name in os.listdir(folder_path):
    # Check if the file is a WAV file
    if file_name.endswith('.wav'):
        # Construct the file path
        file_path = os.path.join(folder_path, file_name)

        # Load the audio file using librosa
        audio, sr = librosa.load(file_path, sr=None)

        # Step 3: Cut the audio files into 1-second chunks with 50% overlap

        # Specify the desired length of the audio chunks (in seconds)
        chunk_length_seconds = .5

        # Convert the chunk length from seconds to samples
        chunk_length = int(chunk_length_seconds * sr)

        # Calculate the overlap length (50% of chunk_length)
        overlap_length = int(chunk_length / 2)

        # Calculate the total number of chunks
        num_chunks = int((len(audio) - chunk_length) / overlap_length) + 1

        # Iterate through the audio and extract the chunks
        for i in range(num_chunks):
            # Calculate the starting and ending indices of the chunk
            start_idx = i * overlap_length
            end_idx = start_idx + chunk_length

            # Extract the chunk
            chunk = audio[start_idx:end_idx]

            # Step 4: Discard any chunks that are not the desired length

            if len(chunk) == chunk_length:
                # Append the chunk to the list
                audio_chunks.append(chunk)

# Step 5: Store the chunks in a numpy array with the desired shape

# Convert the list of chunks to a numpy array
audio_array = np.array(audio_chunks)

# Reshape the array to have the shape (num of chunks, length of audio, 1)
audio_array = audio_array.reshape(len(audio_array), len(audio_array[0]), 1)

noise = np.random.uniform(low=-1.0, high=1.0, size=audio_array.shape)




In [33]:
x_train = noise
y_train = audio_array
print(y_train[0].shape)

(22050, 1)


In [34]:


audio_data = y_train[5].ravel()
# Play the audio within the Jupyter Notebook
Audio(data=audio_data, rate=sr)

In [8]:
class ResNetBlock(layers.Layer):
    def __init__(self, filters,kernel_size=3, strides=1,dilation_rate=1):
        super(ResNetBlock, self).__init__()
        self.conv1 = layers.Conv1D(filters, kernel_size, strides=strides,dilation_rate=dilation_rate, padding='same')
        self.bn1 = layers.BatchNormalization()
        self.conv2 = layers.Conv1D(filters, kernel_size, padding='same')
        self.bn2 = layers.BatchNormalization()

        if strides != 1:
            self.residual = layers.Conv1D(filters, 1, strides=strides)
        else:
            self.residual = lambda x: x

    def call(self, inputs, training=False):
        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = tf.nn.relu(x)
        x = self.conv2(x)
        x = self.bn2(x, training=training)

        r = self.residual(inputs)

        x += r
        return tf.nn.relu(x)

In [9]:
def build_discriminator():
    input_series = layers.Input(shape=(None,1))


    # Convolutional layers
    x = ResNetBlock(64,3,1,1)(input_series)
    x = layers.Dropout(0.2)(x)

    x = ResNetBlock(64,3,1,2)(x)


    x = ResNetBlock(64,3,1,4)(x)
    x = layers.Dropout(0.2)(x)

    x = ResNetBlock(64,3,1,8)(x)
    x = layers.Dropout(0.2)(x)

    x = ResNetBlock(64,3,1,12)(x)
    x = layers.Dropout(0.2)(x)

    x = ResNetBlock(64,3,1,24)(x)
    x = layers.Dropout(0.2)(x)



    # Global pooling
    pooled_output = layers.GlobalAveragePooling1D()(x)

    # Dense layer
    dense_output = layers.Dense(64, activation='relu')(pooled_output)

    # Dense layer
    dense_output = layers.Dense(1, activation='sigmoid')(pooled_output)

    model = tf.keras.models.Model(inputs=input_series, outputs=dense_output)
    return model

discriminator = build_discriminator()
discriminator.summary()


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None, 1)]         0         
                                                                 
 res_net_block (ResNetBlock)  (None, None, 64)         13120     
                                                                 
 dropout (Dropout)           (None, None, 64)          0         
                                                                 
 res_net_block_1 (ResNetBloc  (None, None, 64)         25216     
 k)                                                              
                                                                 
 res_net_block_2 (ResNetBloc  (None, None, 64)         25216     
 k)                                                              
                                                                 
 dropout_1 (Dropout)         (None, None, 64)          0     

In [10]:
def build_generator():
    input_series = layers.Input(shape=(None,1))


    x = ResNetBlock(64,dilation_rate=1)(input_series)


    x = ResNetBlock(64,dilation_rate=2)(x)


    x = ResNetBlock(64,dilation_rate=4)(x)


    x = ResNetBlock(64,dilation_rate=8)(x)


    x = ResNetBlock(1,dilation_rate=12)(x)

    x = layers.Conv1D(1,1)(x)



    model = tf.keras.models.Model(inputs=input_series, outputs=x)
    return model

generator = build_generator()
generator.summary()


Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, None, 1)]         0         
                                                                 
 res_net_block_6 (ResNetBloc  (None, None, 64)         13120     
 k)                                                              
                                                                 
 res_net_block_7 (ResNetBloc  (None, None, 64)         25216     
 k)                                                              
                                                                 
 res_net_block_8 (ResNetBloc  (None, None, 64)         25216     
 k)                                                              
                                                                 
 res_net_block_9 (ResNetBloc  (None, None, 64)         25216     
 k)                                                        

In [11]:
# Compile models
generator_optimizer = tf.keras.optimizers.Adam(0.0004)
discriminator_optimizer = tf.keras.optimizers.Adam(0.0001)

#generator_optimizer = tf.keras.optimizers.experimental.SGD(1e-4)
#discriminator_optimizer = tf.keras.optimizers.experimental.SGD(1e-4)

In [12]:
def discriminator_loss(real_output, fake_output):
    real_loss = tf.keras.losses.BinaryCrossentropy()(tf.ones_like(real_output), real_output)
    fake_loss = tf.keras.losses.BinaryCrossentropy()(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

def generator_loss(fake_output):
    return tf.keras.losses.BinaryCrossentropy()(tf.ones_like(fake_output), fake_output)



In [13]:
def print_img(generator_model):
    # Generate and save sample images
    noise = tf.random.normal([10, 100])
    sampled_labels = tf.constant([[i % 10] for i in range(10)], dtype=tf.int32)
    generated_images = generator_model.predict([noise, sampled_labels])
    fig, axs = plt.subplots(1, 10, figsize=(10, 10))
    for i in range(10):
        axs[i].imshow(generated_images[i], cmap="gray")
        axs[i].axis("off")
    plt.show()

In [14]:
#tf.keras.backend.clear_session()

In [15]:

#@tf.function
def train_step(target_audios):


  for i in range(5):
      noise = tf.random.normal(shape=(target_audios.shape))
      generated_audio = generator(noise, training=True)
      with tf.GradientTape() as disc_tape:

          real_output = discriminator(target_audios, training=True)
          fake_output = discriminator(generated_audio, training=True)

          disc_loss = discriminator_loss(real_output, fake_output)

      gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
      discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))

      if i ==0:
          weights = discriminator.get_weights()


  with tf.GradientTape() as gen_tape:
    noise = tf.random.normal(shape=(target_audios.shape))
    generated_audio = generator(noise, training=True)
    fake_output = discriminator(generated_audio, training=True)
    gen_loss = generator_loss(fake_output)

  gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
  generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))

  discriminator.set_weights(weights)


  tf.print("disc_loss",disc_loss,'gen_loss',gen_loss)


def train(generator, discriminator
          , epochs, batch_size):
    for epoch in range(epochs):
        for batch in range(len(x_train) // batch_size):
            #images = x_train[batch * batch_size: (batch+1) * batch_size]
            target_audios = y_train[batch * batch_size: (batch+1) * batch_size]

            train_step(target_audios)
        '''
        # Output training progress
        if (epoch + 1) % 5 == 0:
            print(f"Epoch {epoch+1}/{epochs}")
            print_img(generator)
        '''

# Train the GAN
EPOCHS = 2000000
BATCH_SIZE = 10
num_unrolling_steps = 5  # Set the desired number of unrolling steps
train(generator, discriminator, EPOCHS, BATCH_SIZE)

disc_loss 1.89095926 gen_loss 1.97783279
disc_loss 1.79918551 gen_loss 1.9129858
disc_loss 1.73747277 gen_loss 1.86220264
disc_loss 1.67793012 gen_loss 1.81083655
disc_loss 1.63129294 gen_loss 1.7642746
disc_loss 1.56295669 gen_loss 1.70907307
disc_loss 1.47635949 gen_loss 1.63406312
disc_loss 1.41999662 gen_loss 1.57596993
disc_loss 1.37719023 gen_loss 1.52103293
disc_loss 1.38012791 gen_loss 1.50364816
disc_loss 1.32490647 gen_loss 1.46194959
disc_loss 1.27559412 gen_loss 1.44660842


KeyboardInterrupt: ignored

In [36]:
noise = tf.random.normal(shape=(1,100000,1))
print(noise.shape)
test = generator.predict(noise)
print(test.shape)

(1, 100000, 1)
(1, 100000, 1)


In [37]:
audio_data = test[0].ravel()
# Play the audio within the Jupyter Notebook
Audio(data=audio_data, rate=sr)