<a href="https://colab.research.google.com/github/bukenalen30/calculator-app/blob/master/Style_Transfer_Music_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

STYLE TRANSFER MUSIC

In [1]:
# Install library yang diperlukan
!pip install librosa numpy matplotlib tensorflow

# Import library
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, Conv2DTranspose, ReLU, BatchNormalization, Add




Mengolah Audio menjadi Mel-Spectrogram

In [2]:
def load_audio(file_path, sr=22050):
    audio, _ = librosa.load(file_path, sr=sr)
    return audio

def audio_to_mel(audio, sr=22050, n_fft=2048, hop_length=512, n_mels=128):
    mel_spectrogram = librosa.feature.melspectrogram(audio, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
    log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
    return log_mel_spectrogram

def mel_to_audio(mel_spectrogram, sr=22050, n_iter=32, hop_length=512):
    mel_spectrogram = librosa.db_to_power(mel_spectrogram)
    return librosa.feature.inverse.mel_to_audio(mel_spectrogram, sr=sr, hop_length=hop_length, n_iter=n_iter)


Menampilkan Mel-Spectogram

In [3]:
def plot_mel_spectrogram(mel_spectrogram, title="Mel-Spectrogram"):
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(mel_spectrogram, sr=22050, hop_length=512, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.tight_layout()
    plt.show()


Arsitektur Generator (U-Net)

In [4]:
def build_generator(input_shape):
    inputs = Input(shape=input_shape)

    # Encoder
    x = Conv2D(64, (4, 4), strides=(2, 2), padding='same')(inputs)
    x = ReLU()(x)
    x = BatchNormalization()(x)

    x = Conv2D(128, (4, 4), strides=(2, 2), padding='same')(x)
    x = ReLU()(x)
    x = BatchNormalization()(x)

    # Decoder
    x = Conv2DTranspose(64, (4, 4), strides=(2, 2), padding='same')(x)
    x = ReLU()(x)
    x = BatchNormalization()(x)

    x = Conv2DTranspose(1, (4, 4), strides=(2, 2), padding='same')(x)

    return Model(inputs, x)


Arsitektur Discriminator (CNN)

In [5]:
def build_discriminator(input_shape):
    inputs = Input(shape=input_shape)

    x = Conv2D(64, (4, 4), strides=(2, 2), padding='same')(inputs)
    x = ReLU()(x)

    x = Conv2D(128, (4, 4), strides=(2, 2), padding='same')(x)
    x = ReLU()(x)

    x = Conv2D(1, (4, 4), strides=(1, 1), padding='same')(x)

    return Model(inputs, x)


Adversarial Loss dan Cycle Consistency Loss

In [6]:
def adversarial_loss(y_true, y_pred):
    return tf.keras.losses.BinaryCrossentropy()(y_true, y_pred)

def cycle_consistency_loss(real, reconstructed):
    return tf.reduce_mean(tf.abs(real - reconstructed))


Pelatihan Model

In [11]:
# Install library yang diperlukan
!pip install librosa numpy matplotlib tensorflow pandas #Added pandas

# Import library
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, Conv2DTranspose, ReLU, BatchNormalization, Add
import pandas as pd #Added import for pandas




In [15]:
# Hyperparameter
input_shape = (128, 128, 1)
batch_size = 16
epochs = 10

# Model
generator = build_generator(input_shape)
discriminator = build_discriminator(input_shape)

# Optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5)

# Training loop
for epoch in range(epochs):
    # Load the dataset using pandas before the loop
    dataset = pd.read_csv('dataset.csv')
    for batch in range(0, len(dataset), batch_size):
        # Load data batch
        real_audio_df = dataset[batch : batch + batch_size]

        # Replace 'audio_column_name' with the actual name of your column
        #  containing the audio file paths
        # Example: If your audio column is named 'audio_path', use:
        real_audio_list = real_audio_df['audio_path'].tolist() # Replace 'audio_path' with your column name

        # Preprocess each audio file to Mel-spectrogram
        real_audio = []
        for audio_file in real_audio_list:
            audio = load_audio(audio_file)
            mel_spec = audio_to_mel(audio)
            # Resize mel_spec to (128, 128) if necessary
            mel_spec = tf.image.resize(mel_spec[:, :, np.newaxis], (128, 128))
            real_audio.append(mel_spec)

        real_audio = np.array(real_audio)  # Convert to NumPy array

        # Generate fake audio
        fake_audio = generator(real_audio)

        # Train discriminator
        real_labels = tf.ones((batch_size, 1))
        fake_labels = tf.zeros((batch_size, 1))
        with tf.GradientTape() as tape:
            real_loss = adversarial_loss(real_labels, discriminator(real_audio))
            fake_loss = adversarial_loss(fake_labels, discriminator(fake_audio))
            d_loss = (real_loss + fake_loss) / 2
        grads = tape.gradient(d_loss, discriminator.trainable_weights)
        optimizer.apply_gradients(zip(grads, discriminator.trainable_weights))

        # Train generator
        with tf.GradientTape() as tape:
            g_loss = adversarial_loss(real_labels, discriminator(fake_audio)) + \
                     cycle_consistency_loss(real_audio, generator(fake_audio))
        grads = tape.gradient(g_loss, generator.trainable_weights)
        optimizer.apply_gradients(zip(grads, generator.trainable_weights))

    print(f"Epoch {epoch+1}/{epochs}, D Loss: {d_loss.numpy()}, G Loss: {g_loss.numpy()}")

KeyError: 'audio_path'

Evaluasi dan Rekonstruksi Audio

In [16]:
# Evaluasi hasil pada sampel
sample_audio = test_data[0:1]
sample_mel = audio_to_mel(sample_audio)
fake_mel = generator.predict(sample_mel)

# Rekonstruksi audio
reconstructed_audio = mel_to_audio(fake_mel)

# Simpan hasil
librosa.output.write_wav('output_audio.wav', reconstructed_audio, sr=22050)


NameError: name 'test_data' is not defined