Using input output pair to train RNN Model

In [1]:
import numpy as np

# Load the input-output pairs from the npy files
input_pairs_all = np.load('input_pairs.npy')

In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [3]:
num_time_steps = input_pairs_all.shape[1] // 2
num_freq_bins = input_pairs_all.shape[2]
num_channels = input_pairs_all.shape[3]

# Ensure the input_pairs_all has an even number of time steps
if input_pairs_all.shape[1] % 2 != 0:
    input_pairs_all = input_pairs_all[:, :-1, :, :]

# Define your RNN-based generative model
model = Sequential()
model.add(LSTM(256, input_shape=(num_time_steps, num_freq_bins * num_channels), return_sequences=True))
model.add(Dense(num_freq_bins * num_channels, activation='sigmoid'))
model.compile(loss='mse', optimizer='adam')

# Define batch size and number of epochs
batch_size = 32
num_epochs = 50

# Create a generator for input-output pairs
def batch_generator(input_pairs, batch_size):
    num_samples = len(input_pairs)
    indices = np.arange(num_samples)
    np.random.shuffle(indices)

    for start_idx in range(0, num_samples, batch_size):
        excerpt = indices[start_idx:start_idx + batch_size]
        batch = input_pairs[excerpt]

        # Split the batch into input and output parts
        input_batch = batch[:, :num_time_steps, :].reshape(-1, num_time_steps, num_freq_bins * num_channels)
        output_batch = batch[:, num_time_steps:, :].reshape(-1, num_time_steps, num_freq_bins * num_channels)

        yield input_batch, output_batch

input_pairs_generator = batch_generator(input_pairs_all, batch_size)

# Train the model using the generator
for epoch in range(num_epochs):
    for input_batch, output_batch in input_pairs_generator:
        model.train_on_batch(input_batch, output_batch)

In [6]:
seed_sequence = input_pairs_all[0:1, :num_time_steps, :, :]
seed_sequence_reshaped = seed_sequence.reshape(-1, num_time_steps, num_freq_bins * num_channels)

# Generate new music using the trained model
generated_music2 = []
for _ in range(num_time_steps):  # Replace num_time_steps with the desired length of the generated music
    predictions = model.predict(seed_sequence_reshaped)
    generated_music2.append(predictions[:, -1:, :])
    seed_sequence_reshaped = np.concatenate([seed_sequence_reshaped[:, 1:, :], predictions[:, -1:, :]], axis=1)

# Convert the generated_music list to a numpy array
generated_music2 = np.concatenate(generated_music2, axis=1)



Using model to create new music

In [8]:
min_value = 0.0
max_value = 1.0

# Denormalization - Apply the formula element-wise to the entire array
denormalized_music2 = (generated_music2 * (max_value - min_value)) + min_value

In [10]:
from scipy.io import wavfile

# Set the sampling rate for the generated music (replace 44100 with your desired sampling rate)
sampling_rate = 44100

# If 'denormalized_music' contains stereo audio (2 channels), use this:
denormalized_music2 = np.int16(denormalized_music2 * 32767)  # Scale the denormalized values to 16-bit integers
wavfile.write('generated_music2.wav', sampling_rate, denormalized_music2)

# If 'denormalized_music' contains mono audio (1 channel), use this:
denormalized_music_mono2 = denormalized_music2[:, :, 0]  # Extract the first channel
denormalized_music_mono2 = np.int16(denormalized_music_mono2 * 32767)  # Scale the denormalized values to 16-bit integers
wavfile.write('generated_music_mono2.wav', sampling_rate, denormalized_music_mono2)

In [11]:
import pyaudio

# Set the sampling rate for the generated music (replace 44100 with your desired sampling rate)
sampling_rate = 44100

# If 'denormalized_music' contains stereo audio (2 channels), use this:
denormalized_music2 = np.int16(denormalized_music2 * 32767)  # Scale the denormalized values to 16-bit integers

# If 'denormalized_music' contains mono audio (1 channel), use this:
# denormalized_music_mono = denormalized_music[:, :, 0]  # Extract the first channel
# denormalized_music_mono = np.int16(denormalized_music_mono * 32767)  # Scale the denormalized values to 16-bit integers

# Initialize PyAudio
p = pyaudio.PyAudio()

# Open a streaming stream
stream = p.open(format=pyaudio.paInt16,
                channels=2,  # Use 1 for mono or 2 for stereo
                rate=sampling_rate,
                output=True)

# Play the audio
stream.write(denormalized_music2.tobytes())

# Stop the stream and close the PyAudio object
stream.stop_stream()
stream.close()
p.terminate()