In [57]:
import librosa
import numpy as np
import pandas as pd
import tensorflow as tf
import os

def process_batch(audio, sr, n_mfcc, output_dir, batch_index):
    """
    Process a batch of audio, compute MFCCs, and save to disk.

    :param audio: The audio array.
    :param sr: Sample rate of the audio.
    :param n_mfcc: Number of MFCCs to compute.
    :param output_dir: Directory to save the output files.
    :param batch_index: The index of the current batch.
    """
    # Compute the MFCCs for the batch
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)

    # Convert MFCCs to DataFrame
    mfcc_df = pd.DataFrame(mfccs.T)

    # Save to disk
    output_file = os.path.join(output_dir, f'mfcc_batch_{batch_index}.csv')
    mfcc_df.to_csv(output_file, index=False)
    print(f"Saved batch {batch_index} to {output_file}")

def batch_process_audio_file(filename, sample_rate=48000, n_mfcc=13, batch_duration=10, output_dir='mfcc_batches'):
    """
    Process an audio file in batches and save MFCCs to disk.

    :param filename: Path to the audio file.
    :param sample_rate: Sample rate for processing the audio.
    :param n_mfcc: Number of MFCCs to compute.
    :param batch_duration: Duration of each batch in seconds.
    :param output_dir: Directory to save the output files.
    """
    # Ensure the output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Get the total duration of the audio file
    total_duration = librosa.get_duration(filename=filename)

    # Calculate the number of batches
    num_batches = int(np.ceil(total_duration / batch_duration))

    # Process each batch
    for i in range(num_batches):
        start_time = i * batch_duration
        end_time = min((i + 1) * batch_duration, total_duration)
        audio, sr = librosa.load(filename, sr=sample_rate, offset=start_time, duration=end_time - start_time)
        process_batch(audio, sr, n_mfcc, output_dir, i)

# Usage Example
audio_file = "./trainingdata.wav"
batch_process_audio_file(audio_file)


	This alias will be removed in version 1.0.
  total_duration = librosa.get_duration(filename=filename)


Saved batch 0 to mfcc_batches/mfcc_batch_0.csv
Saved batch 1 to mfcc_batches/mfcc_batch_1.csv
Saved batch 2 to mfcc_batches/mfcc_batch_2.csv
Saved batch 3 to mfcc_batches/mfcc_batch_3.csv
Saved batch 4 to mfcc_batches/mfcc_batch_4.csv
Saved batch 5 to mfcc_batches/mfcc_batch_5.csv
Saved batch 6 to mfcc_batches/mfcc_batch_6.csv
Saved batch 7 to mfcc_batches/mfcc_batch_7.csv
Saved batch 8 to mfcc_batches/mfcc_batch_8.csv
Saved batch 9 to mfcc_batches/mfcc_batch_9.csv
Saved batch 10 to mfcc_batches/mfcc_batch_10.csv
Saved batch 11 to mfcc_batches/mfcc_batch_11.csv
Saved batch 12 to mfcc_batches/mfcc_batch_12.csv
Saved batch 13 to mfcc_batches/mfcc_batch_13.csv
Saved batch 14 to mfcc_batches/mfcc_batch_14.csv
Saved batch 15 to mfcc_batches/mfcc_batch_15.csv
Saved batch 16 to mfcc_batches/mfcc_batch_16.csv
Saved batch 17 to mfcc_batches/mfcc_batch_17.csv
Saved batch 18 to mfcc_batches/mfcc_batch_18.csv
Saved batch 19 to mfcc_batches/mfcc_batch_19.csv
Saved batch 20 to mfcc_batches/mfcc_batc

In [58]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

def create_model(input_shape, output_timesteps, output_features):
    model = Sequential()
    model.add(LSTM(128, return_sequences=True, input_shape=input_shape))
    model.add(LSTM(128, return_sequences=True))  # Make sure LSTM returns a sequence
    model.add(Dense(output_features))  # Output layer for each timestep
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

input_shape = (None, 12)  # Assuming each input sequence has 13 MFCC features
output_timesteps = 10  # Number of timesteps to predict
output_features = 12  # 13 MFCC coefficients per timestep

model = create_model(input_shape, output_timesteps, output_features)
model.summary()


Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_11 (LSTM)              (None, None, 128)         72192     
                                                                 
 lstm_12 (LSTM)              (None, None, 128)         131584    
                                                                 
 dense_5 (Dense)             (None, None, 12)          1548      
                                                                 
Total params: 205324 (802.05 KB)
Trainable params: 205324 (802.05 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [59]:
def load_batch(batch_index, data_dir='./mfcc_batches', input_cols=13):
    """
    Load a batch of data where the target is the next MFCC in the sequence.
    """
    file_path = os.path.join(data_dir, f'mfcc_batch_{batch_index}.csv')
    df = pd.read_csv(file_path)

    # Assuming the last MFCC of each row is the target
    X = df.iloc[:, :-1].to_numpy()  # All columns except the last one
    Y = df.iloc[:, -1].to_numpy()   # Only the last column

    return X, Y

# # Example usage
# batch_index = 0
# X_batch, Y_batch = load_batch(batch_index)


In [60]:
import numpy as np
from tensorflow.keras.optimizers import Adam

# Assuming the function `load_batch(batch_index)` is defined to load each data batch
# For simplicity, this example assumes that each batch is already in the correct shape

print(X_batch.shape)
print(Y_batch.shape)

num_batches = 1001
batch_size = 32  # You can adjust this
learning_rate = 0.001
epochs = 10  # Number of epochs to train

# Initialize the optimizer
optimizer = Adam(learning_rate=learning_rate)

# Compile the model
model.compile(optimizer=optimizer, loss='mean_squared_error')

for epoch in range(epochs):
    print(f"Epoch {epoch+1}/{epochs}")
    for batch_index in range(num_batches):
        # Load the current batch
        X_batch, Y_batch = load_batch(batch_index)

        # Reshape X_batch to add the time step dimension
        X_batch = X_batch.reshape((X_batch.shape[0], 1, X_batch.shape[1]))

        # Perform training step
        loss = model.train_on_batch(X_batch, Y_batch)

        # Optionally, print the batch loss
        print(f"Batch {batch_index+1}/{num_batches} - Loss: {loss}")



(938, 1, 12)
(938,)
Epoch 1/10


ValueError: in user code:

    File "/Users/davidzagardo/anaconda3/envs/LSTM_music/lib/python3.8/site-packages/keras/src/engine/training.py", line 1338, in train_function  *
        return step_function(self, iterator)
    File "/Users/davidzagardo/anaconda3/envs/LSTM_music/lib/python3.8/site-packages/keras/src/engine/training.py", line 1322, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/davidzagardo/anaconda3/envs/LSTM_music/lib/python3.8/site-packages/keras/src/engine/training.py", line 1303, in run_step  **
        outputs = model.train_step(data)
    File "/Users/davidzagardo/anaconda3/envs/LSTM_music/lib/python3.8/site-packages/keras/src/engine/training.py", line 1081, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "/Users/davidzagardo/anaconda3/envs/LSTM_music/lib/python3.8/site-packages/keras/src/engine/training.py", line 1139, in compute_loss
        return self.compiled_loss(
    File "/Users/davidzagardo/anaconda3/envs/LSTM_music/lib/python3.8/site-packages/keras/src/engine/compile_utils.py", line 265, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "/Users/davidzagardo/anaconda3/envs/LSTM_music/lib/python3.8/site-packages/keras/src/losses.py", line 142, in __call__
        losses = call_fn(y_true, y_pred)
    File "/Users/davidzagardo/anaconda3/envs/LSTM_music/lib/python3.8/site-packages/keras/src/losses.py", line 268, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/Users/davidzagardo/anaconda3/envs/LSTM_music/lib/python3.8/site-packages/keras/src/losses.py", line 1608, in mean_squared_error
        return backend.mean(tf.math.squared_difference(y_pred, y_true), axis=-1)

    ValueError: Dimensions must be equal, but are 12 and 938 for '{{node mean_squared_error/SquaredDifference}} = SquaredDifference[T=DT_FLOAT](sequential_6/dense_5/BiasAdd, IteratorGetNext:1)' with input shapes: [938,1,12], [938].


In [48]:
import random

# Choose a random batch index
random_batch_index = random.randint(0, num_batches - 1)

# Load the batch
X_batch, _ = load_batch(random_batch_index)
X_batch = X_batch.reshape((X_batch.shape[0], 1, X_batch.shape[1]))

new_steps = 300
predicted_sequence = []

current_step = X_batch

for _ in range(new_steps):
    # Predict the next step
    next_step = model.predict(current_step)

    # Remove the unnecessary dimensions from next_step
    next_step_squeezed = next_step.squeeze()

    # Append the prediction to the sequence
    predicted_sequence.append(next_step_squeezed)

    # Update the current step: Remove the oldest timestep and add the new prediction
    current_step = np.roll(current_step, -1, axis=1)
    # Only update the last feature for each sample
    current_step[:, -1, -1] = next_step_squeezed  # Assuming the prediction is for the last feature


# Convert predicted_sequence to an array
predicted_sequence = np.array(predicted_sequence)




In [49]:
import librosa
import soundfile as sf

# Assuming predicted_sequence is a numpy array of shape (300, n_mfcc)
# where n_mfcc is the number of MFCC features per timestep

# Invert the MFCC to audio (you might need to adjust parameters based on your MFCC configuration)
predicted_audio = librosa.feature.inverse.mfcc_to_audio(predicted_sequence.T)

# Write the audio data to a WAV file
output_path = 'predicted_audio.wav'
sf.write(output_path, predicted_audio, samplerate=22050)  # Replace 22050 with your actual sample rate
