In [42]:
import numpy as np
import pandas as pd
from scipy.io import wavfile
import librosa
import os

def wav_to_csv(wav_filename, csv_filename):
    # Read the stereo WAV file
    sample_rate, data = wavfile.read(wav_filename)
    
    # Check if the audio is stereo
    if len(data.shape) != 2 or data.shape[1] != 2:
        raise ValueError("Input file is not a stereo WAV file.")

    # Convert to DataFrame and save to CSV
    df = pd.DataFrame(data, columns=['Left_Channel', 'Right_Channel'])
    df.to_csv(csv_filename, index=False)

def csv_to_wav(csv_filename, wav_filename):
    # Read the CSV file
    df = pd.read_csv(csv_filename)

    # # Debug: Print max and min values to check the range
    # print(f"Max value in DataFrame: {df.max().max()}")
    # print(f"Min value in DataFrame: {df.min().min()}")

    # Check if the CSV file has the correct columns
    if 'Left_Channel' not in df.columns or 'Right_Channel' not in df.columns:
        raise ValueError("CSV file does not contain the correct columns.")

    # Convert DataFrame back to numpy array
    data = df.to_numpy()

    # Debug: Check the data type
    print(f"Data type before casting: {data.dtype}")

    # Write the stereo data back to a WAV file
    wavfile.write(wav_filename, 48000, data.astype(np.int32))

def extract_and_save_features(wav_filename, mel_filename, mfcc_filename, sr=48000, n_mfcc=20, n_mels=128, downsample_factor=8):
    # Load the audio file
    audio, sample_rate = librosa.load(wav_filename, sr=sr)

    # Adjust hop length to match the downsampling factor
    hop_length = downsample_factor

    # Extract Mel spectrograms
    mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_mels=n_mels, hop_length=hop_length)
    mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)

    # Extract MFCCs
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc, hop_length=hop_length)

    # Save Mel spectrograms and MFCCs as CSV
    pd.DataFrame(mel_spectrogram_db).to_csv(mel_filename, index=False)
    pd.DataFrame(mfccs).to_csv(mfcc_filename, index=False)


def split_wav_to_csv_batches(wav_file, batch_count=1000):
    # Read the stereo WAV file
    sample_rate, data = wavfile.read(wav_file)

    # Calculate the number of samples per batch
    total_samples = data.shape[0]
    samples_per_batch = total_samples // batch_count

    # Directory for storing CSV files
    os.makedirs('./csv_batches', exist_ok=True)

    for i in range(batch_count):
        # Determine start and end indices for the current batch
        start = i * samples_per_batch
        end = (i + 1) * samples_per_batch if i < batch_count - 1 else total_samples

        # Extract the batch
        batch_data = data[start:end]

        # File names for the current batch
        batch_wav_filename = f'csv_batches/batch_{i}.wav'
        batch_csv_filename = f'csv_batches/batch_{i}.csv'
        batch_mel_filename = f'csv_batches/mel_batch_{i}.csv'
        batch_mfcc_filename = f'csv_batches/mfcc_batch_{i}.csv'

        # Write the batch to a WAV file
        wavfile.write(batch_wav_filename, sample_rate, batch_data)

        # Extract and save mel spectrograms and MFCCs
        extract_and_save_features(batch_wav_filename, batch_mel_filename, batch_mfcc_filename)

        # Convert WAV to CSV and save
        wav_to_csv(batch_wav_filename, batch_csv_filename)

        # Optionally remove the temporary WAV file
        os.remove(batch_wav_filename)

def convert_csv_batches_to_wav(batch_count=1000):
    # Directory for storing reconstructed WAV files
    os.makedirs('reconstructed_wav_batches', exist_ok=True)

    for i in range(batch_count):
        # File names for the current batch
        batch_csv_filename = f'csv_batches/batch_{i}.csv'
        reconstructed_wav_filename = f'reconstructed_wav_batches/reconstructed_{i}.wav'

        # Convert the CSV file back to a WAV file
        csv_to_wav(batch_csv_filename, reconstructed_wav_filename)



In [43]:
# Example Usage
split_wav_to_csv_batches('./trainingdata.wav')

  sample_rate, data = wavfile.read(wav_file)


In [57]:
def average_audio_data(audio_data, downsample_factor=8):
    # Calculate the new length after downsampling
    new_length = audio_data.shape[0] // downsample_factor
    downsampled_data = np.zeros((new_length, audio_data.shape[1]))

    for i in range(new_length):
        start = i * downsample_factor
        end = start + downsample_factor
        downsampled_data[i, :] = np.mean(audio_data[start:end, :], axis=0)

    # Adjust the length to match the expected time steps if necessary
    expected_length = 60021
    if downsampled_data.shape[0] != expected_length:
        downsampled_data = downsampled_data[:expected_length, :]

    return downsampled_data


def save_combined_data(combined_data, batch_number):
    # Create a directory to save the combined CSV files
    os.makedirs('./combined_batches', exist_ok=True)
    combined_filename = f'./combined_batches/combined_batch_{batch_number}.csv'

    # Save the combined data as a CSV file
    pd.DataFrame(combined_data).to_csv(combined_filename, index=False)

def process_batch(batch_number):
    # Load CSV files
    batch_audio = pd.read_csv(f'csv_batches/batch_{batch_number}.csv').values
    batch_mfcc = pd.read_csv(f'csv_batches/mfcc_batch_{batch_number}.csv').values.T
    batch_mel = pd.read_csv(f'csv_batches/mel_batch_{batch_number}.csv').values.T

    # Reshape raw audio data
    reshaped_audio = average_audio_data(batch_audio)

    # Correctly truncate MFCC and mel spectrogram data to match reshaped_audio's number of rows
    batch_mfcc = batch_mfcc[:reshaped_audio.shape[0], :]
    batch_mel = batch_mel[:reshaped_audio.shape[0], :]

    # Combine features
    combined_data = np.concatenate([reshaped_audio, batch_mfcc, batch_mel], axis=1)

    # Save the combined data to CSV
    save_combined_data(combined_data, batch_number)

# Process and save a single batch for debugging
process_batch(10)  # Example: processing the 10th batch

# Process and save all batches
for i in range(1000):
    process_batch(i)


In [6]:
def print_csv_shapes(batch_number):
    # Filenames for each feature
    batch_csv_filename = f'csv_batches/batch_{batch_number}.csv'
    batch_mfcc_filename = f'csv_batches/mfcc_batch_{batch_number}.csv'
    batch_mel_filename = f'csv_batches/mel_batch_{batch_number}.csv'
    combined_filename = f'combined_batches/combined_batch_{batch_number}.csv'

    # Load each CSV file
    batch_data = pd.read_csv(batch_csv_filename)
    mfcc_data = pd.read_csv(batch_mfcc_filename)
    mel_data = pd.read_csv(batch_mel_filename)
    combined_data = pd.read_csv(combined_filename)

    # Print the shape of each CSV file
    print(f'Shape of {batch_csv_filename}: {batch_data.shape}')
    print(f'Shape of {batch_mfcc_filename}: {mfcc_data.shape}')
    print(f'Shape of {batch_mel_filename}: {mel_data.shape}')
    print(f'Shape of {combined_filename}: {combined_data.shape}')

# Example usage
print_csv_shapes(10)


Shape of csv_batches/batch_10.csv: (480168, 2)
Shape of csv_batches/mfcc_batch_10.csv: (20, 60022)
Shape of csv_batches/mel_batch_10.csv: (128, 60022)
Shape of combined_batches/combined_batch_10.csv: (60021, 150)


In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam

def create_model(input_shape):
    model = Sequential()
    model.add(LSTM(64, return_sequences=True, input_shape=input_shape))
    model.add(LSTM(64, return_sequences=False))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(input_shape[1], activation='linear'))  # Output layer

    model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
    return model

# Example: Create a model with the expected input shape
model = create_model((60020, 150))
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 60020, 64)         55040     
                                                                 
 lstm_3 (LSTM)               (None, 64)                33024     
                                                                 
 dense_2 (Dense)             (None, 128)               8320      
                                                                 
 dense_3 (Dense)             (None, 150)               19350     
                                                                 
Total params: 115734 (452.09 KB)
Trainable params: 115734 (452.09 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [65]:
import numpy as np
import pandas as pd
from tensorflow.keras.utils import Sequence

class DataGenerator(Sequence):
    def __init__(self, batch_size, batch_count=1000, shuffle=True):
        self.batch_size = batch_size
        self.batch_count = batch_count
        self.shuffle = shuffle
        self.indices = np.arange(batch_count)
        if self.shuffle:
            np.random.shuffle(self.indices)

    def __len__(self):
        return int(np.ceil(self.batch_count / self.batch_size))

    def __getitem__(self, index):
        batch_indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        X, Y = [], []
        for i in batch_indices:
            combined_data = pd.read_csv(f'combined_batches/combined_batch_{i}.csv').values
            X_data = combined_data[:-1, :]
            Y_data = combined_data[1:, :]
            X.append(X_data)
            Y.append(Y_data)
        return np.array(X), np.array(Y)

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)


In [8]:
import numpy as np
import pandas as pd

def load_single_batch(batch_number):
    combined_data = pd.read_csv(f'combined_batches/combined_batch_{batch_number}.csv').values
    return combined_data

# Load a single batch of data
batch_number = 10  # Change this to load a different batch
# Load a single batch of data
single_batch_data = load_single_batch(batch_number)

# Reshape X_train and Y_train for LSTM input
# LSTM expects data in the shape [samples, time steps, features]
X_train = single_batch_data[:-1].reshape(1, -1, 150)  # Shape: (1, 60020, 150)
Y_train = single_batch_data[1:].reshape(1, -1, 150)   # Shape: (1, 60020, 150)

print(f"Reshaped X_train: {X_train.shape}")
print(f"Reshaped Y_train: {Y_train.shape}")

# Continue with model training as before...


print(X_train.shape)
print(Y_train.shape)

# Assuming your model is already defined and compiled
# For example: model = create_model((60021, 150))

number_of_epochs = 10  # Set the number of epochs as needed

# Train the model on the single batch
for epoch in range(number_of_epochs):
    print(f"Epoch {epoch+1}/{number_of_epochs}")
    model.fit(X_train, Y_train, epochs=1, batch_size=32, verbose=1)



Reshaped X_train: (1, 60020, 150)
Reshaped Y_train: (1, 60020, 150)
(1, 60020, 150)
(1, 60020, 150)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [15]:
import numpy as np
import pandas as pd

def load_random_batch(batch_count=1000, time_steps=60020):
    random_batch_number = 11
    combined_data = pd.read_csv(f'combined_batches/combined_batch_{random_batch_number}.csv').values

    # Trim or pad the data to ensure it has exactly 60020 time steps
    current_length = combined_data.shape[0]

    if current_length > time_steps:
        # If the data is longer, trim it
        combined_data = combined_data[:time_steps, :]
    elif current_length < time_steps:
        # If the data is shorter, pad it with zeros
        padding = np.zeros((time_steps - current_length, combined_data.shape[1]))
        combined_data = np.vstack((combined_data, padding))

    return combined_data.reshape(1, time_steps, -1)  # Reshaping to match the training data format

# Load a random batch of data for prediction
random_batch_data = load_random_batch()

print(random_batch_data)


[[[ 3.59979584e+08  6.78667840e+07 -6.84922900e+01 ... -7.25929100e+01
   -7.45070500e+01 -7.65186540e+01]
  [ 2.95406496e+08  3.82903136e+08 -6.75606200e+01 ... -7.25551400e+01
   -7.44933400e+01 -7.65195850e+01]
  [ 2.60664096e+08  7.84899456e+08 -6.66505000e+01 ... -7.25194240e+01
   -7.44819600e+01 -7.65231200e+01]
  ...
  [ 1.04488608e+09  3.40938720e+08 -1.33552380e+02 ... -7.88751300e+01
   -7.86772300e+01 -7.88051150e+01]
  [ 5.59149216e+08  5.37217280e+08 -1.34522800e+02 ... -7.88675500e+01
   -7.86685000e+01 -7.87960740e+01]
  [ 4.44700576e+08  6.59750912e+08 -1.35512820e+02 ... -7.88625500e+01
   -7.86623800e+01 -7.87896400e+01]]]


In [13]:
# Make a prediction
predicted_output = model.predict(random_batch_data)

# predicted_output will have the shape (1, 150), representing the predicted next step




In [14]:
print(predicted_output)

[[-0.8013457  -1.6589247   0.07852541  0.3065125   0.07495898  0.104821
  -0.26749134  0.25741285 -0.49910665  0.2720446  -0.2438257   0.2886235
   0.21354641 -0.00984592 -0.66821295 -0.43160796  0.40313578 -0.16114543
   0.22072063 -0.5079994  -0.09167016 -0.28135818 -0.23720239 -0.05179059
  -0.37304986 -0.09808382 -0.2694115   0.05578225 -0.13453603  0.08233033
  -0.3115627  -0.0769795  -0.09596533 -0.07965457  0.0984325   0.33302024
  -0.38446507 -0.41877437 -0.27677858 -0.46309996 -0.4005256  -0.1042662
   0.04745586  0.20855042 -0.2276626  -0.40486294 -0.5613419  -0.4643444
  -0.3402691  -0.441715   -0.35474056  0.07680805 -0.09806741 -0.3202401
  -0.07370927 -0.24728718 -0.28474477 -0.5796748  -0.08196586 -0.29798445
  -0.6808846  -0.05143305 -0.2574043   0.09090462 -0.22952965 -0.3159813
   0.2576944  -0.6148954  -0.44725296 -0.09370957  0.06198024 -0.3163975
  -0.8531656  -0.3668598  -0.38336396 -0.5105525  -0.4562496  -0.08857801
   0.03722769 -0.59924155 -0.49182478  0.11617

In [17]:
def load_random_batch(batch_count=1000, time_steps=60020):
    random_batch_number = 10
    combined_data = pd.read_csv(f'combined_batches/combined_batch_{random_batch_number}.csv').values

    # Ensure the data has the correct number of time steps
    if combined_data.shape[0] > time_steps:
        combined_data = combined_data[:time_steps, :]
    elif combined_data.shape[0] < time_steps:
        padding = np.zeros((time_steps - combined_data.shape[0], combined_data.shape[1]))
        combined_data = np.vstack((combined_data, padding))

    return combined_data.reshape(1, time_steps, -1)  # Reshape for model input

# Load a random batch
random_batch_data = load_random_batch()

new_predictions = []

for i in range(60020):
    # Predict the next 150 features
    next_step_prediction = model.predict(random_batch_data)

    # Update the batch: Remove the first 150 features and add the new prediction
    updated_batch = np.roll(random_batch_data, -1, axis=1)
    updated_batch[0, -1, :] = next_step_prediction

    # Save the prediction for analysis
    new_predictions.append(next_step_prediction[0])

    # Prepare the batch for the next iteration
    random_batch_data = updated_batch

# Convert the list of predictions to a numpy array
new_predictions = np.array(new_predictions)





KeyboardInterrupt: 