In [3]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


Num GPUs Available:  1


In [4]:
import pretty_midi

sample_midi = './maestro_dataset/2018/MIDI-Unprocessed_Recital1-3_MID--AUDIO_03_R1_2018_wav--2.midi'
midi = pretty_midi.PrettyMIDI(sample_midi)

print("Number of Instruments:", len(midi.instruments))
print("Instrument names:", [instr.name for instr in midi.instruments])

Number of Instruments: 1
Instrument names: ['1 to 3']


In [5]:
import os
import pretty_midi
import numpy as np
from tqdm.notebook import tqdm


In [6]:
def midi_to_sequence(file_path, seq_length=100):
    """
    Converts a MIDI file into a sequence of pitches.
    Args:
        file_path (str): Path to the MIDI file.
        seq_length (int): Length of sequence to extract.
    Returns:
        numpy.array: Array of MIDI pitches.
    """
    try:
        midi = pretty_midi.PrettyMIDI(file_path)
        notes = []
        for instrument in midi.instruments:
            if not instrument.is_drum:  # Exclude drum tracks
                for note in instrument.notes:
                    notes.append(note.pitch)  # Extract pitch values
        # Ensure the sequence length matches the required length
        if len(notes) >= seq_length:
            return np.array(notes[:seq_length])
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None


In [7]:
def preprocess_maestro(folder_path, seq_length=100):
    """
    Processes all MIDI files in a folder and extracts sequences.
    Args:
        folder_path (str): Path to the folder containing MIDI files.
        seq_length (int): Length of each sequence.
    Returns:
        numpy.array: Array of sequences.
    """
    sequences = []
    for root, _, files in os.walk(folder_path):
        for file in tqdm(files, desc="Processing MIDI files"):
            if file.endswith(".midi") or file.endswith(".mid"):
                file_path = os.path.join(root, file)
                seq = midi_to_sequence(file_path, seq_length)
                if seq is not None:
                    sequences.append(seq)
    return np.array(sequences)


In [8]:
# Path to the extracted Maestro dataset
maestro_folder = './maestro_dataset'

# Preprocess the dataset to extract sequences
seq_length = 100  # Adjust sequence length as needed
sequences = preprocess_maestro(maestro_folder, seq_length=seq_length)

print(f"Processed {len(sequences)} sequences.")

Processing MIDI files:   0%|          | 0/5 [00:00<?, ?it/s]

Processing MIDI files:   0%|          | 0/127 [00:00<?, ?it/s]

Processing MIDI files:   0%|          | 0/105 [00:00<?, ?it/s]

Processing MIDI files:   0%|          | 0/129 [00:00<?, ?it/s]

Processing MIDI files:   0%|          | 0/147 [00:00<?, ?it/s]

Processing MIDI files:   0%|          | 0/115 [00:00<?, ?it/s]

Processing MIDI files:   0%|          | 0/125 [00:00<?, ?it/s]

Processing MIDI files:   0%|          | 0/140 [00:00<?, ?it/s]

Processing MIDI files:   0%|          | 0/93 [00:00<?, ?it/s]

Processing MIDI files:   0%|          | 0/163 [00:00<?, ?it/s]

Processing MIDI files:   0%|          | 0/132 [00:00<?, ?it/s]

Processed 1276 sequences.


In [2]:
from tqdm.notebook import tqdm
import time

for i in tqdm(range(100), desc="Testing Progress Bar"):
    time.sleep(0.1)


Testing Progress Bar:   0%|          | 0/100 [00:00<?, ?it/s]

In [9]:
# Check dataset shape
print("Dataset shape:", sequences.shape)

# Display a sample sequence
print("Sample sequence:", sequences[0])


Dataset shape: (1276, 100)
Sample sequence: [77 73 68 73 49 61 77 73 78 73 78 73 51 61 78 73 80 73 80 73 53 61 80 73
 82 73 82 73 54 61 82 73 80 73 80 73 61 80 53 73 78 77 75 51 77 60 78 75
 49 77 75 73 75 77 73 75 61 77 75 60 73 58 72 56 58 70 60 56 51 56 68 60
 80 56 61 56 61 70 56 80 61 56 63 56 63 56 72 80 63 56 65 56 65 56 73 65
 80 56 63 56]


In [10]:
import pickle

# Save sequences as a pickle file
with open("maestro_sequences.pkl", "wb") as f:
    pickle.dump(sequences, f)

print("Preprocessed data saved.")


Preprocessed data saved.


In [11]:
# Test preprocessing with a single file
sample_file = './maestro_dataset/2013/ORIG-MIDI_01_7_6_13_Group__MID--AUDIO_01_R1_2013_wav--1.midi'  # Replace with an actual MIDI file path
sample_sequence = midi_to_sequence(sample_file, seq_length=100)

print(f"Sample sequence length: {len(sample_sequence) if sample_sequence is not None else 'Error'}")
print(f"Sample sequence: {sample_sequence[:10] if sample_sequence is not None else 'Error'}")


Sample sequence length: 100
Sample sequence: [53 62 57 65 69 67 52 55 61 70]
