In [1]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Extract Music Files
zip_path = '/content/drive/My Drive/AAI511_ML/midi_classic_music.zip'
extract_path = '/content/midi_classic_music/'

# Unzip the file.
!unzip -q -n "{zip_path}" -d "{extract_path}"

import shutil

composers = ['Bach', 'Beethoven', 'Chopin', 'Mozart']

for item in os.listdir(extract_path):
    item_path = os.path.join(extract_path, item)
    if os.path.isfile(item_path) and (item.endswith('.mid') or item.endswith('.MID')):
        os.remove(item_path)
        # print(f"Remove file at root: {item}")

midi_classic_path = extract_path + 'midiclassics'

for item in os.listdir(midi_classic_path):
    item_path = os.path.join(midi_classic_path, item)
    if os.path.isfile(item_path) and (item.endswith('.mid') or item.endswith('.MID')):
        os.remove(item_path)
        # print(f"Remove file at midi_classic_path: {item}")

# remove rest of non-composer files.
for root, dirs, files in os.walk(extract_path, topdown=False):
    for dir_name in dirs:
        if dir_name in composers:
            continue  # Keep this directory

        full_path = os.path.join(root, dir_name)
        contains_desired = False

        for subroot, subdirs, subfiles in os.walk(full_path):
            if any(composer in subdirs for composer in composers):
                contains_desired = True
                break

        if not contains_desired:
            shutil.rmtree(full_path)
            # print(f"Removed: {full_path}") for debugging

Mounted at /content/drive


In [2]:
!pip install pypianoroll

Collecting pypianoroll
  Downloading pypianoroll-1.0.4-py3-none-any.whl.metadata (3.8 kB)
Collecting pretty-midi>=0.2.8 (from pypianoroll)
  Downloading pretty_midi-0.2.10.tar.gz (5.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m62.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting mido>=1.1.16 (from pretty-midi>=0.2.8->pypianoroll)
  Downloading mido-1.3.3-py3-none-any.whl.metadata (6.4 kB)
Downloading pypianoroll-1.0.4-py3-none-any.whl (26 kB)
Downloading mido-1.3.3-py3-none-any.whl (54 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.6/54.6 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: pretty-midi
  Building wheel for pretty-midi (setup.py) ... [?25l[?25hdone
  Created wheel for pretty-midi: filename=pretty_midi-0.2.10-py3-none-any.whl size=5592286 sha256=edc05a2c1306acc33a17136a2feca6395dd82ac63a45a7f4a9349f5607327824
  St

In [3]:
#
# A script for preprocessing MIDI files using the Piano Roll method.
#
# Major Improvements from the previous token-based approach:
# 1. Image-like Representation: Converts MIDI files into 2D piano roll arrays
#    (time vs. pitch), which is an ideal format for Convolutional Neural Networks (CNNs).
# 2. Simplified Pipeline: Eliminates the need for complex vocabulary building, tokenizing,
#    and sequence generation steps. The data is already numerical.
# 3. Direct Data Handling: Creates final training and validation sets directly using
#    a standard train-test split, ready for model training.
#

import os
from pathlib import Path
from multiprocessing import Pool, cpu_count
import numpy as np
import pypianoroll
from sklearn.model_selection import train_test_split

# --- Configuration ---
MIDI_ROOT_DIR = '/content/midi_classic_music/midiclassics/'
# The fixed number of timesteps for each piano roll. Shorter songs are padded, longer ones are truncated.
FIXED_TIMESTEPS = 5000
# The number of pitches in the MIDI standard.
N_PITCHES = 128
# Filename for the final preprocessed data
OUTPUT_NPZ_FILE = 'preprocessed_pianoroll_data.npz'

# --- Helper Functions ---

def get_songs_by_composer(root_dir):
    """Finds all MIDI files and groups them by composer from the directory structure."""
    songs_dict = {}
    root_path = Path(root_dir)
    for composer_path in root_path.iterdir():
        if composer_path.is_dir():
            composer_name = composer_path.name
            # Use .glob() to find all .mid files, including case variations like .MID
            songs_dict[composer_name] = list(composer_path.glob('**/*.[mM][iI][dD]'))
    return songs_dict

def midi_to_pianoroll(file_path):
    """
    Parses a MIDI file and converts it into a standardized piano roll numpy array.
    """
    try:
        # Load the MIDI file using pypianoroll
        multitrack = pypianoroll.read(str(file_path))

        # Merge all tracks into a single piano roll
        multitrack.blend('sum')
        pianoroll = multitrack.tracks[0].pianoroll

        # Binarize the roll: 1 if a note is played, 0 otherwise
        pianoroll[pianoroll > 0] = 1

        # Standardize the length (timesteps) of the piano roll
        n_timesteps = pianoroll.shape[0]
        if n_timesteps < FIXED_TIMESTEPS:
            # Pad with zeros if the song is too short
            padding = np.zeros((FIXED_TIMESTEPS - n_timesteps, N_PITCHES))
            standardized_pianoroll = np.vstack([pianoroll, padding])
        else:
            # Truncate if the song is too long
            standardized_pianoroll = pianoroll[:FIXED_TIMESTEPS, :]

        return standardized_pianoroll

    except Exception as e:
        # Print an error if a file fails to parse and return None
        print(f" - Could not parse {os.path.basename(file_path)}: {e}")
        return None

# --- Main Execution Block ---
if __name__ == '__main__':
    # Step 1: Discover files and create a map of composers to file paths
    # This logic remains the same, as it correctly finds the files.
    print("Step 1: Finding MIDI files and mapping composers...")
    songs_dict = get_songs_by_composer(MIDI_ROOT_DIR)
    all_composers = sorted(songs_dict.keys())
    composer_map = {name: i for i, name in enumerate(all_composers)}
    num_classes = len(all_composers)
    print(f"Found {num_classes} composers: {', '.join(all_composers)}")

    # Create a flat list of all file paths and a corresponding list of their integer labels
    all_files = []
    all_labels = []
    for composer, files in songs_dict.items():
        composer_id = composer_map[composer]
        for file in files:
            all_files.append(file)
            all_labels.append(composer_id)

    # Step 2: Split files into training and validation sets BEFORE processing
    # This is a crucial step to prevent data leakage. We split the file list, not the data arrays.
    print("\nStep 2: Splitting files into training and validation sets...")
    train_files, val_files, train_labels, val_labels = train_test_split(
        all_files,
        all_labels,
        test_size=0.2,
        random_state=42,
        stratify=all_labels  # Ensures composer distribution is similar in train/val sets
    )
    print(f"Training files: {len(train_files)}, Validation files: {len(val_files)}")

    # Step 3: Process MIDI files into piano rolls in parallel
    print(f"\nStep 3: Processing MIDI files with {cpu_count()} cores...")

    # Process training files
    print("Processing training files...")
    with Pool() as pool:
        X_train_list = pool.map(midi_to_pianoroll, train_files)

    # Process validation files
    print("Processing validation files...")
    with Pool() as pool:
        X_val_list = pool.map(midi_to_pianoroll, val_files)

    # Step 4: Final data cleaning and assembly
    # This step replaces the old vocabulary building and sequence creation.
    print("\nStep 4: Assembling final datasets...")

    # Filter out any files that failed to parse (returned None) and their corresponding labels
    y_train = np.array([label for i, label in enumerate(train_labels) if X_train_list[i] is not None])
    X_train = np.array([roll for roll in X_train_list if roll is not None])

    y_val = np.array([label for i, label in enumerate(val_labels) if X_val_list[i] is not None])
    X_val = np.array([roll for roll in X_val_list if roll is not None])

    print(f"Training set shape:   X={X_train.shape}, y={y_train.shape}")
    print(f"Validation set shape: X={X_val.shape}, y={y_val.shape}")

    print("Preprocessing complete. Data is ready for model training.")

Step 1: Finding MIDI files and mapping composers...
Found 4 composers: Bach, Beethoven, Chopin, Mozart

Step 2: Splitting files into training and validation sets...
Training files: 392, Validation files: 99

Step 3: Processing MIDI files with 2 cores...
Processing training files...




 - Could not parse Anhang 14-3.mid: Could not decode key with 3 flats and mode 255




Processing validation files...


  bpm = 60.0 / np.diff(beat_times_one_more)



Step 4: Assembling final datasets...
Training set shape:   X=(391, 5000, 128), y=(391,)
Validation set shape: X=(99, 5000, 128), y=(99,)
Preprocessing complete. Data is ready for model training.


In [4]:
# we had to go through several iterations to get this right!
save_path = '/content/drive/MyDrive/AAI511_ML/preprocessed_composer_data_vpiano_roll.npz'

print(f"\nSaving pre-processed data to {save_path}...")

np.savez_compressed(
        save_path,
        X_train=X_train,
        y_train=y_train,
        X_val=X_val,
        y_val=y_val
    )

# old method
# np.savez_compressed(
#     save_path,
#     network_input=network_input,
#     network_output=network_output,
#     n_vocab=np.array([n_vocab]), # Save n_vocab as a numpy array
#     composer_map=composer_map,   # Save the composer mapping dictionary
#     note_to_int=note_to_int      # Save the note mapping dictionary
# )

print("Data saved")


Saving pre-processed data to /content/drive/MyDrive/AAI511_ML/preprocessed_composer_data_vpiano_roll.npz...
Data saved


In [5]:
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Load the preprocessed data
print(f"Loading preprocessed data from: {save_path}")
with np.load(save_path, allow_pickle=True) as data:
    X_train = data['X_train']
    y_train = data['y_train']
    X_val = data['X_val']
    y_val = data['y_val']


def create_optimized_dataset(features, labels, batch_size=64, is_training=True):
    """Create an optimized dataset."""
    dataset = tf.data.Dataset.from_tensor_slices((features, labels))
    if is_training:
        dataset = dataset.shuffle(len(features), reshuffle_each_iteration=True)

    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset

# Create the datasets
BATCH_SIZE = 128
train_dataset = create_optimized_dataset(X_train, y_train, batch_size=BATCH_SIZE)
validation_dataset = create_optimized_dataset(X_val, y_val, batch_size=BATCH_SIZE, is_training=False)

# update xtrain/ytrain

X_train = np.expand_dims(X_train, axis=-1)
X_val = np.expand_dims(X_val, axis=-1)

print("\nTensorFlow datasets created.")

Loading preprocessed data from: /content/drive/MyDrive/AAI511_ML/preprocessed_composer_data_vpiano_roll.npz

TensorFlow datasets created.


In [6]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

def create_cnn_model(input_shape, num_classes):
    """
    Creates a 2D CNN model for piano roll classification.
    """
    # The input shape will now include the channel dimension, e.g., (5000, 128, 1)
    inputs = Input(shape=input_shape)

    # --- Convolutional Block 1 ---
    # The first layer now takes 'inputs' directly
    x = Conv2D(32, kernel_size=(3, 3), activation='relu')(inputs)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(0.25)(x)

    # ... the rest of the function remains the same ...

    # --- Convolutional Block 2 ---
    x = Conv2D(64, kernel_size=(3, 3), activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(0.25)(x)

    # --- Convolutional Block 3 ---
    x = Conv2D(64, kernel_size=(3, 3), activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(0.25)(x)

    # --- Flatten and Classify ---
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)
    return model

# Get model parameters from the data
input_shape = X_train.shape[1:]
num_classes = 4

# Build and compile the model
print("\nBuilding and compiling the model...")
model = create_cnn_model(input_shape=input_shape, num_classes=4) # 4 is the num composers

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), # adjusted learning rate after some attempts
    loss='sparse_categorical_crossentropy', # Use this for integer labels
    metrics=['accuracy']
)

model.summary()

# Add a callback to stop training early if performance plateaus
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',  # Monitor the validation loss
    patience=5,          # Stop after 5 epochs with no improvement
    restore_best_weights=True # Restore the best model weights found
)

print("\nStarting model training...")
history = model.fit(
    train_dataset,
    epochs=40, # Set a high number, early stopping will find the best epoch
    validation_data=validation_dataset,
    callbacks=[early_stopping]#,
    # verbose=0 # to keep code clean
)
print("\nModel training complete")


Building and compiling the model...



Starting model training...
Epoch 1/40


UnknownError: Graph execution error:

Detected at node StatefulPartitionedCall defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py", line 37, in <module>

  File "/usr/local/lib/python3.11/dist-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelapp.py", line 712, in start

  File "/usr/local/lib/python3.11/dist-packages/tornado/platform/asyncio.py", line 205, in start

  File "/usr/lib/python3.11/asyncio/base_events.py", line 608, in run_forever

  File "/usr/lib/python3.11/asyncio/base_events.py", line 1936, in _run_once

  File "/usr/lib/python3.11/asyncio/events.py", line 84, in _run

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 499, in process_one

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 730, in execute_request

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/ipkernel.py", line 383, in do_execute

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/zmqshell.py", line 528, in run_cell

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "/tmp/ipython-input-1738705406.py", line 64, in <cell line: 0>

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 377, in fit

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 220, in function

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 133, in multi_step_on_iterator

Failed to determine best cudnn convolution algorithm for:
%cudnn-conv-bias-activation.9 = (f32[128,32,4998,126]{3,2,1,0}, u8[0]{0}) custom-call(f32[128,1,5000,128]{3,2,1,0} %bitcast.12162, f32[32,1,3,3]{3,2,1,0} %bitcast.12169, f32[32]{0} %bitcast.12800), window={size=3x3}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", metadata={op_type="Conv2D" op_name="functional_1/conv2d_1/convolution" source_file="/usr/local/lib/python3.11/dist-packages/tensorflow/python/framework/ops.py" source_line=1200}, backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"conv_result_scale":1,"activation_mode":"kNone","side_input_scale":0,"leakyrelu_alpha":0},"force_earliest_schedule":false}

Original error: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 10334568448 bytes. [tf-allocator-allocation-error='']

To ignore this failure and try to use a fallback algorithm (which may have suboptimal performance), use XLA_FLAGS=--xla_gpu_strict_conv_algorithm_picker=false.  Please also file a bug for the root cause of failing autotuning.
	 [[{{node StatefulPartitionedCall}}]] [Op:__inference_multi_step_on_iterator_2291]