# Convert MIDI piece to PIG format

Some relevant constants:

In [16]:
import os
import pandas as pd

# RELEVANT CONSTANTS
PATH_TO_DATASET_FOLDER = './PianoFingeringDataset_v1.2/PianoFingeringDataset_v1.2/FingeringFiles'
FINGERING_TYPE_TO_ANALYZE = "1"    # there are 8 distinct fingerings done by 8 different people
directory_path = PATH_TO_DATASET_FOLDER

spelled_pitch_values = set()

for filename in os.listdir(directory_path):
    file_path = os.path.join(directory_path, filename)
    if os.path.isfile(file_path):
        df = pd.read_table(file_path, sep="\t", skiprows=1, names=["noteID", "onset_time", "offset_time", "spelled_pitch", "onset_velocity", "offset_velocity", "channel", "finger_number"])
        spelled_pitch_values.update(df['spelled_pitch'].unique())


spelled_pitch_values = sorted(spelled_pitch_values)    # convert "spelled pitch" field to a number: create the mapping in the first place
pitch_to_int_mapping = {p:i for i, p in enumerate(spelled_pitch_values)}

print(f"pitch_to_int_mapping: {pitch_to_int_mapping}")

print("\n")

# We need to do the same with the fingerings themselves, since the dataset includes fingerings like "-1_-2" to denote finger changes on the same note
fingering_map = set()

for filename in os.listdir(directory_path):
    file_path = os.path.join(directory_path, filename)
    if os.path.isfile(file_path):
        df = pd.read_table(file_path, sep="\t", skiprows=1, names=["noteID", "onset_time", "offset_time", "spelled_pitch", "onset_velocity", "offset_velocity", "channel", "finger_number"])
        fingering_map.update(df['finger_number'].unique())

finger_to_int_mapping = {p:i for i, p in enumerate(fingering_map)}    # convert "spelled pitch" field to a number: create the mapping in the first place
print(f"finger_to_int_mapping: {finger_to_int_mapping}")


pitch_to_int_mapping: {'A1': 0, 'A2': 1, 'A3': 2, 'A4': 3, 'A5': 4, 'A6': 5, 'B1': 6, 'B2': 7, 'B3': 8, 'B4': 9, 'B5': 10, 'B6': 11, 'Bb1': 12, 'Bb2': 13, 'Bb3': 14, 'Bb4': 15, 'Bb5': 16, 'Bb6': 17, 'C#1': 18, 'C#2': 19, 'C#3': 20, 'C#4': 21, 'C#5': 22, 'C#6': 23, 'C#7': 24, 'C1': 25, 'C2': 26, 'C3': 27, 'C4': 28, 'C5': 29, 'C6': 30, 'C7': 31, 'D1': 32, 'D2': 33, 'D3': 34, 'D4': 35, 'D5': 36, 'D6': 37, 'D7': 38, 'E1': 39, 'E2': 40, 'E3': 41, 'E4': 42, 'E5': 43, 'E6': 44, 'E7': 45, 'Eb1': 46, 'Eb2': 47, 'Eb3': 48, 'Eb4': 49, 'Eb5': 50, 'Eb6': 51, 'Eb7': 52, 'F#1': 53, 'F#2': 54, 'F#3': 55, 'F#4': 56, 'F#5': 57, 'F#6': 58, 'F#7': 59, 'F1': 60, 'F2': 61, 'F3': 62, 'F4': 63, 'F5': 64, 'F6': 65, 'F7': 66, 'G#1': 67, 'G#2': 68, 'G#3': 69, 'G#4': 70, 'G#5': 71, 'G#6': 72, 'G1': 73, 'G2': 74, 'G3': 75, 'G4': 76, 'G5': 77, 'G6': 78}


finger_to_int_mapping: {'-3_-1': 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, '-1': 6, '-4': 7, '-2': 8, '2_3': 9, '3_1': 10, '5_4': 11, '5_3': 12, '1_5': 13, '-5_-4': 14, '-

In [43]:
from music21 import converter, midi, note, chord
import pandas as pd

def midi_to_pig_format(midi_file, output_txt="output_pig_format.csv"):
    # Parse MIDI file
    score = converter.parse(midi_file)

    # Extract MIDI parts (assume first track is RH, second is LH if multi-track)
    parts = score.parts
    is_multitrack = len(parts) > 1

    if not is_multitrack:
        print("NOT MULTITRACK!")
        return

    note_data = []
    note_id = 0  # Sequential numbering

    for part_index, part in enumerate(parts):        
        # Assign hand based on track index (if multitrack, first track is RH (0), second is LH (1))
        hand = 0 if not is_multitrack or part_index == 0 else 1

        for element in part.flat.notes:
            if isinstance(element, note.Note):  # Single Note
                spelled_pitch = element.nameWithOctave
                onset_time = element.offset  # Start time in quarter notes
                offset_time = onset_time + element.quarterLength  # End time

                # MIDI velocity (MIDI files may not always contain explicit velocity info)
                onset_velocity = element.volume.velocity or 64  # Default to 64 if missing
                offset_velocity = onset_velocity  # No explicit release velocity in MIDI

                note_data.append([
                    note_id, onset_time, offset_time, spelled_pitch,
                    onset_velocity, offset_velocity, hand
                ])
                note_id += 1

            elif isinstance(element, chord.Chord):  # Chord (multiple notes at same time)
                for pitch in element.pitches:
                    spelled_pitch = pitch.nameWithOctave
                    onset_time = element.offset
                    offset_time = onset_time + element.quarterLength
                    onset_velocity = element.volume.velocity or 64
                    offset_velocity = onset_velocity

                    note_data.append([
                        note_id, onset_time, offset_time, spelled_pitch,
                        onset_velocity, offset_velocity, hand
                    ])
                    note_id += 1

    # Convert to DataFrame
    df = pd.DataFrame(note_data, columns=[
        "noteID", "onset_time", "offset_time", "spelled_pitch",
        "onset_velocity", "offset_velocity", "channel"
    ])

    # Sort by onset time
    df = df.sort_values(by="onset_time").reset_index(drop=True)

    # Save to CSV
    with open(output_txt, "w") as f:
        # Write header
        f.write("noteID onset_time offset_time spelled_pitch onset_velocity offset_velocity channel\n")
        # Write each row
        for _, row in df.iterrows():
            f.write(f"{row['noteID']} {row['onset_time']} {row['offset_time']} {row['spelled_pitch']} "
                    f"{row['onset_velocity']} {row['offset_velocity']} {row['channel']}\n")

    print(f"PIG-formatted text file saved as {output_txt}")

    return df



# CONVERTS MIDI FILE TO CSV
midi_file = "TESTY.mid"  # Replace with your MIDI file path
df_pig = midi_to_pig_format(midi_file, "output_pig_format.txt")


PIG-formatted text file saved as output_pig_format.txt


In [44]:
pd.read_csv("output_pig_format.txt")

Unnamed: 0,noteID onset_time offset_time spelled_pitch onset_velocity offset_velocity channel
0,395 0.0 3.75 C#3 80 80 1
1,397 0.0 3.75 A3 80 80 1
2,396 0.0 3.75 E3 80 80 1
3,0 0.5 1.0 C#5 80 80 0
4,1 1.0 1.5 C#5 80 80 0
...,...
681,393 287.5 288.0 A4 80 80 0
682,394 288.0 291.75 A4 80 80 0
683,683 288.0 291.75 C#3 80 80 1
684,684 288.0 291.75 E3 80 80 1


# Load the Relevant Model

In [49]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

directory_path = './PianoFingeringDataset_v1.2/PianoFingeringDataset_v1.2/FingeringFiles/'

all_sequences = []   # will hold raw DataFrames for each piece
all_piece_names = []

pitch_vocab = set()
finger_vocab = set()

FINGERING_TYPE_TO_ANALYZE = 1

# 1) Gather data by piece
for filename in os.listdir(directory_path):
    file_path = os.path.join(directory_path, filename)

    # Example: "001-01_fingering.txt" => fingering_label = "001-01", rest = "fingering.txt"
    fingering_label, _ = filename.split('_')  # e.g. "001-01"
    piece_id, fingering_type = fingering_label.split('-')

    # Check if this is a valid file for the desired fingering type
    if os.path.isfile(file_path) and FINGERING_TYPE_TO_ANALYZE == int(fingering_type):
    # if os.path.isfile(file_path):
        df = pd.read_table(
            file_path, sep="\t", skiprows=1,
            names=[
                "noteID", "onset_time", "offset_time", "spelled_pitch",
                "onset_velocity", "offset_velocity", "channel",
                "finger_number"
            ]
        )

        # Update global vocab sets
        pitch_vocab.update(df["spelled_pitch"].unique())
        finger_vocab.update(map(str, df["finger_number"].unique()))

        all_piece_names.append(filename)
        all_sequences.append(df)

# 2) Build the pitch & finger mappings
pitch_vocab = sorted(pitch_vocab)
pitch_to_idx = {p: i for i, p in enumerate(pitch_vocab)}

finger_vocab = sorted(finger_vocab)
finger_to_idx = {f: i for i, f in enumerate(finger_vocab)}

print(f"Total number of pieces found: {len(all_sequences)}")

# 3) Convert each piece’s DF into (X_list, y_list) with shape (num_notes, 6) + (num_notes,)
raw_encoded_sequences = []  # We'll store the unscaled version first

for df in all_sequences:
    X_list = []
    y_list = []

    for row in df.itertuples(index=False):
        spelled_pitch = row.spelled_pitch
        finger_str = str(row.finger_number)

        # Encode spelled pitch
        pitch_int = pitch_to_idx.get(spelled_pitch, 0)  # fallback 0 if unseen

        # Encode finger
        finger_int = finger_to_idx.get(finger_str, 0)   # fallback 0 if unseen

        # Construct feature row:
        # [pitch_int, onset_time, offset_time, onset_velocity, offset_velocity, channel]
        feature_row = [
            pitch_int,
            float(row.onset_time),
            float(row.offset_time),
            float(row.onset_velocity),
            float(row.offset_velocity),
            float(row.channel)
        ]
        X_list.append(feature_row)
        y_list.append(finger_int)

    raw_encoded_sequences.append((X_list, y_list))

# 3.5) Gather all numeric features for scaling
# We'll do a single pass over raw_encoded_sequences
numeric_data = []
for (X_list, _) in raw_encoded_sequences:
    for row in X_list:
        numeric_vals = row[1:]  # 5 numeric features => (onset_time, offset_time, onset_vel, offset_vel, channel)
        numeric_data.append(numeric_vals)

numeric_data = np.array(numeric_data, dtype=np.float32)
print("numeric_data shape:", numeric_data.shape)

# 3.6) Fit a StandardScaler on the numeric columns
scaler = StandardScaler()
scaler.fit(numeric_data)
print("Numeric scaler mean_:", scaler.mean_)
print("Numeric scaler var_ :", scaler.var_)

# 3.7) Apply the scaler to each piece
scaled_sequences = []
for (X_list, y_list) in raw_encoded_sequences:
    new_X_list = []
    for row in X_list:
        pitch_val = row[0]
        numeric_vals = row[1:]  # shape (5,)
        numeric_vals_arr = np.array(numeric_vals, dtype=np.float32).reshape(1, -1)
        scaled_vals = scaler.transform(numeric_vals_arr)[0]  # (5,)

        # Rebuild the row: pitch_val + scaled numeric
        new_row = [pitch_val] + scaled_vals.tolist()
        new_X_list.append(new_row)
    scaled_sequences.append((new_X_list, y_list))

# 4) Split entire set of sequences (by piece) into train/val/test
train_val_seqs, test_seqs = train_test_split(
    scaled_sequences, test_size=0.2, random_state=42
)
train_seqs, val_seqs = train_test_split(
    train_val_seqs, test_size=0.25, random_state=42
)

print(f"Total pieces: {len(scaled_sequences)}")
print(f"Train pieces: {len(train_seqs)}")
print(f"Val pieces:   {len(val_seqs)}")
print(f"Test pieces:  {len(test_seqs)}")

# Each item is now (X, y) with scaled numeric features


Total number of pieces found: 148
numeric_data shape: (45976, 5)
Numeric scaler mean_: [21.13948002 21.45820221 68.73305638 79.79739429  0.45321472]
Numeric scaler var_ : [3.97946937e+02 3.98278724e+02 3.89274639e+02 1.63700132e+01
 2.47811138e-01]
Total pieces: 148
Train pieces: 88
Val pieces:   30
Test pieces:  30


In [50]:
import torch
import torch.nn as nn

class MultiFeatureRNNTagger(nn.Module):
    def __init__(
        self,
        num_pitches,     # size of pitch vocab
        num_fingers,     # number of finger classes
        embed_dim=32,    # dimension for pitch embedding
        hidden_dim=64,   # dimension for RNN hidden layer
        numeric_dim=5,   # number of extra numeric features
    ):
        super().__init__()

        # 1. Embedding for pitch (categorical)
        self.embedding_pitch = nn.Embedding(num_pitches, embed_dim)

        # 2. A simple linear projection for numeric features => same embed_dim
        #    so we can concatenate them with pitch embeddings
        self.embedding_numeric = nn.Linear(numeric_dim, embed_dim)

        # 3. RNN: input_dim = embed_dim(pitch) + embed_dim(numeric) = 2*embed_dim
        self.rnn = nn.RNN(embed_dim * 2, hidden_dim, batch_first=True)

        # 4. Final linear layer to map hidden_dim => number of finger classes
        self.fc = nn.Linear(hidden_dim, num_fingers)

    def forward(self, x):
        """
        x shape: (batch_size, seq_len, 6)
          - x[..., 0]: pitch IDs (long)
          - x[..., 1:]: numeric features (float)
        returns logits: (batch_size, seq_len, num_fingers)
        """
        # Separate pitch from numeric
        pitch_ids = x[..., 0].long()         # shape => (batch_size, seq_len)
        numeric_feats = x[..., 1:].float()   # shape => (batch_size, seq_len, 5)

        # Embed pitch
        pitch_emb = self.embedding_pitch(pitch_ids)  # (batch_size, seq_len, embed_dim)

        # Project numeric
        numeric_emb = self.embedding_numeric(numeric_feats)  # (batch_size, seq_len, embed_dim)

        # Concatenate along last dimension => (batch_size, seq_len, 2*embed_dim)
        combined_emb = torch.cat((pitch_emb, numeric_emb), dim=2)

        # RNN => (batch_size, seq_len, hidden_dim)
        rnn_out, _ = self.rnn(combined_emb)

        # Final linear => (batch_size, seq_len, num_fingers)
        logits = self.fc(rnn_out)
        return logits


In [61]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score

def train_sequence_model(model, train_loader, val_loader, num_epochs=5, lr=1e-3):
    """
    Trains a token-level sequence model on multi-feature input X_seq.
    Expects X_seq.shape = (batch_size, seq_len, num_features)
           y_seq.shape = (batch_size, seq_len)
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()  # for token-level classification

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0

        for X_seq, y_seq in train_loader:
            # X_seq shape: (batch_size, seq_len, num_features)
            # y_seq shape: (batch_size, seq_len)

            X_seq = X_seq.to(device, non_blocking=True)
            y_seq = y_seq.to(device, non_blocking=True)

            optimizer.zero_grad()

            # Forward pass: model should output shape (batch_size, seq_len, num_fingers)
            logits = model(X_seq)

            # Reshape for CrossEntropy:
            # Flatten tokens => (batch_size * seq_len, num_fingers)
            logits_reshaped = logits.view(-1, logits.size(-1))
            y_seq_reshaped = y_seq.view(-1)

            loss = criterion(logits_reshaped, y_seq_reshaped)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)

        # Evaluate on validation set
        val_acc = evaluate_sequence_model(model, val_loader)
        print(f"Epoch {epoch+1}/{num_epochs} | Loss: {avg_loss:.4f} | ValAcc: {val_acc:.4f}")

def evaluate_sequence_model(model, data_loader):
    """
    Evaluates token-level accuracy. Expects same shapes as train_sequence_model.
    """
    device = next(model.parameters()).device
    model.eval()

    all_preds = []
    all_labels = []

    with torch.no_grad():
        for X_seq, y_seq in data_loader:
            # X_seq: (batch_size, seq_len, num_features)
            # y_seq: (batch_size, seq_len)

            X_seq = X_seq.to(device, non_blocking=True)
            y_seq = y_seq.to(device, non_blocking=True)

            logits = model(X_seq)  # => (batch_size, seq_len, num_fingers)
            preds = torch.argmax(logits, dim=-1)  # => (batch_size, seq_len)

            all_preds.extend(preds.view(-1).cpu().numpy())
            all_labels.extend(y_seq.view(-1).cpu().numpy())

    return accuracy_score(all_labels, all_preds)


In [64]:

import torch
from torch.utils.data import Dataset, DataLoader

class FingeringDataset(Dataset):
    def __init__(self, sequences):
        """
        sequences: list of (X_list, y_list),
                   where X_list is shape (seq_len, 6)
                         y_list is shape (seq_len,)
        """
        self.sequences = sequences

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        X_list, y_list = self.sequences[idx]
        # Convert to Tensors
        # By now, X_list is already scaled
        X_tensor = torch.tensor(X_list, dtype=torch.float32)  # (seq_len, 6)
        y_tensor = torch.tensor(y_list, dtype=torch.long)     # (seq_len,)
        return X_tensor, y_tensor

# Create dataset objects
train_dataset = FingeringDataset(train_seqs)
val_dataset   = FingeringDataset(val_seqs)
test_dataset  = FingeringDataset(test_seqs)

# If your model can handle variable sequence lengths with e.g. a collate_fn,
# you can do batch_size > 1. Otherwise, keep batch_size=1.
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader   = DataLoader(val_dataset,   batch_size=1, shuffle=False)
test_loader  = DataLoader(test_dataset,  batch_size=1, shuffle=False)


# Create your model instance
rnn_model = MultiFeatureRNNTagger(
    num_pitches=len(pitch_vocab),
    num_fingers=len(finger_vocab),
    embed_dim=32,
    hidden_dim=64,
    numeric_dim=5
)

# Train as usual
train_sequence_model(rnn_model, train_loader, val_loader, num_epochs=10, lr=1e-3)
test_acc_rnn = evaluate_sequence_model(rnn_model, test_loader)
print(f"RNN Test Accuracy = {test_acc_rnn:.4f}")

Epoch 1/10 | Loss: 2.4655 | ValAcc: 0.3038
Epoch 2/10 | Loss: 1.6684 | ValAcc: 0.3596
Epoch 3/10 | Loss: 1.5610 | ValAcc: 0.3710
Epoch 4/10 | Loss: 1.5155 | ValAcc: 0.3867
Epoch 5/10 | Loss: 1.4763 | ValAcc: 0.3976
Epoch 6/10 | Loss: 1.4515 | ValAcc: 0.3996
Epoch 7/10 | Loss: 1.4261 | ValAcc: 0.3900
Epoch 8/10 | Loss: 1.4014 | ValAcc: 0.4269
Epoch 9/10 | Loss: 1.3721 | ValAcc: 0.4345
Epoch 10/10 | Loss: 1.3413 | ValAcc: 0.4445
RNN Test Accuracy = 0.4613


In [65]:
import os
import torch
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score


def predict_piece_fingerings_multi(
    model,
    file_path,
    pitch_to_idx,         # dict mapping spelled pitches -> int
    scaler=None,          # the numeric scaler fit during training
    finger_to_idx=None,   # dict mapping finger labels -> int (optional)
    idx_to_finger=None,   # reverse mapping to decode predictions (optional)
    device=None
):
    """
    Predict fingerings for a single piece using a trained multi-feature RNN/LSTM/Transformer model.
    Applies the same numeric scaling used during training if `scaler` is provided.

    Returns:
      - preds:        (seq_len,) integer array of predicted finger indices
      - y_true:       (seq_len,) integer array of actual finger indices (or None if finger_to_idx not given)
      - y_pred_labels: decoded predicted finger labels (list of str) or the raw preds
      - y_true_labels: decoded actual finger labels (list of str) or None
      - acc:          float, accuracy on this piece if y_true is available, else None
    """

    # 1. Load piece data
    df = pd.read_table(
        file_path,
        sep=" ",
        skiprows=1, #noteID onset_time offset_time spelled_pitch onset_velocity offset_velocity channel
        names=[
            "noteID",
            "onset_time",
            "offset_time",
            "spelled_pitch",
            "onset_velocity",
            "offset_velocity",
            "channel",
        ],
    )

    # 2. Encode spelled pitch -> int
    df["spelled_pitch_int"] = df["spelled_pitch"].map(pitch_to_idx).fillna(0).astype(int)

  
    # 4. Construct multi-feature array X of shape (seq_len, 6)
    #    X[..., 0] = spelled_pitch_int (categorical)
    #    X[..., 1] = onset_time
    #    X[..., 2] = offset_time
    #    X[..., 3] = onset_velocity
    #    X[..., 4] = offset_velocity
    #    X[..., 5] = channel
    seq_len = len(df)
    X_array = np.zeros((seq_len, 6), dtype=np.float32)
    print(df)

    # Fill in pitch as int (col 0).
    # We'll keep it as float32 for consistency, then convert to long in the model if needed.
    X_array[:, 0] = df["spelled_pitch_int"].astype(float)

    # Fill in numeric features (cols 1..5)
    X_array[:, 1] = df["onset_time"].astype(float)
    X_array[:, 2] = df["offset_time"].astype(float)
    X_array[:, 3] = df["onset_velocity"].astype(float)
    X_array[:, 4] = df["offset_velocity"].astype(float)
    X_array[:, 5] = df["channel"].astype(float)

    # 5. Apply the same scaler as training (if provided) to columns 1..5
    if scaler is not None:
        pitch_column = X_array[:, 0].copy()  # store pitch in a temp var
        numeric_vals = X_array[:, 1:]       # shape => (seq_len, 5)

        # Scale numeric features => shape (seq_len, 5)
        numeric_vals_scaled = scaler.transform(numeric_vals)

        # Rebuild X_array with pitch + scaled numeric
        X_array = np.concatenate([
            pitch_column.reshape(-1, 1),  # shape => (seq_len, 1)
            numeric_vals_scaled
        ], axis=1)  # => shape => (seq_len, 6)

    # 6. Convert to Torch tensor: shape => (1, seq_len, 6)
    X_tensor = torch.tensor(X_array, dtype=torch.float32).unsqueeze(0)

    # 7. Move model & input to device
    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    X_tensor = X_tensor.to(device)
    model = model.to(device)
    model.eval()

    # 8. Inference: model output => (1, seq_len, num_fingers)
    with torch.no_grad():
        logits = model(X_tensor)
        preds = torch.argmax(logits, dim=-1).squeeze(0).cpu().numpy()  # => (seq_len,)

    # 9. Decode predictions (optional)
    y_pred_labels = preds

    # 10. Compute accuracy if y_true is available
    acc = 0

    return preds, 0, y_pred_labels, 0, 0


piece_file = "output_pig_format.txt"
file_path = piece_file

# Suppose your pitch_to_idx, finger_to_idx, idx_to_finger are defined
# and your model is a MultiFeatureTransformerTagger or MultiFeatureRNNTagger, etc.

# ***IMPORTANT***: Use the SAME scaler you used in training.
# e.g. from the step: `scaler = StandardScaler(); scaler.fit(...)`
# or you saved it in joblib/pickle.

rnn_preds, rnn_true, rnn_pred_labels, rnn_true_labels, rnn_acc = predict_piece_fingerings_multi(
    model=rnn_model,
    file_path=file_path,
    pitch_to_idx=pitch_to_idx,
    scaler=scaler,  # pass the trained scaler
    finger_to_idx=finger_to_idx,
    idx_to_finger=None
)

print(f"\n=== Multi-Feature RNN Predictions for {piece_file} ===")
print("Predicted fingerings (string labels):", rnn_pred_labels)


     noteID  onset_time  offset_time spelled_pitch  onset_velocity  \
0       395         0.0         3.75           C#3              80   
1       397         0.0         3.75            A3              80   
2       396         0.0         3.75            E3              80   
3         0         0.5         1.00           C#5              80   
4         1         1.0         1.50           C#5              80   
..      ...         ...          ...           ...             ...   
681     393       287.5       288.00            A4              80   
682     394       288.0       291.75            A4              80   
683     683       288.0       291.75           C#3              80   
684     684       288.0       291.75            E3              80   
685     685       288.0       291.75            A3              80   

     offset_velocity  channel  spelled_pitch_int  
0                 80        1                 20  
1                 80        1                  2  
2     

In [None]:
# PREDICTED LABELS INCLUDE 0s, WHICH I THINK ARE RESTS!

# Convert any MIDI into a musicXML that can be opened in MuseScore

Replace MIDI with the OG midi you used before the text file path

Then use the list of fingerings

In [72]:
from music21 import converter, note, chord, articulations

def add_fingerings_to_midi(midi_file, fingerings, output_xml="output.musicxml"):
    # Parse the MIDI file into a music21 stream
    score = converter.parse(midi_file)
    
    # Flatten the score to iterate over all notes/chords in order
    events = score.flat.getElementsByClass([note.Note, chord.Chord])
    fingering_index = 0

    # Iterate and attach fingerings
    for event in events:
        if fingering_index >= len(fingerings):
            # If there are more notes than fingerings, stop assigning
            break

        fingering_number = fingerings[fingering_index]

        if isinstance(event, note.Note):
            # Add fingering articulation to a single note
            event.articulations.append(articulations.Fingering(fingering_number))
        
        elif isinstance(event, chord.Chord):
            # Assign the fingering to the lowest note (as is common in MusicXML)
            event.notes[0].articulations.append(articulations.Fingering(fingering_number))

        fingering_index += 1

    # Optionally, warn if not all fingerings were used
    if fingering_index < len(fingerings):
        print(f"Warning: {len(fingerings) - fingering_index} unused fingerings.")
    elif fingering_index > len(fingerings):
        print(f"Warning: Some notes/chords have no assigned fingering.")

    # Write the score to a MusicXML file that MuseScore can open
    score.write("musicxml", fp=output_xml)
    print(f"MusicXML file saved as: {output_xml}")

# Example usage:
midi_file = "TESTY.mid"  # Replace with your MIDI file path
# Replace with your fingering list (each number corresponds to a note/chord in order)
fingerings = rnn_pred_labels
add_fingerings_to_midi(midi_file, fingerings, output_xml="output.musicxml")


MusicXML file saved as: output.musicxml


In [74]:
!musescore output.musicxml

QSocketNotifier: Can only be used with threads started with QThread
Loading soundfont: /usr/share/mscore-2.3/sound/sf3/MuseScore_General.sf3
Info at line 24 col 33: skipping 'instrument-abbreviation'
Info at line 36 col 33: skipping 'instrument-abbreviation'
qt.qpa.wayland: Wayland does not support QWindow::requestActivate()
Creating interface for ScoreView object
SoundFont(/usr/share/mscore-2.3/sound/sf3/MuseScore_General.sf3) Sample(Piano MF B1(L)) start(0) startloop(328404) endloop(478112) end(478118) smaller than SoundFont 2.04 spec chapter 7.10 recommendation
SoundFont(/usr/share/mscore-2.3/sound/sf3/MuseScore_General.sf3) Sample(Piano MF B1(R)) start(0) startloop(328404) endloop(478112) end(478118) smaller than SoundFont 2.04 spec chapter 7.10 recommendation
