<a href="https://colab.research.google.com/github/davebruzil/neuralCore-neural-network-based-midi-generator-/blob/main/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [46]:
#@title Install dependencies
!pip install pretty_midi
!pip install tensorflow==2.11.0
!pip install music21

import os
import pretty_midi
import tensorflow as tf
import numpy as np
import glob
from music21 import converter, instrument, note, chord, stream

print(f"TensorFlow version: {tf.__version__}")

#@title Data preparation: convert MIDI to sequences of note pitches and durations

def midi_to_sequence(midi_file):
    """Convert MIDI to list of (pitch, duration) tuples."""
    midi_data = pretty_midi.PrettyMIDI(midi_file)
    sequence = []
    for instrument in midi_data.instruments:
        if instrument.is_drum:
            continue
        for note in instrument.notes:
            pitch = note.pitch
            duration = note.end - note.start
            sequence.append((pitch, duration))
    # Sort by start time (pretty_midi notes are already sorted, but just in case)
    sequence.sort(key=lambda x: x[0])
    return sequence

def load_dataset(midi_folder):
    """Load all MIDIs in folder and convert to sequences."""
    sequences = []
    files = glob.glob(midi_folder + "/*.mid")
    print(f"Found {len(files)} MIDI files")
    for f in files:
        seq = midi_to_sequence(f)
        if len(seq) > 0:
            sequences.append(seq)
    return sequences

#@title Create tokenization maps

# Define note pitch and duration vocabulary limits for tokenization

MIN_PITCH = 40  # E2 on guitar approx
MAX_PITCH = 88  # F6 approx
PITCH_VOCAB_SIZE = MAX_PITCH - MIN_PITCH + 1  # 49 pitches

DURATION_BINS = [0.125, 0.25, 0.5, 1.0, 2.0, 4.0]  # 32nd to whole notes
DURATION_VOCAB_SIZE = len(DURATION_BINS)

def pitch_to_token(pitch):
    """Convert pitch to token index."""
    return pitch - MIN_PITCH

def duration_to_token(duration):
    """Convert duration to nearest bin index."""
    diffs = [abs(duration - d) for d in DURATION_BINS]
    return diffs.index(min(diffs))

def tokenize_sequence(sequence):
    """Convert sequence of (pitch, duration) to flat token list."""
    tokens = []
    for pitch, duration in sequence:
        if pitch < MIN_PITCH or pitch > MAX_PITCH:
            continue
        p_tok = pitch_to_token(pitch)
        d_tok = duration_to_token(duration)
        tokens.append(p_tok)
        tokens.append(PITCH_VOCAB_SIZE + d_tok)  # Offset durations after pitches
    return tokens

#@title Prepare sequences for training (fixed-length sequences)

SEQ_LENGTH = 64  # tokens per input sequence

def create_single_token_training_data(token_lists):
    inputs = []
    targets = []
    for tokens in token_lists:
        if len(tokens) < SEQ_LENGTH + 1:
            continue
        for i in range(len(tokens) - SEQ_LENGTH):
            inp = tokens[i:i+SEQ_LENGTH]
            tgt = tokens[i+SEQ_LENGTH]  # only one token
            inputs.append(inp)
            targets.append(tgt)
    return np.array(inputs), np.array(targets)


inputs, targets = create_single_token_training_data(token_lists)

# Function to create sequence targets (fixed)
inputs = np.array(inputs)
targets = np.array(targets)



#@title Transformer model definition

class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([
            tf.keras.layers.Dense(ff_dim, activation="relu"),
            tf.keras.layers.Dense(embed_dim),
        ])
        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)

    def call(self, inputs, training=None):  # <-- training optional
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

class TokenAndPositionEmbedding(tf.keras.layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = tf.keras.layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

def build_model(maxlen, vocab_size, embed_dim=128, num_heads=4, ff_dim=256):
    inputs = tf.keras.Input(shape=(maxlen,))
    embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
    x = embedding_layer(inputs)
    transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
    x = transformer_block(x)

    # REMOVE this line if it exists:
    # x = tf.keras.layers.GlobalAveragePooling1D()(x)

    # For next-token prediction, use only the last token's output
    x = x[:, -1, :]  # Take only the last token's embedding

    x = tf.keras.layers.Dropout(0.1)(x)
    x = tf.keras.layers.Dense(64, activation="relu")(x)
    x = tf.keras.layers.Dropout(0.1)(x)
    outputs = tf.keras.layers.Dense(vocab_size, activation="softmax")(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model



#@title Training & generation utilities

def train_model(model, inputs, targets, epochs=20, batch_size=64):
    model.fit(inputs, targets, epochs=epochs, batch_size=batch_size)

def sample_from_probs(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds + 1e-9) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

def generate_sequence(model, seed, length=100, temperature=1.0):
    output = seed.copy()
    for _ in range(length):
        input_seq = np.array(output[-SEQ_LENGTH:]).reshape(1, -1)
        preds = model.predict(input_seq, verbose=0)[0]
        next_token = sample_from_probs(preds, temperature)
        output.append(next_token)
    return output

#@title Convert token sequence back to MIDI

def tokens_to_midi(tokens, filename="output.mid"):
    midi = pretty_midi.PrettyMIDI()
    piano_program = pretty_midi.instrument_name_to_program('Electric Guitar (jazz)')  # close enough
    instrument_obj = pretty_midi.Instrument(program=piano_program)

    i = 0
    time = 0.0
    while i < len(tokens) - 1:
        pitch_token = tokens[i]
        dur_token = tokens[i + 1] - PITCH_VOCAB_SIZE  # duration token index
        pitch = pitch_token + MIN_PITCH
        duration = DURATION_BINS[dur_token]
        note = pretty_midi.Note(velocity=100, pitch=pitch, start=time, end=time + duration)
        instrument_obj.notes.append(note)
        time += duration
        i += 2

    midi.instruments.append(instrument_obj)
    midi.write(filename)
    print(f"MIDI written to {filename}")

#@title === RUN ALL BELOW ===

# Upload your MIDI files to folder '/content/midi_data' before running
# You can use the left sidebar Files tab in Colab or use this snippet:

import shutil

midi_dir = '/content/drive/MyDrive/midi_data_set'
os.makedirs(midi_dir, exist_ok=True)

model = build_model(
    maxlen=SEQ_LENGTH,
    vocab_size=TOTAL_VOCAB_SIZE,  # Change from vocab_size to TOTAL_VOCAB_SIZE
    embed_dim=128,
    num_heads=4,
    ff_dim=256
)

# You can upload files manually or download some test MIDIs here
# For example


[31mERROR: Could not find a version that satisfies the requirement tensorflow==2.11.0 (from versions: 2.12.0rc0, 2.12.0rc1, 2.12.0, 2.12.1, 2.13.0rc0, 2.13.0rc1, 2.13.0rc2, 2.13.0, 2.13.1, 2.14.0rc0, 2.14.0rc1, 2.14.0, 2.14.1, 2.15.0rc0, 2.15.0rc1, 2.15.0, 2.15.0.post1, 2.15.1, 2.16.0rc0, 2.16.1, 2.16.2, 2.17.0rc0, 2.17.0rc1, 2.17.0, 2.17.1, 2.18.0rc0, 2.18.0rc1, 2.18.0rc2, 2.18.0, 2.18.1, 2.19.0rc0, 2.19.0)[0m[31m
[0m[31mERROR: No matching distribution found for tensorflow==2.11.0[0m[31m
TensorFlow version: 2.18.0


SyntaxError: invalid syntax (<ipython-input-2-f188d5ac1197>, line 1)

In [47]:
# Rebuild the model with the fix
model = build_model(
    maxlen=SEQ_LENGTH,
    vocab_size=TOTAL_VOCAB_SIZE,
    embed_dim=128,
    num_heads=4,
    ff_dim=256
)

model.summary()  # Check the new architecture


In [11]:
import os

midi_dir = '/content/drive/MyDrive/midi_data_set'  # Adjust if your folder name/path is different

if not os.path.exists(midi_dir):
    print("MIDI folder not found!")
else:
    print(f"MIDI folder found at: {midi_dir}")




sequences = load_dataset(midi_dir)
print(f"Loaded {len(sequences)} sequences")

# Tokenize sequences
token_lists = [tokenize_sequence(seq) for seq in sequences]
print(f"Tokenized {len(token_lists)} sequences")

# Create training sequences (inputs and targets)
inputs, targets = create_training_sequences(token_lists)
print(f"Prepared {inputs.shape[0]} training samples")



MIDI folder found at: /content/drive/MyDrive/midi_data_set
Found 14 MIDI files
Loaded 14 sequences
Tokenized 14 sequences
Prepared 61942 training samples


In [19]:
model = build_model(maxlen=SEQ_LENGTH, vocab_size=PITCH_VOCAB_SIZE + DURATION_VOCAB_SIZE)
model.summary()



In [48]:
# Replace your existing model.fit line with:
split_idx = int(0.8 * len(inputs))
train_inputs = inputs[:split_idx]
train_targets = targets[:split_idx]
val_inputs = inputs[split_idx:]
val_targets = targets[split_idx:]

history = model.fit(
    train_inputs,
    train_targets,
    validation_data=(val_inputs, val_targets),
    epochs=50,
    batch_size=32,
    verbose=1
)


Epoch 1/50


ValueError: Exception encountered when calling Functional.call().

[1mInvalid input shape for input Tensor("data:0", shape=(32,), dtype=float32). Expected shape (None, 64), but input has incompatible shape (32,)[0m

Arguments received by Functional.call():
  • inputs=tf.Tensor(shape=(32,), dtype=float32)
  • training=True
  • mask=None

In [41]:
# Debug shapes and data
print("=== Debugging Info ===")
print(f"Train inputs shape: {train_inputs.shape}")
print(f"Train targets shape: {train_targets.shape}")
print(f"Model input shape: {model.input_shape}")
print(f"Model output shape: {model.output_shape}")
print(f"Train targets dtype: {train_targets.dtype}")
print(f"Unique values in targets: {np.unique(train_targets)}")
print(f"Max target value: {np.max(train_targets)}")
print(f"Min target value: {np.min(train_targets)}")
print(f"Total vocab size: {TOTAL_VOCAB_SIZE}")

=== Debugging Info ===
Train inputs shape: (49553, 64)
Train targets shape: (49553,)
Model input shape: (None, 64)
Model output shape: (None, 64, 55)
Train targets dtype: int64
Unique values in targets: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54]
Max target value: 54
Min target value: 0
Total vocab size: 55


In [8]:
# Load MIDI files and convert to token sequences


NameError: name 'load_dataset' is not defined

In [12]:
!pip install pretty_midi
!pip install tensorflow




In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [1]:
!pip install music21 pretty_midi


Collecting pretty_midi
  Downloading pretty_midi-0.2.10.tar.gz (5.6 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/5.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━[0m [32m4.0/5.6 MB[0m [31m121.4 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m5.6/5.6 MB[0m [31m117.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m68.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting mido>=1.1.16 (from pretty_midi)
  Downloading mido-1.3.3-py3-none-any.whl.metadata (6.4 kB)
Downloading mido-1.3.3-py3-none-any.whl (54 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.6/54.6 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: pretty_midi
  Building wheel for pretty_midi (setup.py)