# Simple Melody Music Transformer Maker (ver. 1.0)

***

Powered by tegridy-tools: https://github.com/asigalov61/tegridy-tools

***

WARNING: This complete implementation is a functioning model of the Artificial Intelligence. Please excercise great humility, care, and respect. https://www.nscai.gov/

***

#### Project Los Angeles

#### Tegridy Code 2024

***

# GPU check

In [None]:
!nvidia-smi

# Setup environment

In [None]:
!git clone --depth 1 https://github.com/asigalov61/tegridy-tools

In [None]:
!pip3 install -U torch torchvision torchaudio
!pip install einops
!pip install torch-summary
#!pip install tqdm
#!pip install matplotlib

In [None]:
!sudo pip install tqdm
!sudo pip3 install -U torch torchvision torchaudio
!sudo pip install einops
!sudo pip install torch-summary

In [None]:
# Load modules and make data dir

print('Loading modules...')

import os
import pickle
import random
import secrets
import tqdm
import math

import gc

!set USE_FLASH_ATTENTION=1
os.environ['USE_FLASH_ATTENTION'] = '1'

import torch
import torch.optim as optim

from torch.utils.data import DataLoader, Dataset

import matplotlib.pyplot as plt

from torchsummary import summary
from sklearn import metrics

%cd /home/ubuntu/tegridy-tools/tegridy-tools/

import TMIDIX

%cd /home/ubuntu/tegridy-tools/tegridy-tools/X-Transformer

from x_transformer_1_23_2 import *

torch.set_float32_matmul_precision('high')
torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul
torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn
torch.backends.cuda.enable_flash_sdp(True)
torch.backends.cuda.enable_cudnn_sdp(True)

!set USE_FLASH_ATTENTION=1

%cd /home/ubuntu/

if not os.path.exists('/home/ubuntu/INTS'):
    os.makedirs('/home/ubuntu/INTS')

import random

print('Done')

print('Torch version:', torch.__version__)

# Load training data

In [None]:
%cd /home/ubuntu/INTS/
!wget https://github.com/asigalov61/Tegridy-MIDI-Dataset/raw/refs/heads/master/Clean-Melodies/Clean-Melodies-CC-BY-NC-SA.zip
!unzip Clean-Melodies-CC-BY-NC-SA.zip
!rm Clean-Melodies-CC-BY-NC-SA.zip
%cd /home/ubuntu/

# Load Training Data

In [None]:
clean_melodies, clean_melodies_pitches, clean_melodies_delta_pitches = pickle.load(open('INTS/clean_melodies.pickle', 'rb'))

In [None]:
clean_melodies[0][1], len(max(clean_melodies, key=len))

In [None]:
#==========================================================================

print('=' * 70)
print('Loading data files...')
print('Please wait...')
print('=' * 70)

train_data = []

for mel in tqdm.tqdm(clean_melodies):

    for tv in range(-6, 6): # Pitches transpose value

        td = [384] # SOS      
    
        for note in mel:

            td.extend([note[0], note[1]+128, max(1, min(127, note[2]+tv))+256])

        td.append(385) # EOS

        train_data.append(td)
#==========================================================================

print('Done!')
print('=' * 70)
print('All data is good:', len(max(train_data, key=len)) == len(min(train_data, key=len)))
print('=' * 70)
print('Randomizing train data...')
random.shuffle(train_data)
print('Done!')
print('=' * 70)
print('Total length of train data:', len(train_data))
print('=' * 70)

In [None]:
len(train_data[0])

In [None]:
train_data[0][:15]

# Setup model

In [None]:
# Setup model

SEQ_LEN = 194 # Model seq len
PAD_IDX = 386 # Model pad index

# constants

VALIDATE_EVERY  = 1000
SAVE_EVERY = 5000
GENERATE_EVERY  = 2500
GENERATE_LENGTH = 64
PRINT_STATS_EVERY = 100

NUM_EPOCHS = 50

BATCH_SIZE = 512
GRADIENT_ACCUMULATE_EVERY = 1

LEARNING_RATE = 1e-4
GRAD_CLIP = 1.5

# instantiate the model

model = TransformerWrapper(
    num_tokens = PAD_IDX+1,
    max_seq_len = SEQ_LEN,
    attn_layers = Decoder(dim = 1024, 
                          depth = 4, 
                          heads = 8, 
                          rotary_pos_emb = True,  
                          attn_flash = True
                         )
    )

model = AutoregressiveWrapper(model, ignore_index = PAD_IDX, pad_value=PAD_IDX)

model.cuda()

print('Done!')

summary(model)

# Dataloader

def get_train_data_batch(tdata, index, seq_len, batch_size, pad_idx):

    batch = tdata[(index*batch_size):(index*batch_size)+batch_size]
        
    return torch.LongTensor(batch).cuda()
        
# precision/optimizer/scaler

dtype = torch.bfloat16

ctx = torch.amp.autocast(device_type='cuda', dtype=dtype)

optim = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

scaler = torch.amp.GradScaler('cuda')

# Train

In [None]:
# Train the model

train_losses = []
val_losses = []

train_accs = []
val_accs = []

nsteps = 0

for ep in range(NUM_EPOCHS):

        print('=' * 70)
        print('Randomizing train data...')
        random.shuffle(train_data)
        print('=' * 70)
    
        print('=' * 70)
        print('Epoch #', ep)
        print('=' * 70)

        NUM_BATCHES = len(train_data) // BATCH_SIZE // GRADIENT_ACCUMULATE_EVERY

        model.train()

        for i in tqdm.tqdm(range(NUM_BATCHES), mininterval=10., desc='Training'):

            optim.zero_grad()

            for j in range(GRADIENT_ACCUMULATE_EVERY):
                with ctx:
                    loss, acc = model(get_train_data_batch(train_data, (i*GRADIENT_ACCUMULATE_EVERY)+j, SEQ_LEN, BATCH_SIZE, PAD_IDX))
                    loss = loss / GRADIENT_ACCUMULATE_EVERY
                scaler.scale(loss).backward()

            if i % PRINT_STATS_EVERY == 0:
                print(f'Training loss: {loss.item() * GRADIENT_ACCUMULATE_EVERY}')
                print(f'Training acc: {acc.item()}')

            train_losses.append(loss.item() * GRADIENT_ACCUMULATE_EVERY)
            train_accs.append(acc.item())

            scaler.unscale_(optim)
            torch.nn.utils.clip_grad_norm_(model.parameters(), GRAD_CLIP)
            scaler.step(optim)
            scaler.update()
            
            nsteps += 1

            if i % VALIDATE_EVERY == 0:
                model.eval()
                with torch.no_grad():
                    with ctx:
                        val_loss, val_acc = model(get_train_data_batch(train_data, i, SEQ_LEN, BATCH_SIZE, PAD_IDX))

                        print(f'Validation loss: {val_loss.item()}')
                        print(f'Validation acc: {val_acc.item()}')

                        val_losses.append(val_loss.item())
                        val_accs.append(val_acc.item())

                        print('Plotting training loss graph...')

                        tr_loss_list = train_losses
                        plt.plot([i for i in range(len(tr_loss_list))] ,tr_loss_list, 'b')
                        plt.show()
                        plt.close()
                        print('Done!')

                        print('Plotting training acc graph...')

                        tr_loss_list = train_accs
                        plt.plot([i for i in range(len(tr_loss_list))] ,tr_loss_list, 'b')
                        plt.show()
                        plt.close()
                        print('Done!')

                        print('Plotting validation loss graph...')
                        tr_loss_list = val_losses
                        plt.plot([i for i in range(len(tr_loss_list))] ,tr_loss_list, 'b')
                        plt.show()
                        plt.close()
                        print('Done!')

                        print('Plotting validation acc graph...')
                        tr_loss_list = val_accs
                        plt.plot([i for i in range(len(tr_loss_list))] ,tr_loss_list, 'b')
                        plt.show()
                        plt.close()
                        print('Done!')
                    
                model.train()
                
            if i % GENERATE_EVERY == 0:
                model.eval()

                inp = random.choice(get_train_data_batch(train_data, i, SEQ_LEN, BATCH_SIZE, PAD_IDX))[:GENERATE_LENGTH]

                print(inp)

                with ctx:
                    sample = model.generate(inp[None, ...], GENERATE_LENGTH)

                print(sample)

                data = sample.tolist()[0]

                print('Sample INTs', data[:15])

                if len(data) != 0:

                    song = data
                    song_f = []

                    time = 0
                    dur = 0
                    vel = 90
                    pitch = 60
                    channel = 0
                    
                    for ss in song:

                        if 0 <= ss < 128:
                    
                            time += ss

                        if 128 <= ss < 256:
                    
                            dur = ss-128

                        if 256 <= ss < 384:
                    
                            pitch = ss-256
                    
                            song_f.append(['note', time, dur, channel, pitch, vel, 0])

                detailed_stats = TMIDIX.Tegridy_ms_SONG_to_MIDI_Converter(song_f,
                                                                          output_signature = 'Simple Melody Music Transformer',
                                                                          output_file_name = '/home/ubuntu/Simple-Melody-Music-Transformer-Composition',
                                                                          track_name='Project Los Angeles',
                                                                          timings_multiplier=32
                                                                          )

                print('Done!')

                model.train()

            if i % SAVE_EVERY == 0:

                print('Saving model progress. Please wait...')
                print('model_checkpoint_' + str(nsteps) + '_steps_' + str(round(float(train_losses[-1]), 4)) + '_loss_' + str(round(float(train_accs[-1]), 4)) + '_acc.pth')

                fname = '/home/ubuntu/model_checkpoint_' + str(nsteps) + '_steps_' + str(round(float(train_losses[-1]), 4)) + '_loss_' + str(round(float(train_accs[-1]), 4)) + '_acc.pth'

                torch.save(model.state_dict(), fname)

                data = [train_losses, train_accs, val_losses, val_accs]

                TMIDIX.Tegridy_Any_Pickle_File_Writer(data, '/home/ubuntu/losses_accs')

                print('Done!')

# Final Save

In [None]:
print('Saving model progress. Please wait...')
print('model_checkpoint_' + str(nsteps) + '_steps_' + str(round(float(train_losses[-1]), 4)) + '_loss_' + str(round(float(train_accs[-1]), 4)) + '_acc.pth')

fname = '/home/ubuntu/model_checkpoint_' + str(nsteps) + '_steps_' + str(round(float(train_losses[-1]), 4)) + '_loss_' + str(round(float(train_accs[-1]), 4)) + '_acc.pth'

torch.save(model.state_dict(), fname)
#torch.save(optim.state_dict(), fname+'_opt')

print('Done!')

data = [train_losses, train_accs, val_losses, val_accs]

TMIDIX.Tegridy_Any_Pickle_File_Writer(data, '/home/ubuntu/losses_accuracies')

# Save training loss graph

plt.plot([i for i in range(len(train_losses))] ,train_losses, 'b')
plt.savefig('/home/ubuntu/training_loss_graph.png')
plt.close()
print('Done!')

# Save training acc graph

plt.plot([i for i in range(len(train_accs))] ,train_accs, 'b')
plt.savefig('/home/ubuntu/training_acc_graph.png')
plt.close()
print('Done!')

# Save validation loss graph

plt.plot([i for i in range(len(val_losses))] ,val_losses, 'b')
plt.savefig('/home/ubuntu/validation_loss_graph.png')
plt.close()
print('Done!')

# Save validation acc graph

plt.plot([i for i in range(len(val_accs))] ,val_accs, 'b')
plt.savefig('/home/ubuntu/validation_acc_graph.png')
plt.close()
print('Done!')

# Eval

In [None]:
SEQ_LEN = 194 # Model seq len
PAD_IDX = 386 # Model pad index

model = TransformerWrapper(
    num_tokens = PAD_IDX+1,
    max_seq_len = SEQ_LEN,
    attn_layers = Decoder(dim = 1024, 
                          depth = 4, 
                          heads = 8, 
                          rotary_pos_emb = True,  
                          attn_flash = True
                         )
    )

model = AutoregressiveWrapper(model, ignore_index = PAD_IDX, pad_value=PAD_IDX)

print('=' * 70)
print('Loading model checkpoint...')

model_path = 'model_checkpoint_16956_steps_0.4015_loss_0.8613_acc.pth'

model.load_state_dict(torch.load(model_path))

print('=' * 70)

model.cuda()
model.eval()

print('Done!')

summary(model)

dtype = torch.bfloat16

ctx = torch.amp.autocast(device_type='cuda', dtype=dtype)

In [None]:
midi_file = 'tegridy-tools/tegridy-tools/seed-melody.mid'

# Convering MIDI to ms score with MIDI.py module
raw_score = TMIDIX.midi2single_track_ms_score(midi_file)

escore_notes = TMIDIX.advanced_score_processor(raw_score, return_enhanced_score_notes=True)

if escore_notes:
    
    escore_notes = TMIDIX.augment_enhanced_score_notes(escore_notes[0], timings_divider=32, legacy_timings=True)

    dscore = TMIDIX.delta_score_notes(escore_notes)

    if escore_notes:

        mel = []

        for e in dscore:
            mel.append([e[1], e[2], e[4]])

        td = [384] # SOS      
    
        for note in mel:

            td.extend([note[0], note[1]+128, note[2]+256])

        td.append(385) # EOS

print('Done!')
print('=' * 70)
print(len(td))
print('=' * 70)

In [None]:
len(td1)

In [None]:
x = torch.tensor([[384, 0]] * 1, dtype=torch.long, device='cuda') # Gen from embeddings
x = torch.tensor(td[:64], dtype=torch.long, device='cuda') # Gen from loaded MIDI

# run generation

with ctx:
    out = model.generate(x,
                         192,
                         temperature=0.9,
                         return_prime=True,
                         verbose=True)

y = out.tolist()

print('---------------')

In [None]:
print(y[0])

In [None]:
#@title Test INTs

data = y[0]

print('Sample INTs', data[:15])

if len(data) != 0:

    song = data
    song_f = []

    time = 0
    dur = 0
    vel = 90
    pitch = 60
    channel = 0
    
    for ss in song:

        if 0 <= ss < 128:
    
            time += ss

        if 128 <= ss < 256:
    
            dur = ss-128

        if 256 <= ss < 384:
    
            pitch = ss-256
    
            song_f.append(['note', time, dur, channel, pitch, vel, 0])

detailed_stats = TMIDIX.Tegridy_ms_SONG_to_MIDI_Converter(song_f,
                                                          output_signature = 'Simple Melody Music Transformer',
                                                          output_file_name = '/home/ubuntu/Simple-Melody-Music-Transformer-Composition',
                                                          track_name='Project Los Angeles',
                                                          timings_multiplier=32
                                                          )

print('Done!')

In [None]:
tok_emb = model.net.token_emb.emb.weight.detach().cpu().tolist()

cos_sim = metrics.pairwise_distances(
  tok_emb, metric='cosine'
)
plt.figure(figsize=(7, 7))
plt.imshow(cos_sim, cmap="inferno", interpolation="nearest")
im_ratio = cos_sim.shape[0] / cos_sim.shape[1]
plt.colorbar(fraction=0.046 * im_ratio, pad=0.04)
plt.xlabel("Position")
plt.ylabel("Position")
plt.tight_layout()
plt.plot()
plt.savefig("/home/ubuntu/Simple-Melody-Music-Transformer-Tokens-Embeddings-Plot.png", bbox_inches="tight")

# Congrats! You did it! :)