In [1]:
from trainer import Trainer, TrainerConfig
from mingpt_utils import set_seed
from model import GPT, GPTConfig
import torch
from utils import *

from torch.utils.tensorboard import SummaryWriter
from mingpt_utils import sample
torch.cuda.empty_cache()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print("Available devices: ", torch.cuda.device_count())
print("torch version:", torch.__version__)
print("cudnn version:", torch.backends.cudnn.version())
print("cuda version:", torch.version.cuda)

Available devices:  2
torch version: 2.2.0a0+81ea7a4
cudnn version: 8907
cuda version: 12.3


In [3]:
max_length = 1024
id = 0
tokens = np.load('../data/formatted/tokens.npy', allow_pickle=True)
train = np.load('../data/shuffled/dataset_train.npy', allow_pickle=True)
test = np.load('../data/shuffled/dataset_test.npy', allow_pickle=True)
midi_train = np.load('../data/shuffled/midi_train.npy', allow_pickle=True)
midi_test = np.load('../data/shuffled/midi_test.npy', allow_pickle=True)

In [4]:
pad_tokens = 0
elements = 0
for song in train:
    for e in song:
        if e == '<pad>':
            pad_tokens += 1
        elements += 1

percentage = pad_tokens / elements * 100
print(f"Pad tokens: {pad_tokens}, Percentage: {percentage:.2f}%") 

Pad tokens: 16773876, Percentage: 37.86%


In [5]:
#Convert midi into dtype int
midi_train = midi_train.astype(int)
midi_test = midi_test.astype(int)

In [6]:
print(train.shape, test.shape, midi_train.shape, midi_test.shape)

dataset = TokenDatasetMidi(train, midi_train,  max_length, tokens)
validation = TokenDatasetMidi(test, midi_test, max_length, tokens)

(43272, 1024) (4800, 1024) (43272, 1024, 8) (4800, 1024, 8)
data has 43272 pieces, 198 unique tokens.
data has 4800 pieces, 198 unique tokens.


In [7]:
stoi = { tk:i for i,tk in enumerate(tokens) }
itos = { i:tk for i,tk in enumerate(tokens) }

print(stoi)

{'.': 0, '/': 1, '0.3997395833333333': 2, '0.4440104166666667': 3, '0.5': 4, '0.5703125': 5, '0.6666666666666666': 6, '0.75': 7, '0.7994791666666666': 8, '0.8880208333333334': 9, '1.0': 10, '1.1419270833333333': 11, '1.3333333333333333': 12, '1.5': 13, '1.5989583333333333': 14, '1.7135416666666667': 15, '128 Feel': 16, '2.0': 17, '2.25': 18, '2.3997395833333335': 19, '2.6666666666666665': 20, '3.0': 21, '4.0': 22, ':|': 23, '<end>': 24, '<pad>': 25, '<start>': 26, '<style>': 27, 'A': 28, 'A major': 29, 'A minor': 30, 'A#': 31, 'A##': 32, 'Ab': 33, 'Ab major': 34, 'Ab minor': 35, 'Abb': 36, 'Afoxé': 37, 'Afro': 38, 'B': 39, 'B major': 40, 'B minor': 41, 'B#': 42, 'B##': 43, 'Baião': 44, 'Ballad': 45, 'Bb': 46, 'Bb major': 47, 'Bb minor': 48, 'Bbb': 49, 'Blues': 50, 'Bolero': 51, 'Bolero-Cha': 52, 'Bossa': 53, 'C': 54, 'C major': 55, 'C minor': 56, 'C#': 57, 'C##': 58, 'Calypso': 59, 'Cb': 60, 'Cbb': 61, 'Cha Cha': 62, 'Chacarera': 63, 'Choro': 64, 'Country Ballad': 65, 'D': 66, 'D major

In [8]:
import wandb
#wandb.login()
wandb.init(
    # set the wandb project where this run will be logged
    project="music_gpt_new_voicing",
    
    # track hyperparameters and run metadata
    config={
    "learning_rate": 3e-5,
    "architecture": "Transformer - minGPT",
    "dataset": "chords from iRealPro",
    "epochs": 270,
    }
)

04/18/2024 09:05:31 - ERROR - wandb.jupyter -   Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdazzid[0m ([33mmusic_gpt[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [9]:
# import subprocess

# # Example command to list processes using GPU (this won't run here due to sandbox restrictions)
# output = subprocess.run(['nvidia-smi'], capture_output=True, text=True)
# print(output.stdout)

In [11]:
epochs = 270
embedding = 512
heads = 4
layers = 4
batch_size = 128
learning_rate = 3e-5
num_workers = 4
midi_vocab = 128
token_size = len(tokens)

mconf = GPTConfig(len(tokens), dataset.block_size, midi_vocab, n_layer=layers, n_head=heads, n_embd=embedding)
session_model = GPT(mconf)
MODEL_NAME = "../models/model_"+ "epochs->" + str(epochs) + "_heads->" + str(heads) + "_embd->" + str(embedding) + "_batch->" + str(batch_size) + "_new_midi_embeddings"
print(MODEL_NAME)

session_model = load_model(MODEL_NAME, session_model)

if (session_model == None):
    #mconf = GPTConfig(len(tokens), dataset.block_size, n_layer=layers, n_head=heads, n_embd=embbedings)
    session_model = GPT(mconf)
    tconf = TrainerConfig(max_epochs=epochs, 
                          batch_size=batch_size, 
                          learning_rate=learning_rate, 
                          num_workers=num_workers
                          )
    writer = SummaryWriter(log_dir='../runs/'+'logs') 
    trainer = Trainer(session_model, dataset, validation, tconf, writer)
    trainer.train()
    save_model(MODEL_NAME, session_model)
    # [optional] finish the wandb run, necessary in notebooks
    wandb.finish()

04/18/2024 09:06:35 - INFO - model -   number of parameters: 1.283021e+07


../models/model_epochs->270_heads->4_embd->512_batch->128_new_midi_embeddings


04/18/2024 09:06:36 - INFO - model -   number of parameters: 1.283021e+07
epoch 1 iter 338: train loss 2.08555. lr 3.000000e-05: 100%|██████████| 339/339 [02:42<00:00,  2.09it/s]
04/18/2024 09:09:19 - INFO - trainer -   epoch train loss: 2.879668


train loss: 2.8796677329195637


04/18/2024 09:09:27 - INFO - trainer -   test loss: 1.909220
epoch 2 iter 338: train loss 1.57121. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 09:12:08 - INFO - trainer -   epoch train loss: 1.865582


train loss: 1.8655815859459846


04/18/2024 09:12:14 - INFO - trainer -   test loss: 1.608145
epoch 3 iter 338: train loss 1.38269. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 09:14:55 - INFO - trainer -   epoch train loss: 1.589002


train loss: 1.5890019235357773


04/18/2024 09:15:02 - INFO - trainer -   test loss: 1.547545
epoch 4 iter 338: train loss 1.29671. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 09:17:43 - INFO - trainer -   epoch train loss: 1.475286


train loss: 1.475286076554155


04/18/2024 09:17:50 - INFO - trainer -   test loss: 1.564558
epoch 5 iter 338: train loss 1.21842. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.10it/s]
04/18/2024 09:20:32 - INFO - trainer -   epoch train loss: 1.404776


train loss: 1.4047755115503406


04/18/2024 09:20:38 - INFO - trainer -   test loss: 1.447742
epoch 6 iter 338: train loss 1.12696. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 09:23:19 - INFO - trainer -   epoch train loss: 1.334487


train loss: 1.33448720998117


04/18/2024 09:23:26 - INFO - trainer -   test loss: 1.381283
epoch 7 iter 338: train loss 1.09398. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 09:26:07 - INFO - trainer -   epoch train loss: 1.283754


train loss: 1.2837537643128791


04/18/2024 09:26:14 - INFO - trainer -   test loss: 1.313069
epoch 8 iter 338: train loss 1.06398. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 09:28:55 - INFO - trainer -   epoch train loss: 1.254079


train loss: 1.2540786515998277


04/18/2024 09:29:02 - INFO - trainer -   test loss: 1.273768
epoch 9 iter 338: train loss 1.01430. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 09:31:43 - INFO - trainer -   epoch train loss: 1.220625


train loss: 1.220624970475481


04/18/2024 09:31:50 - INFO - trainer -   test loss: 1.226836
epoch 10 iter 338: train loss 0.97980. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 09:34:31 - INFO - trainer -   epoch train loss: 1.188096


train loss: 1.1880959564253994


04/18/2024 09:34:38 - INFO - trainer -   test loss: 1.192800
epoch 11 iter 338: train loss 0.96818. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 09:37:18 - INFO - trainer -   epoch train loss: 1.161498


train loss: 1.1614975874754532


04/18/2024 09:37:25 - INFO - trainer -   test loss: 1.176975
epoch 12 iter 338: train loss 0.94697. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 09:40:06 - INFO - trainer -   epoch train loss: 1.137500


train loss: 1.1374997639023097


04/18/2024 09:40:13 - INFO - trainer -   test loss: 1.133290
epoch 13 iter 338: train loss 0.92461. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 09:42:54 - INFO - trainer -   epoch train loss: 1.112322


train loss: 1.1123219062093437


04/18/2024 09:43:01 - INFO - trainer -   test loss: 1.101093
epoch 14 iter 338: train loss 0.90416. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 09:45:42 - INFO - trainer -   epoch train loss: 1.084624


train loss: 1.0846240875643616


04/18/2024 09:45:49 - INFO - trainer -   test loss: 1.064536
epoch 15 iter 338: train loss 0.87931. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 09:48:29 - INFO - trainer -   epoch train loss: 1.054814


train loss: 1.0548140633422716


04/18/2024 09:48:36 - INFO - trainer -   test loss: 1.014189
epoch 16 iter 338: train loss 0.84083. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 09:51:17 - INFO - trainer -   epoch train loss: 1.021155


train loss: 1.0211549716009856


04/18/2024 09:51:24 - INFO - trainer -   test loss: 0.985637
epoch 17 iter 338: train loss 0.81470. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 09:54:05 - INFO - trainer -   epoch train loss: 0.987435


train loss: 0.9874346245706609


04/18/2024 09:54:12 - INFO - trainer -   test loss: 0.949778
epoch 18 iter 338: train loss 0.77435. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 09:56:53 - INFO - trainer -   epoch train loss: 0.956869


train loss: 0.9568686363971339


04/18/2024 09:56:59 - INFO - trainer -   test loss: 0.923403
epoch 19 iter 338: train loss 0.73971. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 09:59:40 - INFO - trainer -   epoch train loss: 0.928984


train loss: 0.9289836582884324


04/18/2024 09:59:47 - INFO - trainer -   test loss: 0.897391
epoch 20 iter 338: train loss 0.72011. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 10:02:28 - INFO - trainer -   epoch train loss: 0.907758


train loss: 0.9077580551833881


04/18/2024 10:02:35 - INFO - trainer -   test loss: 0.883291
epoch 21 iter 338: train loss 0.70147. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 10:05:16 - INFO - trainer -   epoch train loss: 0.891558


train loss: 0.8915582842531458


04/18/2024 10:05:23 - INFO - trainer -   test loss: 0.868341
epoch 22 iter 338: train loss 0.68608. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 10:08:03 - INFO - trainer -   epoch train loss: 0.877522


train loss: 0.87752181678395


04/18/2024 10:08:10 - INFO - trainer -   test loss: 0.856308
epoch 23 iter 338: train loss 0.67815. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 10:10:51 - INFO - trainer -   epoch train loss: 0.864637


train loss: 0.8646371600198887


04/18/2024 10:10:58 - INFO - trainer -   test loss: 0.844781
epoch 24 iter 338: train loss 0.66295. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 10:13:39 - INFO - trainer -   epoch train loss: 0.852824


train loss: 0.8528237356900465


04/18/2024 10:13:46 - INFO - trainer -   test loss: 0.833302
epoch 25 iter 338: train loss 0.64959. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 10:16:26 - INFO - trainer -   epoch train loss: 0.841823


train loss: 0.8418226094372505


04/18/2024 10:16:33 - INFO - trainer -   test loss: 0.823002
epoch 26 iter 338: train loss 0.64992. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 10:19:14 - INFO - trainer -   epoch train loss: 0.831713


train loss: 0.8317127575916526


04/18/2024 10:19:21 - INFO - trainer -   test loss: 0.811982
epoch 27 iter 338: train loss 0.62836. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 10:22:02 - INFO - trainer -   epoch train loss: 0.821319


train loss: 0.8213191603840628


04/18/2024 10:22:09 - INFO - trainer -   test loss: 0.801548
epoch 28 iter 338: train loss 0.62301. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 10:24:50 - INFO - trainer -   epoch train loss: 0.808871


train loss: 0.8088706276409745


04/18/2024 10:24:57 - INFO - trainer -   test loss: 0.782126
epoch 29 iter 338: train loss 0.60373. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 10:27:37 - INFO - trainer -   epoch train loss: 0.792324


train loss: 0.7923235615446153


04/18/2024 10:27:45 - INFO - trainer -   test loss: 0.758362
epoch 30 iter 338: train loss 0.58896. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 10:30:26 - INFO - trainer -   epoch train loss: 0.768044


train loss: 0.7680435764402767


04/18/2024 10:30:33 - INFO - trainer -   test loss: 0.722696
epoch 31 iter 338: train loss 0.57357. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.10it/s]
04/18/2024 10:33:14 - INFO - trainer -   epoch train loss: 0.742604


train loss: 0.7426037107948709


04/18/2024 10:33:21 - INFO - trainer -   test loss: 0.693898
epoch 32 iter 338: train loss 0.54728. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.10it/s]
04/18/2024 10:36:02 - INFO - trainer -   epoch train loss: 0.719349


train loss: 0.7193491402628851


04/18/2024 10:36:09 - INFO - trainer -   test loss: 0.658933
epoch 33 iter 338: train loss 0.52544. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.10it/s]
04/18/2024 10:38:51 - INFO - trainer -   epoch train loss: 0.682668


train loss: 0.682668452945079


04/18/2024 10:38:58 - INFO - trainer -   test loss: 0.572786
epoch 34 iter 338: train loss 0.46346. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.10it/s]
04/18/2024 10:41:40 - INFO - trainer -   epoch train loss: 0.612400


train loss: 0.6123997351940402


04/18/2024 10:41:46 - INFO - trainer -   test loss: 0.481804
epoch 35 iter 338: train loss 0.41990. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.10it/s]
04/18/2024 10:44:28 - INFO - trainer -   epoch train loss: 0.542275


train loss: 0.5422751679357174


04/18/2024 10:44:35 - INFO - trainer -   test loss: 0.411316
epoch 36 iter 338: train loss 0.39238. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 10:47:16 - INFO - trainer -   epoch train loss: 0.484219


train loss: 0.48421948436087214


04/18/2024 10:47:23 - INFO - trainer -   test loss: 0.371565
epoch 37 iter 338: train loss 0.35892. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.10it/s]
04/18/2024 10:50:04 - INFO - trainer -   epoch train loss: 0.442897


train loss: 0.442896536229986


04/18/2024 10:50:11 - INFO - trainer -   test loss: 0.345337
epoch 38 iter 338: train loss 0.33275. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.10it/s]
04/18/2024 10:52:53 - INFO - trainer -   epoch train loss: 0.410174


train loss: 0.4101738410185924


04/18/2024 10:53:00 - INFO - trainer -   test loss: 0.324895
epoch 39 iter 338: train loss 0.30004. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.11it/s]
04/18/2024 10:55:41 - INFO - trainer -   epoch train loss: 0.384688


train loss: 0.38468845219387066


04/18/2024 10:55:48 - INFO - trainer -   test loss: 0.311090
epoch 40 iter 338: train loss 0.27569. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.10it/s]
04/18/2024 10:58:29 - INFO - trainer -   epoch train loss: 0.364630


train loss: 0.3646297242964967


04/18/2024 10:58:36 - INFO - trainer -   test loss: 0.300343
epoch 41 iter 338: train loss 0.26213. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 11:01:17 - INFO - trainer -   epoch train loss: 0.348676


train loss: 0.3486758859987456


04/18/2024 11:01:24 - INFO - trainer -   test loss: 0.291915
epoch 42 iter 338: train loss 0.25776. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.10it/s]
04/18/2024 11:04:05 - INFO - trainer -   epoch train loss: 0.335141


train loss: 0.33514139240821905


04/18/2024 11:04:12 - INFO - trainer -   test loss: 0.285656
epoch 43 iter 338: train loss 0.24923. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 11:06:53 - INFO - trainer -   epoch train loss: 0.323667


train loss: 0.3236671628589827


04/18/2024 11:07:00 - INFO - trainer -   test loss: 0.279908
epoch 44 iter 338: train loss 0.23648. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.10it/s]
04/18/2024 11:09:42 - INFO - trainer -   epoch train loss: 0.313615


train loss: 0.31361494813345175


04/18/2024 11:09:49 - INFO - trainer -   test loss: 0.275066
epoch 45 iter 338: train loss 0.23528. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 11:12:30 - INFO - trainer -   epoch train loss: 0.304722


train loss: 0.3047218357039764


04/18/2024 11:12:37 - INFO - trainer -   test loss: 0.271548
epoch 46 iter 338: train loss 0.21756. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.10it/s]
04/18/2024 11:15:18 - INFO - trainer -   epoch train loss: 0.296976


train loss: 0.2969760425762441


04/18/2024 11:15:25 - INFO - trainer -   test loss: 0.267980
epoch 47 iter 338: train loss 0.22044. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.10it/s]
04/18/2024 11:18:07 - INFO - trainer -   epoch train loss: 0.290460


train loss: 0.29045981898420326


04/18/2024 11:18:14 - INFO - trainer -   test loss: 0.264823
epoch 48 iter 338: train loss 0.20910. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.10it/s]
04/18/2024 11:20:55 - INFO - trainer -   epoch train loss: 0.284422


train loss: 0.28442211506289367


04/18/2024 11:21:02 - INFO - trainer -   test loss: 0.262037
epoch 49 iter 338: train loss 0.19887. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 11:23:43 - INFO - trainer -   epoch train loss: 0.279073


train loss: 0.2790732948125991


04/18/2024 11:23:50 - INFO - trainer -   test loss: 0.259306
epoch 50 iter 338: train loss 0.19654. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.10it/s]
04/18/2024 11:26:32 - INFO - trainer -   epoch train loss: 0.274003


train loss: 0.27400327972782046


04/18/2024 11:26:39 - INFO - trainer -   test loss: 0.257474
epoch 51 iter 338: train loss 0.19746. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.11it/s]
04/18/2024 11:29:20 - INFO - trainer -   epoch train loss: 0.269659


train loss: 0.2696589649075252


04/18/2024 11:29:27 - INFO - trainer -   test loss: 0.254204
epoch 52 iter 338: train loss 0.18821. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 11:32:08 - INFO - trainer -   epoch train loss: 0.265301


train loss: 0.2653013121413622


04/18/2024 11:32:15 - INFO - trainer -   test loss: 0.252178
epoch 53 iter 338: train loss 0.18606. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 11:34:56 - INFO - trainer -   epoch train loss: 0.261432


train loss: 0.26143168734941513


04/18/2024 11:35:03 - INFO - trainer -   test loss: 0.249086
epoch 54 iter 338: train loss 0.18060. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.10it/s]
04/18/2024 11:37:45 - INFO - trainer -   epoch train loss: 0.257887


train loss: 0.257886881058195


04/18/2024 11:37:51 - INFO - trainer -   test loss: 0.247663
epoch 55 iter 338: train loss 0.18334. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 11:40:32 - INFO - trainer -   epoch train loss: 0.254337


train loss: 0.25433734178015616


04/18/2024 11:40:39 - INFO - trainer -   test loss: 0.246301
epoch 56 iter 338: train loss 0.17094. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 11:43:20 - INFO - trainer -   epoch train loss: 0.251150


train loss: 0.2511495347395759


04/18/2024 11:43:27 - INFO - trainer -   test loss: 0.244198
epoch 57 iter 338: train loss 0.16909. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 11:46:09 - INFO - trainer -   epoch train loss: 0.248042


train loss: 0.24804177401164287


04/18/2024 11:46:16 - INFO - trainer -   test loss: 0.244433
epoch 58 iter 338: train loss 0.16872. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 11:48:57 - INFO - trainer -   epoch train loss: 0.245238


train loss: 0.24523822154443173


04/18/2024 11:49:04 - INFO - trainer -   test loss: 0.241539
epoch 59 iter 338: train loss 0.16935. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 11:51:45 - INFO - trainer -   epoch train loss: 0.242345


train loss: 0.24234510505445586


04/18/2024 11:51:52 - INFO - trainer -   test loss: 0.241101
epoch 60 iter 338: train loss 0.16382. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 11:54:33 - INFO - trainer -   epoch train loss: 0.239720


train loss: 0.2397202569386952


04/18/2024 11:54:40 - INFO - trainer -   test loss: 0.239250
epoch 61 iter 338: train loss 0.15606. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.11it/s]
04/18/2024 11:57:21 - INFO - trainer -   epoch train loss: 0.237136


train loss: 0.23713559514477542


04/18/2024 11:57:28 - INFO - trainer -   test loss: 0.238726
epoch 62 iter 338: train loss 0.15835. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 12:00:09 - INFO - trainer -   epoch train loss: 0.234610


train loss: 0.2346095016836065


04/18/2024 12:00:16 - INFO - trainer -   test loss: 0.237304
epoch 63 iter 338: train loss 0.15313. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.10it/s]
04/18/2024 12:02:58 - INFO - trainer -   epoch train loss: 0.232198


train loss: 0.23219795903097565


04/18/2024 12:03:05 - INFO - trainer -   test loss: 0.236708
epoch 64 iter 338: train loss 0.15122. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 12:05:46 - INFO - trainer -   epoch train loss: 0.229752


train loss: 0.22975180221166583


04/18/2024 12:05:53 - INFO - trainer -   test loss: 0.236328
epoch 65 iter 338: train loss 0.15068. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 12:08:34 - INFO - trainer -   epoch train loss: 0.227491


train loss: 0.22749064744046305


04/18/2024 12:08:41 - INFO - trainer -   test loss: 0.235219
epoch 66 iter 338: train loss 0.14667. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 12:11:22 - INFO - trainer -   epoch train loss: 0.225186


train loss: 0.2251862329719341


04/18/2024 12:11:28 - INFO - trainer -   test loss: 0.234559
epoch 67 iter 338: train loss 0.14115. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.10it/s]
04/18/2024 12:14:10 - INFO - trainer -   epoch train loss: 0.223106


train loss: 0.2231062101667258


04/18/2024 12:14:17 - INFO - trainer -   test loss: 0.233396
epoch 68 iter 338: train loss 0.14516. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 12:16:58 - INFO - trainer -   epoch train loss: 0.220932


train loss: 0.22093218595756542


04/18/2024 12:17:05 - INFO - trainer -   test loss: 0.233212
epoch 69 iter 338: train loss 0.13671. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 12:19:46 - INFO - trainer -   epoch train loss: 0.218656


train loss: 0.21865648514753247


04/18/2024 12:19:53 - INFO - trainer -   test loss: 0.232990
epoch 70 iter 338: train loss 0.13541. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 12:22:34 - INFO - trainer -   epoch train loss: 0.216663


train loss: 0.2166628715387136


04/18/2024 12:22:41 - INFO - trainer -   test loss: 0.232660
epoch 71 iter 338: train loss 0.13412. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.10it/s]
04/18/2024 12:25:23 - INFO - trainer -   epoch train loss: 0.214530


train loss: 0.21452979664359473


04/18/2024 12:25:29 - INFO - trainer -   test loss: 0.232430
epoch 72 iter 338: train loss 0.13504. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 12:28:11 - INFO - trainer -   epoch train loss: 0.212571


train loss: 0.21257133087401545


04/18/2024 12:28:18 - INFO - trainer -   test loss: 0.231351
epoch 73 iter 338: train loss 0.13203. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 12:30:59 - INFO - trainer -   epoch train loss: 0.210503


train loss: 0.21050318271185445


04/18/2024 12:31:06 - INFO - trainer -   test loss: 0.231708
epoch 74 iter 338: train loss 0.12828. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.10it/s]
04/18/2024 12:33:47 - INFO - trainer -   epoch train loss: 0.208540


train loss: 0.20854031292386463


04/18/2024 12:33:54 - INFO - trainer -   test loss: 0.231332
epoch 75 iter 338: train loss 0.12538. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.10it/s]
04/18/2024 12:36:35 - INFO - trainer -   epoch train loss: 0.206599


train loss: 0.20659915931456913


04/18/2024 12:36:42 - INFO - trainer -   test loss: 0.231070
epoch 76 iter 338: train loss 0.12100. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 12:39:23 - INFO - trainer -   epoch train loss: 0.204646


train loss: 0.20464576558817102


04/18/2024 12:39:30 - INFO - trainer -   test loss: 0.231840
epoch 77 iter 338: train loss 0.12311. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 12:42:11 - INFO - trainer -   epoch train loss: 0.202638


train loss: 0.2026377609301809


04/18/2024 12:42:18 - INFO - trainer -   test loss: 0.231184
epoch 78 iter 338: train loss 0.11791. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 12:44:59 - INFO - trainer -   epoch train loss: 0.200685


train loss: 0.20068538347176745


04/18/2024 12:45:06 - INFO - trainer -   test loss: 0.231445
epoch 79 iter 338: train loss 0.11778. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 12:47:47 - INFO - trainer -   epoch train loss: 0.198781


train loss: 0.1987807805427408


04/18/2024 12:47:54 - INFO - trainer -   test loss: 0.231601
epoch 80 iter 338: train loss 0.11472. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 12:50:35 - INFO - trainer -   epoch train loss: 0.196897


train loss: 0.19689681521647096


04/18/2024 12:50:41 - INFO - trainer -   test loss: 0.231980
epoch 81 iter 338: train loss 0.11746. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 12:53:22 - INFO - trainer -   epoch train loss: 0.195037


train loss: 0.19503685221032055


04/18/2024 12:53:29 - INFO - trainer -   test loss: 0.232289
epoch 82 iter 338: train loss 0.11708. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 12:56:10 - INFO - trainer -   epoch train loss: 0.193204


train loss: 0.1932035354034739


04/18/2024 12:56:17 - INFO - trainer -   test loss: 0.233088
epoch 83 iter 338: train loss 0.10857. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 12:58:58 - INFO - trainer -   epoch train loss: 0.191341


train loss: 0.19134069768200934


04/18/2024 12:59:05 - INFO - trainer -   test loss: 0.232947
epoch 84 iter 338: train loss 0.10948. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 13:01:46 - INFO - trainer -   epoch train loss: 0.189301


train loss: 0.18930127610147526


04/18/2024 13:01:53 - INFO - trainer -   test loss: 0.233881
epoch 85 iter 338: train loss 0.10278. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 13:04:34 - INFO - trainer -   epoch train loss: 0.187581


train loss: 0.18758055099607568


04/18/2024 13:04:41 - INFO - trainer -   test loss: 0.234061
epoch 86 iter 338: train loss 0.10855. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 13:07:22 - INFO - trainer -   epoch train loss: 0.185725


train loss: 0.18572521143782456


04/18/2024 13:07:28 - INFO - trainer -   test loss: 0.234528
epoch 87 iter 338: train loss 0.10096. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 13:10:10 - INFO - trainer -   epoch train loss: 0.183909


train loss: 0.18390912224145767


04/18/2024 13:10:17 - INFO - trainer -   test loss: 0.234986
epoch 88 iter 338: train loss 0.10321. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 13:12:58 - INFO - trainer -   epoch train loss: 0.182078


train loss: 0.18207765045331292


04/18/2024 13:13:05 - INFO - trainer -   test loss: 0.235767
epoch 89 iter 338: train loss 0.09847. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 13:15:45 - INFO - trainer -   epoch train loss: 0.180185


train loss: 0.18018544689480182


04/18/2024 13:15:52 - INFO - trainer -   test loss: 0.236710
epoch 90 iter 338: train loss 0.09711. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 13:18:33 - INFO - trainer -   epoch train loss: 0.178350


train loss: 0.17835009955199418


04/18/2024 13:18:40 - INFO - trainer -   test loss: 0.238691
epoch 91 iter 338: train loss 0.09841. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 13:21:21 - INFO - trainer -   epoch train loss: 0.176623


train loss: 0.1766226905505214


04/18/2024 13:21:28 - INFO - trainer -   test loss: 0.238122
epoch 92 iter 338: train loss 0.09604. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.11it/s]
04/18/2024 13:24:10 - INFO - trainer -   epoch train loss: 0.174805


train loss: 0.1748046876538468


04/18/2024 13:24:17 - INFO - trainer -   test loss: 0.239304
epoch 93 iter 338: train loss 0.09308. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 13:26:58 - INFO - trainer -   epoch train loss: 0.173095


train loss: 0.17309484520932567


04/18/2024 13:27:05 - INFO - trainer -   test loss: 0.240776
epoch 94 iter 338: train loss 0.09109. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 13:29:45 - INFO - trainer -   epoch train loss: 0.171305


train loss: 0.1713054867400884


04/18/2024 13:29:52 - INFO - trainer -   test loss: 0.241263
epoch 95 iter 338: train loss 0.09282. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 13:32:33 - INFO - trainer -   epoch train loss: 0.169452


train loss: 0.16945195918941217


04/18/2024 13:32:40 - INFO - trainer -   test loss: 0.241127
epoch 96 iter 338: train loss 0.08830. lr 3.000000e-05: 100%|██████████| 339/339 [02:41<00:00,  2.10it/s]
04/18/2024 13:35:22 - INFO - trainer -   epoch train loss: 0.167844


train loss: 0.16784417176492797


04/18/2024 13:35:29 - INFO - trainer -   test loss: 0.242320
epoch 97 iter 338: train loss 0.08867. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 13:38:10 - INFO - trainer -   epoch train loss: 0.166065


train loss: 0.16606513300916087


04/18/2024 13:38:17 - INFO - trainer -   test loss: 0.243296
epoch 98 iter 338: train loss 0.08518. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 13:40:58 - INFO - trainer -   epoch train loss: 0.164277


train loss: 0.16427688830282133


04/18/2024 13:41:04 - INFO - trainer -   test loss: 0.245290
epoch 99 iter 338: train loss 0.08550. lr 3.000000e-05: 100%|██████████| 339/339 [02:40<00:00,  2.11it/s]
04/18/2024 13:43:46 - INFO - trainer -   epoch train loss: 0.162673


train loss: 0.16267312159844205


04/18/2024 13:43:52 - INFO - trainer -   test loss: 0.245850
epoch 100 iter 251: train loss 0.15759. lr 3.000000e-05:  74%|███████▍  | 252/339 [02:03<00:41,  2.10it/s]Exception in thread Thread-6:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/local/lib/python3.10/dist-packages/tensorboard/summary/writer/event_file_writer.py", line 233, in run
    self._record_writer.write(data)
  File "/usr/local/lib/python3.10/dist-packages/tensorboard/summary/writer/record_writer.py", line 40, in write
    self._writer.write(header + header_crc + data + footer_crc)
  File "/usr/local/lib/python3.10/dist-packages/tensorboard/compat/tensorflow_stub/io/gfile.py", line 766, in write
    self.fs.append(self.filename, file_content, self.binary_mode)
  File "/usr/local/lib/python3.10/dist-packages/tensorboard/compat/tensorflow_stub/io/gfile.py", line 160, in append
    self._write(filename, file_content, "ab" if binar

KeyboardInterrupt: 

Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7f08cb3597e0>> (for post_run_cell), with arguments args (<ExecutionResult object at 7f0896b716c0, execution_count=11 error_before_exec=None error_in_exec= info=<ExecutionInfo object at 7f0896b717e0, raw_cell="epochs = 270
embedding = 512
heads = 4
layers = 4
.." store_history=True silent=False shell_futures=True cell_id=vscode-notebook-cell://attached-container%2B7b22636f6e7461696e65724e616d65223a222f64616c6d617a7a6f5f6368726f6d61666c6f77227d@ssh-remote%2B130.237.3.110/workspace/src/run.ipynb#X12sdnNjb2RlLXJlbW90ZQ%3D%3D> result=None>,),kwargs {}:


BrokenPipeError: [Errno 32] Broken pipe

: 