In [1]:
import torch
from torch.utils.data import DataLoader
from torch.optim import AdamW
from transformers import GPT2Tokenizer, get_linear_schedule_with_warmup
import random
import numpy as np
# For reproducibility
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import pandas as pd

df = pd.read_csv("data/lyrics_midi_data.csv")
df.head()

Unnamed: 0,lyrics,midi_path
0,In sleep he sang to me\nin dreams he came\ntha...,lmd-full_and_reddit_MIDI_dataset/sentenceWord_...
1,I have plans and schemes\nAnd I have hopes and...,lmd-full_and_reddit_MIDI_dataset/sentenceWord_...
2,I get up and nothing gets me You got\nit tough...,lmd-full_and_reddit_MIDI_dataset/sentenceWord_...
3,Man a hot like seven inches\nfrom the midday I...,lmd-full_and_reddit_MIDI_dataset/sentenceWord_...
4,We come from the land of the ice and snow\nfro...,lmd-full_and_reddit_MIDI_dataset/sentenceWord_...


In [3]:
from torch.utils.data import random_split
from pathlib import Path
from miditok import TSD, TokenizerConfig

# Load MIDI Tokenizer
config = TokenizerConfig(
    use_velocities=False,
    use_chords=False,
    use_rests=False,
    use_tempos=False,
    use_time_signatures=False,
)

midi_tokenizer = TSD(config)
midi_tokenizer

# Load GPT-2 tokenizer
lyrics_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
lyrics_tokenizer.add_special_tokens({'pad_token': '<|pad|>'})
lyrics_vocab_size = len(lyrics_tokenizer)

In [4]:
from src.data import randomize_midi_pitch, LyricsMidiDataset
from torch.utils.data import random_split

batch_size = 4

# Create dataset and dataloader
dataset = LyricsMidiDataset(df, lyrics_tokenizer, midi_tokenizer, max_length=512, root_dir='data', augment=True)
train_size = int(0.8 * len(dataset))
train_dataset, val_dataset = random_split(dataset, [train_size, len(dataset) - train_size])

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)

In [6]:
from src.model import LyricsGenerator
from src.utils import train, validate, save_checkpoint, load_checkpoint
# Instantiate model
model = LyricsGenerator(lyrics_tokenizer=lyrics_tokenizer, d_model=768, max_lyrics_length=512, max_midi_length=512)
model.to(device)
print("")

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`





In [8]:
# Train the model
epochs = 6
optimizer = AdamW(model.parameters(), lr=5e-5)
total_steps = len(train_dataloader) * epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

train(model, train_dataloader, val_dataloader, optimizer, scheduler, epochs, device=device, lyrics_tokenizer=lyrics_tokenizer, save_every=3, midi_tokenizer=midi_tokenizer)

  0%|          | 0/2236 [00:00<?, ?it/s]

Epoch 1:   7%|▋         | 154/2236 [02:40<36:10,  1.04s/it, loss=5.63]


KeyboardInterrupt: 

In [1]:
import os
import argparse
import pandas as pd
import torch
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup
from src.model import LyricsGenerator
from src.utils import initialize_lyrics_tokenizer, initialize_midi_tokenizer, generate_lyrics, load_checkpoint
from src.data import prepare_dataloaders


def parse_arguments():
    parser = argparse.ArgumentParser(description="Generate lyrics conditioned on MIDI input.")
    parser.add_argument("--data_dir", type=str, default="data", help="Path to the lyrics_midi_dataset directory")
    parser.add_argument("--checkpoint", type=str, default="model_checkpoint/best_checkpoint.pth", help="Path to the trained model checkpoint")
    parser.add_argument("--midi_index", type=int, default=0, help="Index of the MIDI file in the dataset")
    parser.add_argument("--max_midi_length", type=int, default=512, help="Maximum length of MIDI token sequence")
    parser.add_argument("--max_lyrics_length", type=int, default=512, help="Maximum length of lyrics sequence")
    parser.add_argument("--max_length", type=int, default=512,
                        help="Maximum sequence length for tokenization")
    parser.add_argument("--num_beams", type=int, default=5, help="Number of beams for beam search")
    parser.add_argument("--input_text", type=str, default=None, help="Optional input text to condition lyrics generation")
    parser.add_argument("--num_warmup_steps", type=int, default=0,
                        help="number of warm up step")
    parser.add_argument("--batch_size", type=int, default=4,
                        help="Batch size for training and validation")
    parser.add_argument("--epochs", type=int, default=10,
                        help="Number of training epochs")
    return parser.parse_args()

def main():
    args = parse_arguments()

    dataset_path = os.path.join(args.data_dir, "lyrics_midi_data.csv")
    if not os.path.isfile(dataset_path):
        raise FileNotFoundError(f"Dataset file not found at {dataset_path}")
    df = pd.read_csv(dataset_path)

    if args.midi_index >= len(df):
        raise IndexError(f"MIDI index {args.midi_index} is out of range. Dataset has {len(df)} entries.")

    midi_path = os.path.join(args.data_dir, df["midi_path"][args.midi_index])

    # tokenizers
    lyrics_tokenizer = initialize_lyrics_tokenizer()
    midi_tokenizer = initialize_midi_tokenizer()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    train_dataloader, val_dataloader = prepare_dataloaders(
        df=df,
        lyrics_tokenizer=lyrics_tokenizer,
        midi_tokenizer=midi_tokenizer,
        max_length=args.max_length,
        root_dir=args.data_dir,
        batch_size=args.batch_size,
        )
    # model
    model = LyricsGenerator(
        lyrics_tokenizer=lyrics_tokenizer,
        d_model=768,
        max_lyrics_length=args.max_lyrics_length,
        max_midi_length=args.max_midi_length,
    )
    if not os.path.isfile(args.checkpoint):
        raise FileNotFoundError(f"Checkpoint file not found at {args.checkpoint}")
    optimizer = AdamW(model.parameters(), lr=1e-4)
    total_steps = len(train_dataloader) * args.epochs
    scheduler = get_linear_schedule_with_warmup(optimizer, num_training_steps=total_steps,
                    num_warmup_steps=args.num_warmup_steps)
    model, optimizer, scheduler, start_epoch = load_checkpoint(model, optimizer, scheduler, args.checkpoint, device)
    model.eval()

    # Generate
    generated_lyrics = generate_lyrics(
        model=model,
        midi_path=midi_path,
        lyrics_tokenizer=lyrics_tokenizer,
        midi_tokenizer=midi_tokenizer,
        max_midi_length=args.max_midi_length,
        max_lyrics_length=args.max_lyrics_length,
        num_beams=args.num_beams,
        input_text=args.input_text
    )

    print("\nGenerated Lyrics:")
    print(generated_lyrics)


if __name__ == "__main__":
    main()

  from .autonotebook import tqdm as notebook_tqdm
usage: ipykernel_launcher.py [-h] [--data_dir DATA_DIR]
                             [--checkpoint CHECKPOINT]
                             [--midi_index MIDI_INDEX]
                             [--max_midi_length MAX_MIDI_LENGTH]
                             [--max_lyrics_length MAX_LYRICS_LENGTH]
                             [--max_length MAX_LENGTH] [--num_beams NUM_BEAMS]
                             [--input_text INPUT_TEXT]
                             [--num_warmup_steps NUM_WARMUP_STEPS]
                             [--batch_size BATCH_SIZE] [--epochs EPOCHS]
ipykernel_launcher.py: error: unrecognized arguments: --f=/home/onyxia/.local/share/jupyter/runtime/kernel-v34f03e6a62b6dbdb6d79564c77063eab6e9113c35.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
