# FNet Model for Melody Generation
This model will be trained on melodies from midi data. 

In [1]:
from datasets import load_dataset
from os import listdir

text_dir = "./text_files/ghibli_dataset/augmented/"
file_names = listdir(text_dir)

files =[text_dir + f for f in file_names]

# Load training data
ghibli_dataset = [load_dataset("text", data_files=f) for f in files]
print(len(ghibli_dataset))

Using custom data configuration default-7f2f991de31a5bb7
Found cached dataset text (/Users/jonathan/.cache/huggingface/datasets/text/default-7f2f991de31a5bb7/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2)
100%|██████████| 1/1 [00:00<00:00, 75.62it/s]
Using custom data configuration default-2a3fabbf8920fe7d
Found cached dataset text (/Users/jonathan/.cache/huggingface/datasets/text/default-2a3fabbf8920fe7d/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2)
100%|██████████| 1/1 [00:00<00:00, 999.12it/s]
Using custom data configuration default-ebcbe56b131d7966
Found cached dataset text (/Users/jonathan/.cache/huggingface/datasets/text/default-ebcbe56b131d7966/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2)
100%|██████████| 1/1 [00:00<00:00, 678.47it/s]
Using custom data configuration default-5369bbf16a9c0635
Found cached dataset text (/Users/jonathan/.cache/huggingface/datasets/text/default-5369bbf16a9c0635/0.0.0/cb1e9b

402





In [2]:
from transformers import GPT2TokenizerFast

def get_training_corpus(dataset=ghibli_dataset):
    for song in dataset:
        data = song["train"]["text"]
        for measure in data:
            yield measure

# Train tokenizer
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
tokenizer = tokenizer.train_new_from_iterator(get_training_corpus(), 9999)
tokenizer.save_pretrained("ghibli-tokenizer")

# Token dataset 







('ghibli-tokenizer/tokenizer_config.json',
 'ghibli-tokenizer/special_tokens_map.json',
 'ghibli-tokenizer/vocab.json',
 'ghibli-tokenizer/merges.txt',
 'ghibli-tokenizer/added_tokens.json',
 'ghibli-tokenizer/tokenizer.json')

In [3]:
import csv
import pandas as pd

from random import randint
from datasets import Dataset

# Create single training dataset
single_dataset_file = "./text_files/ghibli_dataset_file.txt"
# max_in_seq = 2
# max_out_seq = 2
max_seq = 4
max_skip = 2

# with open(single_dataset_file, "w", newline="") as f:
#     writer = csv.writer(f)
#     writer.writerow(["sentence_1", "sentence_2"])
#     for song in ghibli_dataset:
#         data = song["train"]["text"]
#         ptr = 0
#         while ptr < len(data):
#             in_seq_len = randint(1, max_in_seq)
#             out_seq_len = randint(1, max_out_seq)
#             if ptr + in_seq_len + out_seq_len < len(data):
#                 in_seq = " ".join(data[ptr: ptr + in_seq_len])
#                 out_seq = " ".join(data[ptr + in_seq_len: ptr + in_seq_len + out_seq_len])
#                 writer.writerow([in_seq, out_seq])
#             ptr += randint(1, max_skip)

with open(single_dataset_file, "w", newline="") as f:
    # writer = csv.writer(f)
    # writer.writerow(["sentence"])
    for song in ghibli_dataset:
        data = song["train"]["text"]
        ptr = 0
        while ptr < len(data):
            seq_len = randint(1, max_seq)
            if ptr + max_seq < len(data):
                in_seq = " ".join(data[ptr: ptr + max_seq])
                f.write(in_seq + "\n")
            ptr += randint(1, max_skip)

# full_train_dataset = Dataset.from_pandas(pd.read_csv(single_dataset_file))

In [4]:
full_train_dataset = load_dataset("text", data_files=single_dataset_file)
print(full_train_dataset)

def tokenize_function(example):
    return tokenizer(example["text"], truncation=True)

tokenized_datasets = full_train_dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(["text"])
print(tokenized_datasets)

tokenized_datasets = tokenized_datasets["train"].train_test_split(test_size=0.2)
tokenized_datasets

Using custom data configuration default-97793ce76f89b22e


Downloading and preparing dataset text/default to /Users/jonathan/.cache/huggingface/datasets/text/default-97793ce76f89b22e/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2...


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 11397.57it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 955.86it/s]
                                                        

Dataset text downloaded and prepared to /Users/jonathan/.cache/huggingface/datasets/text/default-97793ce76f89b22e/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2. Subsequent calls will reuse this data.


100%|██████████| 1/1 [00:00<00:00, 778.89it/s]


DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 27598
    })
})


100%|██████████| 28/28 [00:03<00:00,  9.06ba/s]

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 27598
    })
})





DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 22078
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 5520
    })
})

In [5]:
import numpy as np
import evaluate

from transformers import GPT2LMHeadModel, GPT2Config, TrainingArguments, Trainer, DataCollatorForLanguageModeling

metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)


config = GPT2Config(vocab_size=9999, n_positions=1024)
model = GPT2LMHeadModel(config=config)
training_args = TrainingArguments("trainer", 
    label_names=["input_ids", "token_type_ids"],
    evaluation_strategy="steps", 
    eval_steps=500, 
    load_best_model_at_end=True,
    num_train_epochs=2)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
tokenizer.pad_token = tokenizer.eos_token
trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

In [6]:
trainer.train()

***** Running training *****
  Num examples = 22078
  Num Epochs = 2
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 5520
  Number of trainable parameters = 93521664
  0%|          | 0/5520 [00:00<?, ?it/s]You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  9%|▉         | 500/5520 [1:23:16<9:26:38,  6.77s/it] ***** Running Evaluation *****
  Num examples = 5520
  Batch size = 8


{'loss': 2.1535, 'learning_rate': 4.547101449275363e-05, 'epoch': 0.18}




: 

: 

In [None]:
# from os import listdir
# from random import choice

# from song import Song


# num_of_songs = 15

# midi_dir = "../examples/ghibli_dataset"
# files = listdir(midi_dir)

# songs = []

# while num_of_songs > 0:
#     s = Song(midi_dir + "/" + choice(files))
#     if s.parsed:
#         songs.append(s)
#         num_of_songs-= 1
        
# total_notes = sum([s.num_notes for s in songs])
# print(f"Total number of notes: {total_notes}")

In [None]:
# Update
# Create scale strings
# from music21.stream import Part
# from music21.duration import Duration
# from music21.note import Note

# from instrument import Instrument

# base_scale = ['C', 'D', 'E', 'F', 'G', 'A', 'B']

# scales = []

# base_part = Part()
# for i, n in enumerate(base_scale):
#     # Limitation: each scale will have the same random attributes as the base
#     note = Note(n)
#     note.quarterLength = choice([1/2, 1/4, 1/8])
#     note.volume.velocity = choice(range(60,100))
#     note.offset = i
#     base_part.append(note)

# for i in range(12):
#     base_part = base_part.transpose(i)
#     scales.append(Instrument(base_part))
# print(len(scales))
# print(scales[0])

In [None]:
from random import randint
# Dataset augmentation
input_seq = []
output_seq = []

MAX_IN_MEASURES = 2
MAX_OUT_MEASURES = 2
STEP = 1

for song in songs:
    for part in song.parts:
        i = 0
        while i < len(part.measures):
            in_measures = randint(1, MAX_IN_MEASURES)
            out_measures = randint(1, MAX_OUT_MEASURES)
            if i + in_measures + out_measures < len(part.measures):
                in_seq = []
                for j in range(in_measures):
                    in_seq.append(str(part.measures[i + j]))
                
                out_seq = []
                for j in range(out_measures):
                    out_seq.append(str(part.measures[i + in_measures + j]))
                input_seq.append(" ".join(in_seq))
                output_seq.append(" ".join(out_seq))
            i += STEP

print(input_seq[0])
print(output_seq[0])
print(len(input_seq))
print(len(output_seq))

In [None]:
# Generate vocab
# vocab = []

# for i in range(16):
#     for j in range(16):  
#         vocab.append(f"{i+1}/{j+1}")
# for i in range(10000):
#     vocab.append(str(i))

# vocab = " ".join(vocab)


In [None]:
import torch

from transformers import FNetForNextSentencePrediction, FNetTokenizerFast, FNetConfig

# Tokenize the prompts and responses
tokenizer = FNetTokenizerFast.from_pretrained("google/fnet-base")
encoded_input = tokenizer(input_seq, add_special_tokens=True, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
encoded_output = tokenizer(output_seq, add_special_tokens=True, return_tensors='pt', padding='max_length', truncation=True, max_length=512)

In [None]:
config = FNetConfig(
    vocab_size=9999, # the number of unique tokens in your dataset
)
# Create an instance of the model
model = FNetForNextSentencePrediction(config)

In [None]:
from transformers import Trainer, TrainingArguments 

# Training
optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)
loss_fn = nn.CrossEntropyLoss()

training_args = TrainingArguments(
    output_dir='./results',          # output directory
    evaluation_strategy='steps',     # evaluation strategy
    evaluation_steps=100,            # number of steps between evaluations
    per_device_train_batch_size=32,  # batch size
    per_device_eval_batch_size=32,   # batch size for evaluation
    weight_decay=0.01,               # weight decay
    learning_rate=5e-5,              # learning rate
    num_train_epochs=5,              # number of training epochs
    logging_dir='./logs',            # directory to save logs
    logging_steps=100                # number of steps between logging events
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    data_collator=data_collator,
    optimizers=(optimizer,),
    loss_fn=loss_fn
)

# Start training
trainer.train()