In [None]:
def preprocess_with_multiple_time_tokens(filepath, output_file):
    """
    Preprocess melody dataset to include alternating time tokens (T1, T2) after each pitch/rest.

    Args:
        filepath (str): Path to the input melody dataset.
        output_file (str): Path to save the processed dataset.
    """
    with open(filepath, 'r', encoding='utf-8') as file:
        melodies = file.readlines()

    processed_melodies = []

    for melody in melodies:
        # Remove spaces and ensure each character is treated individually
        notes = ''.join(melody.strip().split())

        # Pair each note/rest with alternating time tokens (T1 and T2)
        combined = []
        time_tokens = ['T1', 'T2']
        for i, note in enumerate(notes):
            combined.append(note)
            combined.append(time_tokens[i % len(time_tokens)])  # Alternate T1 and T2

        # Combine into a single line and append
        processed_melodies.append(' '.join(combined))

    # Save processed melodies
    with open(output_file, 'w', encoding='utf-8') as file:
        file.write('\n'.join(processed_melodies))

    print(f"Pitch+Multiple Time Tokens preprocessed data saved to {output_file}.")

# Example usage
preprocess_with_multiple_time_tokens('inputMelodiesAugmented.txt', 'processedMelodiesPitchTimeMultiple.txt')


Pitch+Multiple Time Tokens preprocessed data saved to processedMelodiesPitchTimeMultiple.txt.


In [None]:
# Read the processed dataset
with open('/content/processedMelodiesPitchTimeMultiple.txt', 'r', encoding='utf-8') as f:
    text = f.read()

# Create vocabulary for pitch and time tokens
tokens = sorted(list(set(text.split())))  # Split on spaces to handle multi-character tokens
stoi = {token: i for i, token in enumerate(tokens)}
itos = {i: token for i, token in enumerate(tokens)}
vocab_size = len(tokens)
print("Vocabulary size:", vocab_size)


Vocabulary size: 15


In [None]:
print(tokens)

['A', 'B', 'C', 'D', 'E', 'F', 'G', 'R', 'T1', 'T2', 'a', 'c', 'd', 'f', 'g']


In [None]:
# Tokenizer for multi-character tokens
def encode(s):
    tokens = s.split()  # Split on spaces to handle multi-character tokens
    return [stoi[token] for token in tokens]

def decode(l):
    return ' '.join([itos[i] for i in l])  # Join tokens with spaces


In [None]:
import torch
import torch.nn as nn
from torch.nn import functional as F

# hyperparameters
batch_size = 64 # how many independent sequences will we process in parallel?
block_size = 256 # what is the maximum context length for predictions?
max_iters = 5000
eval_interval = 500
learning_rate = 3e-4
device = 'cuda' if torch.cuda.is_available() else 'cpu'
eval_iters = 200
n_embd = 384
n_head = 6
n_layer = 6
dropout = 0.2
# ------------

torch.manual_seed(1337)



# Train and test splits
data = torch.tensor(encode(text), dtype=torch.long)
n = int(0.9*len(data)) # first 90% will be train, rest val
train_data = data[:n]
val_data = data[n:]

# data loading
def get_batch(split):
    # generate a small batch of data of inputs x and targets y
    data = train_data if split == 'train' else val_data
    ix = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    x, y = x.to(device), y.to(device)
    return x, y

@torch.no_grad()
def estimate_loss():
    out = {}
    model.eval()
    for split in ['train', 'val']:
        losses = torch.zeros(eval_iters)
        for k in range(eval_iters):
            X, Y = get_batch(split)
            logits, loss = model(X, Y)
            losses[k] = loss.item()
        out[split] = losses.mean()
    model.train()
    return out

class Head(nn.Module):
    """ one head of self-attention """

    def __init__(self, head_size):
        super().__init__()
        self.key = nn.Linear(n_embd, head_size, bias=False)
        self.query = nn.Linear(n_embd, head_size, bias=False)
        self.value = nn.Linear(n_embd, head_size, bias=False)
        self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))

        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        # input of size (batch, time-step, channels)
        # output of size (batch, time-step, head size)
        B,T,C = x.shape
        k = self.key(x)   # (B,T,hs)
        q = self.query(x) # (B,T,hs)
        # compute attention scores ("affinities")
        wei = q @ k.transpose(-2,-1) * k.shape[-1]**-0.5 # (B, T, hs) @ (B, hs, T) -> (B, T, T)
        wei = wei.masked_fill(self.tril[:T, :T] == 0, float('-inf')) # (B, T, T)
        wei = F.softmax(wei, dim=-1) # (B, T, T)
        wei = self.dropout(wei)
        # perform the weighted aggregation of the values
        v = self.value(x) # (B,T,hs)
        out = wei @ v # (B, T, T) @ (B, T, hs) -> (B, T, hs)
        return out

class MultiHeadAttention(nn.Module):
    """ multiple heads of self-attention in parallel """

    def __init__(self, num_heads, head_size):
        super().__init__()
        self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
        self.proj = nn.Linear(head_size * num_heads, n_embd)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        out = torch.cat([h(x) for h in self.heads], dim=-1)
        out = self.dropout(self.proj(out))
        return out

class FeedFoward(nn.Module):
    """ a simple linear layer followed by a non-linearity """

    def __init__(self, n_embd):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_embd, 4 * n_embd),
            nn.ReLU(),
            nn.Linear(4 * n_embd, n_embd),
            nn.Dropout(dropout),
        )

    def forward(self, x):
        return self.net(x)

class Block(nn.Module):
    """ Transformer block: communication followed by computation """

    def __init__(self, n_embd, n_head):
        # n_embd: embedding dimension, n_head: the number of heads we'd like
        super().__init__()
        head_size = n_embd // n_head
        self.sa = MultiHeadAttention(n_head, head_size)
        self.ffwd = FeedFoward(n_embd)
        self.ln1 = nn.LayerNorm(n_embd)
        self.ln2 = nn.LayerNorm(n_embd)

    def forward(self, x):
        x = x + self.sa(self.ln1(x))
        x = x + self.ffwd(self.ln2(x))
        return x

class GPTLanguageModel(nn.Module):

    def __init__(self):
        super().__init__()
        # each token directly reads off the logits for the next token from a lookup table
        self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
        self.position_embedding_table = nn.Embedding(block_size, n_embd)
        self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])
        self.ln_f = nn.LayerNorm(n_embd) # final layer norm
        self.lm_head = nn.Linear(n_embd, vocab_size)

        # better init, not covered in the original GPT video, but important, will cover in followup video
        self.apply(self._init_weights)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)

    def forward(self, idx, targets=None):
        B, T = idx.shape

        # idx and targets are both (B,T) tensor of integers
        tok_emb = self.token_embedding_table(idx) # (B,T,C)
        pos_emb = self.position_embedding_table(torch.arange(T, device=device)) # (T,C)
        x = tok_emb + pos_emb # (B,T,C)
        x = self.blocks(x) # (B,T,C)
        x = self.ln_f(x) # (B,T,C)
        logits = self.lm_head(x) # (B,T,vocab_size)

        if targets is None:
            loss = None
        else:
            B, T, C = logits.shape
            logits = logits.view(B*T, C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)

        return logits, loss

    def generate(self, idx, max_new_tokens):
        # idx is (B, T) array of indices in the current context
        for _ in range(max_new_tokens):
            # crop idx to the last block_size tokens
            idx_cond = idx[:, -block_size:]
            # get the predictions
            logits, loss = self(idx_cond)
            # focus only on the last time step
            logits = logits[:, -1, :] # becomes (B, C)
            # apply softmax to get probabilities
            probs = F.softmax(logits, dim=-1) # (B, C)
            # sample from the distribution
            idx_next = torch.multinomial(probs, num_samples=1) # (B, 1)
            # append sampled index to the running sequence
            idx = torch.cat((idx, idx_next), dim=1) # (B, T+1)
        return idx

model = GPTLanguageModel()
m = model.to(device)
# print the number of parameters in the model
print(sum(p.numel() for p in m.parameters())/1e6, 'M parameters')

# create a PyTorch optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

for iter in range(max_iters):

    # every once in a while evaluate the loss on train and val sets
    if iter % eval_interval == 0 or iter == max_iters - 1:
        losses = estimate_loss()
        print(f"step {iter}: train loss {losses['train']:.4f}, val loss {losses['val']:.4f}")

    # sample a batch of data
    xb, yb = get_batch('train')

    # evaluate the loss
    logits, loss = model(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

# generate from the model
context = torch.zeros((1, 1), dtype=torch.long, device=device)
#print(decode(m.generate(context, max_new_tokens=500)[0].tolist()))
#open('more.txt', 'w').write(decode(m.generate(context, max_new_tokens=10000)[0].tolist()))

10.750479 M parameters
step 0: train loss 2.8427, val loss 2.8242
step 500: train loss 0.8932, val loss 0.8434
step 1000: train loss 0.7929, val loss 0.7560
step 1500: train loss 0.7363, val loss 0.7117
step 2000: train loss 0.7094, val loss 0.6934
step 2500: train loss 0.6974, val loss 0.6897
step 3000: train loss 0.6667, val loss 0.6776
step 3500: train loss 0.6399, val loss 0.6736
step 4000: train loss 0.6170, val loss 0.6820
step 4500: train loss 0.5787, val loss 0.6927
step 4999: train loss 0.5396, val loss 0.7102


In [None]:
torch.save(model.state_dict(), 'gpt_melody_model_time_and_pitch.pth')

In [None]:
model = GPTLanguageModel()
model.load_state_dict(torch.load('gpt_melody_model_time_and_pitch.pth'))
model.to(device)

  model.load_state_dict(torch.load('gpt_melody_model_time_and_pitch.pth'))


GPTLanguageModel(
  (token_embedding_table): Embedding(15, 384)
  (position_embedding_table): Embedding(256, 384)
  (blocks): Sequential(
    (0): Block(
      (sa): MultiHeadAttention(
        (heads): ModuleList(
          (0-5): 6 x Head(
            (key): Linear(in_features=384, out_features=64, bias=False)
            (query): Linear(in_features=384, out_features=64, bias=False)
            (value): Linear(in_features=384, out_features=64, bias=False)
            (dropout): Dropout(p=0.2, inplace=False)
          )
        )
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (dropout): Dropout(p=0.2, inplace=False)
      )
      (ffwd): FeedFoward(
        (net): Sequential(
          (0): Linear(in_features=384, out_features=1536, bias=True)
          (1): ReLU()
          (2): Linear(in_features=1536, out_features=384, bias=True)
          (3): Dropout(p=0.2, inplace=False)
        )
      )
      (ln1): LayerNorm((384,), eps=1e-05, elementwise_affine=

In [None]:
from torch.utils.data import Dataset, DataLoader
import torch

class MelodyDataset(Dataset):
    def __init__(self, data, block_size):
        """
        Args:
            data (torch.Tensor): Encoded dataset as a 1D tensor.
            block_size (int): The context length for GPT.
        """
        self.data = data
        self.block_size = block_size

    def __len__(self):
        return len(self.data) - self.block_size

    def __getitem__(self, idx):
        """
        Returns:
            x (torch.Tensor): Input sequence of length block_size.
            y (torch.Tensor): Target sequence (next token for each input token).
        """
        x = torch.tensor(self.data[idx:idx + self.block_size], dtype=torch.long)
        y = torch.tensor(self.data[idx + 1:idx + self.block_size + 1], dtype=torch.long)
        return x, y


In [None]:
# Define dataset and dataloader
block_size = 256  # Adjust based on model's context size
batch_size = 64   # Number of sequences in a batch
dataset = MelodyDataset(encode(text), block_size)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [None]:
context = torch.zeros((1, 1), dtype=torch.long, device=device)
generated_melody = decode(model.generate(context, max_new_tokens=500)[0].tolist())
print("Generated Melody:", generated_melody)

Generated Melody: A T2 f T1 R T2 A T1 f T2 A T1 B T2 A T1 f T2 B T1 A T2 f T1 R T2 D T1 D T2 D T1 A T2 c T1 B T2 A T1 G T2 G T1 a T2 C T1 C T2 B T1 D T2 G T1 G T2 D T1 C T2 a T1 G T2 a T1 C T2 D T1 D T2 C T1 a T2 C T1 D T2 F T1 D T2 C T1 a T2 D T1 F T2 R T1 a T2 G T1 D T2 D T1 F T2 D T1 D T2 R T1 A T2 C T1 B T2 A T1 G T2 G T1 F T2 E T1 G T2 F T1 E T2 R T1 B T2 G T1 R T2 C T1 B T2 D T1 C T2 B T1 G T2 G T1 A T2 C T1 D T2 E T1 D T2 E T1 D T2 R T1 a T2 G T1 D T2 c T1 C T2 B T1 C T2 B T1 D T2 G T1 G T2 D T1 C T2 B T1 D T2 C T1 B T2 D T1 C T2 B T1 D T2 C T1 B T2 D T1 C T2 B T1 R T2 G T1 F T2 E T1 G T2 F T1 E T2 A T1 C T2 R T1 D T2 C T1 D T2 D T1 E T2 D T1 C T2 B T1 G T2 G T1 F T2 G T1 G T2 F T1 E T2 D T1 D T2 C T1 B T2 G T1 G T2 A T1 C T2 D T1 E T2 D T1 E T2 D T1 E T2 D T1 R T2 E T1 D T2 C T1 B T2 G T1 C T2 B T1 A T2 C T1 B T2 D T1 E T2 D T1 F T2 D T1 D T2 B T1 C T2 B T1 G T2 E T1 E T2 C T1 A T2 B T1 D T2 E T1 D T2 E T1 D T2 G T1 B T2 C T1 B T2 G T1 C T2 B T1 A T2 C T1 B T2 D T1 G T2 G T1 D 

In [None]:
def calculate_perplexity(model, data_loader, device):
    model.eval()
    total_loss = 0
    count = 0
    criterion = torch.nn.CrossEntropyLoss()

    with torch.no_grad():
        for x, y in data_loader:
            x, y = x.to(device), y.to(device)  # Move tensors to the device
            logits = model(x)

            # If logits is a tuple, get the first element
            if isinstance(logits, tuple):
                logits = logits[0]

            # Compute the loss
            loss = criterion(logits.view(-1, logits.size(-1)), y.view(-1))
            total_loss += loss.item() * x.size(0)
            count += x.size(0)

    avg_loss = total_loss / count
    perplexity = torch.exp(torch.tensor(avg_loss))
    return perplexity.item()
perplexity = calculate_perplexity(model, data_loader, device)
print(f"Pitch-and-time Perplexity: {perplexity}")

Pitch-and-time Perplexity: 1.7426496744155884


In [None]:
import numpy as np
from collections import Counter

def calculate_rhythm_consistency(generated_sequences, train_sequences):
    """
    Calculate rhythm consistency by comparing the distribution of time tokens
    in generated sequences to the training dataset.

    Args:
        generated_sequences (list of str): List of generated sequences (pitch + time).
        train_sequences (list of str): List of training sequences (pitch + time).

    Returns:
        float: Rhythm consistency score (closer to 1 indicates higher consistency).
    """
    def extract_time_tokens(sequences):
        time_tokens = []
        for seq in sequences:
            tokens = seq.split()  # Split on spaces
            time_tokens.extend([token for token in tokens if token.startswith('T')])  # Collect time tokens
            print(time_tokens)
        return time_tokens

    # Extract time tokens from generated and training data
    generated_time_tokens = extract_time_tokens(generated_sequences)
    train_time_tokens = extract_time_tokens(train_sequences)

    # Count occurrences of each time token
    gen_dist = Counter(generated_time_tokens)
    train_dist = Counter(train_time_tokens)

    # Normalize counts to get probabilities
    gen_total = sum(gen_dist.values())
    train_total = sum(train_dist.values())
    gen_probs = {k: v / gen_total for k, v in gen_dist.items()}
    train_probs = {k: v / train_total for k, v in train_dist.items()}

    # Calculate the rhythm consistency score (cosine similarity)
    time_tokens = set(gen_probs.keys()).union(train_probs.keys())
    gen_vector = np.array([gen_probs.get(t, 0) for t in time_tokens])
    train_vector = np.array([train_probs.get(t, 0) for t in time_tokens])
    cosine_similarity = np.dot(gen_vector, train_vector) / (np.linalg.norm(gen_vector) * np.linalg.norm(train_vector))

    return cosine_similarity


In [None]:
# Example usage:

# Generated sequences
generated_sequences = [
    "A T1 G T2 F T1 F T2 D T1 E T2 C T1 R T2 D T1 F T2 D T1 F T2 G T1 F T2 E T1 R T2 D T1 F T2 D T1 F T2 G T1 F T2 G T1 A T2 R T1 C T2 D T1 D T2 D T1 D T2 D T1 D T2 D T1 D T2 D T1 C T2 R T1 A T2 A T1 A T2 A T1 A T2 A T1 a T2 C T1 a T2 a T1 A T2 a T1 D T2 D T1 D T2 D T1 C T2 D T1 C T2 R T1 c T2 c T1 c T2 c T1 D T2 C T1 C T2 C T1 A T2 A T1 C T2 A T1 D T2 C T1 R T2 C T1 A T2 A T1 A T2 A T1 G T2 G T1 G T2 F T1 G T2 G T1 F T2 d T1 D T2 C T1 G T2 D T1 D T2 D T1 D T2 F T1 D T2 F T1 G T2 F T1 E T2 R T1 C T2 D T1 F T2 D T1 F T2 G T1 F T2 G T1 F T2 E T1 R T2 D T1 F T2 D T1 F T2 R T1 D T2 F T1 G T2 F T1 G T2 A T1 R T2 C T1 a T2 A T1 R T2 A T1 G T2 F T1 F T2 G T1 F T2 E T1 d T2 D T1 C T2 B T1 A T2 G T1 F T2 A T1 a T2 D T1 D T2 D T1 D T2 D T1 C T2 R T1 C T2 D T1 F T2 D T1 F T2 D T1 F T2 C T1 B T2 C T1 A T2 R T1 C T2 A T1 A T2 G T1 F T2 G T1 G T2 F T1 G T2 G T1 F T2 D T1 D T2 D T1 D T2 D T1 F T2 G T1 F T2 E T1 R T2 C T1 C T2 D T1 F T2 D T1 R T2 F T1 G T2 F T1 E T2 R T1 D T2 F T1 D T2 F T1 G T2 F T1 G T2 A T1 R T2 C T1 G T2 F T1 R T2 D T1 F T2 D T1 F T2 G T1 F T2 E T1 R T2 C T1 C T2 D T1 F T2 D T1 F T2 R T1 F T2 G T1 F T2 G T1 F T2 E T1 R T2 D T1 F T2 D T1 F T2 C T1 R T2 C T1 D T2 F T1 D T2 F T1 C T2 B T1 C T2 A T1 R T2 C T1 A T2 G T1 F T2 G T1 F T2 E",

]

# Training sequences
train_sequences = [
    "R T1 G T2 D T1 D T2 C T1 C T2 a T1 G T2 D T1 F T2 F T1 D T2 F T1 R T2 D T1 D T2 C T1 R T2 a T1 G T2 D T1 F T2 R T1 D T2 C T1 C T2 a T1 G T2 R T1 F T2 F T1 f T2 R T1 a T2 a T1 G T2 a T1 C T2 D T1 R T2 G T1 D T2 D T1 C T2 C T1 a T2 a T1 G T2 G T1 D T2 F T1 R T2 D T1 D T2 C T1 C T2 a T1 G T2 G T1 D T2 F T1 R T2 D T1 C T2 C T1 a T2 G T1 G T2 F T1 F T2 f T1 R T2 a T1 G T2 a T1 C T2 D T1 R T2 G T1 G T2 D T1 D T2 C T1 R T2 D T1 D T2 C T1 R T2 a T1 a T2 a T1 C T2 R T1 D T2 D T1 C T2 R T1 D T2 C T1 a T2 a T1 C T2 C T1 a T2 R T1 D T2 D T1 C T2 a T1 C T2 C T1 a T2 G T1 R T2 a T1 a T2 C T1 a T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 a T2 C T1 D T2 D T1 C T2 R T1 D T2 D T1 C T2 R T1 a T2 a T1 a T2 C T1 R T2 D T1 d T2 D T1 C T2 a T1 G T2 F T1 F T2 D T1 C T2 a T1 R T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 a T2 a T1 C T2 a T1 R T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 G T2 D T1 D T2 C T1 C T2 a T1 G T2 D T1 F T2 F T1 D T2 F T1 R T2 D T1 D T2 C T1 R T2 a T1 G T2 D T1 F T2 R T1 D T2 C T1 C T2 a T1 G T2 R T1 F T2 F T1 f T2 R T1 a T2 a T1 G T2 a T1 C T2 D T1 R T2 G T1 D T2 D T1 C T2 C T1 a T2 a T1 G T2 G T1 D T2 F T1 R T2 D T1 D T2 C T1 C T2 a T1 G T2 G T1 D T2 F T1 R T2 D T1 C T2 C T1 a T2 G T1 G T2 F T1 F T2 f T1 R T2 a T1 G T2 a T1 C T2 D T1 R T2 G T1 G T2 D T1 D T2 C T1 R T2 D T1 D T2 C T1 R T2 a T1 a T2 a T1 C T2 R T1 D T2 D T1 C T2 R T1 D T2 C T1 a T2 a T1 C T2 C T1 a T2 R T1 D T2 D T1 C T2 a T1 C T2 C T1 a T2 G T1 R T2 a T1 a T2 C T1 a T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 a T2 C T1 D T2 D T1 C T2 R T1 D T2 D T1 C T2 R T1 a T2 a T1 a T2 C T1 R T2 D T1 d T2 D T1 C T2 a T1 G T2 F T1 F T2 D T1 C T2 a T1 R T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 a T2 a T1 C T2 a T1 R T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 G T2 G T1 D T2 D T1 C T2 R T1 D T2 D T1 C T2 R T1 a T2 a T1 a T2 C T1 R T2 D T1 D T2 C T1 R T2 D T1 C T2 a T1 a T2 C T1 C T2 a T1 R T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 a T2 a T1 C T2 a T1 D T2 D T1 C T2 a T1 C T2 C T1 a T2 G T1 R T2 a T1 C T2 D T1 D T2 C T1 R T2 D T1 D T2 C T1 R T2 a T1 a T2 a T1 C T2 R T1 D T2 d T1 D T2 C T1 a T2 G T1 F T2 F T1 D T2 C T1 a T2 R T1 D T2 D T1 C T2 a T1 C T2 C T1 a T2 G T1 R T2 a T1 a T2 C T1 a T2 R T1 D T2 D T1 C T2 a T1 C T2 C T1 a T2 G T1",

]

# Calculate rhythm consistency
rhythm_consistency_score = calculate_rhythm_consistency(generated_sequences, train_sequences)
print(f"Rhythm Consistency Score: {rhythm_consistency_score:.4f}")


Rhythm Consistency Score: 1.0000


In [None]:
import math
from collections import defaultdict
class TrigramModel:
    def __init__(self, vocab_size):
        self.vocab_size = vocab_size
        self.trigram_counts = defaultdict(lambda: defaultdict(int))
        self.bigram_counts = defaultdict(int)

    def train(self, sequences):
        # Count trigram occurrences
        for sequence in sequences:
            for i in range(len(sequence) - 2):
                prev_tokens = (sequence[i], sequence[i + 1])
                next_token = sequence[i + 2]
                self.trigram_counts[prev_tokens][next_token] += 1
                self.bigram_counts[prev_tokens] += 1

    def predict_next(self, prev_tokens):
        # Predict next token probabilities
        token_counts = self.trigram_counts[prev_tokens]
        total = self.bigram_counts[prev_tokens]
        if total == 0:
            return torch.ones(self.vocab_size) / self.vocab_size  # Uniform distribution
        probabilities = torch.zeros(self.vocab_size)
        for token, count in token_counts.items():
            probabilities[token] = count / total
        return probabilities

    def perplexity(self, data):
        log_prob = 0
        total_tokens = 0
        for sequence in data:
            for i in range(len(sequence) - 2):
                prev_tokens = (sequence[i], sequence[i + 1])
                next_token = sequence[i + 2]
                probabilities = self.predict_next(prev_tokens)
                log_prob += math.log(probabilities[next_token].item() + 1e-9)
                total_tokens += 1
        avg_log_prob = log_prob / total_tokens
        return math.exp(-avg_log_prob)

# Preprocess pitch + time data for Trigram Model
encoded_data = torch.tensor(encode(text), dtype=torch.long)
train_sequences = [encoded_data[i:i + 256] for i in range(0, len(encoded_data) - 256, 256)]

# Train Trigram Model
trigram_model = TrigramModel(vocab_size)
trigram_model.train(train_sequences)

# Evaluate Trigram Model Perplexity
train_perplexity = trigram_model.perplexity(train_sequences)
print(f"Trigram Model Train Perplexity: {train_perplexity:.4f}")


Trigram Model Train Perplexity: 15.0000


In [None]:
def preprocess_melody_with_time_tokens(generated_sequence):
    # Split the sequence into characters
    notes = list(generated_sequence)

    # Add spaces, but keep time tokens like T1 and T2 without spaces between T and its number
    processed_sequence = ''
    for note in notes:
        if note.startswith('T'):  # Time tokens
            processed_sequence += note
        else:
            processed_sequence += note + ' '

    return processed_sequence.strip()

# Example usage
generated_sequence = "CFEDCAdGdCaBGAT2cAfggEBECgEBDdcT1DfT1RT2gDaT1T2CaRCAgfRfADGgEEGT1cBcdgCBT2cDDERGGCaRGBGGECEET1AGRCGCaBFFdRaGgaAgfET1cARcfdAABRBaGFFEgCdfEDfffRFDRCT2AfCT2FEaT1CRDT2aT2cCT2CT2FCGffCcBDddfcBccgaFT1GfADT2CCcCfDFT1gaEBCaDCT1gFfACGT2T1DADEEDCcDAaT2T2aFCgcGaaT2dDCcEGCFEDfDgGfBdT1CDDCffT1Rd"

processed_sequence = preprocess_melody_with_time_tokens(generated_sequence)
print(processed_sequence)


C F E D C A d G d C a B G A T2 c A f g g E B E C g E B D d c T1 D f T1 R T2 g D a T1 T2 C a R C A g f R f A D G g E E G T1 c B c d g C B T2 c D D E R G G C a R G B G G E C E E T1 A G R C G C a B F F d R a G g a A g f E T1 c A R c f d A A B R B a G F F E g C d f E D f f f R F D R C T2 A f C T2 F E a T1 C R D T2 a T2 c C T2 C T2 F C G f f C c B D d d f c B c c g a F T1 G f A D T2 C C c C f D F T1 g a E B C a D C T1 g F f A C G T2 T1 D A D E E D C c D A a T2 T2 a F C g c G a a T2 d D C c E G C F E D f D g G f B d T1 C D D C f f T1 R d


In [None]:
# Generate sequences from the Bigram Model
def generate_trigram_sequence(model, length, start_token):
    sequence = [start_token]
    for _ in range(length - 1):
        prev_token = sequence[-1]
        next_token_probs = model.predict_next(prev_token)
        next_token = torch.multinomial(next_token_probs, num_samples=1).item()
        sequence.append(next_token)
    return sequence

# Example: Generate a pitch-only sequence
start_token = stoi['C']  # Start with 'C' (or any token in the dataset)
generated_sequence = generate_trigram_sequence(trigram_model, length=256, start_token=start_token)
generated_sequence_str = ''.join([itos[token] for token in generated_sequence])
print("Generated Sequence:", generated_sequence_str)
processed_trigram_sequence = preprocess_melody_with_time_tokens(generated_sequence_str)
print("Processed Trigram Sequence:", processed_trigram_sequence)

Generated Sequence: CGRFGET1GET2BBAADFcT1BT1T1BcRFRfBT2cFDDBffdgECccT2gdfDgACdCdRacEffRCADRAcdGgRAGRT1acEFDDGfBAdfCcaEcDT1T1cDT1cT1dRgERaDgT2gGGCdaDDaFaaDBABaGAGDfECdT1GT1cgBBFdDDcRFRgAFAECDfgT2GABT2GT1dGaRfT1cT2dFEGaBfgAGT1FEdT1ET1T1fgEFRgT1GRaGgGgdRRBcFaFgcGaT2ccgAcaABffCCgFT2daaRBDAT2CAccCcaT1aERGFT1dE
Processed Trigram Sequence: C G R F G E T1 G E T2 B B A A D F c T1 B T1 T1 B c R F R f B T2 c F D D B f f d g E C c c T2 g d f D g A C d C d R a c E f f R C A D R A c d G g R A G R T1 a c E F D D G f B A d f C c a E c D T1 T1 c D T1 c T1 d R g E R a D g T2 g G G C d a D D a F a a D B A B a G A G D f E C d T1 G T1 c g B B F d D D c R F R g A F A E C D f g T2 G A B T2 G T1 d G a R f T1 c T2 d F E G a B f g A G T1 F E d T1 E T1 T1 f g E F R g T1 G R a G g G g d R R B c F a F g c G a T2 c c g A c a A B f f C C g F T2 d a a R B D A T2 C A c c C c a T1 a E R G F T1 d E


In [None]:
# Example usage:

# Generated sequences
generated_sequences = [
   processed_trigram_sequence,

]

# Training sequences
train_sequences = [
    "R T1 G T2 D T1 D T2 C T1 C T2 a T1 G T2 D T1 F T2 F T1 D T2 F T1 R T2 D T1 D T2 C T1 R T2 a T1 G T2 D T1 F T2 R T1 D T2 C T1 C T2 a T1 G T2 R T1 F T2 F T1 f T2 R T1 a T2 a T1 G T2 a T1 C T2 D T1 R T2 G T1 D T2 D T1 C T2 C T1 a T2 a T1 G T2 G T1 D T2 F T1 R T2 D T1 D T2 C T1 C T2 a T1 G T2 G T1 D T2 F T1 R T2 D T1 C T2 C T1 a T2 G T1 G T2 F T1 F T2 f T1 R T2 a T1 G T2 a T1 C T2 D T1 R T2 G T1 G T2 D T1 D T2 C T1 R T2 D T1 D T2 C T1 R T2 a T1 a T2 a T1 C T2 R T1 D T2 D T1 C T2 R T1 D T2 C T1 a T2 a T1 C T2 C T1 a T2 R T1 D T2 D T1 C T2 a T1 C T2 C T1 a T2 G T1 R T2 a T1 a T2 C T1 a T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 a T2 C T1 D T2 D T1 C T2 R T1 D T2 D T1 C T2 R T1 a T2 a T1 a T2 C T1 R T2 D T1 d T2 D T1 C T2 a T1 G T2 F T1 F T2 D T1 C T2 a T1 R T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 a T2 a T1 C T2 a T1 R T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 G T2 D T1 D T2 C T1 C T2 a T1 G T2 D T1 F T2 F T1 D T2 F T1 R T2 D T1 D T2 C T1 R T2 a T1 G T2 D T1 F T2 R T1 D T2 C T1 C T2 a T1 G T2 R T1 F T2 F T1 f T2 R T1 a T2 a T1 G T2 a T1 C T2 D T1 R T2 G T1 D T2 D T1 C T2 C T1 a T2 a T1 G T2 G T1 D T2 F T1 R T2 D T1 D T2 C T1 C T2 a T1 G T2 G T1 D T2 F T1 R T2 D T1 C T2 C T1 a T2 G T1 G T2 F T1 F T2 f T1 R T2 a T1 G T2 a T1 C T2 D T1 R T2 G T1 G T2 D T1 D T2 C T1 R T2 D T1 D T2 C T1 R T2 a T1 a T2 a T1 C T2 R T1 D T2 D T1 C T2 R T1 D T2 C T1 a T2 a T1 C T2 C T1 a T2 R T1 D T2 D T1 C T2 a T1 C T2 C T1 a T2 G T1 R T2 a T1 a T2 C T1 a T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 a T2 C T1 D T2 D T1 C T2 R T1 D T2 D T1 C T2 R T1 a T2 a T1 a T2 C T1 R T2 D T1 d T2 D T1 C T2 a T1 G T2 F T1 F T2 D T1 C T2 a T1 R T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 a T2 a T1 C T2 a T1 R T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 G T2 G T1 D T2 D T1 C T2 R T1 D T2 D T1 C T2 R T1 a T2 a T1 a T2 C T1 R T2 D T1 D T2 C T1 R T2 D T1 C T2 a T1 a T2 C T1 C T2 a T1 R T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 a T2 a T1 C T2 a T1 D T2 D T1 C T2 a T1 C T2 C T1 a T2 G T1 R T2 a T1 C T2 D T1 D T2 C T1 R T2 D T1 D T2 C T1 R T2 a T1 a T2 a T1 C T2 R T1 D T2 d T1 D T2 C T1 a T2 G T1 F T2 F T1 D T2 C T1 a T2 R T1 D T2 D T1 C T2 a T1 C T2 C T1 a T2 G T1 R T2 a T1 a T2 C T1 a T2 R T1 D T2 D T1 C T2 a T1 C T2 C T1 a T2 G T1",

]

# Calculate rhythm consistency
print("time tokens")
rhythm_consistency_score = calculate_rhythm_consistency(generated_sequences, train_sequences)
print(f"Rhythm Consistency Score: {rhythm_consistency_score:.4f}")


time tokens
['T1', 'T2', 'T1', 'T1', 'T1', 'T2', 'T2', 'T1', 'T1', 'T1', 'T1', 'T1', 'T2', 'T1', 'T1', 'T2', 'T2', 'T1', 'T1', 'T2', 'T1', 'T1', 'T1', 'T1', 'T1', 'T2', 'T2', 'T2', 'T1', 'T1']
['T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T