In [None]:
def preprocess_with_time_tokens_after_r(filepath, output_file):
    """
    Preprocess melody dataset to include time tokens (T1, T2) after each note/rest.

    Args:
        filepath (str): Path to the input melody dataset.
        output_file (str): Path to save the processed dataset.
    """
    with open(filepath, 'r', encoding='utf-8') as file:
        melodies = file.readlines()

    processed_melodies = []

    for melody in melodies:
        # Remove spaces and ensure each character is treated individually
        notes = ''.join(melody.strip().split())

        # Pair each note/rest with alternating time tokens (T1, T2)
        combined = []
        time_tokens = ['T1', 'T2']
        i = 0
        while i < len(notes):
            if notes[i] == 'R':
                combined.append('R')
                # Add time token after 'R'
                if len(combined) % 2 == 0:
                    combined.append(time_tokens[0])  # Alternate time tokens
                else:
                    combined.append(time_tokens[1])
                i += 1  # Skip the next character
            else:
                note_pair = notes[i:i+2]
                combined.append(note_pair)
                if note_pair != 'R':  # Exclude rest 'R'
                    if len(combined) % 2 == 0:
                        combined.append(time_tokens[0])  # Alternate time tokens
                    else:
                        combined.append(time_tokens[1])
                i += 2  # Move to the next pair

        # Combine into a single line and append
        processed_melodies.append(' '.join(combined))

    # Save processed melodies
    with open(output_file, 'w', encoding='utf-8') as file:
        file.write('\n'.join(processed_melodies))

    print(f"Pitch+Time Tokens preprocessed data saved to {output_file}.")

# Example usage
preprocess_with_time_tokens_after_r('/content/augmentedOctaveExtensions_processed.txt', 'processedMelodiesWithTimeTokens.txt')


Pitch+Time Tokens preprocessed data saved to processedMelodiesWithTimeTokens.txt.


In [None]:
import torch
import torch.nn as nn
from torch.nn import functional as F
import re
# hyperparameters
batch_size = 64 # how many independent sequences will we process in parallel?
block_size = 256 # what is the maximum context length for predictions?
max_iters = 5000
eval_interval = 500
learning_rate = 3e-4
device = 'cuda' if torch.cuda.is_available() else 'cpu'
eval_iters = 200
n_embd = 384
n_head = 6
n_layer = 6
dropout = 0.2
# ------------

torch.manual_seed(1337)
pattern = r'([A-Za-z])(\d+)'

# Extract unique tokens (e.g., C4, D5, etc.)

# Open the new input file
with open('/content/processedMelodiesWithTimeTokens.txt', 'r', encoding='utf-8') as f:
    text = f.read()


chars = sorted(set(f"{ch}{num}" for ch, num in re.findall(pattern, text)))
chars.append('R')
vocab_size = len(chars)
print("unique vocabulary")
print(chars)
# Create a mapping from characters to integers
stoi = { ch:i for i,ch in enumerate(chars) }
itos = { i:ch for i,ch in enumerate(chars) }
# encode = lambda s: [stoi[c] for c in s] # encoder: take a string, output a list of integers
# decode = lambda l: ''.join([itos[i] for i in l]) # decoder: take a list of integers, output a string
# Updated encode function
encode = lambda s: [stoi['R'] if s == 'R' else stoi[f"{ch}{num}"] for ch, num in re.findall(pattern, s)]

# Updated decode function
decode = lambda l: ' '.join([itos[i] if isinstance(i, int) else 'R' for i in l])

# Train and test splits
data = torch.tensor(encode(text), dtype=torch.long)
n = int(0.9*len(data)) # first 90% will be train, rest val
train_data = data[:n]
val_data = data[n:]

# data loading
def get_batch(split):
    # generate a small batch of data of inputs x and targets y
    data = train_data if split == 'train' else val_data
    ix = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    x, y = x.to(device), y.to(device)
    return x, y

@torch.no_grad()
def estimate_loss():
    out = {}
    model.eval()
    for split in ['train', 'val']:
        losses = torch.zeros(eval_iters)
        for k in range(eval_iters):
            X, Y = get_batch(split)
            logits, loss = model(X, Y)
            losses[k] = loss.item()
        out[split] = losses.mean()
    model.train()
    return out

class Head(nn.Module):
    """ one head of self-attention """

    def __init__(self, head_size):
        super().__init__()
        self.key = nn.Linear(n_embd, head_size, bias=False)
        self.query = nn.Linear(n_embd, head_size, bias=False)
        self.value = nn.Linear(n_embd, head_size, bias=False)
        self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))

        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        B,T,C = x.shape
        k = self.key(x)   # (B,T,hs)
        q = self.query(x) # (B,T,hs)
        wei = q @ k.transpose(-2,-1) * k.shape[-1]**-0.5 # (B, T, hs) @ (B, hs, T) -> (B, T, T)
        wei = wei.masked_fill(self.tril[:T, :T] == 0, float('-inf')) # (B, T, T)
        wei = F.softmax(wei, dim=-1) # (B, T, T)
        wei = self.dropout(wei)
        v = self.value(x) # (B,T,hs)
        out = wei @ v # (B, T, T) @ (B, T, hs) -> (B, T, hs)
        return out

class MultiHeadAttention(nn.Module):
    """ multiple heads of self-attention in parallel """

    def __init__(self, num_heads, head_size):
        super().__init__()
        self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
        self.proj = nn.Linear(head_size * num_heads, n_embd)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        out = torch.cat([h(x) for h in self.heads], dim=-1)
        out = self.dropout(self.proj(out))
        return out

class FeedFoward(nn.Module):
    """ a simple linear layer followed by a non-linearity """

    def __init__(self, n_embd):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_embd, 4 * n_embd),
            nn.ReLU(),
            nn.Linear(4 * n_embd, n_embd),
            nn.Dropout(dropout),
        )

    def forward(self, x):
        return self.net(x)

class Block(nn.Module):
    """ Transformer block: communication followed by computation """

    def __init__(self, n_embd, n_head):
        super().__init__()
        head_size = n_embd // n_head
        self.sa = MultiHeadAttention(n_head, head_size)
        self.ffwd = FeedFoward(n_embd)
        self.ln1 = nn.LayerNorm(n_embd)
        self.ln2 = nn.LayerNorm(n_embd)

    def forward(self, x):
        x = x + self.sa(self.ln1(x))
        x = x + self.ffwd(self.ln2(x))
        return x

class GPTLanguageModel(nn.Module):

    def __init__(self):
        super().__init__()
        self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
        self.position_embedding_table = nn.Embedding(block_size, n_embd)
        self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])
        self.ln_f = nn.LayerNorm(n_embd) # final layer norm
        self.lm_head = nn.Linear(n_embd, vocab_size)

        self.apply(self._init_weights)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)

    def forward(self, idx, targets=None):
        B, T = idx.shape

        tok_emb = self.token_embedding_table(idx) # (B,T,C)
        pos_emb = self.position_embedding_table(torch.arange(T, device=device)) # (T,C)
        x = tok_emb + pos_emb # (B,T,C)
        x = self.blocks(x) # (B,T,C)
        x = self.ln_f(x) # (B,T,C)
        logits = self.lm_head(x) # (B,T,vocab_size)

        if targets is None:
            loss = None
        else:
            B, T, C = logits.shape
            logits = logits.view(B*T, C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)

        return logits, loss

    def generate(self, idx, max_new_tokens):
        for _ in range(max_new_tokens):
            idx_cond = idx[:, -block_size:]
            logits, loss = self(idx_cond)
            logits = logits[:, -1, :]
            probs = F.softmax(logits, dim=-1)
            idx_next = torch.multinomial(probs, num_samples=1)
            idx = torch.cat((idx, idx_next), dim=1)
        return idx

model = GPTLanguageModel()
m = model.to(device)
print(sum(p.numel() for p in m.parameters())/1e6, 'M parameters')

optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

for iter in range(max_iters):

    if iter % eval_interval == 0 or iter == max_iters - 1:
        losses = estimate_loss()
        print(f"step {iter}: train loss {losses['train']:.4f}, val loss {losses['val']:.4f}")

    xb, yb = get_batch('train')

    logits, loss = model(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

context = torch.zeros((1, 1), dtype=torch.long, device=device)
# print(decode(m.generate(context, max_new_tokens=500)[0].tolist()))


unique vocabulary
['A3', 'A4', 'A5', 'B3', 'B4', 'B5', 'C3', 'C4', 'C5', 'D3', 'D4', 'D5', 'E3', 'E4', 'E5', 'F3', 'F4', 'F5', 'G3', 'G4', 'G5', 'T2', 'a3', 'a4', 'a5', 'c3', 'c4', 'c5', 'd3', 'd4', 'd5', 'f3', 'f4', 'f5', 'g3', 'g4', 'g5', 'R']
10.768166 M parameters
step 0: train loss 3.6372, val loss 3.6395
step 500: train loss 0.7379, val loss 0.8801
step 1000: train loss 0.6986, val loss 0.8312
step 1500: train loss 0.6649, val loss 0.8050
step 2000: train loss 0.6281, val loss 0.7894
step 2500: train loss 0.5716, val loss 0.7601
step 3000: train loss 0.5079, val loss 0.7447
step 3500: train loss 0.3915, val loss 0.7317
step 4000: train loss 0.2875, val loss 0.6863
step 4500: train loss 0.2145, val loss 0.6150
step 4999: train loss 0.1740, val loss 0.5540


In [None]:
torch.save(model.state_dict(), 'gpt_melody_model_time_and_pitch_octaves.pth')

In [None]:
model = GPTLanguageModel()
model.load_state_dict(torch.load('gpt_melody_model_time_and_pitch_octaves.pth'))
model.to(device)

  model.load_state_dict(torch.load('gpt_melody_model_time_and_pitch_octaves.pth'))


GPTLanguageModel(
  (token_embedding_table): Embedding(38, 384)
  (position_embedding_table): Embedding(256, 384)
  (blocks): Sequential(
    (0): Block(
      (sa): MultiHeadAttention(
        (heads): ModuleList(
          (0-5): 6 x Head(
            (key): Linear(in_features=384, out_features=64, bias=False)
            (query): Linear(in_features=384, out_features=64, bias=False)
            (value): Linear(in_features=384, out_features=64, bias=False)
            (dropout): Dropout(p=0.2, inplace=False)
          )
        )
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (dropout): Dropout(p=0.2, inplace=False)
      )
      (ffwd): FeedFoward(
        (net): Sequential(
          (0): Linear(in_features=384, out_features=1536, bias=True)
          (1): ReLU()
          (2): Linear(in_features=1536, out_features=384, bias=True)
          (3): Dropout(p=0.2, inplace=False)
        )
      )
      (ln1): LayerNorm((384,), eps=1e-05, elementwise_affine=

In [None]:
from torch.utils.data import Dataset, DataLoader
import torch

class MelodyDataset(Dataset):
    def __init__(self, data, block_size):
        """
        Args:
            data (torch.Tensor): Encoded dataset as a 1D tensor.
            block_size (int): The context length for GPT.
        """
        self.data = data
        self.block_size = block_size

    def __len__(self):
        return len(self.data) - self.block_size

    def __getitem__(self, idx):
        """
        Returns:
            x (torch.Tensor): Input sequence of length block_size.
            y (torch.Tensor): Target sequence (next token for each input token).
        """
        x = torch.tensor(self.data[idx:idx + self.block_size], dtype=torch.long)
        y = torch.tensor(self.data[idx + 1:idx + self.block_size + 1], dtype=torch.long)
        return x, y


In [None]:
# Define dataset and dataloader
block_size = 256  # Adjust based on model's context size
batch_size = 64   # Number of sequences in a batch
dataset = MelodyDataset(encode(text), block_size)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [None]:
#evaluate perplexity of the model here.
def calculate_perplexity(model, data_loader, device):
    model.eval()
    total_loss = 0
    count = 0
    criterion = torch.nn.CrossEntropyLoss()

    with torch.no_grad():
        for x, y in data_loader:
            x, y = x.to(device), y.to(device)  # Move tensors to the device
            logits = model(x)

            # If logits is a tuple, get the first element
            if isinstance(logits, tuple):
                logits = logits[0]

            # Compute the loss
            loss = criterion(logits.view(-1, logits.size(-1)), y.view(-1))
            total_loss += loss.item() * x.size(0)
            count += x.size(0)

    avg_loss = total_loss / count
    perplexity = torch.exp(torch.tensor(avg_loss))
    return perplexity.item()
perplexity = calculate_perplexity(model, data_loader, device)
print(f"Pitch-and-time Perplexity for expanded octave dataset: {perplexity}")

Pitch-and-time Perplexity for expanded octave dataset: 1.2367064952850342


In [None]:
context = torch.zeros((1, 1), dtype=torch.long, device=device)
generated_melody = decode(model.generate(context, max_new_tokens=500)[0].tolist())
print("Generated Melody:", generated_melody)

Generated Melody: A3 T2 T2 a3 T2 F3 T2 G3 T2 F3 T2 a3 T2 G3 T2 T2 F3 T2 G3 T2 a3 T2 C3 T2 a3 T2 a3 T2 G3 T2 T2 G3 T2 D3 T2 F3 T2 D3 T2 F3 T2 D3 T2 G3 T2 G3 T2 D3 T2 C3 T2 a3 T2 F3 T2 D3 T2 C3 T2 a3 T2 C3 T2 a3 T2 a3 T2 C3 T2 D3 T2 C3 T2 C3 T2 a3 T2 a3 T2 C3 T2 G3 T2 F3 T2 G3 T2 F3 T2 G3 T2 a3 T2 F3 T2 G3 T2 a3 T2 G3 T2 F3 T2 D3 T2 C3 T2 a3 T2 F3 T2 D3 T2 F3 T2 D3 T2 C3 T2 a3 T2 C3 T2 D3 T2 C3 T2 T2 D3 T2 D3 T2 D3 T2 C3 T2 C3 T2 a3 T2 a3 T2 a3 T2 C3 T2 D3 T2 D3 T2 C3 T2 D3 T2 D3 T2 C3 T2 C3 T2 a3 T2 a3 T2 C3 T2 D4 T2 D4 T2 C4 T2 C4 T2 a4 T2 a4 T2 C4 T2 D4 T2 D4 T2 C4 T2 C4 T2 a4 T2 a4 T2 C4 T2 G4 T2 F4 T2 T2 D4 T2 D4 T2 C4 T2 C4 T2 a4 T2 a4 T2 C4 T2 D4 T2 D4 T2 D4 T2 C4 T2 C4 T2 a4 T2 a4 T2 C4 T2 G4 T2 F4 T2 T2 D4 T2 D4 T2 C4 T2 C4 T2 a4 T2 a4 T2 C4 T2 D4 T2 D4 T2 C4 T2 C4 T2 a4 T2 a4 T2 G4 T2 d4 T2 D4 T2 C4 T2 a4 T2 T2 F4 T2 F4 T2 D4 T2 F4 T2 D4 T2 F4 T2 G4 T2 G4 T2 F4 T2 d4 T2 D4 T2 C4 T2 D4 T2 a4 T2 C4 T2 D4 T2 D4 T2 F4 T2 C4 T2 a4 T2 T2 a4 T2 C4 T2 D4 T2 D4 T2 C4 T2 C4 T2 C4 T2 a4 T

In [None]:
import numpy as np
from collections import Counter

def calculate_rhythm_consistency(generated_sequences, train_sequences):
    """
    Calculate rhythm consistency by comparing the distribution of time tokens
    in generated sequences to the training dataset.

    Args:
        generated_sequences (list of str): List of generated sequences (pitch + time).
        train_sequences (list of str): List of training sequences (pitch + time).

    Returns:
        float: Rhythm consistency score (closer to 1 indicates higher consistency).
    """
    def extract_time_tokens(sequences):
        time_tokens = []
        for seq in sequences:
            tokens = seq.split()  # Split on spaces
            time_tokens.extend([token for token in tokens if token.startswith('T')])  # Collect time tokens
            print(time_tokens)
        return time_tokens

    # Extract time tokens from generated and training data
    generated_time_tokens = extract_time_tokens(generated_sequences)
    train_time_tokens = extract_time_tokens(train_sequences)

    # Count occurrences of each time token
    gen_dist = Counter(generated_time_tokens)
    train_dist = Counter(train_time_tokens)

    # Normalize counts to get probabilities
    gen_total = sum(gen_dist.values())
    train_total = sum(train_dist.values())
    gen_probs = {k: v / gen_total for k, v in gen_dist.items()}
    train_probs = {k: v / train_total for k, v in train_dist.items()}

    # Calculate the rhythm consistency score (cosine similarity)
    time_tokens = set(gen_probs.keys()).union(train_probs.keys())
    gen_vector = np.array([gen_probs.get(t, 0) for t in time_tokens])
    train_vector = np.array([train_probs.get(t, 0) for t in time_tokens])
    cosine_similarity = np.dot(gen_vector, train_vector) / (np.linalg.norm(gen_vector) * np.linalg.norm(train_vector))

    return cosine_similarity


In [None]:
generated_sequences = [
  generated_melody,

]

# Training sequences
train_sequences = [
    "R T1 G T2 D T1 D T2 C T1 C T2 a T1 G T2 D T1 F T2 F T1 D T2 F T1 R T2 D T1 D T2 C T1 R T2 a T1 G T2 D T1 F T2 R T1 D T2 C T1 C T2 a T1 G T2 R T1 F T2 F T1 f T2 R T1 a T2 a T1 G T2 a T1 C T2 D T1 R T2 G T1 D T2 D T1 C T2 C T1 a T2 a T1 G T2 G T1 D T2 F T1 R T2 D T1 D T2 C T1 C T2 a T1 G T2 G T1 D T2 F T1 R T2 D T1 C T2 C T1 a T2 G T1 G T2 F T1 F T2 f T1 R T2 a T1 G T2 a T1 C T2 D T1 R T2 G T1 G T2 D T1 D T2 C T1 R T2 D T1 D T2 C T1 R T2 a T1 a T2 a T1 C T2 R T1 D T2 D T1 C T2 R T1 D T2 C T1 a T2 a T1 C T2 C T1 a T2 R T1 D T2 D T1 C T2 a T1 C T2 C T1 a T2 G T1 R T2 a T1 a T2 C T1 a T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 a T2 C T1 D T2 D T1 C T2 R T1 D T2 D T1 C T2 R T1 a T2 a T1 a T2 C T1 R T2 D T1 d T2 D T1 C T2 a T1 G T2 F T1 F T2 D T1 C T2 a T1 R T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 a T2 a T1 C T2 a T1 R T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 G T2 D T1 D T2 C T1 C T2 a T1 G T2 D T1 F T2 F T1 D T2 F T1 R T2 D T1 D T2 C T1 R T2 a T1 G T2 D T1 F T2 R T1 D T2 C T1 C T2 a T1 G T2 R T1 F T2 F T1 f T2 R T1 a T2 a T1 G T2 a T1 C T2 D T1 R T2 G T1 D T2 D T1 C T2 C T1 a T2 a T1 G T2 G T1 D T2 F T1 R T2 D T1 D T2 C T1 C T2 a T1 G T2 G T1 D T2 F T1 R T2 D T1 C T2 C T1 a T2 G T1 G T2 F T1 F T2 f T1 R T2 a T1 G T2 a T1 C T2 D T1 R T2 G T1 G T2 D T1 D T2 C T1 R T2 D T1 D T2 C T1 R T2 a T1 a T2 a T1 C T2 R T1 D T2 D T1 C T2 R T1 D T2 C T1 a T2 a T1 C T2 C T1 a T2 R T1 D T2 D T1 C T2 a T1 C T2 C T1 a T2 G T1 R T2 a T1 a T2 C T1 a T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 a T2 C T1 D T2 D T1 C T2 R T1 D T2 D T1 C T2 R T1 a T2 a T1 a T2 C T1 R T2 D T1 d T2 D T1 C T2 a T1 G T2 F T1 F T2 D T1 C T2 a T1 R T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 a T2 a T1 C T2 a T1 R T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 G T2 G T1 D T2 D T1 C T2 R T1 D T2 D T1 C T2 R T1 a T2 a T1 a T2 C T1 R T2 D T1 D T2 C T1 R T2 D T1 C T2 a T1 a T2 C T1 C T2 a T1 R T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 a T2 a T1 C T2 a T1 D T2 D T1 C T2 a T1 C T2 C T1 a T2 G T1 R T2 a T1 C T2 D T1 D T2 C T1 R T2 D T1 D T2 C T1 R T2 a T1 a T2 a T1 C T2 R T1 D T2 d T1 D T2 C T1 a T2 G T1 F T2 F T1 D T2 C T1 a T2 R T1 D T2 D T1 C T2 a T1 C T2 C T1 a T2 G T1 R T2 a T1 a T2 C T1 a T2 R T1 D T2 D T1 C T2 a T1 C T2 C T1 a T2 G T1",

]

# Calculate rhythm consistency
print("time tokens")
rhythm_consistency_score = calculate_rhythm_consistency(generated_sequences, train_sequences)
print(f"Rhythm Consistency Score: {rhythm_consistency_score:.4f}")

time tokens
['T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2

In [None]:
import math
from collections import defaultdict
class TrigramModel:
    def __init__(self, vocab_size):
        self.vocab_size = vocab_size
        self.trigram_counts = defaultdict(lambda: defaultdict(int))
        self.bigram_counts = defaultdict(int)

    def train(self, sequences):
        # Count trigram occurrences
        for sequence in sequences:
            for i in range(len(sequence) - 2):
                prev_tokens = (sequence[i], sequence[i + 1])
                next_token = sequence[i + 2]
                self.trigram_counts[prev_tokens][next_token] += 1
                self.bigram_counts[prev_tokens] += 1

    def predict_next(self, prev_tokens):
        # Predict next token probabilities
        token_counts = self.trigram_counts[prev_tokens]
        total = self.bigram_counts[prev_tokens]
        if total == 0:
            return torch.ones(self.vocab_size) / self.vocab_size  # Uniform distribution
        probabilities = torch.zeros(self.vocab_size)
        for token, count in token_counts.items():
            probabilities[token] = count / total
        return probabilities

    def perplexity(self, data):
        log_prob = 0
        total_tokens = 0
        for sequence in data:
            for i in range(len(sequence) - 2):
                prev_tokens = (sequence[i], sequence[i + 1])
                next_token = sequence[i + 2]
                probabilities = self.predict_next(prev_tokens)
                log_prob += math.log(probabilities[next_token].item() + 1e-9)
                total_tokens += 1
        avg_log_prob = log_prob / total_tokens
        return math.exp(-avg_log_prob)

# Preprocess pitch + time data for Trigram Model
encoded_data = torch.tensor(encode(text), dtype=torch.long)
train_sequences = [encoded_data[i:i + 256] for i in range(0, len(encoded_data) - 256, 256)]

# Train Trigram Model
trigram_model = TrigramModel(vocab_size)
trigram_model.train(train_sequences)

# Evaluate Trigram Model Perplexity
train_perplexity = trigram_model.perplexity(train_sequences)
print(f"Trigram Model Train Perplexity: {train_perplexity:.4f}")


Trigram Model Train Perplexity: 38.0000


In [None]:
def preprocess_melody_with_time_tokens(generated_sequence):
    # Split the sequence into characters
    notes = list(generated_sequence)

    # Add spaces, but keep time tokens like T1 and T2 without spaces between T and its number
    processed_sequence = ''
    for note in notes:
        if note.startswith('T') or note == 'R':  # Time tokens and 'R'
            processed_sequence += note
            if note == 'R':
                processed_sequence += ' '  # Add space after time tokens
        else:
            if note.isalpha() and (note.isupper() or note.islower()) :  # Check if it's a pitch character
                processed_sequence += note  # Keep single pitch characters
            else:
                processed_sequence += note + ' '  # Add space for non-pitch characters

    return processed_sequence.strip()





CFEDCAdGdCaBGAT2 cAfggEBECgEBDdcT1 DfT1 R T2 gDaT1 T2 CaR CAgfR fADGgEEGT1 cBcdgCBT2 cDDER GGCaR GBGGECEET1 AGR CGCaBFFdR aGgaAgfET1 cAR cfdAABR BaGFFEgCdfEDfffR FDR CT2 AfCT2 FEaT1 CR DT2 aT2 cCT2 CT2 FCGffCcBDddfcBccgaFT1 GfADT2 CCcCfDFT1 gaEBCaDCT1 gFfACGT2 T1 DADEEDCcDAaT2 T2 aFCgcGaaT2 dDCcEGCFEDfDgGfBdT1 CDDCffT1 R d


In [None]:
# Generate sequences from the Bigram Model
def generate_trigram_sequence(model, length, start_token):
    sequence = [start_token]
    for _ in range(length - 1):
        prev_token = sequence[-1]
        next_token_probs = model.predict_next(prev_token)
        next_token = torch.multinomial(next_token_probs, num_samples=1).item()
        sequence.append(next_token)
    return sequence

# Example: Generate a pitch-only sequence
start_token = stoi['C4']  # Start with 'C4'
generated_sequence = generate_trigram_sequence(trigram_model, length=256, start_token=start_token)
generated_sequence_str = ''.join([itos[token] for token in generated_sequence])
print("Generated Sequence:", generated_sequence_str)
processed_trigram_sequence = preprocess_melody_with_time_tokens(generated_sequence_str)
print("Processed Trigram Sequence:", processed_trigram_sequence)

Generated Sequence: C4D4g3F3B4a4a3g3E5C5c3C4RC4D4a5B3d3E5C4B4B3f3A5C4C5T2d4E4E5D4g3f4f5F4B4T2f3g3G4g4d4E4B3c4c4d3d3G4C4B3C5F5c4B3a4A3E4E4a4B3B4a3D5d5G5f4E3B3c4d4A3C3F3C5C3g3d4C3T2D3f4f3D5G4T2d3C3f4d5RB3E3G3E5d3f4c4f4D3d5f4a3B5g3F4G5F4g5E5B3d4a5E4F5G5f3B4C3g3d4F5D5c4F4E3c3E3f3C3g3C5C3d3E4f5a5c5d3A3E4a4c3E5G4c4g5B5a4a3B3D4A4g4g5a5G3g3A4F4C5C4f4d3c5g3B5c4c4G3A3d3E3f3a5A3B3D5D3a3A4d5c4F3E5F5g4d5A3d3D4C4g4C4d5c4G3C4E5c4F3a3c3D5d3C4F3D3C4f4B5G4C3G5E5G5A4T2C4E3D4T2G3Rf4a5f4a5B3B5F5B3f3E3G4B3c3d5F5a3A4d5G4c3d5C5G4A3c3RG4E3a4F4c3B3
Processed Trigram Sequence: C4 D4 g3 F3 B4 a4 a3 g3 E5 C5 c3 C4 R C4 D4 a5 B3 d3 E5 C4 B4 B3 f3 A5 C4 C5 T2 d4 E4 E5 D4 g3 f4 f5 F4 B4 T2 f3 g3 G4 g4 d4 E4 B3 c4 c4 d3 d3 G4 C4 B3 C5 F5 c4 B3 a4 A3 E4 E4 a4 B3 B4 a3 D5 d5 G5 f4 E3 B3 c4 d4 A3 C3 F3 C5 C3 g3 d4 C3 T2 D3 f4 f3 D5 G4 T2 d3 C3 f4 d5 R B3 E3 G3 E5 d3 f4 c4 f4 D3 d5 f4 a3 B5 g3 F4 G5 F4 g5 E5 B3 d4 a5 E4 F5 G5 f3 B4 C3 g3 d4 F5 D5 c4 F4 E3 c3 E3 f3 C3 g3 C5 C3 d3 E4 f5 a5 c5 d3 A3 E4 a4 c3 E5 G4 c4 g5 B5 a

In [None]:

# Generated sequences
generated_sequences = [
   processed_trigram_sequence,

]

# Training sequences
train_sequences = [
    "R T1 G T2 D T1 D T2 C T1 C T2 a T1 G T2 D T1 F T2 F T1 D T2 F T1 R T2 D T1 D T2 C T1 R T2 a T1 G T2 D T1 F T2 R T1 D T2 C T1 C T2 a T1 G T2 R T1 F T2 F T1 f T2 R T1 a T2 a T1 G T2 a T1 C T2 D T1 R T2 G T1 D T2 D T1 C T2 C T1 a T2 a T1 G T2 G T1 D T2 F T1 R T2 D T1 D T2 C T1 C T2 a T1 G T2 G T1 D T2 F T1 R T2 D T1 C T2 C T1 a T2 G T1 G T2 F T1 F T2 f T1 R T2 a T1 G T2 a T1 C T2 D T1 R T2 G T1 G T2 D T1 D T2 C T1 R T2 D T1 D T2 C T1 R T2 a T1 a T2 a T1 C T2 R T1 D T2 D T1 C T2 R T1 D T2 C T1 a T2 a T1 C T2 C T1 a T2 R T1 D T2 D T1 C T2 a T1 C T2 C T1 a T2 G T1 R T2 a T1 a T2 C T1 a T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 a T2 C T1 D T2 D T1 C T2 R T1 D T2 D T1 C T2 R T1 a T2 a T1 a T2 C T1 R T2 D T1 d T2 D T1 C T2 a T1 G T2 F T1 F T2 D T1 C T2 a T1 R T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 a T2 a T1 C T2 a T1 R T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 G T2 D T1 D T2 C T1 C T2 a T1 G T2 D T1 F T2 F T1 D T2 F T1 R T2 D T1 D T2 C T1 R T2 a T1 G T2 D T1 F T2 R T1 D T2 C T1 C T2 a T1 G T2 R T1 F T2 F T1 f T2 R T1 a T2 a T1 G T2 a T1 C T2 D T1 R T2 G T1 D T2 D T1 C T2 C T1 a T2 a T1 G T2 G T1 D T2 F T1 R T2 D T1 D T2 C T1 C T2 a T1 G T2 G T1 D T2 F T1 R T2 D T1 C T2 C T1 a T2 G T1 G T2 F T1 F T2 f T1 R T2 a T1 G T2 a T1 C T2 D T1 R T2 G T1 G T2 D T1 D T2 C T1 R T2 D T1 D T2 C T1 R T2 a T1 a T2 a T1 C T2 R T1 D T2 D T1 C T2 R T1 D T2 C T1 a T2 a T1 C T2 C T1 a T2 R T1 D T2 D T1 C T2 a T1 C T2 C T1 a T2 G T1 R T2 a T1 a T2 C T1 a T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 a T2 C T1 D T2 D T1 C T2 R T1 D T2 D T1 C T2 R T1 a T2 a T1 a T2 C T1 R T2 D T1 d T2 D T1 C T2 a T1 G T2 F T1 F T2 D T1 C T2 a T1 R T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 a T2 a T1 C T2 a T1 R T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 G T2 G T1 D T2 D T1 C T2 R T1 D T2 D T1 C T2 R T1 a T2 a T1 a T2 C T1 R T2 D T1 D T2 C T1 R T2 D T1 C T2 a T1 a T2 C T1 C T2 a T1 R T2 D T1 D T2 C T1 a T2 C T1 C T2 a T1 G T2 R T1 a T2 a T1 C T2 a T1 D T2 D T1 C T2 a T1 C T2 C T1 a T2 G T1 R T2 a T1 C T2 D T1 D T2 C T1 R T2 D T1 D T2 C T1 R T2 a T1 a T2 a T1 C T2 R T1 D T2 d T1 D T2 C T1 a T2 G T1 F T2 F T1 D T2 C T1 a T2 R T1 D T2 D T1 C T2 a T1 C T2 C T1 a T2 G T1 R T2 a T1 a T2 C T1 a T2 R T1 D T2 D T1 C T2 a T1 C T2 C T1 a T2 G T1",

]

# Calculate rhythm consistency
print("time tokens")
rhythm_consistency_score = calculate_rhythm_consistency(generated_sequences, train_sequences)
print(f"Rhythm Consistency Score: {rhythm_consistency_score:.4f}")

time tokens
['T2', 'T2', 'T2', 'T2', 'T2', 'T2']
['T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T1', 'T2', 'T