In [1]:
%pylab inline

%pylab is deprecated, use %matplotlib inline and import the required libraries.
Populating the interactive namespace from numpy and matplotlib


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.metrics.cluster import mutual_info_score
from sklearn.cluster import KMeans
from scipy.stats import entropy
import copy
import math
import os
import numpy as np
import matplotlib.pyplot as plt


In [3]:
class TransformerModel(nn.Module):
    def __init__(self, ntokens, emsize, nhead, d_hid, nlayers, dropout=0.5):
        super(TransformerModel, self).__init__()
        self.model_type = 'Transformer'
        self.src_mask = None
        self.pos_encoder = PositionalEncoding(emsize, dropout)
        encoder_layers = nn.TransformerEncoderLayer(emsize, nhead, d_hid, dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, nlayers)
        self.encoder = nn.Embedding(ntokens, emsize)
        self.emsize = emsize
        self.decoder = nn.Linear(emsize, ntokens)
        self.ntokens=ntokens
        self.init_weights()

    def _generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

    def init_weights(self):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, src,verbose=False):
        if self.src_mask is None or self.src_mask.size(0) != len(src):
            device = src.device
            mask = self._generate_square_subsequent_mask(len(src)).to(device)
            self.src_mask = mask

        src = self.encoder(src) * math.sqrt(self.emsize)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src, self.src_mask)
        self.store=output.detach().numpy().copy()
        if verbose:
            print(output.shape)
        output = self.decoder(output)
        return output

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

# Parameters
ntokens = 8  # size of vocabulary
emsize = 20  # embedding dimension
nhead = 4  # number of heads in the nn.MultiheadAttention
d_hid = 20  # dimension of the feedforward network model in nn.TransformerEncoder
nlayers = 2  # number of nn.TransformerEncoderLayer
dropout = 0.03  # dropout probability

# Initialize the model
model = TransformerModel(ntokens, emsize, nhead, d_hid, nlayers, dropout)
print(model)

TransformerModel(
  (pos_encoder): PositionalEncoding(
    (dropout): Dropout(p=0.03, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-1): 2 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=20, out_features=20, bias=True)
        )
        (linear1): Linear(in_features=20, out_features=20, bias=True)
        (dropout): Dropout(p=0.03, inplace=False)
        (linear2): Linear(in_features=20, out_features=20, bias=True)
        (norm1): LayerNorm((20,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((20,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.03, inplace=False)
        (dropout2): Dropout(p=0.03, inplace=False)
      )
    )
  )
  (encoder): Embedding(8, 20)
  (decoder): Linear(in_features=20, out_features=8, bias=True)
)




In [4]:
def getTrainingData(songStrings, nrOfSongs):
    notes = list("ABCDEFGHIJ")
    chord = [[0], [1], [2], [3], [4], [5], [6], [7],[8],[9]]
    source = []
    target = []
    for s in range(nrOfSongs):
        for i in range(40):
            sentence = []
            answer = []
            for j in range(4):
                sentence.append(chord[notes.index(songStrings[s][(i+j)%40])][0])
                answer.append(chord[notes.index(songStrings[s][(i+j+1)%40])][0])
            source.append(sentence)
            target.append(answer)
    return np.array(source), np.array(target)

In [5]:
# Data generation and preparation
songStrings = np.array([
   
    "ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ", # normal
    "JIHGFEDCBAJIHGFEDCBAJIHGFEDCBAJIHGFEDCBA", # reverse
    # "CCGGAAGFFEEDDCGGFFEEDGGFFEEDCCGGAAGFFEEDDC", # daisybells
    # "ABCDEFGHIEABCDEFGHIEABCDEFGHIEABCDEFGHIEAB", # 10th note E
    # "ABCDAFGHIJBCDEAGHIAFCDEFIHIABADEFGCIABCBEF", # 5th note different random
    # "ABCDAFGHIAABCDAFGHIAABCDAFGHIAABCDAFGHIAAB" # 5th note different

])

source, target = getTrainingData(songStrings, 2)

print(source.shape, target.shape)

(80, 4) (80, 4)


In [38]:

# Function to print all source and target pairs
def print_all_source_target_pairs(source, target):
    for i in range(len(source)):
        print(f"Source {i+1}: {source[i]}")
        print(f"Target {i+1}: {target[i]}")
        print()

# Print all source and target pairs
print_all_source_target_pairs(source, target)

Source 1: [0 1 2 3]
Target 1: [1 2 3 4]

Source 2: [1 2 3 4]
Target 2: [2 3 4 5]

Source 3: [2 3 4 5]
Target 3: [3 4 5 6]

Source 4: [3 4 5 6]
Target 4: [4 5 6 7]

Source 5: [4 5 6 7]
Target 5: [5 6 7 8]

Source 6: [5 6 7 8]
Target 6: [6 7 8 9]

Source 7: [6 7 8 9]
Target 7: [7 8 9 0]

Source 8: [7 8 9 0]
Target 8: [8 9 0 1]

Source 9: [8 9 0 1]
Target 9: [9 0 1 2]

Source 10: [9 0 1 2]
Target 10: [0 1 2 3]

Source 11: [0 1 2 3]
Target 11: [1 2 3 4]

Source 12: [1 2 3 4]
Target 12: [2 3 4 5]

Source 13: [2 3 4 5]
Target 13: [3 4 5 6]

Source 14: [3 4 5 6]
Target 14: [4 5 6 7]

Source 15: [4 5 6 7]
Target 15: [5 6 7 8]

Source 16: [5 6 7 8]
Target 16: [6 7 8 9]

Source 17: [6 7 8 9]
Target 17: [7 8 9 0]

Source 18: [7 8 9 0]
Target 18: [8 9 0 1]

Source 19: [8 9 0 1]
Target 19: [9 0 1 2]

Source 20: [9 0 1 2]
Target 20: [0 1 2 3]

Source 21: [0 1 2 3]
Target 21: [1 2 3 4]

Source 22: [1 2 3 4]
Target 22: [2 3 4 5]

Source 23: [2 3 4 5]
Target 23: [3 4 5 6]

Source 24: [3 4 5 6]
Target 2

In [58]:
len(source)

80

In [6]:
def predict(model, source):
    model.eval()  # Set the model to evaluation mode
    source = torch.tensor(source, dtype=torch.long)
    with torch.no_grad():  # No need to track gradients
        src = source.transpose(0, 1)  # Adjust for the expected input dimensions [sequence_length, batch_size]
        output = model(src)  # Compute the output
        predictions = output.argmax(dim=2)  # Get the index of the max log-probability
    return predictions.transpose(0, 1).numpy()  # Return predictions in original input format


In [7]:

# Training parameters
ntokens = 10
emsize = 20
nhead = 4
d_hid = 20
nlayers = 2
dropout = 0.03
learning_rate = 1e-3
num_epochs = 2000
num_models = 1  # Train 30 different models

songStrings = np.array([
   "ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ", # normal
    "JIHGFEDCBAJIHGFEDCBAJIHGFEDCBAJIHGFEDCBA", # reverse
])

# Train and save multiple models
for model_idx in range(num_models):
    model_dir = f"forward_prediction/model_{model_idx}"
    os.makedirs(model_dir, exist_ok=True)

    model = TransformerModel(ntokens, emsize, nhead, d_hid, nlayers, dropout)
    model.train()
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    source, target = getTrainingData(songStrings, 2)
    source_tensor = torch.tensor(source, dtype=torch.long)
    target_tensor = torch.tensor(target, dtype=torch.long)

    for epoch in range(num_epochs):
        optimizer.zero_grad()

        src = source_tensor.transpose(0, 1)
        tgt = target_tensor.transpose(0, 1)

        output = model(src)
        loss = criterion(output.view(-1, model.ntokens), tgt.reshape(-1))
        loss.backward()
        optimizer.step()

    model_save_path = os.path.join(model_dir, 'model.pt')
    torch.save(model.state_dict(), model_save_path)



In [82]:
# Data generation and preparation
songStrings = np.array([
   
    "ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJAB", # normal
    "JIHGFEDCBAJIHGFEDCBAJIHGFEDCBAJIHGFEDCBAJI", # reverse
    # "CCGGAAGFFEEDDCGGFFEEDGGFFEEDCCGGAAGFFEEDDC", # daisybells
    # "ABCDEFGHIEABCDEFGHIEABCDEFGHIEABCDEFGHIEAB", # 10th note E
    # "ABCDAFGHIJBCDEAGHIAFCDEFIHIABADEFGCIABCBEF", # 5th note different random
    # "ABCDAFGHIAABCDAFGHIAABCDAFGHIAABCDAFGHIAAB" # 5th note different

])

source, target = getTrainingData(songStrings, 2)

print(source.shape, target.shape)

(80, 4) (80, 4)


In [8]:
def generate_predictions(model, source):
    """
    Generate and save predictions for the specified source sequences.
    
    Args:
    model (nn.Module): The trained model to use for predictions.
    source (np.ndarray): The source sequences to generate predictions from.
    save_path (str): The path to save the predictions file.
    """

    all_predictions = []

    # Predict and save predictions for source[1] to source[38]
    for i in range(0, 80):  # Loop from 0 to 38 inclusive
        current_source = np.array(source[i]).reshape(1, -1)
        current_predictions = predict(model, current_source)
        all_predictions.append(current_predictions)

    # Convert all_predictions to a numpy array for easier handling
    all_predictions = np.array(all_predictions)

    return all_predictions



In [9]:
def generate_and_save_predictions(model, source, save_path="predictions.npy"):
    """
    Generate and save predictions for the specified source sequences.
    
    Args:
    model (nn.Module): The trained model to use for predictions.
    source (np.ndarray): The source sequences to generate predictions from.
    save_path (str): The path to save the predictions file.
    """

    all_predictions = []

    # Predict for source[39] first
    source_39 = np.array(source[39]).reshape(1, -1)
    predictions_39 = predict(model, source_39)
    all_predictions.append(predictions_39)

    # Predict and save predictions for source[1] to source[38]
    for i in range(0, 39):  # Loop from 0 to 38 inclusive
        current_source = np.array(source[i]).reshape(1, -1)
        current_predictions = predict(model, current_source)
        all_predictions.append(current_predictions)

    # Predict for source[79] first
    source_79 = np.array(source[79]).reshape(1, -1)
    predictions_79 = predict(model, source_79)
    all_predictions.append(predictions_79)

    # Predict and save predictions for source[40] to source[78]
    for i in range(40, 79):  # Loop from 40 to 78 inclusive
        current_source = np.array(source[i]).reshape(1, -1)
        current_predictions = predict(model, current_source)
        all_predictions.append(current_predictions)

    # Convert all_predictions to a numpy array for easier handling
    all_predictions = np.array(all_predictions)

    # Save the predictions for use in the second model
    np.save(save_path, all_predictions)

    print("All predictions have been saved for further use.")

    return all_predictions



In [12]:
source

array([[[0, 1, 2, 3]],

       [[9, 2, 3, 4]],

       [[2, 3, 4, 5]],

       [[3, 4, 5, 6]],

       [[2, 5, 6, 7]],

       [[3, 6, 7, 8]],

       [[6, 7, 8, 9]],

       [[5, 8, 9, 0]],

       [[8, 9, 0, 1]],

       [[7, 0, 1, 2]],

       [[0, 1, 2, 3]],

       [[9, 2, 3, 4]],

       [[2, 3, 4, 5]],

       [[3, 4, 5, 6]],

       [[2, 5, 6, 7]],

       [[3, 6, 7, 8]],

       [[6, 7, 8, 9]],

       [[5, 8, 9, 0]],

       [[8, 9, 0, 1]],

       [[7, 0, 1, 2]],

       [[0, 1, 2, 3]],

       [[9, 2, 3, 4]],

       [[2, 3, 4, 5]],

       [[3, 4, 5, 6]],

       [[2, 5, 6, 7]],

       [[3, 6, 7, 8]],

       [[6, 7, 8, 9]],

       [[5, 8, 9, 0]],

       [[8, 9, 0, 1]],

       [[7, 0, 1, 2]],

       [[0, 1, 2, 3]],

       [[9, 2, 3, 4]],

       [[2, 3, 4, 5]],

       [[3, 4, 5, 6]],

       [[2, 5, 6, 7]],

       [[3, 6, 7, 8]],

       [[6, 7, 8, 9]],

       [[5, 8, 9, 0]],

       [[8, 9, 0, 1]],

       [[7, 0, 1, 2]],

       [[9, 8, 7, 6]],

       [[0, 7, 6

In [10]:

# Generate and save predictions
source_2 = generate_and_save_predictions(model, source, save_path="predictions.npy")
type(source_2)

All predictions have been saved for further use.


array([[[8, 1, 2, 3]],

       [[9, 2, 3, 4]],

       [[0, 3, 4, 5]],

       [[1, 4, 5, 6]],

       [[4, 5, 6, 7]],

       [[5, 6, 7, 8]],

       [[6, 7, 8, 9]],

       [[7, 8, 9, 0]],

       [[8, 9, 0, 1]],

       [[7, 0, 1, 2]],

       [[8, 1, 2, 3]],

       [[9, 2, 3, 4]],

       [[0, 3, 4, 5]],

       [[1, 4, 5, 6]],

       [[4, 5, 6, 7]],

       [[5, 6, 7, 8]],

       [[6, 7, 8, 9]],

       [[7, 8, 9, 0]],

       [[8, 9, 0, 1]],

       [[7, 0, 1, 2]],

       [[8, 1, 2, 3]],

       [[9, 2, 3, 4]],

       [[0, 3, 4, 5]],

       [[1, 4, 5, 6]],

       [[4, 5, 6, 7]],

       [[5, 6, 7, 8]],

       [[6, 7, 8, 9]],

       [[7, 8, 9, 0]],

       [[8, 9, 0, 1]],

       [[7, 0, 1, 2]],

       [[8, 1, 2, 3]],

       [[9, 2, 3, 4]],

       [[0, 3, 4, 5]],

       [[1, 4, 5, 6]],

       [[4, 5, 6, 7]],

       [[5, 6, 7, 8]],

       [[6, 7, 8, 9]],

       [[7, 8, 9, 0]],

       [[8, 9, 0, 1]],

       [[7, 0, 1, 2]],

       [[9, 8, 7, 6]],

       [[8, 7, 6

In [13]:
type(source_2)

NameError: name 'source_2' is not defined

In [11]:

# Generate and save predictions
pred = generate_predictions(model, source)
pred

array([[[9, 2, 3, 4]],

       [[0, 3, 4, 5]],

       [[1, 4, 5, 6]],

       [[4, 5, 6, 7]],

       [[5, 6, 7, 8]],

       [[6, 7, 8, 9]],

       [[7, 8, 9, 0]],

       [[8, 9, 0, 1]],

       [[7, 0, 1, 2]],

       [[8, 1, 2, 3]],

       [[9, 2, 3, 4]],

       [[0, 3, 4, 5]],

       [[1, 4, 5, 6]],

       [[4, 5, 6, 7]],

       [[5, 6, 7, 8]],

       [[6, 7, 8, 9]],

       [[7, 8, 9, 0]],

       [[8, 9, 0, 1]],

       [[7, 0, 1, 2]],

       [[8, 1, 2, 3]],

       [[9, 2, 3, 4]],

       [[0, 3, 4, 5]],

       [[1, 4, 5, 6]],

       [[4, 5, 6, 7]],

       [[5, 6, 7, 8]],

       [[6, 7, 8, 9]],

       [[7, 8, 9, 0]],

       [[8, 9, 0, 1]],

       [[7, 0, 1, 2]],

       [[8, 1, 2, 3]],

       [[9, 2, 3, 4]],

       [[0, 3, 4, 5]],

       [[1, 4, 5, 6]],

       [[4, 5, 6, 7]],

       [[5, 6, 7, 8]],

       [[6, 7, 8, 9]],

       [[7, 8, 9, 0]],

       [[8, 9, 0, 1]],

       [[7, 0, 1, 2]],

       [[8, 1, 2, 3]],

       [[8, 7, 6, 5]],

       [[7, 6, 5

In [93]:
len(pred)

80

In [21]:
def calculate_accuracy_from_saved_predictions(pred, target):
    """
    Calculate accuracy by comparing saved predictions with the target.
    
    Args:
    predictions_path (str): The path to the saved predictions file.
    target (np.ndarray): The true target data.
    
    Returns:
    float: The accuracy as a percentage.
    """
    # Load the predictions from the file
    all_predictions = pred
    
    # Flatten the predictions to match the target shape
    all_predictions = all_predictions.reshape(-1, all_predictions.shape[-1])
    
    # Convert predictions and target to tensors
    pred_tensor = torch.tensor(all_predictions, dtype=torch.long)
    target_tensor = torch.tensor(target, dtype=torch.long)
    
    # Calculate accuracy
    correct = (pred_tensor == target_tensor).sum().item()
    total = target_tensor.numel()
    accuracy = correct / total * 100  # Convert to percentage
    
    return accuracy



In [20]:

# Example usage:
# Load the original target data
source, target = getTrainingData(songStrings, 2)

# Ensure the target matches the number of predictions
target = target[:len(pred_2)]

# Calculate accuracy
accuracy = calculate_accuracy_from_saved_predictions(pred_2, target)
print(f"Accuracy: {accuracy:.2f}%")

Accuracy: 0.00%


In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Assuming the TransformerModel class is already defined and available

# Load the predictions from the first model
all_predictions = np.load("predictions.npy")

# Flatten the predictions to match the expected input format for training
all_predictions = all_predictions.reshape(-1, all_predictions.shape[-1])

# Load the original target data
_, target = getTrainingData(songStrings, 2)
target = target[:len(all_predictions)]  # Ensure the target matches the number of predictions

# Convert predictions and target to tensors
source_tensor = torch.tensor(all_predictions, dtype=torch.long)
target_tensor = torch.tensor(target, dtype=torch.long)

# Define the second model
model_2 = TransformerModel(ntokens, emsize, nhead, d_hid, nlayers, dropout)

# Set the model to training mode
model_2.train()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_2.parameters(), lr=learning_rate)

# Training loop
num_epochs = 2000
for epoch in range(num_epochs):
    optimizer.zero_grad()
    
    # Transpose the source and target tensors to match the model's expected input dimensions
    src = source_tensor.transpose(0, 1)
    tgt = target_tensor.transpose(0, 1)
    
    # Forward pass
    output = model_2(src)
    
    # Compute the loss
    loss = criterion(output.view(-1, model_2.ntokens), tgt.reshape(-1))
    
    # Backward pass
    loss.backward()
    
    # Update the model parameters
    optimizer.step()
    
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item()}")

print("Training of the second model completed.")




Epoch 0, Loss: 2.3110098838806152
Epoch 100, Loss: 1.0287226438522339
Epoch 200, Loss: 0.45679640769958496
Epoch 300, Loss: 0.3288688063621521
Epoch 400, Loss: 0.290780246257782
Epoch 500, Loss: 0.28136831521987915
Epoch 600, Loss: 0.2604230046272278
Epoch 700, Loss: 0.2678636610507965
Epoch 800, Loss: 0.2603604793548584
Epoch 900, Loss: 0.2513199746608734
Epoch 1000, Loss: 0.25223198533058167
Epoch 1100, Loss: 0.25624531507492065
Epoch 1200, Loss: 0.258869469165802
Epoch 1300, Loss: 0.2518995404243469
Epoch 1400, Loss: 0.26028168201446533
Epoch 1500, Loss: 0.24723899364471436
Epoch 1600, Loss: 0.25782880187034607
Epoch 1700, Loss: 0.24713537096977234
Epoch 1800, Loss: 0.2524046301841736
Epoch 1900, Loss: 0.2471456527709961
Training of the second model completed.


In [15]:

# Generate and save predictions
source_3 = generate_and_save_predictions(model_2, source_2, save_path="predictions.npy")
source_3

All predictions have been saved for further use.


array([[[4, 1, 2, 3]],

       [[5, 2, 3, 4]],

       [[8, 3, 4, 5]],

       [[9, 4, 5, 6]],

       [[4, 5, 6, 7]],

       [[5, 6, 7, 8]],

       [[6, 7, 8, 9]],

       [[3, 8, 9, 0]],

       [[4, 9, 0, 1]],

       [[5, 0, 1, 2]],

       [[4, 1, 2, 3]],

       [[5, 2, 3, 4]],

       [[8, 3, 4, 5]],

       [[9, 4, 5, 6]],

       [[4, 5, 6, 7]],

       [[5, 6, 7, 8]],

       [[6, 7, 8, 9]],

       [[3, 8, 9, 0]],

       [[4, 9, 0, 1]],

       [[5, 0, 1, 2]],

       [[4, 1, 2, 3]],

       [[5, 2, 3, 4]],

       [[8, 3, 4, 5]],

       [[9, 4, 5, 6]],

       [[4, 5, 6, 7]],

       [[5, 6, 7, 8]],

       [[6, 7, 8, 9]],

       [[3, 8, 9, 0]],

       [[4, 9, 0, 1]],

       [[5, 0, 1, 2]],

       [[4, 1, 2, 3]],

       [[5, 2, 3, 4]],

       [[8, 3, 4, 5]],

       [[9, 4, 5, 6]],

       [[4, 5, 6, 7]],

       [[5, 6, 7, 8]],

       [[6, 7, 8, 9]],

       [[3, 8, 9, 0]],

       [[4, 9, 0, 1]],

       [[5, 0, 1, 2]],

       [[9, 8, 7, 6]],

       [[8, 7, 6

In [19]:

# Generate and save predictions
pred_2 = generate_predictions(model_2, source_2)
pred_2

array([[[5, 2, 3, 4]],

       [[8, 3, 4, 5]],

       [[9, 4, 5, 6]],

       [[4, 5, 6, 7]],

       [[5, 6, 7, 8]],

       [[6, 7, 8, 9]],

       [[3, 8, 9, 0]],

       [[4, 9, 0, 1]],

       [[5, 0, 1, 2]],

       [[4, 1, 2, 3]],

       [[5, 2, 3, 4]],

       [[8, 3, 4, 5]],

       [[9, 4, 5, 6]],

       [[4, 5, 6, 7]],

       [[5, 6, 7, 8]],

       [[6, 7, 8, 9]],

       [[3, 8, 9, 0]],

       [[4, 9, 0, 1]],

       [[5, 0, 1, 2]],

       [[4, 1, 2, 3]],

       [[5, 2, 3, 4]],

       [[8, 3, 4, 5]],

       [[9, 4, 5, 6]],

       [[4, 5, 6, 7]],

       [[5, 6, 7, 8]],

       [[6, 7, 8, 9]],

       [[3, 8, 9, 0]],

       [[4, 9, 0, 1]],

       [[5, 0, 1, 2]],

       [[4, 1, 2, 3]],

       [[5, 2, 3, 4]],

       [[8, 3, 4, 5]],

       [[9, 4, 5, 6]],

       [[4, 5, 6, 7]],

       [[5, 6, 7, 8]],

       [[6, 7, 8, 9]],

       [[3, 8, 9, 0]],

       [[4, 9, 0, 1]],

       [[5, 0, 1, 2]],

       [[4, 1, 2, 3]],

       [[8, 7, 6, 5]],

       [[5, 6, 5

In [22]:

# Example usage:
# Load the original target data
source, target = getTrainingData(songStrings, 2)

# Ensure the target matches the number of predictions
target = target[:len(pred_2)]

# Calculate accuracy
accuracy = calculate_accuracy_from_saved_predictions(pred_2, target)
print(f"Accuracy: {accuracy:.2f}%")

Accuracy: 85.00%


In [19]:


def getTrainingData(songStrings, nrOfSongs):
    notes = list("ABCDEFGHIJ")
    chord = [[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]]
    source = []
    target = []
    for s in range(nrOfSongs):
        for i in range(40):  # Updated to 40 notes
            sentence = []
            answer = []
            for j in range(4):
                sentence.append(chord[notes.index(songStrings[s][(i+j)%40])][0])
                answer.append(chord[notes.index(songStrings[s][(i+j+1)%40])][0])
            source.append(sentence)
            target.append(answer)
    return np.array(source), np.array(target)

# def predict(model, source):
#     model.eval()  # Set the model to evaluation mode
#     source = torch.tensor(source, dtype=torch.long)
#     with torch.no_grad():  # No need to track gradients
#         src = source.transpose(0, 1)  # Adjust for the expected input dimensions [sequence_length, batch_size]
#         output = model(src)  # Compute the output
#         predictions = output.argmax(dim=2)  # Get the index of the max log-probability
#     return predictions.transpose(0, 1).numpy()  # Return predictions in original input format

# def predict(model, source):
#     model.eval()  # Set the model to evaluation mode
#     source = torch.tensor(source, dtype=torch.long).unsqueeze(1)  # Add batch dimension
#     with torch.no_grad():  # No need to track gradients
#         src = source.transpose(0, 1)  # Adjust for the expected input dimensions [sequence_length, batch_size]
#         output = model(src)  # Compute the output
#         predictions = output.argmax(dim=2)  # Get the index of the max log-probability
#     return predictions.transpose(0, 1).numpy()  # Return predictions in original input format
# def predict(model, source):
#     model.eval()  # Set the model to evaluation mode
#     source = torch.tensor(source, dtype=torch.long).unsqueeze(1)  # Add batch dimension
#     with torch.no_grad():  # No need to track gradients
#         src = source.transpose(0, 1)  # Adjust for the expected input dimensions [sequence_length, batch_size]
#         output = model(src)  # Compute the output
#         predictions = output.argmax(dim=2)  # Get the index of the max log-probability
#     return predictions.transpose(0, 1).numpy()  # Return predictions in original input format
def predict(model, source):
    model.eval()  # Set the model to evaluation mode
    source = torch.tensor(source, dtype=torch.long).unsqueeze(1)  # Add batch dimension
    with torch.no_grad():  # No need to track gradients
        src = source.transpose(0, 1)  # Adjust for the expected input dimensions [sequence_length, batch_size]
        output = model(src)  # Compute the output
        predictions = output.argmax(dim=2)  # Get the index of the max log-probability
    return predictions.transpose(0, 1).numpy()  # Return predictions in original input format

def generate_predictions(model, source):
    """
    Generate predictions for the specified source sequences.
    
    Args:
    model (nn.Module): The trained model to use for predictions.
    source (np.ndarray): The source sequences to generate predictions from.

    Returns:
    np.ndarray: The predictions.
    """
    all_predictions = []

    # Predict and save predictions for source[1] to source[38]
    for i in range(0, 80):  # Loop from 0 to 79 inclusive
        current_source = np.array(source[i]).reshape(1, -1)
        current_predictions = predict(model, current_source)
        all_predictions.append(current_predictions)

    # Convert all_predictions to a numpy array for easier handling
    all_predictions = np.array(all_predictions)

    return all_predictions

def generate_and_save_predictions(model, source):
    """
    Generate and save predictions for the specified source sequences.
    
    Args:
    model (nn.Module): The trained model to use for predictions.
    source (np.ndarray): The source sequences to generate predictions from.
    save_path (str): The path to save the predictions file.
    """
    all_predictions = []

    # Predict for source[39] first
    source_39 = np.array(source[39]).reshape(1, -1)
    predictions_39 = predict(model, source_39)
    all_predictions.append(predictions_39)

    # Predict and save predictions for source[1] to source[38]
    for i in range(0, 39):  # Loop from 0 to 38 inclusive
        current_source = np.array(source[i]).reshape(1, -1)
        current_predictions = predict(model, current_source)
        all_predictions.append(current_predictions)

    # Predict for source[79] first
    source_79 = np.array(source[79]).reshape(1, -1)
    predictions_79 = predict(model, source_79)
    all_predictions.append(predictions_79)

    # Predict and save predictions for source[40] to source[78]
    for i in range(40, 79):  # Loop from 40 to 78 inclusive
        current_source = np.array(source[i]).reshape(1, -1)
        current_predictions = predict(model, current_source)
        all_predictions.append(current_predictions)

    # Convert all_predictions to a numpy array for easier handling
    all_predictions = np.array(all_predictions)

    return all_predictions

def calculate_accuracy(pred, target):
    """
    Calculate accuracy by comparing saved predictions with the target.
    
    Args:
    predictions_path (str): The path to the saved predictions file.
    target (np.ndarray): The true target data.
    
    Returns:
    float: The accuracy as a percentage.
    """
    # Load the predictions from the file
    all_predictions = pred
    
    # Flatten the predictions to match the target shape
    all_predictions = all_predictions.reshape(-1, all_predictions.shape[-1])
    
    # Convert predictions and target to tensors
    pred_tensor = torch.tensor(all_predictions, dtype=torch.long)
    target_tensor = torch.tensor(target, dtype=torch.long)
    
    # Calculate accuracy
    correct = (pred_tensor == target_tensor).sum().item()
    total = target_tensor.numel()
    accuracy = correct / total * 100  # Convert to percentage
    
    return accuracy


# def train_model(model, source, target, num_epochs=2000, learning_rate=1e-3):
#     """
#     Train the model on the given source and target data.
    
#     Args:
#     model (nn.Module): The model to train.
#     source (np.ndarray): The source data.
#     target (np.ndarray): The target data.
#     num_epochs (int): The number of epochs to train.
#     learning_rate (float): The learning rate for the optimizer.
    
#     Returns:
#     nn.Module: The trained model.
#     """
#     model.train()
#     criterion = nn.CrossEntropyLoss()
#     optimizer = optim.Adam(model.parameters(), lr=learning_rate)

#     source_tensor = torch.tensor(source, dtype=torch.long)
#     target_tensor = torch.tensor(target, dtype=torch.long)

#     for epoch in range(num_epochs):
#         optimizer.zero_grad()

#         src = source_tensor.transpose(0, 1)
#         tgt = target_tensor.transpose(0, 1)

#         output = model(src)
#         loss = criterion(output.view(-1, model.ntokens), tgt.reshape(-1))
#         loss.backward()
#         optimizer.step()

#         if epoch % 100 == 0:
#             # Generate predictions for the entire dataset
#             pred = output.argmax(dim=2).transpose(0, 1).detach().numpy()
#             accuracy = calculate_accuracy(pred, target)
#             print(f"Epoch {epoch}, Loss: {loss.item()}, Accuracy: {accuracy:.2f}%")

#     return model
# def train_model(model, source, target, num_epochs=2000, learning_rate=1e-3):
#     model.train()
#     criterion = nn.CrossEntropyLoss()
#     optimizer = optim.Adam(model.parameters(), lr=learning_rate)

#     source_tensor = torch.tensor(source, dtype=torch.long)
#     target_tensor = torch.tensor(target, dtype=torch.long)

#     for epoch in range(num_epochs):
#         optimizer.zero_grad()
#         src = source_tensor.transpose(0, 1)
#         tgt = target_tensor.transpose(0, 1)

#         output = model(src)
#         loss = criterion(output.view(-1, model.ntokens), tgt.reshape(-1))
#         loss.backward()
#         optimizer.step()

#         if epoch % 100 == 0:
#             pred = output.argmax(dim=2).transpose(0, 1).detach().numpy()
#             accuracy = calculate_accuracy(pred, target)
#             print(f"Epoch {epoch}, Loss: {loss.item()}, Accuracy: {accuracy:.2f}%")

#     return model
# def train_model(model, source, target, num_epochs=2000, learning_rate=1e-3):
#     model.train()
#     criterion = nn.CrossEntropyLoss()
#     optimizer = optim.Adam(model.parameters(), lr=learning_rate)

#     source_tensor = torch.tensor(source, dtype=torch.long)
#     target_tensor = torch.tensor(target, dtype=torch.long)

#     for epoch in range(num_epochs):
#         optimizer.zero_grad()

#         # Ensure src and tgt are correctly shaped [sequence_length, batch_size]
#         src = source_tensor.transpose(0, 1)
#         tgt = target_tensor.transpose(0, 1)

#         # Debugging: Print shapes of src and tgt
#         print(f"Epoch {epoch}, src shape: {src.shape}, tgt shape: {tgt.shape}")

#         # Forward pass
#         output = model(src)

#         # Debugging: Print shape of output
#         print(f"Epoch {epoch}, output shape: {output.shape}")

#         # Compute the loss
#         loss = criterion(output.view(-1, model.ntokens), tgt.reshape(-1))
#         loss.backward()
#         optimizer.step()

#         if epoch % 100 == 0:
#             pred = output.argmax(dim=2).transpose(0, 1).detach().numpy()
#             accuracy = calculate_accuracy(pred, target)
#             print(f"Epoch {epoch}, Loss: {loss.item()}, Accuracy: {accuracy:.2f}%")

#     return model
def train_model(model, source, target, num_epochs=2000, learning_rate=1e-3):
    model.train()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    source_tensor = torch.tensor(source, dtype=torch.long)
    target_tensor = torch.tensor(target, dtype=torch.long)

    for epoch in range(num_epochs):
        optimizer.zero_grad()

        # Ensure src and tgt are correctly shaped [sequence_length, batch_size]
        src = source_tensor.transpose(0, 1)
        tgt = target_tensor.transpose(0, 1)

        # Debugging: Print shapes of src and tgt
        if epoch % 100 == 0:  # Print every 100 epochs
            print(f"Epoch {epoch}, src shape: {src.shape}, tgt shape: {tgt.shape}")

        # Forward pass
        output = model(src)

        # Debugging: Print shape of output
        if epoch % 100 == 0:  # Print every 100 epochs
            print(f"Epoch {epoch}, output shape: {output.shape}")

        # Compute the loss
        loss = criterion(output.view(-1, model.ntokens), tgt.reshape(-1))
        loss.backward()
        optimizer.step()

        if epoch % 100 == 0:
            pred = output.argmax(dim=2).transpose(0, 1).detach().numpy()
            accuracy = calculate_accuracy(pred, target)
            print(f"Epoch {epoch}, Loss: {loss.item()}, Accuracy: {accuracy:.2f}%")

    return model




In [21]:
# # Training parameters
ntokens = 10
emsize = 20
nhead = 4
d_hid = 20
nlayers = 2
dropout = 0.03
learning_rate = 1e-3
num_epochs = 2000
num_models = 5  # Number of models to train sequentially

songStrings = np.array([
   "ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ", # normal
    "JIHGFEDCBAJIHGFEDCBAJIHGFEDCBAJIHGFEDCBA", # reverse
])

source, target = getTrainingData(songStrings, 2)

for model_idx in range(num_models):
    print(f"Training model {model_idx + 1}")

    model = TransformerModel(ntokens, emsize, nhead, d_hid, nlayers, dropout)
    model = train_model(model, source, target, num_epochs=num_epochs, learning_rate=learning_rate)

    model_dir = f"forward_prediction/model_{model_idx}"
    os.makedirs(model_dir, exist_ok=True)
    model_save_path = os.path.join(model_dir, 'model.pt')
    torch.save(model.state_dict(), model_save_path)

    pred = generate_predictions(model, source)
    accuracy = calculate_accuracy(pred, target)
    print(f"Accuracy of model {model_idx + 1}: {accuracy:.2f}%")

    source = generate_and_save_predictions(model, source)

    source = source.reshape(-1, source.shape[-1])

print("Training of all models completed.")


Training model 1
Epoch 0, src shape: torch.Size([4, 80]), tgt shape: torch.Size([4, 80])
Epoch 0, output shape: torch.Size([4, 80, 10])
Epoch 0, Loss: 2.3171732425689697, Accuracy: 9.69%




Epoch 100, src shape: torch.Size([4, 80]), tgt shape: torch.Size([4, 80])
Epoch 100, output shape: torch.Size([4, 80, 10])
Epoch 100, Loss: 0.83387291431427, Accuracy: 82.81%
Epoch 200, src shape: torch.Size([4, 80]), tgt shape: torch.Size([4, 80])
Epoch 200, output shape: torch.Size([4, 80, 10])
Epoch 200, Loss: 0.32683008909225464, Accuracy: 84.69%
Epoch 300, src shape: torch.Size([4, 80]), tgt shape: torch.Size([4, 80])
Epoch 300, output shape: torch.Size([4, 80, 10])
Epoch 300, Loss: 0.2322976142168045, Accuracy: 87.19%
Epoch 400, src shape: torch.Size([4, 80]), tgt shape: torch.Size([4, 80])
Epoch 400, output shape: torch.Size([4, 80, 10])
Epoch 400, Loss: 0.20433561503887177, Accuracy: 88.12%
Epoch 500, src shape: torch.Size([4, 80]), tgt shape: torch.Size([4, 80])
Epoch 500, output shape: torch.Size([4, 80, 10])
Epoch 500, Loss: 0.19401873648166656, Accuracy: 88.44%
Epoch 600, src shape: torch.Size([4, 80]), tgt shape: torch.Size([4, 80])
Epoch 600, output shape: torch.Size([4, 

AssertionError: query should be unbatched 2D or batched 3D tensor but received 4-D query tensor