In [2]:
%pylab inline

%pylab is deprecated, use %matplotlib inline and import the required libraries.
Populating the interactive namespace from numpy and matplotlib


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.metrics.cluster import mutual_info_score
from sklearn.cluster import KMeans
from scipy.stats import entropy
import copy
import math
import os
import numpy as np
import matplotlib.pyplot as plt


In [4]:
class TransformerModel(nn.Module):
    def __init__(self, ntokens, emsize, nhead, d_hid, nlayers, dropout=0.5):
        super(TransformerModel, self).__init__()
        self.model_type = 'Transformer'
        self.src_mask = None
        self.pos_encoder = PositionalEncoding(emsize, dropout)
        encoder_layers = nn.TransformerEncoderLayer(emsize, nhead, d_hid, dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, nlayers)
        self.encoder = nn.Embedding(ntokens, emsize)
        self.emsize = emsize
        self.decoder = nn.Linear(emsize, ntokens)
        self.ntokens=ntokens
        self.init_weights()

    def _generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

    def init_weights(self):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, src,verbose=False):
        if self.src_mask is None or self.src_mask.size(0) != len(src):
            device = src.device
            mask = self._generate_square_subsequent_mask(len(src)).to(device)
            self.src_mask = mask

        src = self.encoder(src) * math.sqrt(self.emsize)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src, self.src_mask)
        self.store=output.detach().numpy().copy()
        if verbose:
            print(output.shape)
        output = self.decoder(output)
        return output

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

# Parameters
ntokens = 8  # size of vocabulary
emsize = 20  # embedding dimension
nhead = 4  # number of heads in the nn.MultiheadAttention
d_hid = 20  # dimension of the feedforward network model in nn.TransformerEncoder
nlayers = 2  # number of nn.TransformerEncoderLayer
dropout = 0.03  # dropout probability

# Initialize the model
model = TransformerModel(ntokens, emsize, nhead, d_hid, nlayers, dropout)
print(model)

TransformerModel(
  (pos_encoder): PositionalEncoding(
    (dropout): Dropout(p=0.03, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-1): 2 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=20, out_features=20, bias=True)
        )
        (linear1): Linear(in_features=20, out_features=20, bias=True)
        (dropout): Dropout(p=0.03, inplace=False)
        (linear2): Linear(in_features=20, out_features=20, bias=True)
        (norm1): LayerNorm((20,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((20,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.03, inplace=False)
        (dropout2): Dropout(p=0.03, inplace=False)
      )
    )
  )
  (encoder): Embedding(8, 20)
  (decoder): Linear(in_features=20, out_features=8, bias=True)
)




In [5]:
def getTrainingData(songStrings, nrOfSongs):
    notes = list("ABCDEFGHIJ")
    chord = [[0], [1], [2], [3], [4], [5], [6], [7],[8],[9]]
    source = []
    target = []
    for s in range(nrOfSongs):
        for i in range(40):
            sentence = []
            answer = []
            for j in range(4):
                sentence.append(chord[notes.index(songStrings[s][(i+j)%40])][0])
                answer.append(chord[notes.index(songStrings[s][(i+j+1)%40])][0])
            source.append(sentence)
            target.append(answer)
    return np.array(source), np.array(target)

In [6]:
# Data generation and preparation
songStrings = np.array([
   
    "ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ", # normal
    "JIHGFEDCBAJIHGFEDCBAJIHGFEDCBAJIHGFEDCBA", # reverse
    # "CCGGAAGFFEEDDCGGFFEEDGGFFEEDCCGGAAGFFEEDDC", # daisybells
    # "ABCDEFGHIEABCDEFGHIEABCDEFGHIEABCDEFGHIEAB", # 10th note E
    # "ABCDAFGHIJBCDEAGHIAFCDEFIHIABADEFGCIABCBEF", # 5th note different random
    # "ABCDAFGHIAABCDAFGHIAABCDAFGHIAABCDAFGHIAAB" # 5th note different

])

source, target = getTrainingData(songStrings, 2)

print(source.shape, target.shape)

(80, 4) (80, 4)


In [7]:
def predict(model, source):
    model.eval()  # Set the model to evaluation mode
    source = torch.tensor(source, dtype=torch.long)
    with torch.no_grad():  # No need to track gradients
        src = source.transpose(0, 1)  # Adjust for the expected input dimensions [sequence_length, batch_size]
        output = model(src)  # Compute the output
        predictions = output.argmax(dim=2)  # Get the index of the max log-probability
    return predictions.transpose(0, 1).numpy()  # Return predictions in original input format


In [8]:
def generate_predictions_for_current_model(model, source):
    """
    Generate and save predictions for the specified source sequences.
    
    Args:
    model (nn.Module): The trained model to use for predictions.
    source (np.ndarray): The source sequences to generate predictions from.
    save_path (str): The path to save the predictions file.
    """

    all_predictions = []

    # Predict and save predictions for source[1] to source[38]
    for i in range(0, 80):  # Loop from 0 to 38 inclusive
        current_source = np.array(source[i]).reshape(1, -1)
        current_predictions = predict(model, current_source)
        all_predictions.append(current_predictions)

    # Convert all_predictions to a numpy array for easier handling
    all_predictions = np.array(all_predictions)

    return all_predictions



In [9]:
def generate_source_for_next_model(model, source):
    """
    Generate and save predictions for the specified source sequences.
    
    Args:
    model (nn.Module): The trained model to use for predictions.
    source (np.ndarray): The source sequences to generate predictions from.
    save_path (str): The path to save the predictions file.
    """

    all_predictions = []

    # Predict for source[39] first
    source_39 = np.array(source[39]).reshape(1, -1)
    predictions_39 = predict(model, source_39)
    all_predictions.append(predictions_39)

    # Predict and save predictions for source[1] to source[38]
    for i in range(0, 39):  # Loop from 0 to 38 inclusive
        current_source = np.array(source[i]).reshape(1, -1)
        current_predictions = predict(model, current_source)
        all_predictions.append(current_predictions)

    # Predict for source[79] first
    source_79 = np.array(source[79]).reshape(1, -1)
    predictions_79 = predict(model, source_79)
    all_predictions.append(predictions_79)

    # Predict and save predictions for source[40] to source[78]
    for i in range(40, 79):  # Loop from 40 to 78 inclusive
        current_source = np.array(source[i]).reshape(1, -1)
        current_predictions = predict(model, current_source)
        all_predictions.append(current_predictions)

    # Convert all_predictions to a numpy array for easier handling
    all_predictions = np.array(all_predictions)

    all_predictions = all_predictions.reshape(-1, all_predictions.shape[-1])

    return all_predictions



In [10]:
def calculate_accuracy(pred, target):
    """
    Calculate accuracy by comparing saved predictions with the target.
    
    Args:
    predictions_path (str): The path to the saved predictions file.
    target (np.ndarray): The true target data.
    
    Returns:
    float: The accuracy as a percentage.
    """
    # Load the predictions from the file
    all_predictions = pred
    
    # Flatten the predictions to match the target shape
    all_predictions = all_predictions.reshape(-1, all_predictions.shape[-1])
    
    # Convert predictions and target to tensors
    pred_tensor = torch.tensor(all_predictions, dtype=torch.long)
    target_tensor = torch.tensor(target, dtype=torch.long)
    
    # Calculate accuracy
    correct = (pred_tensor == target_tensor).sum().item()
    total = target_tensor.numel()
    accuracy = correct / total * 100  # Convert to percentage
    
    return accuracy



In [12]:

# Training parameters
ntokens = 10
emsize = 20
nhead = 4
d_hid = 20
nlayers = 2
dropout = 0.03
learning_rate = 1e-3
num_epochs = 2000
num_models = 1  # Train 30 different models

songStrings = np.array([
   "ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ", # normal
    "JIHGFEDCBAJIHGFEDCBAJIHGFEDCBAJIHGFEDCBA", # reverse
])

# Train and save multiple models
for model_idx in range(num_models):
    model_dir = f"forward_prediction/model_1"
    os.makedirs(model_dir, exist_ok=True)

    model_1 = TransformerModel(ntokens, emsize, nhead, d_hid, nlayers, dropout)
    model_1.train()
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model_1.parameters(), lr=learning_rate)

    source, target = getTrainingData(songStrings, 2)
    source_tensor = torch.tensor(source, dtype=torch.long)
    target_tensor = torch.tensor(target, dtype=torch.long)

    for epoch in range(num_epochs):
        optimizer.zero_grad()

        src = source_tensor.transpose(0, 1)
        tgt = target_tensor.transpose(0, 1)

        output = model_1(src)
        loss = criterion(output.view(-1, model_1.ntokens), tgt.reshape(-1))
        loss.backward()
        optimizer.step()

    model_save_path = os.path.join(model_dir, 'model_1.pt')
    torch.save(model_1.state_dict(), model_save_path)





In [13]:

# Generate and save predictions
pred_1 = generate_predictions_for_current_model(model_1, source)
pred_1

array([[[1, 2, 3, 4]],

       [[2, 3, 4, 5]],

       [[1, 4, 5, 6]],

       [[4, 5, 6, 7]],

       [[5, 6, 7, 8]],

       [[4, 7, 8, 9]],

       [[5, 8, 9, 0]],

       [[8, 9, 0, 1]],

       [[7, 0, 1, 2]],

       [[0, 1, 2, 3]],

       [[1, 2, 3, 4]],

       [[2, 3, 4, 5]],

       [[1, 4, 5, 6]],

       [[4, 5, 6, 7]],

       [[5, 6, 7, 8]],

       [[4, 7, 8, 9]],

       [[5, 8, 9, 0]],

       [[8, 9, 0, 1]],

       [[7, 0, 1, 2]],

       [[0, 1, 2, 3]],

       [[1, 2, 3, 4]],

       [[2, 3, 4, 5]],

       [[1, 4, 5, 6]],

       [[4, 5, 6, 7]],

       [[5, 6, 7, 8]],

       [[4, 7, 8, 9]],

       [[5, 8, 9, 0]],

       [[8, 9, 0, 1]],

       [[7, 0, 1, 2]],

       [[0, 1, 2, 3]],

       [[1, 2, 3, 4]],

       [[2, 3, 4, 5]],

       [[1, 4, 5, 6]],

       [[4, 5, 6, 7]],

       [[5, 6, 7, 8]],

       [[4, 7, 8, 9]],

       [[5, 8, 9, 0]],

       [[8, 9, 0, 1]],

       [[7, 0, 1, 2]],

       [[0, 1, 2, 3]],

       [[0, 7, 6, 5]],

       [[7, 6, 5

In [14]:
# Calculate accuracy
accuracy = calculate_accuracy(pred_1, target)
print(f"Accuracy: {accuracy:.2f}%")

Accuracy: 87.50%


In [15]:

# Generate and save predictions
source_2 = generate_source_for_next_model(model_1, source)


In [16]:
source_2

array([[0, 1, 2, 3],
       [1, 2, 3, 4],
       [2, 3, 4, 5],
       [1, 4, 5, 6],
       [4, 5, 6, 7],
       [5, 6, 7, 8],
       [4, 7, 8, 9],
       [5, 8, 9, 0],
       [8, 9, 0, 1],
       [7, 0, 1, 2],
       [0, 1, 2, 3],
       [1, 2, 3, 4],
       [2, 3, 4, 5],
       [1, 4, 5, 6],
       [4, 5, 6, 7],
       [5, 6, 7, 8],
       [4, 7, 8, 9],
       [5, 8, 9, 0],
       [8, 9, 0, 1],
       [7, 0, 1, 2],
       [0, 1, 2, 3],
       [1, 2, 3, 4],
       [2, 3, 4, 5],
       [1, 4, 5, 6],
       [4, 5, 6, 7],
       [5, 6, 7, 8],
       [4, 7, 8, 9],
       [5, 8, 9, 0],
       [8, 9, 0, 1],
       [7, 0, 1, 2],
       [0, 1, 2, 3],
       [1, 2, 3, 4],
       [2, 3, 4, 5],
       [1, 4, 5, 6],
       [4, 5, 6, 7],
       [5, 6, 7, 8],
       [4, 7, 8, 9],
       [5, 8, 9, 0],
       [8, 9, 0, 1],
       [7, 0, 1, 2],
       [1, 8, 7, 6],
       [0, 7, 6, 5],
       [7, 6, 5, 4],
       [8, 5, 4, 3],
       [5, 4, 3, 2],
       [4, 3, 2, 1],
       [5, 2, 1, 0],
       [4, 1,

In [34]:
# # Reshape the array to remove the extra dimension
# source_2 = source_2.reshape(-1, source_2.shape[-1])

In [18]:

# Training parameters
ntokens = 10
emsize = 20
nhead = 4
d_hid = 20
nlayers = 2
dropout = 0.03
learning_rate = 1e-3
num_epochs = 2000
num_models = 1  # Train 30 different models

songStrings = np.array([
   "ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ", # normal
    "JIHGFEDCBAJIHGFEDCBAJIHGFEDCBAJIHGFEDCBA", # reverse
])

# Train and save multiple models
for model_idx in range(num_models):
    model_dir = f"forward_prediction/model_1"
    os.makedirs(model_dir, exist_ok=True)

    model_2 = TransformerModel(ntokens, emsize, nhead, d_hid, nlayers, dropout)
    model_2.train()
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model_2.parameters(), lr=learning_rate)

    _, target = getTrainingData(songStrings, 2)
    source_tensor = torch.tensor(source_2, dtype=torch.long)
    target_tensor = torch.tensor(target, dtype=torch.long)

    for epoch in range(num_epochs):
        optimizer.zero_grad()

        src = source_tensor.transpose(0, 1)
        tgt = target_tensor.transpose(0, 1)

        output = model_2(src)
        loss = criterion(output.view(-1, model_2.ntokens), tgt.reshape(-1))
        loss.backward()
        optimizer.step()

    model_save_path = os.path.join(model_dir, 'model_2.pt')
    torch.save(model_2.state_dict(), model_save_path)



In [19]:

# Generate and save predictions
pred_2 = generate_predictions_for_current_model(model_2, source)
pred_2

array([[[1, 2, 3, 4]],

       [[8, 3, 4, 5]],

       [[3, 4, 5, 6]],

       [[2, 5, 6, 7]],

       [[7, 6, 7, 8]],

       [[2, 7, 8, 9]],

       [[5, 8, 9, 0]],

       [[0, 9, 0, 1]],

       [[5, 0, 1, 2]],

       [[6, 1, 2, 3]],

       [[1, 2, 3, 4]],

       [[8, 3, 4, 5]],

       [[3, 4, 5, 6]],

       [[2, 5, 6, 7]],

       [[7, 6, 7, 8]],

       [[2, 7, 8, 9]],

       [[5, 8, 9, 0]],

       [[0, 9, 0, 1]],

       [[5, 0, 1, 2]],

       [[6, 1, 2, 3]],

       [[1, 2, 3, 4]],

       [[8, 3, 4, 5]],

       [[3, 4, 5, 6]],

       [[2, 5, 6, 7]],

       [[7, 6, 7, 8]],

       [[2, 7, 8, 9]],

       [[5, 8, 9, 0]],

       [[0, 9, 0, 1]],

       [[5, 0, 1, 2]],

       [[6, 1, 2, 3]],

       [[1, 2, 3, 4]],

       [[8, 3, 4, 5]],

       [[3, 4, 5, 6]],

       [[2, 5, 6, 7]],

       [[7, 6, 7, 8]],

       [[2, 7, 8, 9]],

       [[5, 8, 9, 0]],

       [[0, 9, 0, 1]],

       [[5, 0, 1, 2]],

       [[6, 1, 2, 3]],

       [[6, 7, 6, 5]],

       [[5, 6, 5

In [20]:
# Calculate accuracy
accuracy = calculate_accuracy(pred_2, target)
print(f"Accuracy: {accuracy:.2f}%")

Accuracy: 80.00%


In [21]:

# Generate and save predictions
source_3 = generate_source_for_next_model(model_2, source)


In [22]:
source_3

array([[6, 1, 2, 3],
       [1, 2, 3, 4],
       [8, 3, 4, 5],
       [3, 4, 5, 6],
       [2, 5, 6, 7],
       [7, 6, 7, 8],
       [2, 7, 8, 9],
       [5, 8, 9, 0],
       [0, 9, 0, 1],
       [5, 0, 1, 2],
       [6, 1, 2, 3],
       [1, 2, 3, 4],
       [8, 3, 4, 5],
       [3, 4, 5, 6],
       [2, 5, 6, 7],
       [7, 6, 7, 8],
       [2, 7, 8, 9],
       [5, 8, 9, 0],
       [0, 9, 0, 1],
       [5, 0, 1, 2],
       [6, 1, 2, 3],
       [1, 2, 3, 4],
       [8, 3, 4, 5],
       [3, 4, 5, 6],
       [2, 5, 6, 7],
       [7, 6, 7, 8],
       [2, 7, 8, 9],
       [5, 8, 9, 0],
       [0, 9, 0, 1],
       [5, 0, 1, 2],
       [6, 1, 2, 3],
       [1, 2, 3, 4],
       [8, 3, 4, 5],
       [3, 4, 5, 6],
       [2, 5, 6, 7],
       [7, 6, 7, 8],
       [2, 7, 8, 9],
       [5, 8, 9, 0],
       [0, 9, 0, 1],
       [5, 0, 1, 2],
       [1, 8, 7, 6],
       [6, 7, 6, 5],
       [5, 6, 5, 4],
       [0, 5, 4, 3],
       [5, 4, 3, 2],
       [2, 3, 2, 1],
       [7, 2, 1, 0],
       [2, 1,

In [24]:
import numpy as np
import torch

# Mock predictions and target arrays
pred = np.array([
    [0, 1, 2, 3],
    [1, 2, 3, 4],
    [2, 3, 4, 5],
    [3, 3, 5, 6],
    [0, 1, 2, 3],  # Incorrect prediction
])

target = np.array([
    [0, 1, 2, 3],
    [1, 2, 3, 4],
    [2, 3, 4, 5],
    [3, 4, 5, 6],
    [3, 4, 5, 6],  # Correct target
])

def calculate_accuracy(pred, target):
    """
    Calculate accuracy by comparing saved predictions with the target.
    
    Args:
    pred (np.ndarray): The predictions from the model.
    target (np.ndarray): The true target data.
    
    Returns:
    float: The accuracy as a percentage.
    """
    # Flatten the predictions to match the target shape
    all_predictions = pred.reshape(-1, pred.shape[-1])
    
    # Convert predictions and target to tensors
    pred_tensor = torch.tensor(all_predictions, dtype=torch.long)
    target_tensor = torch.tensor(target, dtype=torch.long)
    
    # Calculate accuracy
    correct = (pred_tensor == target_tensor).sum().item()
    total = target_tensor.numel()
    accuracy = correct / total * 100  # Convert to percentage
    
    return accuracy

# Calculate accuracy
accuracy = calculate_accuracy(pred, target)
print(f"Accuracy: {accuracy:.2f}%")


Accuracy: 75.00%
