In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pickle
# load the word2vec model
from gensim.models import KeyedVectors
from tqdm import tqdm
#from torch.utils.tensorboard import SummaryWriter


In [3]:
class LyricDataset(Dataset):
    def __init__(self, lyrics_dict, midi_dict, word2vec):
        """
        lyrics_dict: Dictionary of {song: [list of tokens]}
        midi_dict: Dictionary of {song: midi_vector}
        get_word_embedding: Function to convert a word to its corresponding embedding
        """
        # get keys that are in both dictionaries
        common_keys = lyrics_dict.keys() & midi_dict.keys()

        self.samples = []
        for song in common_keys:
            midi_vector = torch.tensor(midi_dict[song][0], dtype=torch.float32)  # MIDI vector for the song
            lyrics = lyrics_dict[song]
            for i in range(len(lyrics) - 1):  # Exclude last word for which there is no next word
                current_word_embedding = torch.tensor(word2vec.get_vector(lyrics[i]), dtype=torch.float32)
                next_word_index = torch.tensor(lyrics[i + 1], dtype=torch.long)
                self.samples.append((current_word_embedding, midi_vector, next_word_index))
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        current_word_embedding, midi_vector, next_word_index = self.samples[idx]
        return current_word_embedding, midi_vector, next_word_index

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [5]:
# load lyrics_dict.pkl

with open('data/lyrics_dict.pkl', 'rb') as f:
    lyrics_dict = pickle.load(f)

# load midi_feature_vectors.pkl
with open('data/midi_embeddings.pkl', 'rb') as f:
    midi_embeddings = pickle.load(f)


In [6]:
# load word2vec model
word2vec = KeyedVectors.load_word2vec_format('models/GoogleNews-vectors-negative300.bin.gz', binary=True)

In [10]:
# load midi_autoencoder from torch
input = midi_embeddings['no woman no cry the fugees']
len(input[0])

1024

In [11]:
# Create a dataset and dataloader
lyric_dataset = LyricDataset(lyrics_dict, midi_embeddings, word2vec)
#create a dataloader
lyric_dataloader = DataLoader(lyric_dataset, batch_size=2, shuffle=False)

In [12]:
class NTPModel(nn.Module):
    def __init__(self, word_embedding_dim=300, midi_embedding_dim=1024, hidden_dim=512, num_layers=2, vocab_size=3000000):
        super(NTPModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        # Define the LSTM layer
        self.lstm = nn.LSTM(input_size=word_embedding_dim + midi_embedding_dim,
                            hidden_size=hidden_dim,
                            num_layers=num_layers,
                            batch_first=True)
        
        # Define the output layer
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, word_embeddings, midi_embeddings):
        # Concatenate word embeddings and MIDI embeddings along the feature dimension
        combined_embeddings = torch.cat((word_embeddings.squeeze(), midi_embeddings.squeeze()), dim=-1)
        
        # LSTM layer
        lstm_out, _ = self.lstm(combined_embeddings)
        
        # Pass through the output layer
        predictions = self.fc(lstm_out)
        
        return predictions



In [13]:
import torch
import torch.nn as nn
import torch.optim as optim

# Assuming the model is already defined and instantiated
model = NTPModel()
model.to(device)  # Assuming you're using a GPU

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [14]:
def train(model, dataloader, epochs, device):
    writer = SummaryWriter()  # For logging to TensorBoard
    model.train()
    
    for epoch in tqdm(range(epochs), desc="Epochs"):
        total_loss = 0
        for batch_idx, (current_word_embedding, midi_vector, next_word_index) in enumerate(tqdm(dataloader, desc="Training")):
            current_word_embedding = current_word_embedding.to(device)
            midi_vector = midi_vector.to(device)
            next_word_index = next_word_index.to(device)

            optimizer.zero_grad()
            output = model(current_word_embedding, midi_vector)
            loss = criterion(output.view(-1, vocab_size), next_word_index.view(-1))
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

            # Log to TensorBoard
            writer.add_scalar('Loss/train', loss.item(), epoch * len(dataloader) + batch_idx)
        
        # Print loss every epoch
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {total_loss/len(dataloader)}')
        
    writer.flush()
    writer.close()



In [15]:
# train the model
train(model, lyric_dataloader, 1, device)

NameError: name 'SummaryWriter' is not defined