In [1]:
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from torch import Tensor
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from torchtext.legacy.data import Field, TabularDataset, BucketIterator,ReversibleField
import matplotlib.pyplot as plt
from ast import literal_eval
import remi_utils as utils
import pickle
source_folder = "solo_generation_dataset"
destination_folder = "solo_generation_weights"

In [2]:
event2word, word2event = pickle.load(open('dictionary.pkl', 'rb'))

In [3]:
if torch.cuda.is_available():  
    dev = "cuda:1" 
else:  
    dev = "cpu" 
print(dev)
device = torch.device(dev)
print(device)

cuda:1
cuda:1


In [4]:
df_train = pd.read_csv(source_folder + '/train.csv')
df_test = pd.read_csv(source_folder + '/test.csv')
df_valid = pd.read_csv(source_folder + '/val.csv')

In [5]:
train_intro = df_train['intro'].apply(literal_eval).apply(lambda x: np.array(x,dtype="float")).values
train_outro = df_train['outro'].apply(literal_eval).apply(lambda x: np.array(x,dtype="float")).values
train_solo = df_train['solo'].apply(literal_eval).apply(lambda x: np.array(x,dtype="float")).values
train_solo_piano = df_train['solo_piano'].apply(literal_eval).apply(lambda x: np.array(x,dtype="float")).values

test_intro = df_test['intro'].apply(literal_eval).apply(lambda x: np.array(x,dtype="float")).values
test_outro = df_test['outro'].apply(literal_eval).apply(lambda x: np.array(x,dtype="float")).values
test_solo = df_test['solo'].apply(literal_eval).apply(lambda x: np.array(x,dtype="float")).values
test_solo_piano = df_test['solo_piano'].apply(literal_eval).apply(lambda x: np.array(x,dtype="float")).values

valid_intro = df_valid['intro'].apply(literal_eval).apply(lambda x: np.array(x,dtype="float")).values
valid_outro = df_valid['outro'].apply(literal_eval).apply(lambda x: np.array(x,dtype="float")).values
valid_solo = df_valid['solo'].apply(literal_eval).apply(lambda x: np.array(x,dtype="float")).values
valid_solo_piano = df_valid['solo_piano'].apply(literal_eval).apply(lambda x: np.array(x,dtype="float")).values

In [6]:
train_intro = np.array([values for values in train_intro])
train_outro = np.array([values for values in train_outro])
train_solo = np.array([values for values in train_solo])
train_solo_piano = np.array([values for values in train_solo_piano])

test_intro = np.array([values for values in test_intro])
test_outro = np.array([values for values in test_outro])
test_solo = np.array([values for values in test_solo])
test_solo_piano = np.array([values for values in test_solo_piano])

valid_intro = np.array([values for values in valid_intro])
valid_outro = np.array([values for values in valid_outro])
valid_solo = np.array([values for values in valid_solo])
valid_solo_piano = np.array([values for values in valid_solo_piano])

In [7]:
trainset = TensorDataset( torch.from_numpy(train_intro).long(), torch.from_numpy(train_outro).long(), torch.from_numpy(train_solo).long(), torch.from_numpy(train_solo_piano).long() )

testset = TensorDataset( torch.from_numpy(test_intro).long(), torch.from_numpy(test_outro).long(), torch.from_numpy(test_solo).long(), torch.from_numpy(test_solo_piano).long() )

valset = TensorDataset( torch.from_numpy(valid_intro).long(), torch.from_numpy(valid_outro).long(), torch.from_numpy(valid_solo).long(), torch.from_numpy(valid_solo_piano).long() )

In [8]:
batch_size = 8
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=4)

testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=4)

valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size,
                                         shuffle=True, num_workers=4)

In [9]:
for i, (a,_,_,_) in enumerate(trainloader):
    print(a.size())

torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size([8, 1637])
torch.Size

In [10]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [11]:
#https://pytorch.org/tutorials/beginner/torchtext_translation_tutorial.html
import random
from typing import Tuple

import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch import Tensor

torch.backends.cudnn.enabled=False

class Encoder(nn.Module):
    def __init__(self,
                 input_dim: int,
                 emb_dim: int,
                 enc_hid_dim: int,
                 dec_hid_dim: int,
                 dropout: float):
        super().__init__()

        self.input_dim = input_dim
        self.emb_dim = emb_dim
        self.enc_hid_dim = enc_hid_dim
        self.dec_hid_dim = dec_hid_dim
        self.dropout = dropout

        self.embedding = nn.Embedding(input_dim, emb_dim)

        self.rnn = nn.GRU(emb_dim, enc_hid_dim, num_layers = 2,bidirectional = True)

        self.fc = nn.Linear(enc_hid_dim * 2, dec_hid_dim)

        self.dropout = nn.Dropout(dropout)

    def forward(self,
                src: Tensor) -> Tuple[Tensor]:

        embedded = self.dropout(self.embedding(src))

        outputs, hidden = self.rnn(embedded)

        hidden = torch.tanh(self.fc(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1)))

        return outputs, hidden


class Attention(nn.Module):
    def __init__(self,
                 enc_hid_dim: int,
                 dec_hid_dim: int,
                 attn_dim: int):
        super().__init__()

        self.enc_hid_dim = enc_hid_dim
        self.dec_hid_dim = dec_hid_dim

        self.attn_in = (enc_hid_dim * 2) + dec_hid_dim

        self.attn = nn.Linear(self.attn_in, attn_dim)

    def forward(self,
                decoder_hidden: Tensor,
                encoder_outputs: Tensor) -> Tensor:

        src_len = encoder_outputs.shape[0]

        repeated_decoder_hidden = decoder_hidden.unsqueeze(1).repeat(1, src_len, 1)

        encoder_outputs = encoder_outputs.permute(1, 0, 2)

        energy = torch.tanh(self.attn(torch.cat((
            repeated_decoder_hidden,
            encoder_outputs),
            dim = 2)))

        attention = torch.sum(energy, dim=2)

        return F.softmax(attention, dim=1)


class Decoder(nn.Module):
    def __init__(self,
                 output_dim: int,
                 emb_dim: int,
                 enc_hid_dim: int,
                 dec_hid_dim: int,
                 dropout: int,
                 attention: nn.Module):
        super().__init__()

        self.emb_dim = emb_dim
        self.enc_hid_dim = enc_hid_dim
        self.dec_hid_dim = dec_hid_dim
        self.output_dim = output_dim
        self.dropout = dropout
        self.attention = attention

        self.embedding = nn.Embedding(output_dim, emb_dim)

        self.rnn = nn.GRU((enc_hid_dim * 2) + emb_dim, dec_hid_dim, num_layers = 1)
        
        self.out = nn.Linear(self.attention.attn_in + emb_dim, output_dim)

        self.dropout = nn.Dropout(dropout)


    def _weighted_encoder_rep(self,
                              decoder_hidden: Tensor,
                              encoder_outputs: Tensor) -> Tensor:

        a = self.attention(decoder_hidden, encoder_outputs)

        a = a.unsqueeze(1)

        encoder_outputs = encoder_outputs.permute(1, 0, 2)

        weighted_encoder_rep = torch.bmm(a, encoder_outputs)

        weighted_encoder_rep = weighted_encoder_rep.permute(1, 0, 2)

        return weighted_encoder_rep


    def forward(self,
                input: Tensor,
                decoder_hidden: Tensor,
                encoder_outputs: Tensor) -> Tuple[Tensor]:

        input = input.unsqueeze(0)

        embedded = self.dropout(self.embedding(input))

        weighted_encoder_rep = self._weighted_encoder_rep(decoder_hidden,
                                                          encoder_outputs)

        rnn_input = torch.cat((embedded, weighted_encoder_rep), dim = 2)

        output, decoder_hidden = self.rnn(rnn_input, decoder_hidden.unsqueeze(0))

        embedded = embedded.squeeze(0)
        output = output.squeeze(0)
        weighted_encoder_rep = weighted_encoder_rep.squeeze(0)

        output = self.out(torch.cat((output,
                                     weighted_encoder_rep,
                                     embedded), dim = 1))

        return output, decoder_hidden.squeeze(0)


class Seq2Seq(nn.Module):
    def __init__(self,
                 encoder: nn.Module,
                 decoder: nn.Module,
                 device: torch.device):
        super().__init__()

        self.encoder = encoder
        self.decoder = decoder
        self.device = device

    def forward(self,
                src: Tensor,
                trg: Tensor,
                teacher_forcing_ratio: float = 0.5) -> Tensor:

        batch_size = src.shape[1]
        max_len = trg.shape[0]
        trg_vocab_size = self.decoder.output_dim

        outputs = torch.zeros(max_len, batch_size, trg_vocab_size).to(self.device)

        encoder_outputs, hidden = self.encoder(src)

        # first input to the decoder is the <sos> token
        output = trg[0,:]

        for t in range(1, max_len):
            output, hidden = self.decoder(output, hidden, encoder_outputs)
            outputs[t] = output
            teacher_force = random.random() < teacher_forcing_ratio
            top1 = output.max(1)[1]
            output = (trg[t] if teacher_force else top1)

        return outputs


In [12]:


INPUT_DIM = len(word2event)+1
OUTPUT_DIM = len(word2event)+1
# ENC_EMB_DIM = 256
# DEC_EMB_DIM = 256
# ENC_HID_DIM = 512
# DEC_HID_DIM = 512
# ATTN_DIM = 64
# ENC_DROPOUT = 0.5
# DEC_DROPOUT = 0.5

ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
ENC_HID_DIM = 512
DEC_HID_DIM = 512
ATTN_DIM = 256
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

enc = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT)

attn = Attention(ENC_HID_DIM, DEC_HID_DIM, ATTN_DIM)

dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, DEC_DROPOUT, attn)

model = Seq2Seq(enc, dec, device)
# model = nn.DataParallel(model.to(device))
model = model.to(device)

In [13]:
def init_weights(m: nn.Module):
    for name, param in m.named_parameters():
        if 'weight' in name:
            nn.init.normal_(param.data, mean=0, std=0.01)
        else:
            nn.init.constant_(param.data, 0)


model.apply(init_weights)

optimizer = optim.Adam(model.parameters())


def count_parameters(model: nn.Module):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 11,351,807 trainable parameters


In [14]:
PAD_IDX = 254

# criterion = nn.CrossEntropyLoss(ignore_index=PAD_IDX)
criterion = nn.CrossEntropyLoss()

In [15]:
import math
import time


def train(model: nn.Module,
          iterator: torch.utils.data.DataLoader,
          optimizer: optim.Optimizer,
          criterion: nn.Module,
          clip: float):

    model.train()

    epoch_loss = 0

    for _, (src, _,trg,_) in enumerate(iterator):
        src, trg = src.to(device), trg.to(device)

        optimizer.zero_grad()

        output = model(src, trg)

        output = output[1:].view(-1, output.shape[-1])
        trg = trg[1:].view(-1)

        loss = criterion(output, trg)

        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)

        optimizer.step()

        epoch_loss += loss.cpu().detach().item()

    return epoch_loss / len(iterator)


def evaluate(model: nn.Module,
             iterator: torch.utils.data.DataLoader,
             criterion: nn.Module):

    model.eval()

    epoch_loss = 0

    with torch.no_grad():

        for _, (src, _,trg,_) in enumerate(iterator):
            src, trg = src.to(device), trg.to(device)

            output = model(src, trg, 0) #turn off teacher forcing

            output = output[1:].view(-1, output.shape[-1])
            trg = trg[1:].view(-1)

            loss = criterion(output, trg)

            epoch_loss += loss.cpu().detach().item()

    return epoch_loss / len(iterator)


def epoch_time(start_time: int,
               end_time: int):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs


N_EPOCHS = 10
CLIP = 1

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()

    train_loss = train(model, trainloader, optimizer, criterion, CLIP)
    valid_loss = evaluate(model, valloader, criterion)

    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')

test_loss = evaluate(model, testloader, criterion)

print(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |')

Epoch: 01 | Time: 0m 19s
	Train Loss: 1.227 | Train PPL:   3.412
	 Val. Loss: 0.892 |  Val. PPL:   2.441
Epoch: 02 | Time: 0m 18s
	Train Loss: 0.839 | Train PPL:   2.315
	 Val. Loss: 0.801 |  Val. PPL:   2.228
Epoch: 03 | Time: 0m 18s
	Train Loss: 0.832 | Train PPL:   2.298
	 Val. Loss: 0.813 |  Val. PPL:   2.254
Epoch: 04 | Time: 0m 18s
	Train Loss: 0.802 | Train PPL:   2.230
	 Val. Loss: 0.771 |  Val. PPL:   2.161
Epoch: 05 | Time: 0m 18s
	Train Loss: 0.792 | Train PPL:   2.207
	 Val. Loss: 0.807 |  Val. PPL:   2.242
Epoch: 06 | Time: 0m 18s
	Train Loss: 0.787 | Train PPL:   2.197
	 Val. Loss: 0.807 |  Val. PPL:   2.240
Epoch: 07 | Time: 0m 18s
	Train Loss: 0.792 | Train PPL:   2.207
	 Val. Loss: 0.766 |  Val. PPL:   2.152
Epoch: 08 | Time: 0m 19s
	Train Loss: 0.773 | Train PPL:   2.167
	 Val. Loss: 0.762 |  Val. PPL:   2.143
Epoch: 09 | Time: 0m 19s
	Train Loss: 0.784 | Train PPL:   2.191
	 Val. Loss: 0.793 |  Val. PPL:   2.209
Epoch: 10 | Time: 0m 19s
	Train Loss: 0.789 | Train PPL

KeyboardInterrupt: 

In [16]:
model.eval()

with torch.no_grad():

    for _, (src, _,trg,_) in enumerate(trainloader):
        src, trg = src.to(device), trg.to(device)

        output = model(src, trg, 0) #turn off teacher forcing
        orig = src[4].cpu().detach().tolist()
        test = torch.argmax(output[1:], dim=2)[4].cpu().detach().tolist()
        #print(trg[1:].size())
        for i in range(len(torch.argmax(output[1:], dim=2))):
            midi_series = utils.remove_padding(torch.argmax(output[1:], dim=2)[i].tolist(),word2event)
            #print(midi_series)
            utils.write_midi(midi_series, word2event, "test" + str(i) + ".midi")
        
        print(output.size())
        print(trg.size())
        
        output = output[1:].view(-1, output.shape[-1])
        trg = trg[1:].view(-1)
        #print(output)

torch.Size([8, 1637, 255])
torch.Size([8, 1637])
torch.Size([8, 1637, 255])
torch.Size([8, 1637])
torch.Size([8, 1637, 255])
torch.Size([8, 1637])
torch.Size([8, 1637, 255])
torch.Size([8, 1637])
torch.Size([8, 1637, 255])
torch.Size([8, 1637])
torch.Size([8, 1637, 255])
torch.Size([8, 1637])
torch.Size([8, 1637, 255])
torch.Size([8, 1637])
torch.Size([8, 1637, 255])
torch.Size([8, 1637])
torch.Size([8, 1637, 255])
torch.Size([8, 1637])
torch.Size([8, 1637, 255])
torch.Size([8, 1637])
torch.Size([8, 1637, 255])
torch.Size([8, 1637])
torch.Size([8, 1637, 255])
torch.Size([8, 1637])
torch.Size([8, 1637, 255])
torch.Size([8, 1637])
torch.Size([8, 1637, 255])
torch.Size([8, 1637])
torch.Size([8, 1637, 255])
torch.Size([8, 1637])
torch.Size([8, 1637, 255])
torch.Size([8, 1637])
torch.Size([8, 1637, 255])
torch.Size([8, 1637])
torch.Size([8, 1637, 255])
torch.Size([8, 1637])
torch.Size([8, 1637, 255])
torch.Size([8, 1637])
torch.Size([8, 1637, 255])
torch.Size([8, 1637])
torch.Size([8, 1637,

In [None]:
print(utils.remove_padding(orig,word2event))

In [None]:
print(utils.remove_padding(test,word2event))

In [None]:
utils.write_midi(utils.remove_padding(test,word2event), word2event, "test.mid")

In [None]:
class LSTM(nn.Module):

    def __init__(self, dimension=64):
        super(LSTM, self).__init__()

        self.embedding = nn.Embedding(len(text_field.vocab), 300)
        self.dimension = dimension
        self.lstm = nn.LSTM(input_size=300,
                            hidden_size=dimension,
                            num_layers=1,
                            batch_first=True,
                            bidirectional=True)
        self.drop = nn.Dropout(p=0.5)

        self.fc = nn.Linear(2*dimension, 1)

    def forward(self, notes, notes_len):
        notes_emb = self.embedding(notes)

        packed_input = pack_padded_sequence(notes_emb, notes_len, batch_first=True, enforce_sorted=False)
        packed_output, _ = self.lstm(packed_input)
        output, _ = pad_packed_sequence(packed_output, batch_first=True)

        out_forward = output[range(len(output)), notes_len - 1, :self.dimension]
        out_reverse = output[:, 0, self.dimension:]
        out_reduced = torch.cat((out_forward, out_reverse), 1)
        notes_fea = self.drop(out_reduced)

        notes_fea = self.fc(notes_fea)
        notes_fea = torch.squeeze(notes_fea, 1)
        notes_out = torch.sigmoid(notes_fea)

        return notes_out

In [None]:
# Save and Load Functions https://towardsdatascience.com/lstm-text-classification-using-pytorch-2c6c657f8fc0

def save_checkpoint(save_path, model, optimizer, valid_loss):

    if save_path == None:
        return
    
    state_dict = {'model_state_dict': model.state_dict(),
                  'optimizer_state_dict': optimizer.state_dict(),
                  'valid_loss': valid_loss}
    
    torch.save(state_dict, save_path)
    print(f'Model saved to ==> {save_path}')


def load_checkpoint(load_path, model, optimizer):

    if load_path==None:
        return
    
    state_dict = torch.load(load_path, map_location=device)
    print(f'Model loaded from <== {load_path}')
    
    model.load_state_dict(state_dict['model_state_dict'])
    optimizer.load_state_dict(state_dict['optimizer_state_dict'])
    
    return state_dict['valid_loss']


def save_metrics(save_path, train_loss_list, valid_loss_list, global_steps_list):

    if save_path == None:
        return
    
    state_dict = {'train_loss_list': train_loss_list,
                  'valid_loss_list': valid_loss_list,
                  'global_steps_list': global_steps_list}
    
    torch.save(state_dict, save_path)
    print(f'Model saved to ==> {save_path}')


def load_metrics(load_path):

    if load_path==None:
        return
    
    state_dict = torch.load(load_path, map_location=device)
    print(f'Model loaded from <== {load_path}')
    
    return state_dict['train_loss_list'], state_dict['valid_loss_list'], state_dict['global_steps_list']


In [None]:
# Training Function

def train(model,
          optimizer,
          criterion = nn.BCELoss(),
          train_loader = train_iter,
          valid_loader = valid_iter,
          num_epochs = 10,
          eval_every = len(train_iter) // 2,
          file_path = destination_folder,
          best_valid_loss = float("Inf")):
    
    # initialize running values
    running_loss = 0.0
    valid_running_loss = 0.0
    global_step = 0
    train_loss_list = []
    valid_loss_list = []
    global_steps_list = []

    # training loop
    model.train()
    for epoch in range(num_epochs):
        total = 0
        total_correct = 0
        for (labels, (notes, notes_len)), _ in (train_loader):           
            labels = labels.to(device)
            notes = notes.to(device)
            notes_len = notes_len.cpu()
            output = model(notes.long(), notes_len.long())

            loss = criterion(output, labels.float())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            labels_max = labels.detach().cpu()
            output_max = torch.round(output.detach().cpu())

            for i in range(len(labels_max)):
                total+=1
                if labels_max[i] ==  output_max[i]:
                    total_correct += 1
            accuracy = accuracy_score(labels_max, output_max)
            
            # update running values
            running_loss += loss.item()
            global_step += 1

            # evaluation step
            if global_step % eval_every == 0:
                model.eval()
                with torch.no_grad():                    
                  # validation loop
                    for (labels, (notes, notes_len)), _ in (valid_loader):
                        labels = labels.to(device)
                        notes = notes.to(device)
                        notes_len = notes_len.cpu()
                        output = model(notes.long(), notes_len.long())
                        loss = criterion(output, labels.float())
                        valid_running_loss += loss.item()

                # evaluation
                average_train_loss = running_loss / eval_every
                average_valid_loss = valid_running_loss / len(valid_loader)
                train_loss_list.append(average_train_loss)
                valid_loss_list.append(average_valid_loss)
                global_steps_list.append(global_step)

                # resetting running values
                running_loss = 0.0                
                valid_running_loss = 0.0
                model.train()

                # print progress
                print('Epoch [{}/{}], Step [{}/{}], Train Loss: {:.4f}, Valid Loss: {:.4f}'
                      .format(epoch+1, num_epochs, global_step, num_epochs*len(train_loader),
                              average_train_loss, average_valid_loss))
                
                # checkpoint
                if best_valid_loss > average_valid_loss:
                    best_valid_loss = average_valid_loss
                    save_checkpoint(file_path + '/model.pt', model, optimizer, best_valid_loss)
                    save_metrics(file_path + '/metrics.pt', train_loss_list, valid_loss_list, global_steps_list)
        print("Epoch Accuracy: {}".format(total_correct/total))
    save_metrics(file_path + '/metrics.pt', train_loss_list, valid_loss_list, global_steps_list)
    print('Finished Training!')


model = LSTM().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

train(model=model, optimizer=optimizer, num_epochs=25)

In [None]:
# torch.backends.cudnn.enabled = False

In [None]:
train_loss_list, valid_loss_list, global_steps_list = load_metrics(destination_folder + '/metrics.pt')
plt.plot(global_steps_list, train_loss_list, label='Train')
plt.plot(global_steps_list, valid_loss_list, label='Valid')
plt.xlabel('Global Steps')
plt.ylabel('Loss')
plt.legend()
plt.show() 

In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns

In [None]:
def evaluate(model, test_loader, version='title', threshold=0.5):
    y_pred = []
    y_true = []

    model.eval()
    with torch.no_grad():
        for (labels, (notes, notes_len)), _ in test_loader:           
            labels = labels.to(device)
            notes = notes.to(device)
            notes_len = notes_len.cpu()
            output = model(notes.long(), notes_len.long())

            output = (output > threshold).int()
            y_pred.extend(output.tolist())
            y_true.extend(labels.tolist())
    
    print('Classification Report:')
    print(classification_report(y_true, y_pred, labels=[1,0], digits=4))
    
    cm = confusion_matrix(y_true, y_pred, labels=[1,0])
    ax= plt.subplot()
    sns.heatmap(cm, annot=True, ax = ax, cmap='Blues', fmt="d")

    ax.set_title('Confusion Matrix')

    ax.set_xlabel('Predicted Labels')
    ax.set_ylabel('True Labels')

    ax.xaxis.set_ticklabels(['NON-SOLO', 'SOLO'])
    ax.yaxis.set_ticklabels(['NON-SOLO', 'SOLO'])
    
    
best_model = LSTM().to(device)
optimizer = optim.Adam(best_model.parameters(), lr=0.001)

load_checkpoint(destination_folder + '/model.pt', best_model, optimizer)
evaluate(best_model, test_iter)