In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from stilus.data.sets import MidiDataset
from torch.utils.data import DataLoader

In [20]:
class TransformerNet(nn.Module):

    def __init__(self):
        super(TransformerNet, self).__init__() # bs * 5 * 32
        self.encoder0 = nn.TransformerEncoderLayer(d_model=32, nhead=8, dim_feedforward=512)
        self.fc0 = nn.Linear(32 * 5, 5)

    def forward(self, x):
        x = self.encoder0(x)
        x = x.view(-1, self.num_flat_features(x))
        x = self.fc0(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


In [21]:
transformer_net = TransformerNet()
print(transformer_net)

TransformerNet(
  (encoder0): TransformerEncoderLayer(
    (self_attn): MultiheadAttention(
      (out_proj): Linear(in_features=32, out_features=32, bias=True)
    )
    (linear1): Linear(in_features=32, out_features=512, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
    (linear2): Linear(in_features=512, out_features=32, bias=True)
    (norm1): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
    (norm2): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
    (dropout1): Dropout(p=0.1, inplace=False)
    (dropout2): Dropout(p=0.1, inplace=False)
  )
  (fc0): Linear(in_features=160, out_features=5, bias=True)
)


In [22]:
input = torch.randn(128, 5, 32)
out = transformer_net(input)
print(input.shape)
print(out.shape)

torch.Size([128, 5, 32])
torch.Size([128, 5])


In [23]:
criterion = nn.L1Loss()
optimizer = optim.SGD(transformer_net.parameters(), lr=0.001, momentum=0.9)

In [24]:
midi_dataset = MidiDataset("training_data.npy")
dataloader = DataLoader(midi_dataset, batch_size=32, shuffle=True)

In [28]:
for epoch in range(20):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(dataloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[:,:,0:32], data[:,:,32]

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = transformer_net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 1000 == 999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 1000))
            running_loss = 0.0

print('Finished Training')

[1,  1000] loss: 0.147
[1,  2000] loss: 0.146
[1,  3000] loss: 0.151
[1,  4000] loss: 0.148
[1,  5000] loss: 0.148
[1,  6000] loss: 0.147
[1,  7000] loss: 0.149
[1,  8000] loss: 0.146
[2,  1000] loss: 0.146
[2,  2000] loss: 0.146
[2,  3000] loss: 0.146
[2,  4000] loss: 0.149
[2,  5000] loss: 0.148
[2,  6000] loss: 0.148
[2,  7000] loss: 0.148
[2,  8000] loss: 0.145
[3,  1000] loss: 0.148
[3,  2000] loss: 0.149
[3,  3000] loss: 0.146
[3,  4000] loss: 0.148
[3,  5000] loss: 0.143
[3,  6000] loss: 0.147
[3,  7000] loss: 0.147
[3,  8000] loss: 0.147
[4,  1000] loss: 0.152
[4,  2000] loss: 0.146
[4,  3000] loss: 0.148
[4,  4000] loss: 0.147
[4,  5000] loss: 0.147
[4,  6000] loss: 0.147
[4,  7000] loss: 0.144
[4,  8000] loss: 0.145
[5,  1000] loss: 0.147
[5,  2000] loss: 0.147
[5,  3000] loss: 0.147
[5,  4000] loss: 0.147
[5,  5000] loss: 0.146
[5,  6000] loss: 0.149
[5,  7000] loss: 0.148
[5,  8000] loss: 0.147
[6,  1000] loss: 0.146
[6,  2000] loss: 0.147
[6,  3000] loss: 0.149
[6,  4000] 

In [None]:
for i, data in enumerate(dataloader, 0):
    # get the inputs; data is a list of [inputs, labels]
    if i == 0:
        print( data.shape)
    inputs, labels = data[5:7,:,0:32], data[5:7,:,32]
    print("labels:", labels)
    print("pred:", conv_net(inputs))
    
    if i == 2:
        break

In [29]:
torch.save(conv_net.state_dict(), "models/60epochs_transformer_1.0.0.pth")

In [19]:
conv_net = TransformerNet()
conv_net.load_state_dict(torch.load("./models/60epochs_transformer_1.0.0.pth"))
conv_net.eval()

RuntimeError: Error(s) in loading state_dict for TransformerNet:
	size mismatch for fc0.weight: copying a param with shape torch.Size([5, 64]) from checkpoint, the shape in current model is torch.Size([5, 160]).

In [None]:
midi_test_dataset = MidiDataset("test_data.npy", midi_dataset.mean, midi_dataset.std)
test_dataloader = DataLoader(midi_test_dataset, batch_size=64, shuffle=False)

In [None]:
def std_tensor_to_int(pred):
    return ((pred * midi_dataset.std) + midi_dataset.mean).int()

In [None]:
for i, data in enumerate(test_dataloader, 0):
    # get the inputs; data is a list of [inputs, labels]
    print(data.shape)
    inputs, labels = data[1:64,:,0:32], data[1:64,:,32]
    #print(inputs)
    print("labels:",  std_tensor_to_int(labels))
    pred = conv_net(inputs)
    print("pred:",  std_tensor_to_int(pred))
   
    