In [1]:
%load_ext autoreload
%autoreload 2
import sys

sys.path.append('../')

In [27]:
from virtuoso.train import prepare_dataloader
from virtuoso.parser import get_parser
from virtuoso import utils, model as modelzoo
from virtuoso.dataset import ScorePerformDataset, FeatureCollate
import random

In [28]:
parser = get_parser()
args = parser.parse_args(
    args=["--yml_path=../ymls/han_measnote.yml",
          "--data_path=../dataset_section_tempo",
          "--emotion_data_path=../dataset_emotion_section_tempo",
          "--device=cpu"]
)
args, net_params, configs = utils.handle_args(args)
device = 'cpu'

In [29]:
model = modelzoo.make_model(net_params)

In [30]:
checkpoint_path = '../../virtuosonet_checkpoints/yml_path=ymls/han_measnote.yml iters_per_checkpoint=300 delta_weight=10.0 delta_loss=True vel_balance_loss=True intermediate_loss=False_220108-152314/checkpoint_last.pt'
model = utils.load_weight(model, checkpoint_path)
model.eval()

=> loaded checkpoint '../../virtuosonet_checkpoints/yml_path=ymls/han_measnote.yml iters_per_checkpoint=300 delta_weight=10.0 delta_loss=True vel_balance_loss=True intermediate_loss=False_220108-152314/checkpoint_last.pt' (epoch 99)


VirtuosoNet(
  (score_encoder): HanEncoder(
    (note_fc): Sequential(
      (0): Linear(in_features=78, out_features=128, bias=True)
      (1): Dropout(p=0.2, inplace=False)
      (2): ReLU()
    )
    (lstm): LSTM(128, 128, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
    (voice_net): LSTM(128, 128, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
    (beat_attention): ContextAttention(
      (attention_net): Linear(in_features=512, out_features=512, bias=True)
    )
    (beat_rnn): LSTM(512, 128, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
    (measure_attention): ContextAttention(
      (attention_net): Linear(in_features=256, out_features=256, bias=True)
    )
    (measure_rnn): LSTM(256, 128, batch_first=True, bidirectional=True)
  )
  (performance_encoder): HanPerfEncoder(
    (performance_note_encoder): LSTM(64, 64, batch_first=True, bidirectional=True)
    (performance_measure_attention): ContextAttention(
      (attenti

In [76]:
from torch.utils.data import DataLoader

hier_type = ['is_hier', 'in_hier', 'hier_beat', 'hier_meas', 'meas_note']
curr_type = [x for x in hier_type if getattr(args, x)]

train_set = ScorePerformDataset(args.data_path, 
                                type="train", 
                                len_slice=args.len_slice, 
                                len_graph_slice=args.len_graph_slice, 
                                graph_keys=args.graph_keys, 
                                hier_type=curr_type)

train_loader = DataLoader(train_set, batch_size=16, shuffle=False, num_workers=args.num_workers, pin_memory=args.pin_memory, collate_fn=FeatureCollate())
small_train_loader = DataLoader(train_set, batch_size=5, shuffle=False, num_workers=args.num_workers, pin_memory=args.pin_memory, collate_fn=FeatureCollate())
tiny_train_loader = DataLoader(train_set, batch_size=1, shuffle=False, num_workers=args.num_workers, pin_memory=args.pin_memory, collate_fn=FeatureCollate())


In [81]:
random.seed(0)
train_set.update_slice_info()
batch = next(iter(train_loader))
batch_x, batch_y, beat_y, meas_y, note_locations, align_matched, pedal_status, edges = utils.batch_to_device(batch, device)

model.eval()
# outputs, perform_mu, perform_var, total_out_list = model(batch_x, batch_y, edges, note_locations)
score_embedding = model.score_encoder(batch_x, edges, note_locations)

In [82]:
batch_x.shape, small_batch_x.shape

(torch.Size([16, 818, 78]), torch.Size([5, 818, 78]))

In [80]:
random.seed(0)
train_set.update_slice_info()
small_batch = next(iter(small_train_loader))
small_batch_x, small_batch_y, beat_y, meas_y, small_note_locations, align_matched, pedal_status, edges = utils.batch_to_device(small_batch, device)
# small_outputs, small_perform_mu, small_perform_var, total_out_list = model(small_batch_x, batch_y, edges, note_locations)
model.eval()
score_embedding_small = model.score_encoder(small_batch_x, edges, small_note_locations)

In [86]:
import torch.nn as nn
import torch
from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence
from virtuoso.model_utils import masking_half, make_higher_node, encode_with_net

def compare_output(model, batch_a, batch_b, note_locations_a, note_locations_b, y_a, y_b, target_id=0):
  torch.manual_seed(0)
  out_a = desired_computation(model, batch_a, note_locations_a, y_a)
  torch.manual_seed(0)
  out_b = desired_computation(model, batch_b, note_locations_b, y_b)
  return out_a[:4, :30] - out_b[:4, :30] 

def desired_computation(model, batch_x, note_locations, y):
  score_embedding = model.score_encoder(batch_x, None, note_locations)
  performance_embedding, perform_mu, perform_var = model.performance_encoder(score_embedding, y, edges, note_locations, return_z=False)
  residual_info = model.residual_info_selector(batch_x, note_locations)
  
#   perform_z = model.performance_decoder.style_vector_expandor(performance_embedding)
#   perform_z = model.performance_decoder.handle_style_vector(performance_embedding)
#   _, measure_tempo_vel = model.performance_decoder.run_measure_level(score_embedding, perform_z, residual_info, note_locations)

  output, alter_out = model.performance_decoder(score_embedding, performance_embedding, residual_info, edges, note_locations)
  print(output.shape)
  return output
  
model=model.eval()
com_result = compare_output(model, batch_x, small_batch_x, note_locations, small_note_locations, batch_y, small_batch_y)
print(com_result)
print(torch.max(com_result))
# print((torch.abs(com_result)<1e-6).all())

torch.Size([16, 818, 11])
torch.Size([5, 818, 11])
tensor([[[ 7.4506e-08, -8.9407e-08,  2.9802e-08,  ..., -1.4901e-08,
           5.9605e-08,  1.1921e-07],
         [ 7.4506e-08,  5.9605e-08,  7.4506e-08,  ..., -2.9802e-08,
           0.0000e+00,  0.0000e+00],
         [ 7.4506e-08, -1.7881e-07,  4.4703e-08,  ...,  5.9605e-08,
           1.1921e-07,  1.1921e-07],
         ...,
         [ 2.9802e-08, -2.0862e-07, -1.0617e-07,  ...,  1.1921e-07,
          -2.3842e-07,  1.1921e-07],
         [ 2.9802e-08, -5.9605e-08, -6.5193e-08,  ...,  0.0000e+00,
           0.0000e+00,  0.0000e+00],
         [ 0.0000e+00, -1.1921e-07, -1.1874e-08,  ...,  5.9605e-08,
           0.0000e+00,  1.1921e-07]],

        [[-4.4703e-08,  0.0000e+00,  4.4703e-08,  ...,  0.0000e+00,
           5.9605e-08,  0.0000e+00],
         [-4.4703e-08, -8.9407e-08, -2.9802e-08,  ...,  5.9605e-08,
           5.9605e-08,  5.9605e-08],
         [-4.4703e-08,  5.9605e-08, -2.9802e-08,  ..., -5.9605e-08,
           1.7881e-07,  5

In [107]:
model.score_encoder.lstm

LSTM(128, 128, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)

In [73]:
score_embedding['note'].shape, score_embedding_small['note'].shape

(torch.Size([16, 818, 512]), torch.Size([4, 814, 512]))

In [74]:
score_embedding['note'][0][:10] - score_embedding_small['note'][0][:10] 

tensor([[ 0.0000e+00,  0.0000e+00, -1.1176e-08,  ..., -7.4506e-09,
         -1.1176e-08,  1.7695e-08],
        [ 0.0000e+00, -1.4901e-08, -1.4901e-08,  ...,  2.9802e-08,
         -1.4901e-08,  0.0000e+00],
        [ 4.4703e-08,  1.1176e-08,  7.4506e-09,  ...,  1.4901e-08,
          7.4506e-09, -1.8626e-08],
        ...,
        [-1.4901e-08,  2.9802e-08,  4.4703e-08,  ...,  1.4901e-08,
          1.7229e-08, -1.4901e-08],
        [-2.2352e-08, -5.9605e-08,  1.4901e-08,  ...,  1.4901e-08,
          2.5146e-08,  2.9802e-08],
        [ 1.6764e-08, -1.1921e-07,  2.9802e-08,  ...,  2.9802e-08,
          1.3039e-08,  0.0000e+00]], grad_fn=<SubBackward0>)

In [64]:
((batch_x[0,:814] - small_batch_x[0]) == 0).all()

tensor(True)

In [63]:
outputs[0,:814] == small_outputs[0]

tensor([[False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        ...,
        [ True,  True,  True,  ...,  True,  True,  True],
        [ True,  True,  True,  ...,  True,  True,  True],
        [ True,  True,  True,  ...,  True,  True,  True]])

IndexError: The shape of the mask [818] at index 0 does not match the shape of the indexed tensor [755, 128] at index 0