In [1]:
import pickle
import sys
import json
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
from tensor_hero.model import Transformer
from tensor_hero.inference import full_song_prediction
import torch
import os

source_song = Path.cwd().parent / 'Experiments' / 'Test_Songs' / '1. Andy McKee - Ouray'
model_to_load = 'model13'
name_of_song = 'model13 - Ouray'
outfolder = Path.cwd() / 'generated_songs' / name_of_song
assert not os.path.isdir(outfolder), 'ERROR: Song already exists, enter new name'
os.mkdir(outfolder)
audio_file = source_song / 'separated.ogg'

# First, let's load the model's parameters and define a Transformer model with them
# Let's take a look at these parameters, they're stored in a dictionary

model_path = Path.cwd().parent / 'model' / 'saved_models' / model_to_load
with open(model_path / 'params.pkl', 'rb') as f:
    params = pickle.load(f)
print(json.dumps(params, indent=4))

# We use these parameters to define the skeleton of the model, then load the weights into it
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'device = {device}')

# Model hyperparameters are derived from the params dictionary
trg_vocab_size = params['trg_vocab_size']
embedding_size = params['embedding_size']
num_heads = params['num_heads']
num_encoder_layers = params['num_encoder_layers']
num_decoder_layers = params['num_decoder_layers']
dropout = params['dropout']
max_len = params['max_trg_len']
forward_expansion = params['embedding_size']*params['forward_expansion']

model = Transformer(
    embedding_size,
    trg_vocab_size,
    num_heads,
    num_encoder_layers,
    num_decoder_layers,
    forward_expansion,
    dropout,
    max_len,
    device,
).to(device)  # Always send the model to the GPU

# Load the weights into the model
model.load_state_dict(torch.load(model_path / (str(model_to_load)+'.pt')))

print(model)


# We have to define some things for the .chart file so it's actually playable
# just some dummy metadata for now
song_metadata = {'Name' : name_of_song,
                'Artist' : 'Forrest',       # Forrest is the honorary author of all of our output
                'Charter' : 'tensorhero',
                'Offset' : 0,
                'Resolution' : 192,
                'Genre' : 'electronic',
                'MediaType' : 'cd',
                'MusicStream' : 'song.ogg'}

_ = full_song_prediction(song_path = audio_file,
                         model=model,
                         device=device,
                         sos_idx=432,
                         max_len=500,
                         song_metadata=song_metadata,
                         outfolder=outfolder)

{
    "training_data": "train separated",
    "model_name": "model13",
    "optimizer": "Adam",
    "train_path": "X:\\Training Data\\Model 1 Training Separated\\train",
    "num_epochs": 500,
    "batch_size": 12,
    "shuffle": true,
    "num_workers": 4,
    "drop_last": true,
    "last_global_step": 1490580,
    "max_trg_len": 500,
    "max_src_len": 500,
    "trg_vocab_size": 435,
    "pad_idx": 434,
    "embedding_size": 512,
    "lr": 0.0001,
    "num_heads": 8,
    "num_encoder_layers": 2,
    "num_decoder_layers": 2,
    "dropout": 0.1,
    "forward_expansion": 4,
    "date": "27/01/2022 20:36:21",
    "LOAD": true,
    "model_file_name": "model13.pt",
    "model_outfile": "C:\\Users\\ewais\\Documents\\GitHub\\tensor-hero\\model\\saved_models\\model13\\model13.pt",
    "experiment_description": "Just getting a better model trained for experimentation"
}
device = cuda
Transformer(
  (src_position_embedding): Embedding(500, 512)
  (trg_position_embedding): Embedding(500, 512)
  

  0%|          | 0/500 [00:00<?, ?it/s]

predicting segment 0/35


  0%|          | 0/500 [00:01<?, ?it/s]
  1%|          | 3/500 [00:00<00:19, 25.64it/s]

predicting segment 1/35


  9%|▉         | 46/500 [00:01<00:17, 25.66it/s]
  1%|          | 3/500 [00:00<00:19, 25.86it/s]

predicting segment 2/35


 10%|█         | 50/500 [00:01<00:17, 26.18it/s]
  1%|          | 3/500 [00:00<00:17, 28.57it/s]

predicting segment 3/35


  2%|▏         | 10/500 [00:00<00:19, 25.06it/s]
  1%|          | 3/500 [00:00<00:17, 27.78it/s]

predicting segment 4/35


 10%|█         | 50/500 [00:01<00:17, 26.34it/s]
  1%|          | 3/500 [00:00<00:17, 28.04it/s]

predicting segment 5/35


 11%|█         | 56/500 [00:02<00:16, 26.13it/s]
  1%|          | 3/500 [00:00<00:17, 28.04it/s]

predicting segment 6/35


  9%|▉         | 46/500 [00:01<00:17, 26.51it/s]
  1%|          | 3/500 [00:00<00:19, 25.86it/s]

predicting segment 7/35


 10%|█         | 52/500 [00:01<00:17, 26.05it/s]
  1%|          | 3/500 [00:00<00:19, 25.64it/s]

predicting segment 8/35


 12%|█▏        | 58/500 [00:02<00:16, 26.17it/s]
  1%|          | 3/500 [00:00<00:17, 28.57it/s]

predicting segment 9/35


  7%|▋         | 36/500 [00:01<00:17, 26.34it/s]
  1%|          | 3/500 [00:00<00:19, 25.00it/s]

predicting segment 10/35


 11%|█         | 54/500 [00:02<00:18, 23.89it/s]
  1%|          | 3/500 [00:00<00:22, 22.22it/s]

predicting segment 11/35


  9%|▉         | 46/500 [00:01<00:19, 23.78it/s]
  1%|          | 3/500 [00:00<00:19, 25.00it/s]

predicting segment 12/35


 10%|█         | 52/500 [00:02<00:17, 25.05it/s]
  1%|          | 3/500 [00:00<00:19, 25.21it/s]

predicting segment 13/35


 13%|█▎        | 64/500 [00:02<00:17, 24.22it/s]
  1%|          | 3/500 [00:00<00:20, 24.39it/s]

predicting segment 14/35


 10%|█         | 50/500 [00:01<00:17, 25.25it/s]
  1%|          | 3/500 [00:00<00:18, 27.03it/s]

predicting segment 15/35


 10%|█         | 52/500 [00:02<00:17, 25.72it/s]
  1%|          | 3/500 [00:00<00:19, 25.86it/s]

predicting segment 16/35


  8%|▊         | 40/500 [00:01<00:17, 25.91it/s]
  1%|          | 3/500 [00:00<00:19, 25.64it/s]

predicting segment 17/35


  8%|▊         | 38/500 [00:01<00:17, 25.96it/s]
  1%|          | 3/500 [00:00<00:18, 27.03it/s]

predicting segment 18/35


 12%|█▏        | 62/500 [00:02<00:16, 26.12it/s]
  1%|          | 3/500 [00:00<00:18, 26.55it/s]

predicting segment 19/35


 10%|█         | 52/500 [00:02<00:17, 25.84it/s]
  1%|          | 3/500 [00:00<00:19, 26.09it/s]

predicting segment 20/35


 11%|█         | 54/500 [00:02<00:17, 25.87it/s]
  1%|          | 3/500 [00:00<00:18, 27.52it/s]

predicting segment 21/35


  9%|▉         | 44/500 [00:01<00:17, 25.69it/s]
  1%|          | 3/500 [00:00<00:18, 26.79it/s]

predicting segment 22/35


 14%|█▍        | 70/500 [00:02<00:16, 26.06it/s]
  1%|          | 3/500 [00:00<00:17, 28.30it/s]

predicting segment 23/35


 14%|█▎        | 68/500 [00:02<00:16, 26.43it/s]
  1%|          | 3/500 [00:00<00:18, 26.79it/s]

predicting segment 24/35


 10%|█         | 52/500 [00:02<00:17, 25.18it/s]
  1%|          | 3/500 [00:00<00:19, 25.21it/s]

predicting segment 25/35


 12%|█▏        | 58/500 [00:02<00:16, 26.19it/s]
  1%|          | 3/500 [00:00<00:18, 27.03it/s]

predicting segment 26/35


 11%|█         | 56/500 [00:02<00:16, 26.45it/s]
  1%|          | 3/500 [00:00<00:17, 28.30it/s]

predicting segment 27/35


 11%|█         | 54/500 [00:02<00:16, 26.35it/s]
  1%|          | 3/500 [00:00<00:18, 26.79it/s]

predicting segment 28/35


 13%|█▎        | 64/500 [00:02<00:16, 26.55it/s]
  1%|          | 3/500 [00:00<00:18, 26.55it/s]

predicting segment 29/35


 12%|█▏        | 58/500 [00:02<00:16, 26.21it/s]
  1%|          | 3/500 [00:00<00:18, 26.55it/s]

predicting segment 30/35


  8%|▊         | 42/500 [00:01<00:17, 26.01it/s]
  1%|          | 3/500 [00:00<00:17, 28.04it/s]

predicting segment 31/35


  6%|▋         | 32/500 [00:01<00:17, 26.23it/s]
  1%|          | 3/500 [00:00<00:18, 27.03it/s]

predicting segment 32/35


  6%|▋         | 32/500 [00:01<00:17, 26.34it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]

predicting segment 33/35
predicting segment 34/35



