In [3]:
import pickle
import sys
import json
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
from tensor_hero.model import Transformer
from tensor_hero.inference import full_song_prediction
import torch
import os

source_song = Path.cwd().parent / 'Experiments' / 'Test_Songs' / '1. Andy McKee - Ouray'
model_to_load = 'colab_test_model_0'
name_of_song = 'colab_model_test_0'
outfolder = Path.cwd() / 'generated_songs' / name_of_song
assert not os.path.isdir(outfolder), 'ERROR: Song already exists, enter new name'
os.mkdir(outfolder)
audio_file = source_song / 'separated.ogg'

# First, let's load the model's parameters and define a Transformer model with them
# Let's take a look at these parameters, they're stored in a dictionary

model_path = Path.cwd().parent / 'model' / 'saved_models' / model_to_load
with open(model_path / 'params.pkl', 'rb') as f:
    params = pickle.load(f)
print(json.dumps(params, indent=4))

# We use these parameters to define the skeleton of the model, then load the weights into it
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'device = {device}')

# Model hyperparameters are derived from the params dictionary
trg_vocab_size = params['trg_vocab_size']
embedding_size = params['embedding_size']
num_heads = params['num_heads']
num_encoder_layers = params['num_encoder_layers']
num_decoder_layers = params['num_decoder_layers']
dropout = params['dropout']
max_len = params['max_trg_len']
forward_expansion = params['embedding_size']*params['forward_expansion']

model = Transformer(
    embedding_size,
    trg_vocab_size,
    num_heads,
    num_encoder_layers,
    num_decoder_layers,
    forward_expansion,
    dropout,
    max_len,
    device,
).to(device)  # Always send the model to the GPU

# Load the weights into the model
model.load_state_dict(torch.load(model_path / (str(model_to_load)+'.pt')))

print(model)


# We have to define some things for the .chart file so it's actually playable
# just some dummy metadata for now
song_metadata = {'Name' : name_of_song,
                'Artist' : 'Forrest',       # Forrest is the honorary author of all of our output
                'Charter' : 'tensorhero',
                'Offset' : 0,
                'Resolution' : 192,
                'Genre' : 'electronic',
                'MediaType' : 'cd',
                'MusicStream' : 'song.ogg'}

_ = full_song_prediction(song_path = audio_file,
                         model=model,
                         device=device,
                         sos_idx=432,
                         max_len=500,
                         song_metadata=song_metadata,
                         outfolder=outfolder)

{
    "training_data": "train separated",
    "model_name": "colab_test_model_0",
    "optimizer": "Adam",
    "train_path": "X:\\Training Data\\training_ready\\train",
    "num_epochs": 500,
    "batch_size": 12,
    "shuffle": true,
    "num_workers": 4,
    "drop_last": true,
    "last_global_step": 12525,
    "max_trg_len": 500,
    "max_src_len": 500,
    "trg_vocab_size": 435,
    "pad_idx": 434,
    "embedding_size": 512,
    "lr": 0.0001,
    "num_heads": 8,
    "num_encoder_layers": 2,
    "num_decoder_layers": 2,
    "dropout": 0.1,
    "forward_expansion": 4,
    "date": "12/02/2022 20:37:37",
    "LOAD": false,
    "model_file_name": "colab_test_model_0.pt",
    "model_outfile": "C:\\Users\\ewais\\Documents\\GitHub\\tensor-hero\\model\\saved_models\\colab_test_model_0\\colab_test_model_0.pt",
    "experiment_description": "testing new colab dataloader"
}
device = cuda
Transformer(
  (src_position_embedding): Embedding(500, 512)
  (trg_position_embedding): Embedding(500, 512

  0%|          | 0/500 [00:00<?, ?it/s]

predicting segment 0/35


  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]

predicting segment 1/35
predicting segment 2/35
predicting segment 3/35
predicting segment 4/35
predicting segment 5/35


  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]

predicting segment 6/35
predicting segment 7/35
predicting segment 8/35
predicting segment 9/35
predicting segment 10/35


  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]

predicting segment 11/35
predicting segment 12/35
predicting segment 13/35
predicting segment 14/35
predicting segment 15/35


  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]

predicting segment 16/35
predicting segment 17/35
predicting segment 18/35
predicting segment 19/35
predicting segment 20/35


  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]

predicting segment 21/35
predicting segment 22/35
predicting segment 23/35
predicting segment 24/35
predicting segment 25/35


  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]

predicting segment 26/35
predicting segment 27/35
predicting segment 28/35
predicting segment 29/35
predicting segment 30/35


  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]
  0%|          | 0/500 [00:00<?, ?it/s]

predicting segment 31/35
predicting segment 32/35
predicting segment 33/35
predicting segment 34/35



