Refactor the transformer model to produce 
1. (1) time (2) note plurality (3) motion
2. (1) note plurality (2) motion

First we'll try including the time

Create a vector representation of an output space detailing note categories as well as motions

Rewrite preprocess_transformer_data to process data in this fashion, perhaps automatically insert the contour arrays (or transformer representation of contour arrays)

note: let's replace "note category" with "note plurality"

In [2]:
from nltk.corpus import wordnet as wn

for ss in wn.synsets('multitude'):
    print(ss.name(), ss.lemma_names())



battalion.n.02 ['battalion', 'large_number', 'multitude', 'plurality', 'pack']
multitude.n.02 ['multitude', 'throng', 'concourse']
multitude.n.03 ['multitude', 'masses', 'mass', 'hoi_polloi', 'people', 'the_great_unwashed']


### Dataloader prototype

The dataloader will inheret from LazierDataset and ColabMemoryDataset   

In [14]:
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent.parent))
from tensor_hero.model import ColabMemoryDataset, LazierDataset, \
                              note_dirs_from_spec_dirs, check_notes_length
from tensor_hero.inference import __single_prediction_to_notes_array
from tensor_hero.preprocessing.data import encode_contour, notes_array_time_adjust
import os
from tqdm import tqdm
import torch
import numpy as np

def contour_vector_from_notes(notes, tbps):
    '''Captures original transformer output notes arrays and translates them to
    contour vectors

    Args:
        notes (1D numpy array): original transformer output formatted notes
        tbps (int): time bins per second represented in output array
    Returns:
        contour_vector (1D numpy array): transformer formatted contour array
            - [time, note plurality, motion, time, note plurality, motion, ...]
    '''
    notes_array = __single_prediction_to_notes_array(notes)

    # Reduce time bins per second from 100 to tbps
    notes_array, _ = notes_array_time_adjust(notes_array, time_bins_per_second=tbps)
    
    # Create contour
    contour = encode_contour(notes_array)
    
    # Convert to vector representation
    #      index         information
    #  0            | <sos> 
    #  1            | <eos> 
    #  2            | <pad> 
    #  3-15         | <note pluralities 0-13>
    #  16-24        | <motion [-4, 4]>
    #  25-(tbps+25) | <time bin 1-tbps>
    contour_vector = contour_to_transformer_output(contour, tbps)
    return contour_vector

def contour_to_transformer_output(contour, tbps):
    '''Generates transformer output version of contour array
    
    ~~~~ ARGUMENTS ~~~~
        contour (2D numpy array): contour array, note plurality is first row, motion
                                    is second row 
        tbps (int): time bins per second. Determines dimensionality of output_vector
    ~~~~ RETURNS ~~~~
        contour_vector (1D numpy array):
            [time, note plurality, motion, time, note plurality, motion, ...]

        The values of contour_vector are detailed below

            value           information
        ____________________________________
        0            | <sos> 
        1            | <eos> 
        2            | <pad>
        3-15         | <note pluralities    contour (_type_): _description_
        16-24        | <motion [-4, 4]>    tbps (_type_): _description_
        25-(tbps+24) | <time bin 1-tbps>
    '''
    # Find indices with note events and create empty vector for contour
    note_events = np.where(contour[0,:] > 0)[0].astype(int)
    contour_vector = np.zeros(shape=(2+note_events.shape[0]*3))
    
    # Populate contour_vector
    # 0 is already encoded as <sos>
    motion_idx = lambda motion: motion + 20     # motion in [-4, 4] -> [16, 24]
    time_idx = lambda time: time + 25           # time bin in [0,tbps*4] -> [25, tbps*4+24]
    np_idx = lambda note_p: note_p + 2          # note plurality in [1, 13] -> [3, 15]
    for idx, ne in enumerate(list(note_events)):
        contour_vector[1+(3*idx)] = time_idx(ne)
        contour_vector[2+(3*idx)] = np_idx(contour[0, ne])
        contour_vector[3+(3*idx)] = motion_idx(contour[1, ne])
    
    # Populate eos
    contour_vector[-1] = 1
    return contour_vector

class ContourMemoryDataset(ColabMemoryDataset):
    '''Implementation of ColabMemoryDataset but transforms output into contour_vectors
    '''
    def __init__(self, partition_path, max_src_len, max_trg_len, max_examples, 
                 pad_idx, CHECK_LENGTH=False, tbps=25):
        self.max_trg_len = max_trg_len
        self.max_src_len = max_src_len
        self.pad_idx = pad_idx
        self.tbps = 25
        
        # Construct list of spectrogram file paths and list of note file paths
        song_paths = [partition_path / x for x in os.listdir(partition_path)]
        specs_dirs = [x / 'spectrograms' for x in song_paths]
        specs_lists = []
        for dir_ in specs_dirs:
            for specs_dir, _, specs in os.walk(dir_):
                if not specs:
                    continue
                specs_lists.append([Path(specs_dir) / spec for spec in specs])
        specs_lists = [spec for spec_list in specs_lists for spec in spec_list]  # Flatten
        notes_lists = [note_dirs_from_spec_dirs(x) for x in specs_lists]
        
        # Construct dictionary where key:value is <path to spec>:<path to notes array>
        l = {}  # labels
        for i in range(len(specs_lists)):
            l[specs_lists[i]] = notes_lists[i]
            
        # Weed out bits of data that exceed the maximum length
        self.labels = {}        # holds spec paths as keys, note paths as values
        self.data_paths = []    # list of spec paths
        too_long = 0            # how many of the notes have more elements than max_trg_len
        if CHECK_LENGTH:
            print('Checking length of spectrograms and notes...')
            for x in tqdm(specs_lists):
                if check_notes_length(l[x], max_trg_len):
                    self.data_paths.append(x)
                    self.labels[x] = l[x]
                else:
                    too_long += 1
                print(f'{too_long} datapoints removed due to exceeding maximum length')
        else:
            self.data_paths = specs_lists
            self.labels = l
            print('Notes were not checked against max_trg_len')
        
        # Restrict max samples in Dataset to min(max_examples, num_samples)        
        self.num_samples = len(self.labels)  # This could be lower than max_samples
        self.max_examples = max_examples if max_examples > 0 else self.num_samples
        self.max_examples = min(self.max_examples, self.num_samples)
        del too_long, l, song_paths, specs_dirs, specs_lists, notes_lists
        
        # Create and empty data matrix
        spec = np.load(self.data_paths[0])  # Load single examples to get shape
        notes = np.load(self.labels[self.data_paths[0]])
        # Shape for self.specs = [max_examples, 512, max_src_len]
        # Shape for self.notes = [max_examples, max_trg_len]
        self.specs = np.empty(shape=(self.max_examples, spec.shape[0], max_src_len))
        self.notes = np.empty(shape=(self.max_examples, max_trg_len))
        
        # Populate data into memory
        for idx in tqdm(range(self.max_examples)):
            spec = self.pad_spec(np.load(self.data_paths[idx]))
            # Transform notes into contour_vectors
            # contour_vectors are formatted to be transformer output
            notes = np.load(self.labels[self.data_paths[idx]])
            notes = contour_vector_from_notes(notes, tbps)
            notes = self.pad_notes(notes)
            self.specs[idx,...] = spec      # Final data
            self.notes[idx,...] = notes     # Final data
        print(f'self.specs (shape = {self.specs.shape}) is taking up {sys.getsizeof(self.specs) / (1024**2):.2f} MB')
        print(f'self.notes (shape = {self.notes.shape}) is taking up {sys.getsizeof(self.notes) / (1024**2):.2f} MB')
        del spec, notes
       
# Define data loaders
train_path = Path.cwd().parent.parent / 'Training_Data' / 'training_ready' / 'train'
train_data = ContourMemoryDataset(partition_path = train_path, 
                                  max_src_len = 500, 
                                  max_trg_len= 250,
                                  max_examples = 200,
                                  pad_idx = 2)

dl_params = {
    'batch_size' : 8,
    'shuffle' : True,
    'num_workers' : 0,
    'drop_last' : True,
}
train_loader = torch.utils.data.DataLoader(train_data, **dl_params)

for batch_idx, batch in enumerate(train_loader):
    print(f'batch_idx: {batch_idx}')
    print(f'batch[0] shape: {batch[0].shape}')
    print(f'batch[1] shape: {batch[1].shape}')
    contour_vectors = batch[1]
    for i in range(contour_vectors.shape[0]):
        print(f'contour_vector {i}: {contour_vectors[i]}')
    print('\n')
        
    
    

 29%|██▉       | 58/200 [00:00<00:00, 563.11it/s]

Notes were not checked against max_trg_len


100%|██████████| 200/200 [00:00<00:00, 530.51it/s]


self.specs (shape = (200, 512, 500)) is taking up 390.63 MB
self.notes (shape = (200, 250)) is taking up 0.38 MB
batch_idx: 0
batch[0] shape: torch.Size([8, 512, 500])
batch[1] shape: torch.Size([8, 250])
contour_vector 0: tensor([ 0, 69, 14, 20,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  

In [31]:
from tensor_hero.preprocessing.data import decode_contour

def __contour_prediction_to_notes_array(prediction, tbps=25):
    '''
    Takes a single contour prediction from the transformer and translates it to a notes array
    of length 400.

    ~~~~ ARGUMENTS ~~~~
    -   prediction (numpy Array, shape=(<max_trg_len>,)):
            -   Prediction from the transformer, should be a single list of max indices
                from transformer prediction. 
            -   Expected to be formatted as
                [<sos>, time, note plurality, motion, etc., <eos>, <pad>, <pad>, etc.]
    -   tbps (int): time bins per second of predicted notes
        
    ~~~~ RETURNS ~~~~
    -   notes_array (Numpy Array, shape = (400,)):
            -   The prediction
    '''
    if type(prediction) == torch.Tensor:
        prediction = prediction.detach().cpu().numpy()
    #     value           information
    # ____________________________________
    # 0            | <sos> 
    # 1            | <eos> 
    # 2            | <pad>
    # 3-15         | <note pluralities>    
    # 16-24        | <motion [-4, 4]>    
    # 25-(tbps*4+24) | <time bin 1-tbps>

    note_vals = list(range(3, 16))            # Note pluralities
    time_vals = list(range(25, tbps*4+24))    # Corresponding to times
    motion_vals = list(range(16, 25))         # Motion in [-4,4]
    print(tbps*4 + 24)
    # Loop through the array 3 elements at a time
    pairs = []
    for i in range(prediction.shape[0]-2):
        pair = (prediction[i], prediction[i+1], prediction[i+2]) # Take predicted notes as couples
        if pair[0] in time_vals and pair[1] in note_vals and pair[2] in motion_vals:
            pairs.append(pair)  # Append if pair follows (time, note) pattern

    # Create contour from pairs
    expansion_factor = 100/tbps
    contour = np.zeros(shape=(2, 400))
    for pair in pairs:
        index = min(round((pair[0]-25)*expansion_factor), 400)
        contour[0, index] = pair[1]-2    # note plurality
        contour[1, index] = pair[2]-20   # motion
    
    # Create notes array from contour
    notes_array = decode_contour(contour)
     
    return notes_array

notes_array = __contour_prediction_to_notes_array(contour_vectors[7])
print(contour_vectors[7])
print(notes_array)

124
tensor([ 0, 81, 14, 20,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  2,  2,  2, 