#### Colab prep

In [None]:
!pip install tensorflow-addons
!pip install mir_eval
!pip install hyperopt
!pip install guildai # restart after install

In [None]:
# colab prep
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!cp /content/drive/MyDrive/colab-handover/autochord/chordseq/* ./
!cp /content/drive/MyDrive/colab-handover/autochord/*.py ./
!cp /content/drive/MyDrive/colab-handover/autochord/*.csv ./
!ls

<hr style="border:1px solid gray">

In [1]:
import pandas as pd
base_dir = 'data/McGill-Billboard'
data_index = 'billboard-2.0-manychords.csv'

df_songs = pd.read_csv(f'{base_dir}/{data_index}')
#df_songs = pd.read_csv(f'{data_index}') # colab
df_songs.set_index('id', inplace=True)
len(df_songs)

719

In [2]:
df_songs.head(n=3)

Unnamed: 0_level_0,title,artist,no_chord_percent
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3,I Don't Mind,James Brown,0.049747
4,You've Got A Friend,"Roberta Flack,Donny Hathaway",0.05077
6,The Rose,Bette Midler,0.117244


In [3]:
test_ids = [1289, 736, 637, 270, 18] # songs to exclude for testing
df_dataset = df_songs.drop(index=test_ids)
len(df_dataset)

714

#### Splitting

In [5]:
import numpy as np
_SEED = 0

df_idxs = np.array(df_dataset.index.values)
rng = np.random.default_rng(_SEED)
rng.shuffle(df_idxs)

df_idxs[:10]

array([1167,    6,  986,  227,  743,  568,  107,  181,   27,  793])

In [6]:
from dataloader import ChromaSequenceDataset
# import dataloader
# from importlib import reload
# reload(dataloader)

_LABEL_TYPE = 'majmin'
_SEQ_LEN = 128

pre_computed_seq = f'data/chordseq/{_LABEL_TYPE}_{_SEQ_LEN}.pkl'
#pre_computed_seq = f'{_LABEL_TYPE}_{_SEQ_LEN}.pkl' # colab
ds = ChromaSequenceDataset(pre_computed_sequence=pre_computed_seq)

Loaded sequence data.


In [7]:
for train_split, val_split in ds.get_next_cv_split(df_idxs):
    print(train_split.shape, val_split.shape)

((21936, 128, 24), (21936, 128)) ((3470, 128, 24), (3470, 128))
((21952, 128, 24), (21952, 128)) ((3454, 128, 24), (3454, 128))
((21892, 128, 24), (21892, 128)) ((3514, 128, 24), (3514, 128))
((21911, 128, 24), (21911, 128)) ((3495, 128, 24), (3495, 128))
((21588, 128, 24), (21588, 128)) ((3818, 128, 24), (3818, 128))


### Training loop

In [4]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import accuracy_score, f1_score, recall_score
import tensorflow as tf
from tensorflow.keras import layers
import tensorflow_addons as tfa

def K_plot_loss(history):
    plt.plot(history.history['crf_loss'])
    plt.plot(history.history['val_crf_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()

 The versions of TensorFlow you are currently using is 2.6.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [None]:
# optional class weights function
from sklearn.utils import class_weight

def get_chord_class_weights(labels, minimize_no_chord=True):
    # make sure the labels are flattened
    class_weights = class_weight.compute_class_weight('balanced', np.unique(labels), labels)
    if minimize_no_chord: # modify weight for no-chord
        class_weights[0] = min(class_weights[1:]) / 10.0 # set to 10% of least weight
    
    return dict(enumerate(class_weights))

get_chord_class_weights(train_split.labels.flatten())

In [5]:
from dataloader import _CHROMA_FEAT_NAMES, _MAJMIN_CLASSES
# import model
# from importlib import reload
# reload(model)
from model import ModelWithCRFLoss
from tensorflow.keras.models import Model

_SEQ_LEN = 128

def init_bilstm_crf_model(base_linear_units=128, dropout=0.1, opt='adam', lr=1e-3):
    input_ph = tf.keras.Input(shape=(_SEQ_LEN, len(_CHROMA_FEAT_NAMES),))
    lstm_out = layers.Bidirectional(
        layers.LSTM(units=base_linear_units, dropout=dropout,
                    return_sequences=True, stateful=False),
        merge_mode='concat')(input_ph)
    crf_out = tfa.layers.CRF(units=len(_MAJMIN_CLASSES))(lstm_out)
    model = Model(input_ph, crf_out)
    model = ModelWithCRFLoss(model, dtype='float64')
    
    opt = tf.keras.optimizers.Adam(learning_rate=lr)
    model.compile(optimizer=opt, metrics=['accuracy'])
    
    return model

model = init_bilstm_crf_model()

  return py_builtins.overload_of(f)(*args)


In [6]:
model.base_model.summary()
model.base_model.output_shape

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 128, 24)]         0         
_________________________________________________________________
bidirectional (Bidirectional (None, 128, 256)          156672    
_________________________________________________________________
crf (CRF)                    [(None, 128), (None, 128, 7100      
Total params: 163,772
Trainable params: 163,772
Non-trainable params: 0
_________________________________________________________________


[(None, 128), (None, 128, 25), (None,), (25, 25)]

In [29]:
model.base_model.layers[2].output_shape

[(None, 128), (None, 128, 25), (None,), (25, 25)]

In [10]:
_SEED = 0
_EPOCHS = 2 #10
_BATCH_SIZE = 2 #512
_CKPT_PATH = 'models/chroma-seq-bilstm-crf-{cv}'

def sample_train_loop(ds, ref_idxs):
    # cross validation loop
    tf.random.set_seed(_SEED)
    for cv_ix, (train, val) in enumerate(ds.get_next_cv_split(ref_idxs)):
        print(f'----------- CV{cv_ix+1} -----------')
        train_dataset = tf.data.Dataset.from_tensor_slices((train.feats, train.labels)) \
                                       .take(100) \
                                       .shuffle(buffer_size=len(train), seed=_SEED, reshuffle_each_iteration=True) \
                                       .batch(_BATCH_SIZE)
        val_dataset = tf.data.Dataset.from_tensor_slices((val.feats, val.labels)) \
                                     .take(100) \
                                     .shuffle(buffer_size=len(val), seed=_SEED, reshuffle_each_iteration=True) \
                                     .batch(_BATCH_SIZE)

        print(f'Num train: {len(train)}, Num val: {len(val)}')
        assert(train.feats.shape[1:] == val.feats.shape[1:])
        print(f'Input features: {train.feats.shape[1:]}, Num classes: {len(_MAJMIN_CLASSES)}')

        model = init_bilstm_crf_model()
        history = model.fit(train_dataset, validation_data=val_dataset, epochs=_EPOCHS)
        # !!! class_weights NOT WORKING for some reason
        # class_weights = get_chord_class_weights(train.labels.flatten())
        # history = model.fit(train_dataset, validation_data=val_dataset, epochs=_EPOCHS,
                            class_weight=class_weights)

        # get acc
        preds, _, _, _ = model.predict(val.feats, batch_size=_BATCH_SIZE)
        true = val.labels.flatten()
        preds = preds.flatten()
        acc = accuracy_score(true, preds)
        non_zero_acc = recall_score(true[true != 0], preds[true != 0], average='micro')
        print(f'Acc: {acc}, Non-zero: {non_zero_acc}')
        
        K_plot_loss(history)
        #model.save(_CKPT_PATH.format(cv=cv_ix))
        break

sample_train_loop(ds, df_idxs)

#### Inference speed check

In [15]:
import dataloader
import lazycats.np as catnp
import time
import numpy as np
from tqdm import tqdm

_LABEL_TYPE = 'majmin'
_CHROMA_NUM_FEATS = 24

inf_model = init_bilstm_crf_model(base_linear_units=128)
inf_times = []

for _id in tqdm(test_ids):
#for _id in test_ids:
    chroma_vectors, chord_labels = dataloader.get_chord_features_and_labels(_id, label_type=_LABEL_TYPE)
    assert(chroma_vectors.shape[-1] == _CHROMA_NUM_FEATS)
    chordseq_vectors = catnp.divide_to_subsequences(chroma_vectors, sub_len=_SEQ_LEN)
    
    #print(chordseq_vectors.shape)
    st = time.time()
    pred_labels, _, _, _ = inf_model.predict(chordseq_vectors, batch_size=32)
    inf_times.append(time.time() - st)
    #print(pred_labels.shape)
    
print(inf_times)
print(np.mean(inf_times))

100%|██████████| 5/5 [00:01<00:00,  3.11it/s]

[0.8955433368682861, 0.2515449523925781, 0.11009716987609863, 0.0739896297454834, 0.11784219741821289]
0.28980345726013185





| LSTM size | Avg. Inference Time (full song) | Max |
|--|--|--|
| 32 | 0.23s | 0.86s |
| 128 | 0.29s | 0.90s |
| 256 | 0.43s | 0.96s |
| 512 | 0.95s | 1.73s |
| 1024 | 1.86 | 3.71s |
| 2048 | 6.04s | 12.85s |

* results are regardless of input lengths, since no. of params stay the same

#### Model saving

In [13]:
_CKPT_PATH = 'models/chroma-seq-bilstm-crf-test'
save_model = init_bilstm_crf_model(base_linear_units=512)
save_preds, _, _, _ = save_model.predict(chordseq_vectors, batch_size=32) # to get shape for serialization
save_model.save(_CKPT_PATH)

  "CRF decoding models have serialization issues in TF >=2.5 . Please see isse #2476"
  return py_builtins.overload_of(f)(*args)
  return py_builtins.overload_of(f)(*args)
  "CRF decoding models have serialization issues in TF >=2.5 . Please see isse #2476"


INFO:tensorflow:Assets written to: models/chroma-seq-bilstm-crf-test/assets


INFO:tensorflow:Assets written to: models/chroma-seq-bilstm-crf-test/assets


In [14]:
!ls {_CKPT_PATH}

assets	keras_metadata.pb  saved_model.pb  variables


In [15]:
save_model.summary()

Model: "model_with_crf_loss_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
model_6 (Functional)         [(None, 128), (None, 128, 2225852   
Total params: 2,225,852
Trainable params: 2,225,852
Non-trainable params: 0
_________________________________________________________________


In [None]:
from tensorflow import keras
load_model = keras.models.load_model('models/chroma-seq-bilstm-crf-test')

In [17]:
load_model.summary()

Model: "model_with_crf_loss_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
model_6 (Functional)         [(None, 128), (None, 128, 2225852   
Total params: 2,225,852
Trainable params: 2,225,852
Non-trainable params: 0
_________________________________________________________________


In [46]:
load_preds, _, _, _ = load_model.predict(chordseq_vectors, batch_size=32)





In [47]:
assert(np.all(save_preds == load_preds))

#### Automated tuning & tracking

In [None]:
# colab
!cp -R /content/drive/MyDrive/colab-handover/autochord/guild-env-colab ./

In [11]:
from IPython.display import display

GUILD_HOME = 'guild-env/chroma-seq-bilstm-crf' # "guild-env-colab/chroma-seq-bilstm-crf"
DELETE_RUNS_ON_INIT = False
import guild.ipy as guild
guild.set_guild_home(GUILD_HOME)

if DELETE_RUNS_ON_INIT:
    deleted = guild.runs().delete(permanent=True)
    print("Deleted %i run(s)" % len(deleted))
else:
    display(guild.runs().head())

Unnamed: 0,run,operation,started,status,label


In [14]:
_EPOCHS = 5
_BATCH_SIZE = 64
_TRAIN = None
_VAL = None

# function for guild tracking
def hpset_trainloop(hd=256, dp=0.6, opt='adam', lr=0.001, bs=512, si=0):
    '''
    Train loop with a specific set of hyperparams
    
    hd: hidden dim base size
    dp: dropout rate
    opt: optimizer, lr: learning rate
    bs: batch size
    si: CV split index
    '''
    tf.random.set_seed(_SEED)
    train = _TRAIN
    val = _VAL
    if (not train) or (not val):
        raise Exception("Missing data!")
    
    train_dataset = tf.data.Dataset.from_tensor_slices((train.feats, train.labels)) \
                                   .take(100) \
                                   .shuffle(buffer_size=len(train), seed=_SEED, reshuffle_each_iteration=True) \
                                   .batch(bs)
    val_dataset = tf.data.Dataset.from_tensor_slices((val.feats, val.labels)) \
                                 .take(100) \
                                 .shuffle(buffer_size=len(val), seed=_SEED, reshuffle_each_iteration=True) \
                                 .batch(bs)

    assert(train.feats.shape[-1] == val.feats.shape[-1])

    model = init_bilstm_crf_model(base_linear_units=hd, dropout=dp, opt=opt, lr=lr)
    history = model.fit(train_dataset, validation_data=val_dataset, epochs=_EPOCHS, verbose=0)
    
    # get acc
    preds, _, _, _ = model.predict(val.feats, batch_size=_BATCH_SIZE)
    acc = accuracy_score(val.labels.flatten(), preds.flatten())
    
    best_epoch = np.argmin(history.history['val_crf_loss'])
    best_loss = history.history['val_crf_loss'][best_epoch]
    train_loss = history.history['crf_loss'][best_epoch]
    
    # output metrics
    print(f"BE: {best_epoch+1}")
    print(f"BL: {best_loss}")
    print(f"TL: {train_loss}")
    print(f"VA: {acc}")
    
    return acc

In [None]:
# tuning loop
from hyperopt import hp, tpe, fmin

_REF_IDXS = df_idxs

def tuning_loop(hparams):
    global _TRAIN
    global _VAL
    global _REF_IDXS
    
    print(hparams)

    avg_acc = 0.0
    num_runs = 0
    for cv_ix, (train, val) in enumerate(ds.get_next_cv_split(_REF_IDXS)):
        _TRAIN = train
        _VAL = val
        run, acc = guild.run(hpset_trainloop,
                             hd=int(hparams['base_hidden_dim']),
                             dp=hparams['drop_rate'],
                             opt=hparams['opt'],
                             lr=hparams['lr'], 
                             bs=int(hparams['batch_size']),
                             si=cv_ix)
        
        num_runs += 1
        # if hyperparams fail miserably on one split,
        # no need to check other splits
        if acc < 0.5:
            return 1.0
            
        avg_acc += acc
    
        if cv_ix == 0: # limit to try multiple config
            break
    
    avg_acc /= num_runs
    return (1-avg_acc) # since we're using fmin

hparams = {
    'base_hidden_dim': hp.choice('base_hidden_dim', [256, 512]),
    'drop_rate': hp.choice('drop_rate', [0.5, 0.3, 0.1]),
    'opt': hp.choice('opt', ['adam']),
    'lr': hp.choice('lr', [1e-3, 3e-4, 1e-4]),
    'batch_size': hp.choice('batch_size', [64]),
}

best = fmin(tuning_loop, hparams, algo=tpe.suggest, max_evals=6)
print(best)

In [None]:
# colab
!cp -R guild-env-colab /content/drive/MyDrive/colab-handover/autochord/

In [18]:
runs = guild.runs()
df_exps = runs.compare()

_COMPARE_COLS = ['bs','dp','hd','lr','opt','si','BE','TL','BL','VA']
comps = df_exps[_COMPARE_COLS]
#comps[comps.VA > 0.7]
comps[:15]

Unnamed: 0,bs,dp,hd,lr,opt,si,BE,VA
9,256,0.6,256,0.0003,adam,0,2.0,0.5203
12,512,0.3,256,0.0003,adam,2,2.0,0.52078
17,512,0.3,512,0.0003,adam,2,1.0,0.52138
36,256,0.5,256,0.0001,adam,3,2.0,0.52239
37,256,0.5,256,0.0001,adam,2,1.0,0.52428
39,256,0.5,256,0.0001,adam,0,2.0,0.52686
51,256,0.5,256,0.0003,adam,0,1.0,0.5205
53,256,0.5,256,0.0001,adam,0,2.0,0.52666


#### Saved model - Inference test

In [11]:
from tensorflow import keras

#_CKPT_PATH = 'models/chroma-seq-bilstm-crf-0-base'
_CKPT_PATH = 'models/chroma-seq-bilstm-crf-0'
load_model = keras.models.load_model(_CKPT_PATH)

In [16]:
load_model.summary()

Model: "model_with_crf_loss_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
model_11 (Functional)        [(None, 64), (None, 64, 2 2225852   
Total params: 2,225,852
Trainable params: 2,225,852
Non-trainable params: 0
_________________________________________________________________


In [14]:
load_preds, _, _, _ = load_model.predict(chordseq_vectors, batch_size=32) # to get shape for serialization

In [15]:
load_preds

array([[ 0,  0,  0, ...,  1,  1,  1],
       [ 8,  8,  8, ..., 13, 13, 13],
       [13, 13, 13, ..., 13, 13, 13],
       ...,
       [ 1,  1,  1, ...,  1,  1,  1],
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0]], dtype=int32)

#### Save base model only

In [14]:
load_model.base_model.summary()

Model: "model_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_12 (InputLayer)        [(None, 64, 24)]          0         
_________________________________________________________________
bidirectional_11 (Bidirectio (None, 64, 1024)          2199552   
_________________________________________________________________
crf_11 (Addons>CRF)          [(None, 64), (None, 64, 2 26300     
Total params: 2,225,852
Trainable params: 2,225,852
Non-trainable params: 0
_________________________________________________________________


In [None]:
load_model.base_model.save('models/chroma-seq-bilstm-crf-0-base')

#### Save graph

In [25]:
from tensorflow import keras

_CKPT_PATH = 'models/chroma-seq-bilstm-crf-0-base'
model = keras.models.load_model(_CKPT_PATH)



In [27]:
model.compile()

In [29]:
# from https://ksingh7.medium.com/part-iii-convert-keras-model-to-tensorflow-frozen-graph-model-a6aa6b1aaeee
import tensorflow as tf
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2

# Convert Keras model to ConcreteFunction
full_model = tf.function(lambda x: model(x))
full_model = full_model.get_concrete_function(
    x=tf.TensorSpec(model.inputs[0].shape, model.inputs[0].dtype))

# Get frozen ConcreteFunction
frozen_func = convert_variables_to_constants_v2(full_model)
frozen_func.graph.as_graph_def()

# inspect the layers operations inside your frozen graph definition
# and see the name of its input and output tensors
layers = [op.name for op in frozen_func.graph.get_operations()]

In [30]:
layers

['x',
 'model_11/crf_11/rnn/while/body/_23/model_11/crf_11/rnn/while/Tile/multiples',
 'unused_control_flow_input',
 'model_11/crf_11/rnn/while/body/_23/model_11/crf_11/rnn/while/Tile',
 'model_11/crf_11/rnn/while/body/_23/model_11/crf_11/rnn/while/crf_decode_forward_rnn_cell/ArgMax/dimension',
 'unused_control_flow_input_1',
 'model_11/crf_11/rnn/while/body/_23/model_11/crf_11/rnn/while/crf_decode_forward_rnn_cell/ArgMax',
 'model_11/crf_11/rnn/while/body/_23/model_11/crf_11/rnn/while/crf_decode_forward_rnn_cell/Cast',
 'model_11/crf_11/rnn/crf_decode_forward_rnn_cell/ExpandDims/dim',
 'unused_control_flow_input_2',
 'model_11/crf_11/rnn/crf_decode_forward_rnn_cell/ExpandDims',
 'unused_control_flow_input_3',
 'model_11/crf_11/rnn/crf_decode_forward_rnn_cell/add',
 'model_11/crf_11/rnn/crf_decode_forward_rnn_cell/ArgMax/dimension',
 'model_11/crf_11/rnn/crf_decode_forward_rnn_cell/ArgMax',
 'model_11/crf_11/rnn/crf_decode_forward_rnn_cell/Cast',
 'model_11/crf_11/rnn/zeros_like',
 'mo

In [33]:
# Save frozen graph from frozen ConcreteFunction to hard drive
# serialize the frozen graph and its text representation to disk.
tf.io.write_graph(graph_or_graph_def=frozen_func.graph,
                  logdir="models/graph/",
                  name="chroma-seq-bilstm-crf-0-base.pb",
                  as_text=False)

#Optional
tf.io.write_graph(graph_or_graph_def=frozen_func.graph,
                  logdir="models/graph/",
                  name="chroma-seq-bilstm-crf-0-base.pbtxt",
                  as_text=True)

model.summary()

Model: "model_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_12 (InputLayer)        [(None, 64, 24)]          0         
_________________________________________________________________
bidirectional_11 (Bidirectio (None, 64, 1024)          2199552   
_________________________________________________________________
crf_11 (CRF)                 [(None, 64), (None, 64, 2 26300     
Total params: 2,225,852
Trainable params: 2,225,852
Non-trainable params: 0
_________________________________________________________________


#### Scratch