# Feature Extraction and Unsupervised Learning for Generated tracks with Tensorboard

Similar to [`feature_extraction_and_tensorboard.ipynb`](feature_extraction_and_tensorboard.ipynb) but intended to be used with midi files generated by magenta experiments inside of `../../models/custom`.

## Imports

In [7]:
import sys, os, time, csv
sys.path.append('../python')

import utils
from multiprocessing import Pool as ThreadPool
import numpy as np
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector

## Find paths to generated midi files

In [8]:
models_dir = '../../models/custom'
midi_paths = []
for dirs in os.listdir(models_dir):
    path = os.path.join(models_dir, dirs, 'generated')
    if os.path.exists(path):
        for f in os.listdir(path):
            midi_paths.append((os.path.join(path, f), dirs))

## Extract Features

In [9]:
num_threads = 8
pool = ThreadPool(num_threads)

start_time = time.time()
extracted_features = pool.map(utils.extract_features, [p[0] for p in midi_paths])
print('Finished in {:.2f} seconds'.format(time.time() - start_time))

Finished in 17.76 seconds


## Remove unnecessary features

In [10]:
vec = []
for f in extracted_features:
    if f is not None:
        arr = f[0][0]
        arr.pop(0) # first element is an empty string
        arr.pop(-1) # last element is an empty string
        vec.append(np.array(arr))
embeddings = np.asarray(vec)

## Tensorboard Labels/Metadata
Create a `metadata.tsv` file to assosciate msd track metadata to data points in tensorboard. Here we label data points by their path and experiment name.

In [11]:
LOG_DIR='../../data/generated_logdir'
with open(os.path.join(LOG_DIR, 'metadata.tsv'), 'w') as f:
    writer = csv.writer(f, delimiter='\t')
    writer.writerow(['path', 'experiment'])
    for i, feat in enumerate(extracted_features):
        writer.writerow(midi_paths[i])

## Tensorboard Embeddings
Expose the embeddings to be used tensorboard.

In [12]:
sess = tf.Session()

# create embeddings var
emb = tf.Variable(embeddings, name='embeddings')

# embedding projector
summary_writer = tf.train.SummaryWriter(LOG_DIR)
config = projector.ProjectorConfig()
embedding = config.embeddings.add()
embedding.tensor_name = emb.name
embedding.metadata_path = os.path.join(LOG_DIR, 'metadata.tsv')
projector.visualize_embeddings(summary_writer, config)

# init and run the session
init = tf.global_variables_initializer()
sess.run(init)

# save checkpoint
saver = tf.train.Saver()
saver.save(sess, os.path.join(LOG_DIR, "model.ckpt"), 0)

Instructions for updating:
Please switch to tf.summary.FileWriter. The interface and behavior is the same; this is just a rename.


'../../data/generated_logdir/model.ckpt-0'