<a href="https://colab.research.google.com/github/Kristin33/Composer-Clustering/blob/master/auto_encoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# get the dataset from git
!git clone https://github.com/Kristin33/Composer-Clustering

%cd Composer-Clustering

Cloning into 'Composer-Clustering'...
remote: Enumerating objects: 12, done.[K
remote: Counting objects: 100% (12/12), done.[K
remote: Compressing objects: 100% (10/10), done.[K
remote: Total 16326 (delta 5), reused 3 (delta 2), pack-reused 16314[K
Receiving objects: 100% (16326/16326), 138.14 MiB | 23.83 MiB/s, done.
Resolving deltas: 100% (1178/1178), done.
Checking out files: 100% (15217/15217), done.
/content/Composer-Clustering


In [0]:
import os, sys
import pretty_midi
import librosa, librosa.display
import matplotlib.pyplot as plt 

import numpy as np 
import pandas as pd 

# Fetch Data Functions

In [0]:
"""
===========================================================
Get the piano roll encoding of midi files 
===========================================================

Attempt of using autoencoders to encode the pianoroll representation
into a lower dimension latent representation. 

currently, the input of the piano roll is of dimension 1280000

"""

def get_piano_roll_matrix(midi_data, start_pitch, end_pitch, fs=50, draw=False):
    # roll = midi_data.get_piano_roll(fs)[start_pitch:end_pitch]
    matrix = midi_data.get_piano_roll(fs)[:, :10000]
    # print(matrix[:, 30:40])
    # print(matrix.shape)

    if draw: 
      librosa.display.specshow(matrix,
            hop_length=1, sr=fs, x_axis='time', y_axis='cqt_note',
            fmin=pretty_midi.note_number_to_hz(start_pitch))
    return np.array(matrix).flatten()

# input: an array of 
def get_test_data():
    

    data_dir = "New_Data_Selection/"
    bach_dir = "bach_lute_(c)contributors-kunstderfuge/"
    scriabin_dir = "scriabin_(c)contributors-kunstderfuge/"
    byrd_dir = "byrd_(c)contributors-kunstderfuge/"
    faure_dir = "faure_(c)contributors-kunstderfuge/"
    buxtehude_dir = "buxtehude_(c)contributors-kunstderfuge/"
    beethoven_dir = "beethoven_iii_(c)contributors-kunstderfuge/"

    test_comps = [scriabin_dir, buxtehude_dir, byrd_dir] 

    comp_data, comp_label = [], []

    for idx, test_comp in enumerate(test_comps):
      tmp_data, tmp_label = [], []
      for filename in os.listdir(data_dir + test_comp):
          if ".mid" in filename or ".MID" in filename:
              print(filename)
              midi_data = pretty_midi.PrettyMIDI(data_dir + test_comp + filename)
              l = midi_data.get_end_time()
              # scale the sampling frequency by the length of data, so the picture is 
              # of the same size 128 * 10000
              fs = 50 * (10000/(l * 50 - 1))
              roll = []
              roll.append(get_piano_roll_matrix(midi_data,36,108,fs=fs,draw=False))
              tmp = tmp_data + roll
              if (len(np.array(tmp).shape) != 2):
                continue
              tmp_data.append(roll[0])
              print(np.array(tmp_data).shape)
              tmp_label.append(idx)
      comp_data.extend(tmp_data)
      comp_label.extend(tmp_label)


    data = np.array(comp_data)
    labels = np.array(comp_label)

    return data, labels


def get_train_data():
    
    data_dir = "New_Data_Selection/"
    bach_dir = "bach_concertos_(c)contributors-kunstderfuge/"
    beethoven_dir = "beethoven_i_(c)contributors-kunstderfuge/"

    bach_data, bach_label = [], []
    beethoven_data, beethoven_label = [], []

    for filename in os.listdir(data_dir + beethoven_dir):
        if ".mid" in filename or ".MID" in filename:
            print(filename)
            midi_data = pretty_midi.PrettyMIDI(data_dir + beethoven_dir + filename)
            l = midi_data.get_end_time()
            # scale the sampling frequency by the length of data, so the picture is 
            # of the same size
            fs = 50 * (10000/(l * 50 - 1))
            roll = []
            roll.append(get_piano_roll_matrix(midi_data,36,108,fs=fs,draw=False))
            tmp = beethoven_data + roll
            if (len(np.array(tmp).shape) != 2):
              continue
            beethoven_data.append(roll[0])
            print(np.array(beethoven_data).shape)
            beethoven_label.append(1)

    for filename in os.listdir(data_dir + bach_dir):
        if ".mid" in filename:
            print(filename)
            midi_data = pretty_midi.PrettyMIDI(data_dir + bach_dir + filename)
            l = midi_data.get_end_time()
            # scale the sampling frequency by the length of data, so the picture is 
            # of the same size
            fs = 50 * (10000/(l * 50 - 1))
            roll = []
            roll.append(get_piano_roll_matrix(midi_data,36,108,fs=fs,draw=False))
            tmp = bach_data + roll
            if (len(np.array(tmp).shape) != 2):
              continue
            bach_data.append(roll[0])
            print(np.array(bach_data).shape)
            bach_label.append(0)

    data = np.array(bach_data + beethoven_data)
    labels = np.array(bach_label + beethoven_label)

    return data, labels



In [0]:
"""
===========================================================
Auto encoders
===========================================================

Attempt of using autoencoders to encode the pianoroll representation
into a lower dimension latent representation. 

currently, the input of the piano roll is of dimension 1280000


"""

import seaborn as sns
import warnings

warnings.filterwarnings('ignore')

# from __future__ import print_function
from keras.models import Model
from keras.layers import Dense, Input
from keras.datasets import mnist
from keras.regularizers import l1
from keras.optimizers import Adam




Using TensorFlow backend.


In [0]:
(x_test, y_test) = get_test_data()
print(x_test.shape)
x_test = x_test.astype('float32') / 255.0
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))

etude_2_1_(c)dery.mid
(1, 1280000)
etude_8_12_(c)lefeldt.mid
(2, 1280000)
poeme_satanique_36_(c)lefeldt.mid
(3, 1280000)
etude_56_4_(c)dery.mid
(4, 1280000)
mazurka_3_6_(c)dery.mid
(5, 1280000)
prelude_16_4_(c)lefeldt.mid
(6, 1280000)
prelude_39_3_(c)lefeldt.mid
(7, 1280000)
prelude_48_3_(c)lefeldt.mid
(8, 1280000)
etude_8_12_(c)dery.mid
(9, 1280000)
etude_8_1_(c)lefeldt.mid
(10, 1280000)
etude_8_2_(c)lefeldt.mid
(11, 1280000)
etude_8_4_(c)dery.mid
(12, 1280000)
etude_8_6_(c)dery.mid
(13, 1280000)
piece_59_1_(c)lefeldt.mid
(14, 1280000)
prelude_56_1_(c)lefeldt.mid
(15, 1280000)
etude_65_2_(c)dery.mid
(16, 1280000)
prelude_22_1_(c)lefeldt.mid
(17, 1280000)
prelude_11_5_(c)lefeldt.mid
(18, 1280000)
etude_8_8_(c)dery.mid
(19, 1280000)
sonate_5_53_(c)lefeldt.mid
(20, 1280000)
prelude_51_2_(c)lefeldt.mid
(21, 1280000)
poeme_69_2_(c)lefeldt.mid
(22, 1280000)
prelude_74_4_(c)lefeldt.mid
(23, 1280000)
poeme-nocturne_61_(c)lefeldt.mid
(24, 1280000)
etude_8_3_(c)dery.mid
(25, 1280000)
etude_42_6

In [0]:
# (x_train, y_train), (x_test, y_test) = mnist.load_data()
(x_train, y_train) = get_train_data()


x_train = x_train.astype('float32') / 255.0

x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))


print(x_train.shape)
print(x_test.shape)

hess236-03_(c)unheard-beethoven.mid
(1, 1280000)
hess326_(c)unheard-beethoven.mid
(2, 1280000)
hess243-3_(c)unheard-beethoven.mid
(3, 1280000)
hess236-11_(c)unheard-beethoven.mid
(4, 1280000)
hess236-10_(c)unheard-beethoven.mid
(5, 1280000)
hess250_(c)unheard-beethoven.mid
(6, 1280000)
hess237-2 _(c)unheard-beethoven.mid
(7, 1280000)
hess239-1_(c)unheard-beethoven.mid
(8, 1280000)
hess244-2_(c)unheard-beethoven.mid
(9, 1280000)
letzer_musikalischer_gedanke_woo62_1_(c)prengle.mid
(10, 1280000)
hess235_(c)unheard-beethoven.mid
(11, 1280000)
hess236-07_(c)unheard-beethoven.mid
(12, 1280000)
hess236-09_(c)unheard-beethoven.mid
(13, 1280000)
hess233_(c)unheard-beethoven.mid
(14, 1280000)
hess240_(c)unheard-beethoven.mid
(15, 1280000)
hess236-06_(c)unheard-beethoven.mid
(16, 1280000)
hess244-1_(c)unheard-beethoven.mid
(17, 1280000)
hess234c_(c)unheard-beethoven.mid
(18, 1280000)
diabelli_variationen_1_(c)lefeldt.mid
(19, 1280000)
adagio_lamentoso_on_b-a-c-h_(c)prengel.mid
hess236-01_(c)unhea

In [0]:
#@title
'''
===========================================================
Utility Functions
'''

def plot_autoencoder_outputs(autoencoder, n, dims):
    decoded_imgs = autoencoder.predict(x_test)

    # number of example digits to show
    n = 5
    plt.figure(figsize=(10, 4.5))
    for i in range(n):
        # plot original image
        ax = plt.subplot(2, n, i + 1)
        plt.imshow(x_test[i].reshape(*dims))
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        if i == n/2:
            ax.set_title('Original Images')

        # plot reconstruction 
        ax = plt.subplot(2, n, i + 1 + n)
        plt.imshow(decoded_imgs[i].reshape(*dims))
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        if i == n/2:
            ax.set_title('Reconstructed Images')
    plt.show()

def plot_loss(history):
    historydf = pd.DataFrame(history.history, index=history.epoch)
    plt.figure(figsize=(8, 6))
    historydf.plot(ylim=(0, historydf.values.max()))
    plt.title('Loss: %.3f' % history.history['loss'][-1])
    

def plot_compare_histories(history_list, name_list, plot_accuracy=True):
    dflist = []
    min_epoch = len(history_list[0].epoch)
    losses = []
    for history in history_list:
        h = {key: val for key, val in history.history.items() if not key.startswith('val_')}
        dflist.append(pd.DataFrame(h, index=history.epoch))
        min_epoch = min(min_epoch, len(history.epoch))
        losses.append(h['loss'][-1])

    historydf = pd.concat(dflist, axis=1)

    metrics = dflist[0].columns
    idx = pd.MultiIndex.from_product([name_list, metrics], names=['model', 'metric'])
    historydf.columns = idx
    
    plt.figure(figsize=(6, 8))

    ax = plt.subplot(211)
    historydf.xs('loss', axis=1, level='metric').plot(ylim=(0,1), ax=ax)
    plt.title("Training Loss: " + ' vs '.join([str(round(x, 3)) for x in losses]))
    
    if plot_accuracy:
        ax = plt.subplot(212)
        historydf.xs('acc', axis=1, level='metric').plot(ylim=(0,1), ax=ax)
        plt.title("Accuracy")
        plt.xlabel("Epochs")
    
    plt.xlim(0, min_epoch-1)
    plt.tight_layout()






# Construct Model and Train

In [0]:
# 724 mnist 28 * 28 
input_size = 1280000
hidden_size = 128
code_size = 32

input_img = Input(shape=(input_size,))
hidden_1 = Dense(hidden_size, activation='relu')(input_img)
code = Dense(code_size, activation='relu')(hidden_1)
hidden_2 = Dense(hidden_size, activation='relu')(code)
output_img = Dense(input_size, activation='sigmoid')(hidden_2)

autoencoder = Model(input_img, output_img)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
autoencoder_train = autoencoder.fit(x_train, x_train, epochs=4)




In [0]:
autoencoder_half = Model(input_img, code)
pred = autoencoder_half.predict(x_test)
print(pred.shape)

In [0]:
data = pred
labels = y_test

np.random.seed(42)

# X_digits, y_digits = load_digits(return_X_y=True)
# print("X_digits shape: {}".format(X_digits.shape))
# print("y_digits shape: {}".format(y_digits.shape))
# data = scale(X_digits)
print("data shape: {}".format(data.shape))

n_samples, n_features = data.shape
n_digits = len(np.unique(labels))
# n_digits = len(np.unique(y_digits))
# labels = y_digits

sample_size = 300

print("n_digits: %d, \t n_samples %d, \t n_features %d"
      % (n_digits, n_samples, n_features))


In [0]:
from time import time

from sklearn import metrics
from sklearn.cluster import KMeans
from sklearn.datasets import load_digits
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale



# Run KMeans

In [0]:
print(82 * '_')
print('init\t\ttime\tinertia\thomo\tcompl\tv-meas\tARI\tAMI\tsilhouette')


def bench_k_means(estimator, name, data):
    t0 = time()
    estimator.fit(data)
    print(labels)
    print(estimator.labels_)
    print('%-9s\t%.2fs\t%i\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f'
          % (name, (time() - t0), estimator.inertia_,
             metrics.homogeneity_score(labels, estimator.labels_),
             metrics.completeness_score(labels, estimator.labels_),
             metrics.v_measure_score(labels, estimator.labels_),
             metrics.adjusted_rand_score(labels, estimator.labels_),
             metrics.adjusted_mutual_info_score(labels,  estimator.labels_),
             metrics.silhouette_score(data, estimator.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size)))

bench_k_means(KMeans(init='k-means++', n_clusters=n_digits, n_init=10),
              name="k-means++", data=data)

bench_k_means(KMeans(init='random', n_clusters=n_digits, n_init=10),
              name="random", data=data)

# in this case the seeding of the centers is deterministic, hence we run the
# kmeans algorithm only once with n_init=1
pca = PCA(n_components=n_digits).fit(data)
bench_k_means(KMeans(init=pca.components_, n_clusters=n_digits, n_init=1),
              name="PCA-based",
              data=data)
print(82 * '_')


In [0]:
from google.colab import drive
drive.mount('/content/drive')