<a href="https://colab.research.google.com/github/Zhachory1/MusicNST/blob/master/ComposerClassifierWorkingCopy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title Install Dependencies
!pip install py-midi pretty_midi



In [2]:
#@Title Import Data from GitHub 
!cd MusicNST/ && git pull
!git clone https://github.com/Zhachory1/MusicNST.git

Already up to date.
fatal: destination path 'MusicNST' already exists and is not an empty directory.


In [3]:
#@Title Imports
import os
import fnmatch
import numpy as np
import time
import argparse
import warnings
import midi
import pretty_midi
import pandas as pd
import collections
import random
import tensorflow as tf

# Note sequence to piano roll
import magenta.music.pianoroll_encoder_decoder as pianoroll_ed
import magenta.music.sequences_lib as seq_lib
import magenta.music as mm
import magenta.models.music_vae.data as data

from keras import optimizers
from keras import backend, losses
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense, Lambda, BatchNormalization, Reshape, Dropout
from keras.layers.convolutional import Convolution2D, AveragePooling2D, MaxPooling2D, Convolution1D
from keras import backend as K
from keras.utils.data_utils import get_file
from keras.utils.layer_utils import convert_all_kernels_in_model  


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.



Using TensorFlow backend.


In [0]:
#@Title Load Data 

################################################################################
#                                 Constants 
################################################################################
BATCH_SIZE = 20
NUM_ITER = 25
DF_FNAME = 'midi_filename'
DF_FEATURES = 'midi_features'
DF_COMPOSER = 'canonical_composer'
DF_SPLIT = "split"
DF_COMPOSER_INDEX = 'composer_idx'
PATH_PREFIX = './MusicNST/midi_files/maestro-v1.0.0/'
MAX_DIM = 500
COUNT_CUTTING = 0
################################################################################
#                    Helper Functions to manipulate input data. 
################################################################################

def midi_to_pianoroll(midi_filename):
  """Converts Midi Files into np.arrays and concats them.
  
  The arrays have the following types:
  active, weights, onsets, onset_velocities, active_velocities, offsets, 
  control_changes
  
  Args:
    midi_filename: string, filename
  Returns: 
    np.array - train tensor.  
  """
  midi_file = pretty_midi.PrettyMIDI(midi_filename)
  note_seq = mm.midi_to_sequence_proto(midi_file)
  pnt = seq_lib.sequence_to_pianoroll(
      note_seq, 10, data.MIN_MIDI_PITCH, data.MAX_MIDI_PITCH)
  t_list = [pnt.active, pnt.weights, pnt.onsets, 
                                pnt.onset_velocities, pnt.active_velocities, 
                                pnt.offsets, pnt.control_changes]
  fin_list = [np.expand_dims(a, axis=-1) for a in t_list]
  final_array = np.concatenate(t_list, axis=-1)
  return final_array

def pianoroll_to_notes(pianoroll, opt_midi_file_name=""):
  """Helper to obtain note_seq."""
  note_seq = seq_lib.pianoroll_to_note_sequence(piano_roll, 10, 0)
  if opt_midi_file_name != "":
    download(note_seq, opt_midi_file_name)
  return note_seq

def load_midi_data_from_midi_files(filenames):
  """Loads files.
  Args:
    filenames: list of strings, midi filenames
  Returns: 
    pandas dataframe."""
  tensor_dict = collections.OrderedDict()
  index_array = []
  i = 0
  for mn in filenames:
    tensor_dict[mn]=midi_to_pianoroll(PATH_PREFIX+mn)
    index_array.append(i)
    i=i+1
#   return pd.DataFrame(
#       data={DF_FEATURES: tensor_dict.values()}, 
#       index=tensor_dict.keys())
  return tensor_dict

################################################################################
#                           Data Loading
################################################################################

df = pd.read_csv("MusicNST/midi_files/maestro-v1.0.0/maestro-v1.0.0.csv", 
                 usecols=[DF_COMPOSER,DF_SPLIT,DF_FNAME])
# unique_composers = df[DF_COMPOSER].unique()
# NUM_UNIQUE_COMPOSERS = len(unique_composers)
n=4
unique_composers = df[DF_COMPOSER].value_counts()[:n].index.tolist()
NUM_UNIQUE_COMPOSERS = len(unique_composers)

df_comp = pd.DataFrame(data={
    DF_COMPOSER_INDEX: range(len(unique_composers))},
                      index=unique_composers)

df = df.join(df_comp, on=DF_COMPOSER, how='inner')
df = df[[DF_SPLIT, DF_FNAME, DF_COMPOSER_INDEX]]

# Make test/train/validation split from our csv file will contain our golden
# ie the composer and the midi_filename
train_split = df.loc[df[DF_SPLIT]=="train"]
train_split = train_split[[DF_COMPOSER_INDEX,DF_FNAME]]

# Holds all of the train filenames
train_filenames = train_split[DF_FNAME].tolist()
random.shuffle(train_filenames)
random.shuffle(train_filenames)

validation_split = df.loc[df[DF_SPLIT]=="validation"]
validation_split = validation_split[[DF_COMPOSER_INDEX,DF_FNAME]]

test_split = df.loc[df[DF_SPLIT]=="test"]
test_split = test_split[[DF_COMPOSER_INDEX,DF_FNAME]]




In [0]:
#@Title Model 

# def get_model(input_tensor):
#   """Creates and returns model object."""
#   ip = Input(tensor=tf.cast(tf.constant(input_tensor),dtype=tf.float32), 
#              batch_shape=input_tensor.shape)
def get_model(shape):
  ip = Input(batch_shape=shape)
  layer_2 = Convolution1D(filters=256, kernel_size=128, activation='elu', padding='same')(ip)
#   layer_2 = BatchNormalization(axis=-1)(layer_2)
  layer_2 = Dropout(rate=0.1)(layer_2)
  layer_2 = Convolution1D(filters=128, kernel_size=64, activation='elu', padding='same')(layer_2)
  layer_2 = Dropout(rate=0.1)(layer_2)
  layer_2 = Convolution1D(filters=64, kernel_size=32, activation='elu', padding='same')(layer_2)
#   layer_2 = Convolution1D(filters=32, kernel_size=16, activation='elu', padding='same')(layer_2)
#   layer_2 = Convolution1D(filters=16, kernel_size=8, activation='elu', padding='same')(layer_2)
  layer_2 = Convolution1D(filters=1, kernel_size=4, activation='elu', padding='same')(layer_2)
#   layer_2 = Reshape((BATCH_SIZE, MAX_DIM*4, 1))
  layer_2 = Lambda(lambda x: backend.squeeze(x, axis=-1))(layer_2)
  dense_1 = Dense(units=MAX_DIM, activation='elu')(layer_2)
  dense_1 = Dense(units=MAX_DIM/2, activation='elu')(dense_1)
  dense_1 = Dense(units=NUM_UNIQUE_COMPOSERS, activation='elu')(dense_1)
  dense_1 = Dense(units=NUM_UNIQUE_COMPOSERS, activation='softmax')(dense_1)
  return Model(ip, dense_1)

def calculate_loss_per_batch_get_gradients(filenames, su):  
  """Calculates loss on a specified start_end index"""
  midi_data_dict = load_midi_data_from_midi_files(filenames)
  
  file_to_use = pd.DataFrame(data={}, index=midi_data_dict.keys())
  
  labels_list=[]
  tensors_list=[]
  max_2_dim = 0
  cut = 0
  for key, value in midi_data_dict.items():
    labels_list.append(su.loc[su[DF_FNAME]==key][DF_COMPOSER_INDEX].values[0])
    max_2_dim = max(value.shape[0], max_2_dim)
    if(value.shape[0] > MAX_DIM):
      cut = cut + 1
      value = value[:MAX_DIM,:]
    elif(value.shape[0] < MAX_DIM):
      value = np.pad(value, ((0,MAX_DIM - value.shape[0]),(0,0)), 'constant')
    tensor = np.expand_dims(value, axis=0)
    tensors_list.append(tensor)
  print("Num cuts in batch %d", cut)
  # One hot labels 
  labels = np.eye(NUM_UNIQUE_COMPOSERS)[labels_list]
  training_data=np.concatenate(tensors_list, axis=0)  
  # Gotta pad first.
#   training_data = np.concatenate(
#       [np.pad(a , ((0,0), (0, max_2_dim-a.shape[1]), (0,0)), 'constant') 
#        for a in tensors_list], axis=0)
  return training_data, labels
#   model = get_model(training_data)
  
#   # Calculate the loss 
#   loss=losses.categorical_crossentropy(labels,model.output)
#   grads = K.gradients(loss, model.output)
#   outputs = [loss]
#   if type(grads) in {list, tuple}:
#       outputs += grads
#   else:
#       outputs.append(grads)  
  return loss, model

# calculate_loss_per_batch(train_filenames[0:3], train_split)

In [27]:
#@Title Training Loop 
model_250 = get_model([BATCH_SIZE, MAX_DIM, 896])
model_250.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
for j in range(3):
  for i in range(NUM_ITER):
    print("At iteration and epoch %d% , %d%", i, j)
    training_data, labels = calculate_loss_per_batch_get_gradients(train_filenames[i * BATCH_SIZE:(i+1) * BATCH_SIZE], train_split)
  #   loss=losses.categorical_crossentropy(labels, model.output)
  #   sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
  #   model.compile(sgd, loss)
    print(model_250.train_on_batch(training_data, labels))

    # evaluate the model
  #   scores = model.evaluate(training_data, labels)
  #   print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    del training_data, labels
  random.shuffle(train_filenames)

At iteration and epoch %d% , %d% 0 0
Num cuts in batch %d 20
[2.946462, 0.15]
At iteration and epoch %d% , %d% 1 0
Num cuts in batch %d 20
[1.465884, 0.3]
At iteration and epoch %d% , %d% 2 0
Num cuts in batch %d 20
[1.4142858, 0.25]
At iteration and epoch %d% , %d% 3 0
Num cuts in batch %d 20
[1.4521363, 0.25]
At iteration and epoch %d% , %d% 4 0
Num cuts in batch %d 20
[1.3502523, 0.3]
At iteration and epoch %d% , %d% 5 0
Num cuts in batch %d 20
[1.6555021, 0.1]
At iteration and epoch %d% , %d% 6 0
Num cuts in batch %d 20
[1.5060873, 0.3]
At iteration and epoch %d% , %d% 7 0
Num cuts in batch %d 20
[1.5817376, 0.1]
At iteration and epoch %d% , %d% 8 0
Num cuts in batch %d 20
[1.5891314, 0.3]
At iteration and epoch %d% , %d% 9 0
Num cuts in batch %d 20
[1.3966458, 0.15]
At iteration and epoch %d% , %d% 10 0
Num cuts in batch %d 20
[1.5074703, 0.3]
At iteration and epoch %d% , %d% 11 0
Num cuts in batch %d 20
[1.4623995, 0.05]
At iteration and epoch %d% , %d% 12 0
Num cuts in batch %d 

In [10]:
#@Title Training Loop 
model_one = get_model([BATCH_SIZE, MAX_DIM, 896])
model_one.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
for j in range(3):
  for i in range(NUM_ITER):
    print("At iteration and epoch %d% , %d%", i, j)
    training_data, labels = calculate_loss_per_batch_get_gradients(train_filenames[i * BATCH_SIZE:(i+1) * BATCH_SIZE], train_split)
  #   loss=losses.categorical_crossentropy(labels, model.output)
  #   sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
  #   model.compile(sgd, loss)
    print(model_one.train_on_batch(training_data, labels))

    # evaluate the model
  #   scores = model.evaluate(training_data, labels)
  #   print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    del training_data, labels

At iteration and epoch %d% , %d% 0 0
Num cuts in batch %d 20
[1.6855037, 0.25]
At iteration and epoch %d% , %d% 1 0
Num cuts in batch %d 20
[1.5805612, 0.15]
At iteration and epoch %d% , %d% 2 0
Num cuts in batch %d 20
[1.4634602, 0.25]
At iteration and epoch %d% , %d% 3 0
Num cuts in batch %d 20
[1.4661157, 0.25]
At iteration and epoch %d% , %d% 4 0
Num cuts in batch %d 20
[1.4119016, 0.1]
At iteration and epoch %d% , %d% 5 0
Num cuts in batch %d 20
[1.409792, 0.15]
At iteration and epoch %d% , %d% 6 0
Num cuts in batch %d 20
[1.5550728, 0.25]
At iteration and epoch %d% , %d% 7 0
Num cuts in batch %d 20
[1.4067249, 0.25]
At iteration and epoch %d% , %d% 8 0
Num cuts in batch %d 20
[1.4270213, 0.2]
At iteration and epoch %d% , %d% 9 0
Num cuts in batch %d 20
[1.2976172, 0.55]
At iteration and epoch %d% , %d% 10 0
Num cuts in batch %d 20
[1.374302, 0.4]
At iteration and epoch %d% , %d% 11 0
Num cuts in batch %d 20
[1.51947, 0.15]
At iteration and epoch %d% , %d% 12 0
Num cuts in batch %

InvalidArgumentError: ignored

In [6]:
#@Title Training Loop 
model = get_model([BATCH_SIZE, MAX_DIM, 896])
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
for j in range(3):
  for i in range(NUM_ITER):
    print("At iteration and epoch %d% , %d%", i, j)
    training_data, labels = calculate_loss_per_batch_get_gradients(train_filenames[i * BATCH_SIZE:(i+1) * BATCH_SIZE], train_split)
  #   loss=losses.categorical_crossentropy(labels, model.output)
  #   sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
  #   model.compile(sgd, loss)
    print(model.train_on_batch(training_data, labels))

    # evaluate the model
  #   scores = model.evaluate(training_data, labels)
  #   print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    del training_data, labels

  
  

Instructions for updating:
Colocations handled automatically by placer.
At iteration and epoch %d% , %d% 0 0
Num cuts in batch %d 20
Instructions for updating:
Use tf.cast instead.
[2.3025174, 0.15]
At iteration and epoch %d% , %d% 1 0
Num cuts in batch %d 20
[1.9169124, 0.3]
At iteration and epoch %d% , %d% 2 0
Num cuts in batch %d 20
[1.7785218, 0.2]
At iteration and epoch %d% , %d% 3 0
Num cuts in batch %d 20
[2.0761747, 0.2]
At iteration and epoch %d% , %d% 4 0
Num cuts in batch %d 19
[1.8056618, 0.2]
At iteration and epoch %d% , %d% 5 0
Num cuts in batch %d 20
[1.7365942, 0.1]
At iteration and epoch %d% , %d% 6 0
Num cuts in batch %d 20
[1.403935, 0.25]
At iteration and epoch %d% , %d% 7 0
Num cuts in batch %d 19
[1.717499, 0.15]
At iteration and epoch %d% , %d% 8 0
Num cuts in batch %d 20
[1.5800993, 0.25]
At iteration and epoch %d% , %d% 9 0
Num cuts in batch %d 20
[1.6374344, 0.2]
At iteration and epoch %d% , %d% 10 0
Num cuts in batch %d 20
[1.7233845, 0.2]
At iteration and ep

InvalidArgumentError: ignored

In [7]:
for j in range(3):
  for i in range(NUM_ITER):
    print("At iteration and epoch %d% , %d%", i, j)
    training_data, labels = calculate_loss_per_batch_get_gradients(train_filenames[i * BATCH_SIZE:(i+1) * BATCH_SIZE], train_split)
  #   loss=losses.categorical_crossentropy(labels, model.output)
  #   sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
  #   model.compile(sgd, loss)
    print(model.train_on_batch(training_data, labels))

    # evaluate the model
  #   scores = model.evaluate(training_data, labels)
  #   print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    del training_data, labels

At iteration and epoch %d% , %d% 0 0
Num cuts in batch %d 20
[1.4331439, 0.2]
At iteration and epoch %d% , %d% 1 0
Num cuts in batch %d 20
[1.398399, 0.2]
At iteration and epoch %d% , %d% 2 0
Num cuts in batch %d 20
[1.4157279, 0.15]
At iteration and epoch %d% , %d% 3 0
Num cuts in batch %d 20
[1.4213254, 0.25]
At iteration and epoch %d% , %d% 4 0
Num cuts in batch %d 19
[1.3730273, 0.35]
At iteration and epoch %d% , %d% 5 0
Num cuts in batch %d 20
[1.4121124, 0.3]
At iteration and epoch %d% , %d% 6 0
Num cuts in batch %d 20
[1.3319993, 0.4]
At iteration and epoch %d% , %d% 7 0
Num cuts in batch %d 19
[1.369064, 0.4]
At iteration and epoch %d% , %d% 8 0
Num cuts in batch %d 20
[1.3323095, 0.4]
At iteration and epoch %d% , %d% 9 0
Num cuts in batch %d 20
[1.458204, 0.25]
At iteration and epoch %d% , %d% 10 0
Num cuts in batch %d 20
[1.5126954, 0.25]
At iteration and epoch %d% , %d% 11 0
Num cuts in batch %d 19
[1.4544828, 0.35]
At iteration and epoch %d% , %d% 12 0
Num cuts in batch %d 

InvalidArgumentError: ignored

In [0]:
#@Title Training Loop 
model = get_model([BATCH_SIZE, MAX_DIM, 896])
sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
for i in range(NUM_ITER):
  print("At iteration %d", i)
  training_data, labels = calculate_loss_per_batch_get_gradients(train_filenames[i * BATCH_SIZE:(i+1) * BATCH_SIZE], train_split)
#   loss=losses.categorical_crossentropy(labels, model.output)
#   sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
#   model.compile(sgd, loss)
  print(model.train_on_batch(training_data, labels))
  
  # evaluate the model
  scores = model.evaluate(training_data, labels)
  print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
  del training_data, labels