In [31]:
import numpy as np
import _pickle as pickle
import glob
from collections import OrderedDict
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from music21 import note, chord, instrument, converter
#import matplotlib.pyplot as plt
#%matplotlib inline 

In [3]:
# Set to true for debugging
debug = True

In [None]:
# Main function to call all subfunctions in the notebook.
def train_network():
    # get data and convert it to notes
    notes, n_vocab = get_notes(quick=False)
    # prepare data
    mapped_notes, NetworkInput, NetworkOutut = prepare_data(notes, n_vocab)
    # get model
    model = get_model(NetworkInput.shape[1:], n_vocab)
    # train
    train(model, NetworkInput, NetworkOutut)
#--------------------------------------------------------------------
# uncomment after runing all cells.
#train_network()

In [95]:
def prepare_data():
    
    input_folder = './music/Mozart/'
    output_folder = './data/Mozart/'
    bigchords = []
    
    note_names = ['A', 'B', 'B-', 'C', 'C#', 'D', 'E', 'E-', 'F', 'F#', 'G', 'G#']
    octaves = ['1', '2', '3', '4', '5', '6']
    element_info = [ OrderedDict({e:0 for e in ['empty'] + note_names + octaves}) for i in range(3) ]
    
    for index,file in enumerate(glob.glob(input_folder+'*.mid')):
        if index % 10 == 0 : print(index,end='')
        print('.',end='')
        midi = converter.parse(file)
        parts = instrument.partitionByInstrument(midi)
        
        if parts :
            piano_notes = parts.parts[0].recurse()
        else :
            print('\n*caution no parts are found.')
            piano_notes = parts.flat_notes
        
        # array to hold the data
        data = np.zeros([len(piano_notes), 3, 19], dtype=np.int32)
        # prepare notes
        for elidx, element in enumerate(piano_notes):
            elinfo = element_info.copy()
            
            if isinstance(element, note.Note):
                assert str(element.name) in note_names and str(element.octave) in octaves
                elinfo[0][str(element.name)] = 1
                elinfo[0][str(element.octave)] = 1
                elinfo[1]['empty'] = 1
                elinfo[2]['empty'] = 1
                data[elidx, 0] = list(elinfo[0].values())
                data[elidx, 1] = list(elinfo[1].values())
                data[elidx, 2] = list(elinfo[2].values())
                
            elif isinstance(element, chord.Chord):
                if len(element.pitches) > 3 : 
                    bigchords.append(element.pitches)
                    element.pitches = element.pitches[:3]
                #assert len(element.pitches) <= 3
                for idx,e in enumerate(element.pitches):
                    assert str(e.name) in note_names and str(e.octave) in octaves
                    elinfo[idx][str(e.name)] = 1
                    elinfo[idx][str(e.octave)] = 1
                # incase there is only 2 notes in the chord
                for i in range(idx,3):
                    elinfo[i]['empty'] = 1
                data[elidx, 0] = list(elinfo[0].values())
                data[elidx, 1] = list(elinfo[1].values())
                data[elidx, 2] = list(elinfo[2].values())
            #----------
        # save data to disk
        fn = output_folder + file.split('/')[-1].split('.')[0] + '.pkl'
        with open(fn, 'wb') as f:
            pickle.dump(data, f)
        #---------------------
    return bigchords
#----------------------------------------------------------------------
bigchords = prepare_data()

0..........10..........

122

In [None]:
def prepare_data(notes, n_vocab):
    ''' create input sequences and output notes '''
    sequence_length = 100
    NetworkInput = []
    NetworkOutput = []
    # create a mapping to the notes
    mapper = LabelEncoder()
    mapped_notes = mapper.fit_transform(notes)

    for i in range(len(notes)-sequence_length):
        in_seq = mapped_notes[i : i+sequence_length]
        out_note = mapped_notes[i+sequence_length]
        NetworkInput.append(in_seq)
        NetworkOutput.append(out_note)
    
    n_patterns = len(NetworkOutput)
    
    NetworkInput = np.reshape(NetworkInput, (n_patterns, sequence_length, 1))
    NetworkInput = NetworkInput / float(n_vocab)
    
    NetworkOutput = np.reshape(NetworkOutput, (-1,1))
    hotencoder = OneHotEncoder(sparse=False)
    _ = hotencoder.fit(mapped_notes.reshape(-1,1))
    NetworkOutput = hotencoder.transform(NetworkOutput)
    
    # save the mapper and hotencoder to disk for prediction.
    #with open('./data/mapper.pkl','wb') as f:
    #    pickle.dump(mapper, f)
    #with open('./data/hotencoder.pkl','wb') as f:
    #    pickle.dump(hotencoder, f)
    
    print('Input shape = ',NetworkInput.shape, '\nOutput shape = ', NetworkOutput.shape)
    return mapped_notes, NetworkInput, NetworkOutput
#---------------------------------------
if debug : mapped_notes, NetworkInput, NetworkOutput = prepare_data(notes, n_vocab)

In [None]:
#with open('./checkpoints/1/loss_stack.pkl','wb') as f:
#    pickle.dump(loss_stack, f)

In [None]:
#plt.plot(range(len(loss_stack)),loss_stack)
#plt.show()

In [None]:
notes

In [None]:
piano = parts.parts[0].recurse()

In [None]:
names = set()
for p in piano :
    if isinstance(p, note.Note):
        names.add(p.name)
    if isinstance(p, chord.Chord):
        for pp in p.pitches:
            names.add(pp.name)

In [52]:
lenoct = set()
octaves = set()
for index,file in enumerate([glob.glob('./music/Mozart/*.mid')[2]]):
        
    if index % 10 == 0 : print(index,end='')
    print('.',end='')
    midi = converter.parse(file)
    notes_to_parse  = []
    parts = instrument.partitionByInstrument(midi)

    if parts :
        piano_notes = parts.parts[0].recurse()
    else :
        print('\n*caution no parts are found.')
        piano_notes = parts.flat_notes

    # prepare notes
    notes_to_parse = []
    for elidx, element in enumerate(piano_notes):
            elinf = [ element_info.copy() for i in range(3)]
            if isinstance(element, note.Note):
                elinf[0][str(element.name)] = 1
                elinf[0][str(element.octave)] = 1
                elinf[1]['empty'] = 1
                elinf[2]['empty'] = 1
                notes_to_parse.append(elinf)
                octaves.add(element.octave)
                data[elidx,0] = elinfo[0].values()
            elif isinstance(element, chord.Chord):
                lenoct.add(len(element.pitches))
                assert len(element.pitches) <= 3
                for idx,e in enumerate(element.pitches) :
                    elinf[idx][str(e.name)] = 1
                    elinf[idx][str(e.octave)] = 1
                    octaves.add(e.octave)
                if idx != 2 :
                    for i in range(idx, 3):
                        elinf[i]['empty'] = 1
                notes_to_parse.append(elinf)
            else : # it shouldn't reach here.
                pass
            

0.

In [53]:
lenoct

{2, 3}

In [51]:
notes_to_parse

[[OrderedDict([('empty', 0),
               ('A', 0),
               ('B', 0),
               ('B-', 0),
               ('C', 0),
               ('C#', 1),
               ('D', 0),
               ('E', 0),
               ('E-', 0),
               ('F', 0),
               ('F#', 0),
               ('G', 0),
               ('G#', 0),
               ('1', 0),
               ('2', 0),
               ('3', 0),
               ('4', 1),
               ('5', 0),
               ('6', 0)]),
  OrderedDict([('empty', 1),
               ('A', 0),
               ('B', 0),
               ('B-', 0),
               ('C', 0),
               ('C#', 0),
               ('D', 0),
               ('E', 0),
               ('E-', 0),
               ('F', 0),
               ('F#', 0),
               ('G', 0),
               ('G#', 0),
               ('1', 0),
               ('2', 0),
               ('3', 0),
               ('4', 0),
               ('5', 0),
               ('6', 0)]),
  OrderedDict([('empty', 1),

In [44]:
note_names = ['A', 'B', 'B-', 'C', 'C#', 'D', 'E', 'E-', 'F', 'F#', 'G', 'G#']
octaves = ['1', '2', '3', '4', '5', '6']
element_info = OrderedDict({e:0 for e in ['empty'] + note_names + octaves})

TypeError: list indices must be integers or slices, not tuple

In [68]:
data = np.zeros([len(piano_notes), 3, 19], dtype=np.int32)

In [70]:
data[0,0] = list(elinf[0].values())

In [71]:
data[0]

array([[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
      dtype=int32)