In [1]:
# Harmony Team
# Nov. 10 - 2022

In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle 
import json
import os
from pathlib import Path
from IPython.display import Image, Audio
from music21 import note , chord , stream , instrument , converter   
import mido
# from midi2audio import FluidSynth          # to convert midi to wav file

In [10]:
mid = mido.MidiFile('dataset/midi_songs/rufus.mid')

In [7]:
# Reading MidiFile
# parse the encoded data in a file object to midi stream
midi = converter.parse('dataset/midi_songs/rufus.mid')
type(midi)

music21.stream.base.Score

In [14]:
#midi.show('text')

In [15]:
# Flat all the elements - notes/chords
notes_to_parse = midi.flat.notes
print(len(notes_to_parse))

507


In [29]:
for element in notes_to_parse[:15]:
    print(element , element.offset)   # Offset refers to where the note is located in the piece

<music21.note.Note E> 0.0
<music21.note.Note E> 0.0
<music21.note.Note E> 1.5
<music21.note.Note E> 1.75
<music21.note.Note E> 2.0
<music21.note.Note E> 2.0
<music21.note.Note F> 2.5
<music21.note.Note E-> 2.5
<music21.note.Note E> 3.0
<music21.note.Note D> 3.0
<music21.note.Note G> 3.5
<music21.note.Note C#> 3.5
<music21.note.Note G#> 4.0
<music21.note.Note B> 4.0
<music21.note.Note E> 5.0


In [34]:
str(notes_to_parse[0].pitch)

'E4'

In [39]:
notes_to_parse[50].normalOrder

[2, 5]

In [40]:
# Pitch refers to the frequency of the sound, or how high or low a particular note is 
# and is represented with the letters [A, B, C, D, E, F, G], with A being the highest and G being the lowest
notes_to_parse[0].pitch , str(notes_to_parse[0].pitch)

(<music21.pitch.Pitch E4>, 'E4')

In [41]:
notes_demo = []

for element in notes_to_parse:
    
    # if the element is a Note , then store it's Pitch
    if isinstance(element , note.Note):
        notes_demo.append(str(element.pitch))
        
    # if the element is a Chord , split each of the note of the chord and join them with +
    elif isinstance(element , chord.Chord):
        notes_demo.append('+'.join(str(n) for n in element.normalOrder))

In [42]:
len(notes_demo)

507

In [43]:
print(notes_demo[32:50])

['B4', 'B2', 'E2', 'E4', 'E2', 'G#4', 'B4', '2+4', 'G#3', '2+5', 'A3', 'C#5', '2+6', 'B3', '6+11', 'B3', '2+7', 'B3']


In [44]:
# Get all the notes and chords from the midi files in the ./midi_songs directory 
notes = []
p = Path("dataset/midi_songs")

for file in p.glob("*.mid"):
    midi = converter.parse(file)
    # print(f"parsing {file}" , end = "  ")
    
    elements_to_parse = midi.flat.notes
    # print(f"length {len(elements_to_parse)}")
    
    for element in elements_to_parse:
        
        # if the element is a Note, then store it's Pitch
        if isinstance(element , note.Note):
            notes.append(str(element.pitch))
            
        # if the element is a Chord , then split each of the note and join with +
        elif isinstance(element , chord.Chord):
            notes.append("+".join(str(n) for n in element.normalOrder))



In [45]:
len(notes)

60764

In [51]:
with open("./dataset/notes" , "wb") as file:
    pickle.dump(notes , file)

In [52]:
with open("./dataset/notes" , "rb") as file:
    notes = pickle.load(file)

In [54]:
print("Total notes: " , len(notes))
print("Unique notes: " , len(set(notes)))

Total notes:  60764
Unique notes:  398


In [55]:
n_vocab = len(set(notes))

In [56]:
n_vocab

398

## PREPARE SEQUENTIAL DATA FOR LSTM

In [57]:
# get all pitch names (unique classes)
pitchnames = sorted(set(notes))

# create a dictionary to map pitches to integers
note_to_int = dict((element , idx) for idx , element in enumerate(pitchnames))

# create a reverse mapping
int_to_note = {idx:element for element , idx in note_to_int.items()}

assert len(note_to_int) == n_vocab

In [58]:
# get all pitch names (unique classes)
pitchnames = sorted(set(notes))

# create a dictionary to map pitches to integers
note_to_int = dict((element , idx) for idx , element in enumerate(pitchnames))

# create a reverse mapping
int_to_note = {idx:element for element , idx in note_to_int.items()}

assert len(note_to_int) == n_vocab

In [59]:
# How many elements LSTM input should consider
sequence_len = 100

In [60]:
network_input = []     # input sequence data
network_output = []    # output data

for i in range(len(notes) - sequence_len):
    seq_in = notes[i : i+sequence_len]         # contains 100 values
    seq_out = notes[i+sequence_len]
    
    network_input.append([note_to_int[n] for n in seq_in])
    network_output.append(note_to_int[seq_out])

In [71]:
network_output[0]

390

In [72]:
len(network_input) , len(network_output)

(60664, 60664)

In [73]:
len(network_input) , len(network_output)

(60664, 60664)

In [74]:
np.asarray(network_input).shape

(60664, 100)

In [84]:
# reshape input data into a shape compatible with LSTM layers
normalised_network_input = np.reshape(network_input , (*(np.asarray(network_input).shape) , 1))  # input_samples, sequence_len, 1
print(normalised_network_input.shape)

(60664, 100, 1)


In [86]:
normalised_network_input[0][:10]

array([[382],
       [382],
       [333],
       [382],
       [395],
       [389],
       [383],
       [382],
       [395],
       [394]])

In [87]:
normalised_network_input = normalised_network_input/float(n_vocab)

In [88]:
normalised_network_input[0][:10]

array([[0.95979899],
       [0.95979899],
       [0.83668342],
       [0.95979899],
       [0.99246231],
       [0.97738693],
       [0.96231156],
       [0.95979899],
       [0.99246231],
       [0.98994975]])

In [89]:
# Network output are the classes, so encode into one hot vector
from tensorflow.keras.utils import to_categorical
network_output = to_categorical(network_output)

In [92]:
print(normalised_network_input.shape)
print(network_output.shape)

(60664, 100, 1)
(60664, 398)


In [93]:
print(normalised_network_input.shape)
print(network_output.shape)

(60664, 100, 1)
(60664, 398)


#### DEFINE MODEL ARCHITECTURE