<a href="https://colab.research.google.com/github/brandonso994/AttnLSTMMusicGeneration/blob/main/Extract_Midi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from music21 import converter, instrument, note, chord, stream, volume
from fractions import Fraction
import matplotlib.pyplot as plt
import glob
import numpy as np
import nltk
import pandas as pd
import pickle



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
def flatten(array):
  new_array = [item for array in array for item in array]
  return new_array

# Experimental function to find melody instrument in polyphonic MIDI file
def find_melody_instrument(midi):

    # Get a list of unique instrument parts
    instrument_parts = list(set(instrument.partitionByInstrument(midi)))

    # Find the part with the most unique notes
    max_unique_notes = 0
    melody_instrument = None

    invalid_instr = ['Bass', 'Percussion']

    for part in instrument.partitionByInstrument(midi):
        # Count the number of unique notes in each part
        unique_notes = set()
        for element in part.recurse():
          if isinstance(element, note.Note):
            unique_notes.add(element.pitch)
          elif isinstance(element, chord.Chord):
              chord_notes = []
              for n in element.pitches:
                chord_notes.append(str(n))
              chord_notes = sorted(chord_notes)
              unique_notes.add('.'.join(chord_notes))

        # Save intrument with most unqiue notes as the melody instrument
        num_unique_notes = len(unique_notes)
        if num_unique_notes > max_unique_notes:
          try:
            if any([x in part.getInstrument().instrumentName for x in invalid_instr]):
              continue
            else:
              max_unique_notes = num_unique_notes
              melody_instrument = part.getInstrument().instrumentName
          except:
            continue

    return melody_instrument

def get_notes(file):
    simple_pitches = []
    pitches = []
    durations = []
    velocities = []
    notes = []
    simple_notes = []
    duration_offset = []
    offsets = []
    simple_notes_offsets = []


    midi = converter.parse(file)
    notes_to_parse = None

    instrument_name="None"

    # USE THIS IF FINDING MELODY INSTRUMENT
    # found_notes = False
    # melody_instrument = find_melody_instrument(midi)
    # try:
    #   for part in instrument.partitionByInstrument(midi):
    #       print(melody_instrument)
    #       instrument_name = part.getInstrument().instrumentName
    #       print(instrument_name)
    #       try:
    #         if melody_instrument.lower() in instrument_name.lower():
    #             if len(part.recurse().notes) > 0:
    #                 notes_to_parse = part.recurse().notesAndRests
    #                 found_notes = True
    #                 break
    #       except:
    #         continue
    # except:
    #   return

    # if not found_notes:
    #     notes_to_parse = midi.flat.notes
    #     instrument_name = "Default"

    # Take first instrument in file
    try:
        s2 = instrument.partitionByInstrument(midi)
        notes_to_parse = s2.parts[0].recurse().notes
        instrument_name = "First Instrument"
    except:
        notes_to_parse = midi.flat.notes.notes
        instrument_name = "Default"

    start_offset = 0
    for element in notes_to_parse:
      duration = element.duration.quarterLength
      if isinstance(duration, Fraction):
        float_num = float(duration)
        rounded = round(float_num, 2)
        duration = rounded

      # Ignore exceptionally long durations, extract all features from MIDI file
      if duration <= 12:
          durations.append(duration)
          temp_offset = round(float(element.offset), 2)
          offset_diff = round(float(temp_offset - start_offset), 2)
          start_offset = temp_offset
          duration_offset.append(str(duration) + ":"  + str(offset_diff))
          offsets.append(offset_diff)

          if isinstance(element, note.Note):
              notes.append(str(element.pitch) + " " + str(duration))
              simple_pitches.append(str(element.pitch))
              simple_notes.append(str(element.pitch) + " " + str(duration))
              simple_notes_offsets.append(str(element.pitch) + " " + str(duration)+ ":" + str(offset_diff))
              pitches.append(str(element.pitch))
              velocities.append(element.volume.velocity)
          elif isinstance(element, chord.Chord):
            simple_notes.append('.'.join(str(n) for n in element.normalOrder) + " " + str(duration))
            simple_notes_offsets.append('.'.join(str(n) for n in element.normalOrder) + " " + str(duration) + ":" + str(offset_diff))
            simple_pitches.append('.'.join(str(n) for n in element.normalOrder))
            chord_pitches = []
            for n in element.pitches:
              chord_pitches.append(str(n))
            chord_pitches = sorted(chord_pitches)
            pitches.append('.'.join(chord_pitches))
            notes.append('.'.join(chord_pitches) + " " +str(duration))
            velocities.append(element.volume.velocity)
          # elif isinstance(element, note.Rest):
          #     notes.append("Rest" + " " + str(duration))
          #     simple_notes.append("Rest" + " " + str(duration))
          #     simple_notes_offsets.append("Rest" + " " + str(duration)+ ":" + str(offset_diff))
          #     pitches.append('Rest')
          #     simple_pitches.append("Rest")
          #     velocities.append(0)

    # Remove silence at beginning of files
    while len(pitches) > 0 and pitches[0] == 'Rest':
      pitches.pop(0)
      durations.pop(0)
      velocities.pop(0)
      notes.pop(0)
      simple_notes.pop(0)
      simple_pitches.pop(0)
      duration_offset.pop(0)
      offsets.pop(0)
      simple_notes_offsets.pop(0)

    return notes, simple_notes, pitches, simple_pitches, durations, velocities,duration_offset, offsets, simple_notes_offsets, instrument_name

# Save all extracted features from folder containing MIDI files
def note_result(folder_name, save_drive=False, save_file=True):
  pitches = []
  simple_pitches = []
  notes = []
  simple_notes = []
  durations = []
  velocities = []
  instrument_names = []
  duration_offsets = []
  offsets = []
  simple_notes_offsets = []

  files_parsed = 0
  #for file in glob.glob("Inasent2.mid"):
  for file in glob.glob("*.[mM][iI][dD]"):
    try:
      temp_note, temp_simple_note ,temp_pitch, temp_simple_pitch, temp_duration, temp_velocity, temp_duration_offset, temp_offset, temp_simple_note_offset,instrument_name = get_notes(file)
    except:
      print("No notes in file: ", str(file))
      continue
    instrument_names.append(instrument_name)
    notes.append(temp_note)
    simple_notes.append(temp_simple_note)
    pitches.append(temp_pitch)
    simple_pitches.append(temp_simple_pitch)
    durations.append(temp_duration)
    velocities.append(temp_velocity)
    duration_offsets.append(temp_duration_offset)
    offsets.append(temp_offset)
    simple_notes_offsets.append(temp_simple_note_offset)
    files_parsed = files_parsed + 1
    print(str(file), " ",instrument_name)

  print("Files parsed: ", files_parsed)

  # Uncomment to get more optional features/combinations of features

  if save_file:
    with open('simple_pitches.pkl','wb') as f:
      pickle.dump(simple_pitches, f)
    with open('duration_offsets.pkl', 'wb') as f:
      pickle.dump(duration_offsets, f)
    with open('simple_notes_offsets.pkl', 'wb') as f:
      pickle.dump(simple_notes_offsets, f)
    # with open('simple_notes.pkl','wb') as f:
    #   pickle.dump(simple_notes, f)
    # with open('notes.pkl','wb') as f:
    #   pickle.dump(notes, f)
    # with open('pitches.pkl','wb') as f:
    #   pickle.dump(pitches, f)
    # with open('durations.pkl', 'wb') as f:
    #   pickle.dump(durations, f)
    # with open('velocities.pkl', 'wb') as f:
    #   pickle.dump(velocities, f)
    # with open('offsets.pkl', 'wb') as f:
    #   pickle.dump(offsets, f)

  # if save_drive:
    with open('/content/drive/My Drive/MRP/Pickle/' + folder_name +'/simple_pitches.pkl','wb') as f:
      pickle.dump(simple_pitches, f)
    with open('/content/drive/My Drive/MRP/Pickle/' + folder_name +'/duration_offsets.pkl', 'wb') as f:
      pickle.dump(duration_offsets, f)
    with open('/content/drive/My Drive/MRP/Pickle/' + folder_name +'/simple_notes_offsets.pkl', 'wb') as f:
      pickle.dump(simple_notes_offsets, f)
    # with open('/content/drive/My Drive/MRP/Pickle/' + folder_name +'/notes.pkl','wb') as f:
    #   pickle.dump(notes, f)
    # with open('/content/drive/My Drive/MRP/Pickle/' + folder_name +'/simple_notes.pkl','wb') as f:
    #   pickle.dump(simple_notes, f)
    # with open('/content/drive/My Drive/MRP/Pickle/' + folder_name +'/pitches.pkl','wb') as f:
    #   pickle.dump(pitches, f)
    # with open('/content/drive/My Drive/MRP/Pickle/' + folder_name +'/durations.pkl', 'wb') as f:
    #   pickle.dump(durations, f)
    # with open('/content/drive/My Drive/MRP/Pickle/' + folder_name +'/velocities.pkl', 'wb') as f:
    #   pickle.dump(velocities, f)
    # with open('/content/drive/My Drive/MRP/Pickle/' + folder_name +'/offsets.pkl', 'wb') as f:
    #   pickle.dump(offsets, f)



  return notes, simple_notes, pitches, simple_pitches, durations, velocities, offsets, duration_offsets, simple_notes_offsets

In [None]:
notes, simple_notes, pitches, simple_pitches, duration, velocities, offsets,duration_offsets, simple_notes_offsets = note_result("input_pickle", save_drive=False, save_file=True)

preprocessed_chpn_op10_e01_format0.mid   First Instrument
Files parsed:  1


In [None]:
print(len(flatten(pitches)))
# print(len(flatten(simple_pitches)))
# print(len(flatten(duration)))
# print(len(flatten(velocities)))

pitch_set_len = len(set(flatten(simple_pitches)))
duration_offset_set_len = len(set(flatten(duration_offsets)))
simple_note_offset_len = len(set(flatten(simple_notes_offsets)))

print(sorted(set(flatten(offsets))))

print(pitch_set_len)
print(duration_offset_set_len)
print(simple_note_offset_len)

print(flatten(pitches)[0:60])
print(flatten(simple_pitches)[0:60])
print(flatten(duration_offsets)[0:60])

1311
[0.0, 0.25, 1.0]
75
12
80
['C2', 'C3', 'C3', 'G3', 'C4', 'E4', 'C4', 'G4', 'C5', 'E5', 'C5', 'G5', 'C6', 'E6', 'C6', 'G6', 'C7', 'E7', 'C3', 'C3', 'C7', 'G6', 'C6', 'E6', 'C6', 'G5', 'C5', 'E5', 'C5', 'G4', 'C4', 'E4', 'C4', 'G3', 'C3', 'F1.F2', 'C3', 'A3', 'C4', 'F4', 'C4', 'A4', 'C5', 'F5', 'C5', 'A5', 'C6', 'F6', 'C6', 'A6', 'C7', 'E7', 'F#1.F#2', 'C7', 'A6', 'C6', 'E6', 'C6', 'A5', 'C5']
['C2', 'C3', 'C3', 'G3', 'C4', 'E4', 'C4', 'G4', 'C5', 'E5', 'C5', 'G5', 'C6', 'E6', 'C6', 'G6', 'C7', 'E7', 'C3', 'C3', 'C7', 'G6', 'C6', 'E6', 'C6', 'G5', 'C5', 'E5', 'C5', 'G4', 'C4', 'E4', 'C4', 'G3', 'C3', '5', 'C3', 'A3', 'C4', 'F4', 'C4', 'A4', 'C5', 'F5', 'C5', 'A5', 'C6', 'F6', 'C6', 'A6', 'C7', 'E7', '6', 'C7', 'A6', 'C6', 'E6', 'C6', 'A5', 'C5']
['4.0:0.0', '0.25:0.0', '3.75:0.25', '0.25:0.25', '0.25:0.25', '0.25:0.25', '0.25:0.25', '0.25:0.25', '0.25:0.25', '0.25:0.25', '0.25:0.25', '0.25:0.25', '0.25:0.25', '0.25:0.25', '0.25:0.25', '0.25:0.25', '0.25:0.25', '0.25:0.25', '4.0:0.0'