# Midi Chord to List Extraction
The below notebook is used to clean a directory of midi files and convert them into tensorform. The tensor is of the following form 

**(midi_progression_batches,time_steps, music_cords)**

**"insert function name"** function uses a threshold to decide where a chord ends in a given progression, therefore appending the time_steps.

## Imports

In [None]:
#imports
!pip install mido
!pip install chorder
!pip install miditoolkit
import mido
import chorder
import miditoolkit

import numpy as np
np.set_printoptions(threshold=np.inf)
import tensorflow as tf
import torch


import os
import shutil
import copy



## Chord Extractor Class

In [None]:
#class for code extraction
class chordExtractor:
  """
  chordExtractor takes in a file using MIDO library
  and seperate chord progressions into independent files or return as list of (.MidiFile)\n objects
  """

  #midi file as input
  def __init__(self, midiFilePath,thresh_add=100):
    self.filePath = midiFilePath
    self.midOriginal = mido.MidiFile(self.filePath)
    self.midTimeUpdated = self._timeConverter()
    self.completeNotes = self._chordDecider(thresh_add)

  def __str__(self):
    return (f"filepath: {self.filePath}")

  def _timeConverter(self):
    """
    takes in a MidiFile object, and updates timing for internal use
    """
    #initialization
    note_on = []
    note_off = []
    t_m1 = 0
    actual_time = 0
    contTime = []
    #calculate real time
    for track in self.midOriginal.tracks[0]:
      d_time = track.time
      actual_time =  d_time + t_m1
      #update t_m1 before next loop
      contTime.append(actual_time)
      t_m1 += d_time

    #update times in new track
    mid = mido.MidiFile()
    mid_track = mido.MidiTrack()

    #append updated times here
    count = 0
    for track in self.midOriginal.tracks[0]:
      tmp_msg = track
      if ((tmp_msg.type) == "note_on" or (tmp_msg.type) == "note_off"):
        mid_track.append(mido.Message(tmp_msg.type,
                                note=tmp_msg.note,
                                velocity=tmp_msg.velocity,
                                time=contTime[count]))
      else:
        mid_track.append(tmp_msg)

      count += 1
    #append times to new version and return MidiFile type
    mid.tracks.append(mid_track)
    return mid

  def print_tracks(MidiFile):
    """
    prints all tracks on MidiFile.tracks[0] in order
    """
    for track in MidiFile.tracks[0]:
      print(track)

  def get_midTimeUpdated(self):
    """
    gives instance of original file with linearized time (not true time)
    """
    return self.midTimeUpdated

  def _chordDecider(self,thresh_add = 100):
    """
    takes in a self.midTimeUpdated with thresh_add as the threshold for numbers
    """
    #seperate notes/threshold
    mid_track = self.midTimeUpdated.tracks[0]
    complete_notes = []
    local_notes = []
    flag = True

    for track in mid_track:
      if track.type == "note_on":
        #print(track)
        if flag == True:
          timeReference = track.time
          flag = False
        threshold = thresh_add + timeReference

        if track.time < threshold:
          local_notes.append(track)
        else:
          complete_notes.append(local_notes)
          local_notes = [track] #saves newest note to local for next chord
          flag = True
    
    #append last set of notes for last chord in progression
    complete_notes.append(local_notes)
      
    return complete_notes

  def generate_chordFiles(self,saveDir):
    """
    takes in input of self.completeNotes form, and saves .midi per chord in 
    folder specified
    """
    #paths so that different files can be created
    baseFileName = "chord"
    fileExtension = ".mid"
    save_path = saveDir + r"//"

    chordNum = 0
    for chord in self.completeNotes:
      print("\n")
      save_mid = mido.MidiFile()
      track_10 = mido.MidiTrack()
      save_mid.tracks.append(track_10)
      track_10.append(mido.Message('program_change', program=12, time=0))
      print("\n")
      for note in chord:
        #print(note)
        note.time = 32
        track_10.append(note)
      #create off notes, for testing purposes
      for baseNote in chord: 
        tmp_message = mido.Message("note_off",
                                channel=baseNote.channel,
                                note=baseNote.note,
                                velocity=baseNote.velocity,
                                time=64)
        track_10.append(tmp_message)

      # for message in save_mid.tracks[0]:
      #   print(message)
      
      completeSave = save_path + baseFileName + str(chordNum) + fileExtension
      save_mid.save(completeSave)
      chordNum += 1

    print("Midi Conversions Complete\nFiles at:\n{}".format(save_path))
  
  #COMPLETE NOTE EXTRACTION FUNCTIONS
  def extractNotesFromCompleteNotes(self,message_chord_list, printNotes = False, printNumChords = False, printOutput = True):
    """
    extracts note from a list of lists (chord,notes/chord). See 'complete_notes' variable for the form.
    output is saved as a class variable: self.final_note_list
    """
    if printNumChords == True:
      print("Number of Chords in File: {}\n".format(len(message_chord_list)))

    tmp_list = []
    final_list = []
    for chord in message_chord_list:
      for note in chord:
        if printNotes == True:
          print(note)
        tmp_list.append(note.note)
      #END OF INSIDE LOOP
      final_list.append(tmp_list)
      tmp_list = []
    
    if printOutput == True:
      print("\nNotes have been extracted with output showing:\n{}".format(final_list))
    
    self.final_note_list = final_list
  
  #PRINT FUNCTIONS FOR ABOVE VARIABLES & FUNCTIONS
  def print_original_messages(self,trackNum = 0):
    tmp = self.midOriginal.tracks[trackNum]
    for note in tmp:
      print(note)
    print("\nprint_original_messages COMPLETE\n")
  
  def print_original_noteOn(self, trackNum = 0):
    tmp = self.midOriginal.tracks[trackNum]
    for note in tmp:
      if note.type == "note_on":
        print(note)
    print("\nprint_original_noteOn COMPLETE\n")

  def print_midTimeUpdated_messages(self, trackNum = 0):
    tmp = self.midTimeUpdated.tracks[trackNum]
    for note in tmp:
      print(note)

    print("\nprint_midTimeUpdated_messages COMPLETE\n")
  def print_midTimeUpdated_noteOn(self,trackNum = 0):
    tmp = self.midTimeUpdated.tracks[trackNum]
    for note in tmp:
      if note.type == "note_on":
        print(note)
    print("\nprint_midTimeUpdated_noteOn COMPLETE\n")
#############################END OF CLASS##################################################

## Chord Extractor Example

In [None]:
###########################################################################
file_path = "/content/drive/Shareddrives/Senior Design - Audio Project/MIDI Datasets/Datasets/AllDataSetsCombined/Essential_Midi_ProgressionsOnly/Major_Chord Progressions_Ab/G#_Ab_1-2-5-5.mid"
#initalize model
cls = chordExtractor(file_path)

#print some of the variables inside the model
cls.print_original_messages()

cls.print_midTimeUpdated_noteOn()

print(cls.completeNotes)

<meta message channel_prefix channel=0 time=0>
<meta message track_name name='G#_Ab_1-2-5-5' time=0>
<meta message instrument_name name='Inst 13' time=0>
<meta message time_signature numerator=4 denominator=4 clocks_per_click=24 notated_32nd_notes_per_beat=8 time=0>
<meta message key_signature key='C' time=0>
<meta message smpte_offset frame_rate=24 hours=33 minutes=0 seconds=0 frames=0 sub_frames=0 time=0>
<meta message set_tempo tempo=500000 time=0>
note_on channel=0 note=56 velocity=113 time=0
note_on channel=0 note=60 velocity=119 time=0
note_on channel=0 note=63 velocity=107 time=0
note_off channel=0 note=56 velocity=64 time=1920
note_off channel=0 note=60 velocity=64 time=0
note_off channel=0 note=63 velocity=64 time=0
note_on channel=0 note=58 velocity=113 time=0
note_on channel=0 note=61 velocity=119 time=0
note_on channel=0 note=65 velocity=107 time=0
note_off channel=0 note=58 velocity=64 time=1920
note_off channel=0 note=61 velocity=64 time=0
note_off channel=0 note=65 veloc

In [None]:
###########################################DO NOT USE UNLESS NEEDED ON PURPOSE########################################
#save_files
#saves each chord in a .mid file to a seperate .mid file with locked time steps.
#In later versions this will be in its own class for inferencing.
save_path = "/content/drive/Shareddrives/Senior Design - Audio Project/MIDI Datasets/midi_file_test"

cls.generate_chordFiles(save_path)
##################################################################################################################

In [None]:
#EXTRACT CHORDS & TAKE JUST NOTE NAMES
cls.extractNotesFromCompleteNotes(cls.completeNotes,printNotes=True,printNumChords=True)

print("\n")
for chord in cls.completeNotes:
  for note in chord:
    print(note)

Number of Chords in File: 4

note_on channel=0 note=56 velocity=113 time=0
note_on channel=0 note=60 velocity=119 time=0
note_on channel=0 note=63 velocity=107 time=0
note_on channel=0 note=58 velocity=113 time=1920
note_on channel=0 note=61 velocity=119 time=1920
note_on channel=0 note=65 velocity=107 time=1920
note_on channel=0 note=63 velocity=113 time=3840
note_on channel=0 note=67 velocity=119 time=3840
note_on channel=0 note=70 velocity=107 time=3840
note_on channel=0 note=63 velocity=113 time=5760
note_on channel=0 note=67 velocity=119 time=5760
note_on channel=0 note=70 velocity=107 time=5760

Notes have been extracted with output showing:
[[56, 60, 63], [58, 61, 65], [63, 67, 70], [63, 67, 70]]


note_on channel=0 note=56 velocity=113 time=0
note_on channel=0 note=60 velocity=119 time=0
note_on channel=0 note=63 velocity=107 time=0
note_on channel=0 note=58 velocity=113 time=1920
note_on channel=0 note=61 velocity=119 time=1920
note_on channel=0 note=65 velocity=107 time=1920


# Dataset Manipulation

The above class is used to take one midi file, now we must pass the class object for each file inside out dataset. As well as, add the data to a tensor() that we can use for training. **This technique will also be used for single conversions with inferencing**

## midi_dir class

use this class hand in hand with chord_extractor to create a chord dataset based on a .mid dataset

In [None]:
class midi_dir(chordExtractor):
  """
  This class interacts directly with chordExtractor class to create a tensor for LSTM Training.
  The dir must be made of .mid files & an easy to understand timing must exist for the chord_extractor to seperate chords for training purposes.
  I.E. --> Chords must not be layered but completely seperate
  """
  def __init__(self, midiDirPath):
    self.midiDir = midiDirPath

  def midDirToTensor(self):
    """
    take a dir and coverts all .mid files into a list of lists.
    Each progression is converted with a filter, making sure all progressions are of equal length.
    """
    finalArray = []
    for subdir, dirs, files in os.walk(self.midiDir):
      for filename in files:
        read_path = subdir + os.sep + filename
        if read_path.endswith('.mid'):
          self.midiProgression = chordExtractor(read_path)
          self.midiProgression.extractNotesFromCompleteNotes(self.midiProgression.completeNotes)
          finalArray.append(self.midiProgression.final_note_list)

    #must be created before filter, since filter calls this array
    self.finalArray = finalArray

    #run filter
    self._arrayFilter_EssentialMidiDataset(progressionLength = 3)

    return self.finalArray

  def npArrayDataset(self,printShape = True):
    array_from_list = np.array(self.finalArray)
    self.array = array_from_list

    if printShape == True:
      print("Output np.array Shape:\n{}\n".format(np.shape(self.array)))
    return self.array
  
  def tensorDataset(self, returnDataset = True):
    outputArray = self.npArrayDataset(printShape = False)
    self.tensorFormat = tf.convert_to_tensor(outputArray)

    if returnDataset == True:
      print("Output TensorDataset Shape:\n{}\n".format(self.tensorFormat.shape))
      return self.tensorFormat

  #TOOLS FOR CHECKING DATASET CREATION
  def print_total_files(self):
    count = 0
    for subdir, dirs, files in os.walk(self.midiDir):
      print(len(files))

  def saveTensorDataset(self,dirPath = "./",fileName = "tensor.pt"):
    """
    Saves self.tensorFormat from tensorDataset creator
    dir path defaults too ./
    fileName Default: tensor.pt
    """
    complete_path = dirPath + os.sep + fileName
    torch.save(self.tensorFormat, complete_path)
  #################FILTERS####################################################
  def _arrayFilter_EssentialMidiDataset(self, progressionLength = 3):
    """
    dataset filter for essentialMidiDataset Pack. Makes sure all progressions have same length.
    Use basic arrayFilter to just delete rows where the sizing isn't correct. This allows for a cleaned ML Dataset.
    """
    uncleanList = self.finalArray

    for chord_progression in uncleanList:
      for chord in chord_progression:
        if len(chord) != progressionLength:
          #print(chord)
          tmp_note = chord[0]
          for noteIndex in range(1,len(chord)):
            if tmp_note == chord[noteIndex]:
              chord.remove(tmp_note)
              break
            tmp_note = chord[noteIndex]
##################################END OF CLASS########################################################################

## Create Tensor for Training

In [None]:
#function creator for midi_dir
dirPath = "/content/drive/Shareddrives/Senior Design - Audio Project/MIDI Datasets/Datasets/AllDataSetsCombined/Essential_Midi_ProgressionsOnly"
#dirPath = "/content/drive/Shareddrives/Senior Design - Audio Project/MIDI Datasets/Datasets/AllDataSetsCombined/Essential_Midi_ProgressionsOnly/Major_Chord Progressions_ A"

#initialize class
midiCls = midi_dir(dirPath)
#extract chords from midi files
outputList = midiCls.midDirToTensor()

In [None]:
#np.array creator & tensor converter
outputNPArray = midiCls.npArrayDataset()

print("TENSOR INFO")
outputTensor = midiCls.tensorDataset()

Output np.array Shape:
(1416, 4, 3)

TENSOR INFO
Output TensorDataset Shape:
(1416, 4, 3)



Save Torch File & Open

In [None]:
#################BE CAREFUL NOT TO OVERWRITE THE LAST FILE###############################
save_path = r"/content/drive/Shareddrives/Senior Design - Audio Project/MIDI Datasets/Datasets/AllDataSetsCombined/Essential_Midi_ProgressionsOnly"
file_name = r"tensor_essentialDataset.pt"

midiCls.saveTensorDataset(dirPath = save_path, fileName=file_name)

In [None]:
tensorLocation = "/content/drive/Shareddrives/Senior Design - Audio Project/MIDI Datasets/Datasets/AllDataSetsCombined/Essential_Midi_ProgressionsOnly/tensor_essentialDataset.pt"
x = torch.load(tensorLocation)

print(x.shape)

(1416, 4, 3)
