# Abstract

The purpose of this notebook is to take in our Music Dataset and convert it into a Forest Model that can be used with varying random generators for chord recommendation

# Imports

Basic Imports for Development Purposes

In [None]:
import numpy as np
np.set_printoptions(threshold=np.inf)
import tensorflow as tf
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as autograd
import collections
from sklearn.preprocessing import MinMaxScaler
from random import randint
import random

import pickle

print(torch.__version__)
import time

import os
import shutil
import copy

!pip install mido
import mido

1.8.1+cu101
Collecting mido
[?25l  Downloading https://files.pythonhosted.org/packages/b5/6d/e18a5b59ff086e1cd61d7fbf943d86c5f593a4e68bfc60215ab74210b22b/mido-1.2.10-py2.py3-none-any.whl (51kB)
[K     |████████████████████████████████| 51kB 4.7MB/s 
[?25hInstalling collected packages: mido
Successfully installed mido-1.2.10


# Load Dataset
Our Dataset is made up of 1,416, 4-chord progressions with 3 note chords

In [None]:
# MUST CHANGE FOR GITHUB REPO
tensorLocation = "/content/drive/Shareddrives/Senior Design - Audio Project/MIDI Datasets/Datasets/AllDataSetsCombined/Essential_Midi_ProgressionsOnly/tensor_essentialDataset.pt"
dataset = torch.load(tensorLocation)

print(dataset.shape)

(1416, 4, 3)


Midi Note Chart

![](https://newt.phys.unsw.edu.au/jw/graphics/notes.GIF)  

Min: 41 == F2

Max: 76 == E5

+- 12 to go up and down an octave

In [None]:
print("Min: {}, Max: {}".format(np.amin(dataset),np.amax(dataset)))

Min: 41, Max: 76


# Useful Functions

The following functions accomplish the following:  


1.   Convert between dataset and .midi format
2.   Open Pre-existing lookup table for known chords



In [None]:
from mido import Message, MidiFile, MidiTrack
import copy

def listToMidi(chord_list,
               dir_path = './',
               file_name = "new_song.mid",
               program = 12,
               dt = 256,
               veloON = 64,
               veloOFF = 127,
               printOut = True,
               saveMidiFile=True,
               completePath = False):
  """
  converts midi value chords into a midi file with given specifications.
  """
  mid = MidiFile()
  track = MidiTrack()
  mid.tracks.append(track)

  #initialize instrument type
  track.append(Message('program_change', program=12, time=0))
  #Loop through each all chords in list
  for chord in chord_list:
    start = True
    
    #turn chord(s) on
    for note in chord:
      if start == True:
        
        track.append(Message('note_on', note=note, velocity=veloON, time=dt))
        start = False
      else:
        track.append(Message('note_on', note=note, velocity=veloON, time=0))

    start = True
    #turn chord(s) off
    for note in chord:
      if start == True:
        track.append(Message('note_off', note=note, velocity=veloOFF, time=dt))
        start = False
      else:
        track.append(Message('note_off', note=note, velocity=veloOFF, time=0))

  #print created track
  if printOut == True:
    for msg in mid:
      print(msg)
  
  #save midi file
  if saveMidiFile == True and completePath == False:
    complete_path = os.path.join(dir_path,file_name)
    #print(complete_path)
    mid.save(complete_path)
    print("\nfile saved @\n{}".format(complete_path))
  elif completePath != False:
    mid.save(completePath)
##########################################################
##########################################################
#NOT WORKING RIGHT NOW
def midiExtend(file_path,dupNum=2,save_path = False):
  mid = MidiFile(file_path)

  final_tracks = MidiTrack()
  for i in range(dupNum):
    for track in mid:
      #print(track)
      final_tracks.append(track)

  #create new file name
  file_name = "midi_extended"
  num = str(dupNum)
  extension = ".mid"
  full_name = file_name + num + extension

  path = "/".join(file_path.split("/")[0:-1])
  #print("Path {}\n".format(path))
  #print(full_name)
  mid_final = MidiFile()
  mid_final.tracks.append(final_tracks)
  
  final_path = os.path.join(path,full_name)
  mid_final.save(final_path)
#########################################################################
#########################################################################
def datasetEmbedder(dataset,inputLength=3):
  """
  dataset: tensorflow dataset with dimensions (total_progs,progression length,notes in chord)
  inputLength: # of chords played.

  Function seperates progressions into "input" & "target"
  """
  assert len(dataset[0]) > inputLength, "inputLength needs to be less than progression length"
  #quadgrams = [(progression[:3],progression[3]) for progression in dataset]
  if inputLength != len(dataset[0]):
    grams = [(progression[:inputLength],progression[inputLength]) for progression in dataset]
  
  return grams

def decoder(code=0):
  """
  code: input encrypted recommendation from model
  """
  string = ix_to_chord[code]
  string = string.replace("["," ")
  string = string.replace("]"," ")
  string = list(string.split(" "))
  final_chord = [int(x) for x in string if x != ""]
  return final_chord

#######################Lookup Table Functions###################################
def createLookup(dataset):
  #create chords as a singular word
  vocab = []
  for prog in dataset:
    for chord in prog:
      tmp_chord = str(chord.numpy()) #convert chord to string
      vocab.append(tmp_chord)
  vocab = set(vocab)
  print("# of Chords in Vocab: {}".format(len(vocab)))

  #create lookup table
  word_to_ix = {word:i for i,word in enumerate(vocab)} #encode
  ix_to_chord = {i:word for i,word in enumerate(vocab)} #decode

  return vocab, word_to_ix, ix_to_chord

def saveLookUp(encode_dict,decode_dict,dirPath):
  """
  encode: word_to_ix
  decode: ix_to_chord
  """
  file_encode = r"lookUp_encode.txt"
  file_decode = r"lookUp_decode.txt"
  #save files
  with open(os.path.join(dirPath,file_encode), "wb") as myFile_encode:
    pickle.dump(encode_dict, myFile_encode)

  with open(os.path.join(dirPath,file_decode), "wb") as myFile_decode:
    pickle.dump(decode_dict, myFile_decode)

  print("Files Saved @:\n{}".format(dirPath))

def loadLookUp(dirPath):
  """
  reverse of saveLookUp()
  """
  file_encode = r"lookUp_encode.txt"
  file_decode = r"lookUp_decode.txt"

  with open(os.path.join(dirPath,file_encode), "rb") as myFile_encode:
    encode_dict = pickle.load(myFile_encode)

  with open(os.path.join(dirPath,file_decode), "rb") as myFile_decode:
    decode_dict = pickle.load(myFile_decode)

  assert len(encode_dict) == len(decode_dict), "encode and decode must be same length"

  vocab = encode_dict #used for the len(vocab) for embed dimensions

  return encode_dict, decode_dict, vocab
########################################################################
########################################################################

# Create Dataset AND Load lookup Table

Loads a pre-existing lookup table that has been used across all "Chord Recommendation Models"

In [None]:
#####################EMBEDDING LOADER####################
CONTEXT_SIZE = 3 #decides how long the input is for next recommendation
EMBEDDING_DIM = 100 #how many dimensions we want inside the linear model to have

###seperates progressions into input and target as a tuple###
quadgrams = datasetEmbedder(dataset,inputLength=CONTEXT_SIZE)

In [None]:
#Lookup Table(s) Location
saveDir = r"/content/drive/Shareddrives/Senior Design - Audio Project/MIDI Datasets/Models/NLP Models/lookup_tables"

###load Lookup Table(s)###
word_to_ix, ix_to_chord,vocab = loadLookUp(saveDir)


print("Encoder\n{}".format(word_to_ix))
print("Decoder\n{}".format(ix_to_chord))
print("Number of Chord Options: {}".format(len(word_to_ix)))
print("\nThe above encoder and decoder should be inverses of eachother.\nIf you choose to create your own lookup table make sure they are the same")

Encoder
{'[53 56 60]': 0, '[46 49 53]': 1, '[45 48 52]': 2, '[47 50 53]': 3, '[54 57 61]': 4, '[58 62 65]': 5, '[61 64 67]': 6, '[70 73 76]': 7, '[68 71 74]': 8, '[65 68 71]': 9, '[57 61 64]': 10, '[59 63 66]': 11, '[61 65 68]': 12, '[50 54 57]': 13, '[45 49 52]': 14, '[53 57 60]': 15, '[47 50 54]': 16, '[55 58 62]': 17, '[61 64 68]': 18, '[51 54 57]': 19, '[60 63 67]': 20, '[59 62 66]': 21, '[48 52 55]': 22, '[64 67 70]': 23, '[66 70 73]': 24, '[54 58 61]': 25, '[49 52 55]': 26, '[67 70 74]': 27, '[43 46 50]': 28, '[57 60 64]': 29, '[54 57 60]': 30, '[65 68 72]': 31, '[48 51 55]': 32, '[62 66 69]': 33, '[63 67 70]': 34, '[66 69 73]': 35, '[60 63 66]': 36, '[50 53 56]': 37, '[41 44 48]': 38, '[64 67 71]': 39, '[64 68 71]': 40, '[68 72 75]': 41, '[62 65 69]': 42, '[58 61 65]': 43, '[44 47 51]': 44, '[52 55 59]': 45, '[48 51 54]': 46, '[42 45 49]': 47, '[67 70 73]': 48, '[66 69 72]': 49, '[53 56 59]': 50, '[49 53 56]': 51, '[49 52 56]': 52, '[68 71 75]': 53, '[55 58 61]': 54, '[51 54 58]

# Tree Creator

1. Create list of possible chords options
2. Create our independent trees (roots)
3. Create class to create tree per start chord  
    This involves the following:
      1. search through each layer and bind possible next chords as list connected to input chord
4. Generate Midi File
---  
7. create random forest model via class with each layer  in terms of dictionary lookups

In [None]:
#make copy of dataset and convert to lists form
data = copy.copy(dataset)
data = data.numpy()
data = np.ndarray.tolist(data)

## Step 1 --> See how many chords are in dataset

This step isn't needed for any of the below classes but it is a sanity check that your lookup table was created from the same dataset

In [None]:
#extract all possible chords in list form
independent_chords = []
for key in ix_to_chord:
  chord = decoder(code=key)
  independent_chords.append(chord)

assert len(independent_chords) == len(ix_to_chord),"independent chords does NOT match your lookup table length"
print("# of Chords in Dataset: {}".format(len(independent_chords)))

# of Chords in Dataset: 77


In [None]:
#prints first chord in each progression
for chord in dataset[:,0,:]:
  print(chord.numpy())

In [None]:
for chord in dataset[0,:,:]:
  print(chord)

tf.Tensor([55 59 62], shape=(3,), dtype=int64)
tf.Tensor([60 64 67], shape=(3,), dtype=int64)
tf.Tensor([62 66 69], shape=(3,), dtype=int64)
tf.Tensor([64 67 71], shape=(3,), dtype=int64)


## Step 2 - Create Indendent Tree Starting Points

In [None]:
tree_bases = []
#loop through first chord in each progression
for chord in dataset[:,0,:]:
  start_chord = chord.numpy()
  if tree_bases != []:
    # print("final",tree_bases)
    # print("current",start_chord)

    ##check if chord already exists##
    does_exist = False
    for existing_chord in tree_bases:
      if (existing_chord == start_chord).all():
        does_exist = True
    if does_exist == False:
      tree_bases.append(list(start_chord))
  #first pass
  else:
    #first pass
    print("first pass")
    tree_bases.append(list(start_chord))


print("number of independent starting chords (roots): {}".format(len(tree_bases)))
print(tree_bases)

first pass
number of independent starting chords (roots): 40
[[55, 59, 62], [64, 67, 71], [59, 62, 66], [57, 60, 64], [62, 66, 69], [60, 63, 67], [56, 60, 63], [65, 68, 72], [63, 67, 70], [58, 61, 65], [59, 63, 66], [68, 71, 75], [63, 66, 70], [61, 64, 68], [66, 70, 73], [52, 56, 59], [56, 59, 63], [54, 57, 61], [50, 54, 57], [52, 55, 59], [57, 61, 64], [53, 57, 60], [60, 64, 67], [62, 65, 69], [55, 58, 62], [49, 53, 56], [51, 54, 58], [53, 56, 60], [48, 52, 55], [50, 53, 57], [58, 62, 65], [67, 70, 74], [65, 69, 72], [66, 69, 73], [64, 68, 71], [51, 55, 58], [54, 58, 61], [61, 65, 68], [48, 51, 55], [49, 52, 56]]


## Step 3 - Create Single Tree Class

In [None]:
class tree:
  def __init__(self,first_chord,data):
    self.tree_base = first_chord

    self.layer1 = self._layer_creator(data,layer_index=1)
    self.layer2 = self._layer_creator(data,layer_index=2)

  def updateTree(self,first_chord):
    self.tree_base = first_chord
    self.layer1 = self._layer_creator(data,layer_index=1)
    self.layer2 = self._layer_creator(data,layer_index=2)

  def _treelocations(self,dataset,tree_base):
    root_idxs = []
    for i, prog in enumerate(dataset):
      if prog[0] == tree_base:
        root_idxs.append(i)
    return root_idxs

  def _layer_creator(self,dataset,layer_index):
    root_idxs = self._treelocations(dataset,tree_base=self.tree_base)

    layer_dict = {}
    #create non_duplicate chords for layer
    for ix in root_idxs:
      #grab chord for that layer
      current_chord = dataset[ix][layer_index]
      next_chord = dataset[ix][layer_index+1]
      if str(current_chord) not in layer_dict.keys():
        layer_dict[str(current_chord)] = []
        layer_dict[str(current_chord)].append(next_chord)
      elif next_chord not in layer_dict[str(current_chord)]:
        layer_dict[str(current_chord)].append(next_chord)
      else:
        continue
    return layer_dict

  def printInfo(self):
    print("tree_base: {}\nlayer_1 dictionary:\n{}\nlayer_2 dictionary:\n{}".format(self.tree_base,self.layer1,self.layer2))

###############################################################################################################################
class forest:
  def __init__(self,first_chord_list,data):
    self.first_chord_list = first_chord_list
    self.dataset = data
    self.current_root = []

    #initialize sub-instance for single tree for class protection
    self.current_tree = tree(self.first_chord_list[0],self.dataset)
  
  def update_current_tree(self,start_chord):
    """
    update current tree from start_chord_location
    """
    if start_chord != self.current_root:
      if start_chord in self.first_chord_list:
        self.current_root = start_chord
        self.current_tree.updateTree(self.current_root)
      else:
        print("chord not found in dataset")
    else:
      print("Tree Already Loaded")
  
  def _random_key_selector(self,dictionary):
    """
    returns a random key from a dictionary
    """
    key_choice = random.choice(list(dictionary.keys()))
    return key_choice

  def random_chord_progression(self):
    def _str_check(final_prog):
      x = final_prog
      for idx, chord in enumerate(x):
        if type(chord) == type("s"):
          input = chord.replace(",","")
          encoded = word_to_ix[input]
          decoded = decoder(code=encoded)
          x[idx] = decoded
      return x
    #chord_1
    chord_1 = self.current_root
    # print("1: ",chord_1)

    #chord_2
    chord_2 = self._random_key_selector(self.current_tree.layer1)
    # print("2: ",list(chord_2))

    #chord 3
    chord_3 = random.choice(self.current_tree.layer1[str(chord_2)])
    # print("3: ",chord_3)

    #chord 4
    chord_4 = random.choice(self.current_tree.layer2[str(chord_3)])
    # print("4: ",chord_4)

    final_prog = []
    final_prog.append(chord_1)
    final_prog.append(chord_2)
    final_prog.append(chord_3)
    final_prog.append(chord_4)

    final_prog = _str_check(final_prog)
    return final_prog



In [None]:
#tree class initialization
t = tree(tree_bases[1],data)
t.printInfo()

tree_base: [64, 67, 71]
layer_1 dictionary:
{'[62, 66, 69]': [[60, 64, 67], [55, 59, 62]], '[60, 64, 67]': [[55, 59, 62]], '[55, 59, 62]': [[62, 66, 69]], '[57, 60, 64]': [[62, 66, 69]]}
layer_2 dictionary:
{'[60, 64, 67]': [[59, 62, 66]], '[55, 59, 62]': [[62, 66, 69], [60, 64, 67], [64, 67, 71]], '[62, 66, 69]': [[60, 64, 67], [55, 59, 62]]}


forest model

In [None]:
chordForest = forest(tree_bases,data)

In [None]:
# for start_chord in tree_bases:
start_chord = tree_bases[21]
chordForest.update_current_tree(start_chord)

In [None]:
random_prog = chordForest.random_chord_progression()
random_prog

[[53, 57, 60], [57, 60, 64], [62, 65, 69], [58, 62, 65]]

Debug Code:

Used to check output and variables inside the above classes

In [None]:
chordForest.current_tree.layer1

{'[53, 57, 60]': [[48, 52, 55]],
 '[54, 57, 60]': [[64, 67, 71], [59, 62, 66], [62, 66, 69]],
 '[55, 59, 62]': [[64, 67, 71], [60, 64, 67], [59, 62, 66]],
 '[57, 60, 64]': [[59, 62, 66], [62, 66, 69], [60, 64, 67]],
 '[59, 62, 66]': [[57, 60, 64], [64, 67, 71]],
 '[60, 64, 67]': [[62, 66, 69], [64, 67, 71], [55, 59, 62], [57, 60, 64]],
 '[62, 66, 69]': [[59, 62, 66], [64, 67, 71], [60, 64, 67], [57, 60, 64]],
 '[64, 67, 71]': [[57, 60, 64], [60, 64, 67], [55, 59, 62], [59, 62, 66]]}

In [None]:
#print each progression with its dataset index
for ix, prog in enumerate(data):
  print("Index: {}\n{}\n".format(ix,prog))

## Step 4 - Generate Midi File  

using listToMidi() converts tree output to listsToMidi

In [None]:
start="midi_"
end = ".mid"

complete_file
for i in range(10):
  complete_file = start + str(i) + end

  random_prog = chordForest.random_chord_progression()
  listToMidi(random_prog,printOut=False,file_name=complete_file)


file saved @
./midi_0.mid

file saved @
./midi_1.mid

file saved @
./midi_2.mid

file saved @
./midi_3.mid

file saved @
./midi_4.mid

file saved @
./midi_5.mid

file saved @
./midi_6.mid

file saved @
./midi_7.mid

file saved @
./midi_8.mid

file saved @
./midi_9.mid


# Test Code

## Min,Max Note value from Dataset

In [None]:
#check what the smallest note value is, and what the largest note value is
data_numpy = dataset.numpy()
print(data_numpy.shape)
d1_data = np.ndarray.flatten(data_numpy)
print(d1_data.shape)

print(d1_data.max())
print(d1_data.min())

(1416, 4, 3)
(16992,)
76
41
