# Music Generation - Siddharth Shah, Ian Pompliano

Artificial Neural Networks and Deep Learning Final Project (Fall 2023)

**Part 1: Scrape MIDI files.**

In [3]:
import requests
import os
from bs4 import BeautifulSoup
from music21 import *

In [4]:
# list of composers
composers = ["albeniz", "bach", "balak", "beeth", "borodin", "brahms", "burgm",
             "chopin", "clementi", "debuss", "godowsky", "grana", "grieg", "haydn",
             "liszt", "mendelssohn", "moszkowski", "mozart", "muss", "rach", "ravel",
             "schub", "schum", "sinding", "tschai"]

baseURL = 'http://www.piano-midi.de/'

# create a directory to save downloaded MIDI files
if not os.path.exists('midiFiles'):
    os.makedirs('midiFiles')

for composer in composers:
    # create URL using respective composer
    url = baseURL + composer + '.htm'
    response = requests.get(url)

    soup = BeautifulSoup(response.content, 'html.parser')

    # Find MIDI links on the composer's page
    midiLinks = soup.find_all('a', href=True)

    for link in midiLinks:
        # removes duplicate MIDI files (special format0)
        if link['href'].endswith('.mid') and '_format0' not in link['href']:
            midiURL = baseURL + link['href']
            fileName = f"midiFiles/{composer}_{link['href'].split('/')[-1]}"

            # download MIDI file and save it to directory
            with open(fileName, 'wb') as midiFile:
                midiResponse = requests.get(midiURL)
                midiFile.write(midiResponse.content)

**Part 2: Organize notes in each MIDI file. Create list of sublists containing notes for each song**

In [5]:
# helper function to get the notes in a given MIDI file
def getNotes(file):
    # initalize list of notes to return
    notes = []
    pick = file.recurse()

    for element in pick:
        # if element is note, add to list of notes
        if isinstance(element, note.Note):
            notes.append(str(element.pitch))
        # if element is chord, add highest pitch (generally belongs to melody) to list of notes.
        elif isinstance(element, chord.Chord):
            highestPitch = max(element.pitches)
            notes.append(str(highestPitch))
    return notes

In [6]:
# initialize list that will hold sub-lists of notes for each song
allNotes = []

# retrieve paths of MIDI files
midiFiles = [os.path.join('midiFiles', file) for file in os.listdir('midiFiles') if file.endswith('.mid')]

# append allNotes with note data for each song
for path in midiFiles:
    midi = converter.parse(path)
    notes = getNotes(midi)
    allNotes.append(notes)









**Part 2.5: Visualize songs from corpus on sheet music**

In [22]:
# import necessary libraries
import IPython
from IPython.display import Image

In [23]:
# helper functions
def show(music):
    display(Image(str(music.write("lily.png"))))
    
def chords_n_notes(Snippet):
    Melody = []
    offset = 0 #Incremental
    for i in Snippet:
        #If it is chord
        if ("." in i or i.isdigit()):
            chord_notes = i.split(".") #Seperating the notes in chord
            notes = [] 
            for j in chord_notes:
                inst_note=int(j)
                note_snip = note.Note(inst_note)            
                notes.append(note_snip)
                chord_snip = chord.Chord(notes)
                chord_snip.offset = offset
                Melody.append(chord_snip)
        # pattern is a note
        else: 
            note_snip = note.Note(int(i))
            note_snip.offset = offset
            Melody.append(note_snip)
        # increase offset each iteration so that notes do not stack
        offset += 1
    Melody_midi = stream.Stream(Melody)   
    return Melody_midi

In [24]:
# sample first 50 notes from first song in corpus
sample_notes = allNotes[0][:50]

# execute helper functions for sheet music display
melody_snippet = chords_n_notes(sample_notes)
show(melody_snippet)

AttributeError: 'str' object has no attribute 'Note'

**Part 3: Map notes for each song to strings with normalized characters**


In [7]:
# function maps individual notes to specific chars for sequencing.
# note that, for example, both C#2 and D-2 map to '#'. This is because
# these two notes are the same pitch, but may be represented differently
# in musical notation. D- indicates D flat.
def mapNote(note):
    noteMapping = {
        'C2': '@', 'C#2': '#', 'D-2': '#', 'D2': '$', 'D#2': '%', 'E-2': '%', 'E2': '^', 'F2': '&', 'F#2': '*',
        'G-2': '*', 'G2': '(', 'G#2': ')', 'A-2': ')', 'A2': '_', 'A#2': '+', 'B-2': '+', 'B2': '-',
        
        'C3': 'q', 'C#3': 'w', 'D-3': 'w', 'D3': 'e', 'D#3': 'r', 'E-3': 'r', 'E3': 't', 'F3': 'y', 'F#3': 'u',
        'G-3': 'u', 'G3': 'i', 'G#3': 'o', 'A-3': 'o', 'A3': 'p', 'A#3': '[', 'B-3': '[', 'B3': ']',
        
        'C4': 'a', 'C#4': 's', 'D-4': 's', 'D4': 'd', 'D#4': 'f', 'E-4': 'f', 'E4': 'g', 'F4': 'h', 'F#4': 'j',
        'G-4': 'j', 'G4': 'k', 'G#4': 'l', 'A-4': 'l', 'A4': ';', 'A#4': '?', 'B-4': '?', 'B4': 'z',
        
        'C5': 'x', 'C#5': 'c', 'D-5': 'c', 'D5': 'v', 'D#5': 'b', 'E-5': 'b', 'E5': 'n', 'F5': 'm', 'F#5': ',',
        'G-5': ',', 'G5': '.', 'G#5': '/', 'A-5': '/', 'A5': 'Q', 'A#5': 'W', 'B-5': 'W', 'B5': 'E',
        
        'C6': 'R', 'C#6': 'T', 'D-6': 'T', 'D6': 'Y', 'D#6': 'U', 'E-6': 'U', 'E6': 'I', 'F6': 'O', 'F#6': 'P',
        'G-6': 'P', 'G6': '{', 'G#6': '}', 'A-6': '}', 'A6': '|', 'A#6': 'A', 'B-6': 'A', 'B6': 'S',
        
        'C7': 'D', 'C#7': 'F', 'D-7': 'F', 'D7': 'G', 'D#7': 'H', 'E-7': 'H', 'E7': 'J', 'F7': 'K', 'F#7': 'L',
        'G-7': 'L', 'G7': ':', 'G#7': '"', 'A-7': '"', 'A7': 'Z', 'A#7': 'X', 'B-7': 'X', 'B7': 'C',
        
        'C8': 'V'
    }

    return noteMapping.get(note, note)

In [9]:
# initialize list of strings where each char in string is a mapped note. Each string represents one song
allNotesMapped = []

# iterate through all songs in allNotes list
for i in range(len(allNotes)):
    buildString = ""
    
    # iterate through raw note data for each song, map note to respective char, add char to song string
    for j in range(len(allNotes[i])):
        buildString += mapNote(allNotes[i][j])
    
    # append list of strings with built string
    allNotesMapped.append(buildString)

**Optional: Pickle library to store and reload allNotesMapped**

In [13]:
# pickle and Download allNotesMapped for future reference
import pickle as pkl

with open('allNotesMapped.pkl', 'wb') as file:
    pkl.dump(allNotesMapped, file)

In [None]:
# pickle to reload allNotesMapped
import pickle as pkl

# unpickle the data structure from the file
with open('allNotesMapped.pkl', 'rb') as file:
    allNotesMapped = pkl.load(file)

**Part 4: Build the Network**

In [31]:
import numpy as np
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import tensorflow.keras.backend as K
from tensorflow.keras.optimizers import Adamax

In [27]:
# allNotesMapped -- array of music strings containing notes mapped to arbitrary characters
# oraganize data structure into array of arrays
all_notes = [list(song) for song in allNotesMapped]

# split corpus into labels and targets
length = 40 # arbitrarily choose input sequences of length 40
features = []
targets = []

for song in all_notes: 
    for i in range(0, len(song) - length, 1):
        features.append(song[i:i + length])
        targets.append(song[i + length])

In [30]:
# display summary statistics
num_datapoints = len(targets)
print(f"num sequences of sample music: {len(features)}")
print(targets[:60])

num sequences of sample music: 527548
['m', 'O', 'E', 'T', '|', 'Y', 'G', 'p', '[', 'g', 'p', '[', 'h', 'a', 's', 'k', 'd', 'f', 'x', 'd', 'f', '?', 'g', 'h', 'v', 'k', 'l', 'm', 'l', ';', 'n', ';', '?', 'n', ';', '?', 'm', 'x', 'c', '.', 'v', 'b', 'R', 'v', 'b', 'W', 'n', 'm', 'O', 'E', 'T', '|', 'Y', 'G', 'Q', 'W', 'O', 'Q', 'W', 'Q']


In [44]:
noteMapping = {
        'C2': '@', 'C#2': '#', 'D-2': '#', 'D2': '$', 'D#2': '%', 'E-2': '%', 'E2': '^', 'F2': '&', 'F#2': '*',
        'G-2': '*', 'G2': '(', 'G#2': ')', 'A-2': ')', 'A2': '_', 'A#2': '+', 'B-2': '+', 'B2': '-',
        
        'C3': 'q', 'C#3': 'w', 'D-3': 'w', 'D3': 'e', 'D#3': 'r', 'E-3': 'r', 'E3': 't', 'F3': 'y', 'F#3': 'u',
        'G-3': 'u', 'G3': 'i', 'G#3': 'o', 'A-3': 'o', 'A3': 'p', 'A#3': '[', 'B-3': '[', 'B3': ']',
        
        'C4': 'a', 'C#4': 's', 'D-4': 's', 'D4': 'd', 'D#4': 'f', 'E-4': 'f', 'E4': 'g', 'F4': 'h', 'F#4': 'j',
        'G-4': 'j', 'G4': 'k', 'G#4': 'l', 'A-4': 'l', 'A4': ';', 'A#4': '?', 'B-4': '?', 'B4': 'z',
        
        'C5': 'x', 'C#5': 'c', 'D-5': 'c', 'D5': 'v', 'D#5': 'b', 'E-5': 'b', 'E5': 'n', 'F5': 'm', 'F#5': ',',
        'G-5': ',', 'G5': '.', 'G#5': '/', 'A-5': '/', 'A5': 'Q', 'A#5': 'W', 'B-5': 'W', 'B5': 'E',
        
        'C6': 'R', 'C#6': 'T', 'D-6': 'T', 'D6': 'Y', 'D#6': 'U', 'E-6': 'U', 'E6': 'I', 'F6': 'O', 'F#6': 'P',
        'G-6': 'P', 'G6': '{', 'G#6': '}', 'A-6': '}', 'A6': '|', 'A#6': 'A', 'B-6': 'A', 'B6': 'S',
        
        'C7': 'D', 'C#7': 'F', 'D-7': 'F', 'D7': 'G', 'D#7': 'H', 'E-7': 'H', 'E7': 'J', 'F7': 'K', 'F#7': 'L',
        'G-7': 'L', 'G7': ':', 'G#7': '"', 'A-7': '"', 'A7': 'Z', 'A#7': 'X', 'B-7': 'X', 'B7': 'C',
        
        'C8': 'V'
}

In [45]:
for key, value in noteMapping.items():
    print(value)
    print(int(value))
    print()

@


ValueError: invalid literal for int() with base 10: '@'

In [42]:
# reshape X and LATER normalize
X = (np.reshape(features, (num_datapoints, length, 1))) # / float(L_symb)

# one hot encode the output variable
#y = np.array(targets)
y = tf.keras.utils.to_categorical([int(sym) for sym in targets], num_classes=len(set(targets)))

ValueError: invalid literal for int() with base 10: 'm'

In [39]:
# Split labels and targets into training and seed data
X_train, X_seed, y_train, y_seed = train_test_split(X, y, test_size=0.2, random_state=42)

In [40]:
# Initialize the Model
model = Sequential()

# Additional layers
model.add(LSTM(512, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.4))
model.add(LSTM(256))
model.add(Dense(256))
model.add(Dropout(0.4))
model.add(Dense(y.shape[1], activation='softmax'))

# Compile the model for training  
opt = Adamax(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=opt)

IndexError: tuple index out of range

In [None]:
# Display Model Summary               
model.summary()

In [None]:
# Train the Model
history = model.fit(X_train, y_train, batch_size=256, epochs=200)

In [None]:
# Plot learning via loss function
history_df = pd.DataFrame(history.history)
fig = plt.figure(figsize=(15,4), facecolor="#97BACB")
fig.suptitle("Learning Plot of Model for Loss")
pl=sns.lineplot(data=history_df["loss"],color="#444160")
pl.set(ylabel ="Training Loss")
pl.set(xlabel ="Epochs")

**Part 5: Generate sample music from input seed**

**Part 6: Reverse map output to musical notation and form MIDI file**

In [22]:
# function that will convert char in produced sequence string back to note notation.
# note that here, we omit flats, because we can represent all flat pitches as sharps as well
# i.e. A flat is the same as G#, Db is the same as C#
def reverseMap(char):
    notes = {
        '@': 'C2', '#': 'C#2', '$': 'D2', '%': 'D#2', '^': 'E2', '&': 'F2', '*': 'F#2',
        '(': 'G2', ')': 'G#2', '_': 'A2', '+': 'A#2', '-': 'B2',
        
        'q': 'C3', 'w': 'C#3', 'e': 'D3', 'r': 'D#3', 't': 'E3', 'y': 'F3', 'u': 'F#3',
        'i': 'G3', 'o': 'G#3', 'p': 'A3', '[': 'A#3', ']': 'B3',
        
        'a': 'C4', 's': 'C#4', 'd': 'D4', 'f': 'D#4', 'g': 'E4', 'h': 'F4', 'j': 'F#4',
        'k': 'G4', 'l': 'G#4', ';': 'A4', '?': 'A#4', 'z': 'B4',
        
        'x': 'C5', 'c': 'C#5', 'v': 'D5', 'b': 'D#5', 'n': 'E5', 'm': 'F5', ',': 'F#5',
        '.': 'G5', '/': 'G#5', 'Q': 'A5', 'W': 'A#5', 'E': 'B5',
        
        'R': 'C6', 'T': 'C#6', 'Y': 'D6', 'U': 'D#6', 'I': 'E6', 'O': 'F6', 'P': 'F#6',
        '{': 'G6', '}': 'G#6', '|': 'A6', 'A': 'A#6', 'S': 'B6',
        
        'D': 'C7', 'F': 'C#7', 'G': 'D7', 'H': 'D#7', 'J': 'E7', 'K': 'F7', 'L': 'F#7',
        ':': 'G7', '"': 'G#7', 'Z': 'A7', 'X': 'A#7', 'C': 'B7',
        
        'V': 'C8'
    }
    return notes.get(char, char)

In [31]:
# convert output string chars back to music notation
notes = []
for char in #outputString:
    notes.append(reverseMap(char))

# create MIDI file
noteStream = stream.Stream()

for n in notes:
    noteObj = note.Note(n)
    noteStream.append(noteObj)

noteStream.append(tempo.MetronomeMark(number=120))
midiFile = noteStream.write('midi', fp='test.mid')

SyntaxError: invalid syntax (964265150.py, line 3)