# Notebook 2: Markov Models

#### Dependencies


In [1]:
import markov
from midiutil import MIDIFile
import pandas as pd
from music21 import midi
import numpy as np
from collections import Counter
from chord_dictionary import *

### Text Generation

Below, we'll build a markov chain using `alice_oz.txt`. 

In [2]:
with open("assets/alice_oz.txt", 'r') as f:
    text = f.readline()
    
splitText = text.split()

In [3]:
c = markov.Chain()
c.build(splitText)

Observe how the generated text changes when the number of words to output is adjusted.

In [15]:
num_words = 12
' '.join(c.generate(nwords=num_words))

'Alice was beginning to get very tired of sitting by her sister'

------

### Audio Generation

We can also create a markov chain that learns from the harmonies of Bach chorales and generate audio using the chain. Below is a preview of the dataset.

In [16]:
df = pd.read_csv('bach_choral_set_dataset.csv')
df.head()

Unnamed: 0,choral_ID,event_number,pitch_1,pitch_2,pitch_3,pitch_4,pitch_5,pitch_6,pitch_7,pitch_8,pitch_9,pitch_10,pitch_11,pitch_12,bass,meter,chord_label
0,000106b_,1,YES,NO,NO,NO,NO,YES,NO,NO,NO,YES,NO,NO,F,3,F_M
1,000106b_,2,YES,NO,NO,NO,YES,NO,NO,YES,NO,NO,NO,NO,E,5,C_M
2,000106b_,3,YES,NO,NO,NO,YES,NO,NO,YES,NO,NO,NO,NO,E,2,C_M
3,000106b_,4,YES,NO,NO,NO,NO,YES,NO,NO,NO,YES,NO,NO,F,3,F_M
4,000106b_,5,YES,NO,NO,NO,NO,YES,NO,NO,NO,YES,NO,NO,F,2,F_M


We will first train a markov model using the first choral in the dataset, `000106b_`. 

In [17]:
bigrams={}
grouped = df.groupby(df.choral_ID)
chords = grouped.get_group('000106b_')['chord_label'].values
ngrams = (zip(*[chords[i:] for i in range(2)]))
bigrams['chords'] = [" ".join(ngram) for ngram in ngrams]

bass = grouped.get_group('000106b_')['bass'].values
ngrams = (zip(*[bass[i:] for i in range(2)]))
bigrams['bass'] = [" ".join(ngram) for ngram in ngrams]

meter = grouped.get_group('000106b_')['meter'].values.astype(str)
ngrams = (zip(*[meter[i:] for i in range(2)]))
bigrams['meter'] = [" ".join(ngram) for ngram in ngrams]

In [20]:
def predict_next_state(chord, bigrams):
    """Predict next chord based on current state."""
    # create list of bigrams starting with current chord
    bigrams_with_current_chord = [bigram for bigram in bigrams if bigram.split(' ')[0]==chord]
    
    # count appearance of each bigram
    count_appearance = dict(Counter(bigrams_with_current_chord))
    
    # convert apperance into probabilities
    for ngram in count_appearance.keys():
        count_appearance[ngram] = count_appearance[ngram]/len(bigrams_with_current_chord)
        
    # create list of possible options for the next chord
    options = [key.split(' ')[1] for key in count_appearance.keys()]
    # create  list of probability distribution
    probabilities = list(count_appearance.values())
    # return random prediction
    return np.random.choice(options, p=probabilities)

def generate_sequence(chord, data, length:int=30):
    """Generate sequence of defined length."""
    # create list to store future chords
    chords = []
    for n in range(length):
        # append next chord for the list
        chords.append(predict_next_state(chord, data))
        # use last chord in sequence to predict next chord
        chord = chords[-1]
    return chords

chords = generate_sequence('F_M', bigrams['chords'])
bass = generate_sequence('F', bigrams['bass'])
meter = generate_sequence('3', bigrams['meter'])

print(chords)
print(bass)
print(meter)

['F_M', 'C_M', 'C_M', 'C_M', 'C_M', 'C_M', 'C_M', 'C_M', 'D_m7', 'D_m7', 'D_m7', 'D_m7', 'D_m7', 'D_m7', 'D_m7', 'D_m7', 'D_m7', 'D_m7', 'G_M', 'C_M', 'C_M', 'F_M', 'G_m', 'D_m', 'F_M', 'F_M', 'C_M', 'F_M', 'D_m', 'D_m']
['D', 'F', 'F', 'E', 'E', 'F', 'F', 'G', 'A', 'A', 'A', 'D', 'E', 'E', 'F', 'F', 'Bb', 'C', 'C', 'C', 'D', 'D', 'E', 'E', 'E', 'F', 'F', 'F', 'F', 'E']
['2', '1', '2', '3', '2', '4', '2', '3', '2', '1', '4', '2', '1', '4', '2', '1', '3', '5', '2', '1', '2', '3', '2', '3', '1', '2', '1', '3', '2', '3']


In [21]:
# Takes in a list of the pitch values from 1-12 and returns the converted MIDI note numbers
def get_midi_notes(pitch_values):
    a = []
    for i in pitch_values:
        a.append(midi_note_converter[i])
    return a

# Convert the pitch values to their midi note numbers
#for i in pitch_values:
#    pitch_values[i] = get_midi_notes(pitch_values[i])

# Sanity check to see if the generated chord sequence (and pitch values) were properly converted
def midi_numbers_list(chord_sequence):
    a = []
    for i in chord_sequence:
        a.append(pitch_values[i])
    return a

midi_numbers_final = midi_numbers_list(chords)
midi_numbers_final[:5]

[['pitch_1', 'pitch_6', 'pitch_10'],
 ['pitch_1', 'pitch_5', 'pitch_8'],
 ['pitch_1', 'pitch_5', 'pitch_8'],
 ['pitch_1', 'pitch_5', 'pitch_8'],
 ['pitch_1', 'pitch_5', 'pitch_8']]

In [22]:
# starting with C3
lower_octave_bass_converter = {'C': 48, 
                            'C#': 49,
                            'D': 50,
                            'D#': 51,
                            'Eb': 51,
                            'E': 52,
                            'F': 53,
                            'F#': 54,
                            'G': 55,
                            'G#': 56,
                            'A': 57,
                            'Bb': 58,
                            'B': 59                     
}

In [23]:
track = 0
channel = 0
time = 0 
duration = 1
tempo = 60
volume = 100

MIDI = MIDIFile(1)
MIDI.addTempo(track, time, tempo)

t = 0
for i in range(len(midi_numbers_final)):
    chord_sequence = midi_numbers_final[i]
    duration = int(meter[i])
    
    for c in chord_sequence:
        note = midi_note_converter[c]
        MIDI.addNote(track, channel, note, t, duration, volume)
        
    bass_note = bass[i]
    MIDI.addNote(track, channel, lower_octave_bass_converter[bass_note], t, duration, volume)
    t += duration
    
with open("generated_000106b_.mid", 'wb') as output_file:
    MIDI.writeFile(output_file)
    
mf = midi.MidiFile()
mf.open('generated_000106b_.mid')
mf.read()
mf.close()
s = midi.translate.midiFileToStream(mf)
s.show('midi')

StreamException: failed to find TimeSignature in meterStream; cannot process Measures

------

Now, we'll train on the entire dataset of chorales, rather than just one.

In [25]:
chord = markov.Chain(NPREF=4)
meter = markov.Chain(NPREF=4)
bass = markov.Chain(NPREF=4)

# training on all unique chorales
for chorale in df['choral_ID'].unique():
    d = df[df['choral_ID'] == chorale]
    chord.build(d['chord_label'])

    bass.build(d['bass'])

    chorale_meter_str = np.array(d['meter'], dtype='str')
    meter.build(chorale_meter_str)
    
generated_chords = chord.generate(16)
print(len(generated_chords))
generated_bass = bass.generate(16)
print(len(generated_bass))
generated_meter = meter.generate(16)
print(len(generated_meter))

8
8
8


In [26]:
# Takes in a list of the pitch values from 1-12 and returns the converted MIDI note numbers
def get_midi_notes(pitch_values):
    a = []
    for i in pitch_values:
        a.append(midi_note_converter[i])
    return a

# Convert the pitch values to their midi note numbers
#for i in pitch_values:
#    pitch_values[i] = get_midi_notes(pitch_values[i])

# Sanity check to see if the generated chord sequence (and pitch values) were properly converted
def midi_numbers_list(chord_sequence):
    a = []
    for i in chord_sequence:
        a.append(pitch_values[i])
    return a

midi_numbers_final = midi_numbers_list(generated_chords)
midi_numbers_final[:5]
print(len(midi_numbers_final))

KeyError: 'N'

In [27]:
track = 0
channel = 0
time = 0 
duration = 1
tempo = 60
volume = 100

MIDI = MIDIFile(2)
MIDI.addTempo(track, time, tempo)

t = 0
for i in range(len(midi_numbers_final)):
    chord_sequence = midi_numbers_final[i]
    duration = int(generated_meter[i])
    
    for c in chord_sequence:
        note = midi_note_converter[c]
        MIDI.addNote(track, channel, note, t, duration, volume)
        
    bass_note = generated_bass[i]
    MIDI.addNote(track, channel, lower_octave_bass_converter[bass_note], t, duration, volume)

    t += duration
    
with open("generated_all_midi.mid", 'wb') as output_file:
    MIDI.writeFile(output_file)
    
mf = midi.MidiFile()
mf.open('generated_all_midi.mid')
mf.read()
mf.close()
s = midi.translate.midiFileToStream(mf)
s.show('midi')

ValueError: invalid literal for int() with base 10: 'N'

-----
