In [2]:
import collections
import numpy as np

with open("/Users/joelchang/Desktop/Downloads/alice_oz.txt", 'r') as f:
    text = f.readline()
splitText = text.split()

NPREF = 2
NONWORD = "\n"
MAXGEN = 200

class Prefix:
    
    def __init__(self, n, string):
        self.__multiplier = 31
        self.p = collections.deque()
        
        for i in range(n):
            self.p.append(string)
        
    def __hash__(self):
        h = 0
        for i in range(len(self.p)):
            h = self.__multiplier * h + hash(self.p[i])
        return h
    
    def __eq__(self, other):
        for i in range(len(self.p)):
            if (self.p[i] != other.p[i]):
                return False
        return True
    
    def clone(self):
        copyP = self.p.copy()
        copy = Prefix(NPREF, NONWORD)
        copy.p = copyP
        return copy
    
class Chain:
    def __init__(self, NPREF=2, NONWORD = "\n", MAXGEN = 200):
        # map<Prefix, vector<string>>
        self.statetab = {}
        self.NPREF = NPREF
        self.NONWORD = NONWORD
        self.MAXGEN = MAXGEN
        
        self.prefix = Prefix(self.NPREF, self.NONWORD)

    def add(self, string):
        suf =  self.statetab.get(self.prefix)
        if not suf:
            suf = []
            self.statetab[self.prefix.clone()] = suf
            
        suf.append(string)
        self.prefix.p.popleft()
        self.prefix.p.append(string)
        

    def build(self, inStream):
        for i in inStream:
            self.add(i)
        self.add(self.NONWORD)

    def generate(self, nwords):
        chain = []
        new = Prefix(self.NPREF, self.NONWORD)
        for i in range(0, nwords):
            s = self.statetab.get(new)
            
            if not s:
                return "No state"

            r = np.random.randint(0, 9999999999) % len(s)
            word = s[r]

            if word == self.NONWORD:
                break;
            chain.append(word)
            new.p.popleft()
            new.p.append(word)
        return chain

In [3]:
c = Chain()
c.build(splitText)

In [4]:
' '.join(c.generate(30))

''

-----

In [5]:
import pandas as pd
df = pd.read_csv('/Users/joelchang/Desktop/Downloads/bach_choral_set_dataset.csv')

In [6]:
df['choral_ID'].unique()

array(['000106b_', '000206b_', '000306b_', '000408b_', '000507b_',
       '000606b_', '000707b_', '000907b_', '001007b_', '001106b_',
       '001207b_', '001306b_', '001606b_', '001707b_', '001805b_',
       '001907ch', '002406bs', '002506b_', '002806b_', '002908ch',
       '003006b_', '003109b_', '003206b_', '003306b_', '003608b2',
       '003806b_', '003907b_', '003907bv', '004006b_', '004008b_',
       '005708b_', '012006b_', '012106b_', '012206b_', '012306b_',
       '012406b_', '012506b_', '012606b_', '012606bv', '012705b_',
       '012805b_', '013506b_', '013705ch', '013906b_', '014007b_',
       '014403b_', '014406b_', '014500ba', '014505b_', '014505bv',
       '014608b_', '014806b_', '014806bv', '015105b_', '015301b_',
       '015305b_', '015309b_', '015403b_', '015408b_', '015505b_'],
      dtype=object)

These three chains are trained on each chorale (entire dataset).

In [7]:
chord = Chain()
meter = Chain()
bass = Chain()
for chorale in df['choral_ID'].unique():
    d = df[df['choral_ID'] == chorale]
    chord.build(d['chord_label'])
    
    bass.build(d['bass'])
    
    chorale_meter_str = np.array(d['meter'], dtype='str')
    meter.build(chorale_meter_str)
    
print(' '.join(chord.generate(10)))
print(' '.join(bass.generate(10)))
print(' '.join(meter.generate(10)))

F_M BbM BbM F_M F_M7 BbM BbM F_M F_M G_m
F D E E B F# F# D E E
3 2 5 3 4 3 5


-------

Install music21, listen to the first MIDI file `jsb/000106b.mid` (the miniplayer doesn't show up in jupyterlab). 

In [8]:
#!pip install music21
from music21 import midi

path = "/Users/joelchang/Desktop/Downloads/notebooks_nb2_jsb_000106b_.midi"
mf = midi.MidiFile()
mf.open(path) # path='abc.midi'
mf.read()
mf.close()
s = midi.translate.midiFileToStream(mf)
s.show('midi')

Creating a markov chain for the chord, bass, and meter.

In [9]:
chord = Chain()
meter = Chain()
bass = Chain()

# train on one chorale first
d = df[df['choral_ID'] == '000106b_']

# build chord markov chain
chord.build(d['chord_label'])

# build bass markov model
bass.build(d['bass'])

# convert meter to strings, build meter markov model
chorale_meter_str = np.array(d['meter'], dtype='str')
meter.build(chorale_meter_str)

In [10]:
chords_generated = chord.generate(20)
print(chords_generated)
bass_generated = bass.generate(20)
print(bass_generated)
meter_generated = meter.generate(20)
print(meter_generated)

['F_M', 'C_M', 'C_M', 'F_M', 'F_M', 'BbM', 'BbM', 'F_M', 'F_M', 'C_M', 'C_M', 'F_M', 'C_M4', 'C_M4', 'C_M4', 'C_M4', 'C_M4', 'C_M', 'F_M', 'F_M']
['F', 'E', 'E', 'F', 'F', 'F', 'Bb', 'Bb', 'A', 'C', 'C', 'C', 'Bb', 'Bb', 'Bb', 'C', 'C', 'C', 'C', 'F']
['3', '5', '2', '3', '2', '5', '2', '3', '2', '5', '2', '1', '3', '4', '3', '5', '2', '1', '3', '2']


In [11]:
# middle C
bass_converter = {'C': 60, 
                 'D': 62,
                 'E': 64,
                 'F': 65,
                 'G': 67,
                 'A': 69,
                 'B': 71
                 }
# starting with C2
lower_octave_bass_converter = {'C': 36, 
                             'D': 38,
                             'E': 40,
                             'F': 41,
                             'G': 43,
                             'A': 45,
                             'B': 47
}

In [12]:
bass_midi_notes = []
for i in bass_generated:
    bass_midi_notes.append(lower_octave_bass_converter[i])
bass_midi_notes

KeyError: 'Bb'

------

**the code below can be ignored, this is mainly testing if the underlying data structure is the same after adding more chords from a new chorale to a pre-existing chain.**

In [13]:
first = df[df['choral_ID'] == '000106b_']
first.head(10)

Unnamed: 0,choral_ID,event_number,pitch_1,pitch_2,pitch_3,pitch_4,pitch_5,pitch_6,pitch_7,pitch_8,pitch_9,pitch_10,pitch_11,pitch_12,bass,meter,chord_label
0,000106b_,1,YES,NO,NO,NO,NO,YES,NO,NO,NO,YES,NO,NO,F,3,F_M
1,000106b_,2,YES,NO,NO,NO,YES,NO,NO,YES,NO,NO,NO,NO,E,5,C_M
2,000106b_,3,YES,NO,NO,NO,YES,NO,NO,YES,NO,NO,NO,NO,E,2,C_M
3,000106b_,4,YES,NO,NO,NO,NO,YES,NO,NO,NO,YES,NO,NO,F,3,F_M
4,000106b_,5,YES,NO,NO,NO,NO,YES,NO,NO,NO,YES,NO,NO,F,2,F_M
5,000106b_,6,NO,NO,YES,NO,NO,YES,NO,NO,NO,YES,NO,NO,D,4,D_m
6,000106b_,7,NO,NO,YES,NO,NO,YES,NO,NO,NO,YES,NO,NO,D,2,D_m
7,000106b_,8,YES,NO,NO,NO,NO,YES,NO,NO,NO,YES,NO,NO,A,3,F_M
8,000106b_,9,YES,NO,NO,NO,NO,YES,NO,NO,NO,YES,NO,NO,A,2,F_M
9,000106b_,10,NO,NO,YES,NO,NO,YES,NO,NO,NO,NO,YES,NO,Bb,5,BbM


In [64]:
df[df['chord_label'] == "F_m7"]

Unnamed: 0,choral_ID,event_number,pitch_1,pitch_2,pitch_3,pitch_4,pitch_5,pitch_6,pitch_7,pitch_8,pitch_9,pitch_10,pitch_11,pitch_12,bass,meter,chord_label
1414,001805b_,84,NO,NO,NO,YES,NO,YES,NO,NO,YES,NO,NO,NO,F,3,F_m7
1415,001805b_,85,NO,NO,YES,NO,NO,YES,NO,NO,YES,NO,NO,NO,F,2,F_m7
2904,004008b_,13,YES,NO,NO,YES,NO,YES,NO,NO,YES,NO,NO,NO,Eb,3,F_m7
4371,014007b_,42,YES,NO,NO,YES,NO,YES,NO,NO,YES,NO,NO,NO,F,5,F_m7
4372,014007b_,43,NO,NO,NO,YES,NO,YES,NO,YES,NO,NO,YES,NO,F,2,F_m7
5052,014806bv,6,YES,NO,NO,NO,NO,YES,NO,NO,YES,NO,NO,NO,F,4,F_m7
5053,014806bv,7,YES,NO,NO,NO,YES,YES,NO,NO,YES,NO,NO,NO,F,2,F_m7


In [28]:
chord_labels = df.chord_label.unique()

In [62]:
def get_pitch_numbers(chord_label):
    return df.columns[(df[df['chord_label'] == chord_label].values=="YES")[0]].tolist()

chord_label_to_pitch_numbers = {}

for i in range(len(chord_labels)):
    chord_label_to_pitch_numbers[chord_labels[i]] = get_pitch_numbers(chord_labels[i])

In [63]:
chord_label_to_pitch_numbers

{'F_M': ['pitch_1', 'pitch_6', 'pitch_10'],
 'C_M': ['pitch_1', 'pitch_5', 'pitch_8'],
 'D_m': ['pitch_3', 'pitch_6', 'pitch_10'],
 'BbM': ['pitch_3', 'pitch_6', 'pitch_11'],
 'C_M7': ['pitch_5', 'pitch_8', 'pitch_10', 'pitch_11'],
 'D_m7': ['pitch_1', 'pitch_3', 'pitch_6', 'pitch_10'],
 'G_M': ['pitch_3', 'pitch_8', 'pitch_12'],
 'A_m': ['pitch_1', 'pitch_5', 'pitch_10'],
 'C_M4': ['pitch_1', 'pitch_6', 'pitch_8'],
 'G_m': ['pitch_3', 'pitch_8', 'pitch_11'],
 'G_M7': ['pitch_3', 'pitch_6', 'pitch_8', 'pitch_12'],
 'D_M': ['pitch_3', 'pitch_7', 'pitch_10'],
 'F#d': ['pitch_1', 'pitch_7', 'pitch_10'],
 'AbM': ['pitch_1', 'pitch_4', 'pitch_9'],
 'C#d7': ['pitch_2', 'pitch_5', 'pitch_8', 'pitch_11'],
 'D_M7': ['pitch_1', 'pitch_3', 'pitch_7', 'pitch_10'],
 'A_M': ['pitch_2', 'pitch_5', 'pitch_10'],
 'EbM': ['pitch_4', 'pitch_8', 'pitch_11'],
 'F_M7': ['pitch_1', 'pitch_4', 'pitch_8', 'pitch_10'],
 'C_m': ['pitch_1', 'pitch_4', 'pitch_8'],
 'A_M4': ['pitch_3', 'pitch_5', 'pitch_10'],
 'F#m

In [65]:
df.bass.unique()

array(['F', 'E', 'D', 'A', 'Bb', 'C', 'G', 'B', 'F#', 'C#', 'Eb', 'D#',
       'G#', 'A#', 'Ab', 'Db'], dtype=object)

In [14]:
bach_chain = Chain()
bach_chain.build(first['chord_label'])

In [15]:
f1 = bach_chain.statetab

In [16]:
second = df[df['choral_ID'] == '000306b_']
bach_chain.build(second['chord_label'])

In [17]:
f2 = bach_chain.statetab

In [18]:
f1 == f2

True

In [34]:
df

Unnamed: 0,choral_ID,event_number,pitch_1,pitch_2,pitch_3,pitch_4,pitch_5,pitch_6,pitch_7,pitch_8,pitch_9,pitch_10,pitch_11,pitch_12,bass,meter,chord_label
0,000106b_,1,YES,NO,NO,NO,NO,YES,NO,NO,NO,YES,NO,NO,F,3,F_M
1,000106b_,2,YES,NO,NO,NO,YES,NO,NO,YES,NO,NO,NO,NO,E,5,C_M
2,000106b_,3,YES,NO,NO,NO,YES,NO,NO,YES,NO,NO,NO,NO,E,2,C_M
3,000106b_,4,YES,NO,NO,NO,NO,YES,NO,NO,NO,YES,NO,NO,F,3,F_M
4,000106b_,5,YES,NO,NO,NO,NO,YES,NO,NO,NO,YES,NO,NO,F,2,F_M
5,000106b_,6,NO,NO,YES,NO,NO,YES,NO,NO,NO,YES,NO,NO,D,4,D_m
6,000106b_,7,NO,NO,YES,NO,NO,YES,NO,NO,NO,YES,NO,NO,D,2,D_m
7,000106b_,8,YES,NO,NO,NO,NO,YES,NO,NO,NO,YES,NO,NO,A,3,F_M
8,000106b_,9,YES,NO,NO,NO,NO,YES,NO,NO,NO,YES,NO,NO,A,2,F_M
9,000106b_,10,NO,NO,YES,NO,NO,YES,NO,NO,NO,NO,YES,NO,Bb,5,BbM
