In [22]:
from collections import Counter, defaultdict
from itertools import izip_longest
from sklearn.cluster import KMeans
from sklearn.preprocessing import normalize
from itertools import groupby
import pandas as pd
import copy
import numpy as np
import sys

In [23]:
from music21 import *

In [24]:
oscar = converter.parse('../midi/Oscar-Peterson-2.mid')

# Get the first part
singlepart = oscar[0]
timesig = singlepart.getElementsByClass(meter.TimeSignature)[0]
mmark = singlepart.getElementsByClass(tempo.MetronomeMark)[0]

allnotes = []
allchords = []

for ix, voice in enumerate(singlepart.getElementsByClass(stream.Voice)):
    notes = voice.getElementsByClass(note.Note).notes
    chords = voice.getElementsByClass(chord.Chord)
    for i in notes:
        allnotes.append(i)
    for i in chords:
        allchords.append(i)

In [41]:
oscar2notes = open('../midi/oscar2notes.txt', 'w')
oscar2notes.write(str(mmark.number) + '\n')
print mmark.number

176.0


In [42]:
oscar2notes.write("%s / %s" % (timesig.numerator, timesig.denominator) + '\n')

In [43]:
oscar2notes.write("Note/Rest,Octave,Len,Offset" + "\n")

In [44]:
for i in allnotes:
    oscar2notes.write("%s, %s, %s, %s" % (i.name, i.octave, i.quarterLength, float(i.offset)) + "\n")
oscar2notes.close()

# 2: N-grams

## generation

In [64]:
def ngram_generation(iterable, grams):
    '''
    generate the n-gram vocabulary.
    '''
    for index, note in enumerate(iterable):
        if index == len(iterable) - grams + 1:return
        yield tuple(iterable[index:index+grams])

In [65]:
oscar = pd.read_csv('../midi/oscar2notes.txt', skiprows=2)[:].sort_values(by="Offset")
oscar.index = xrange(1, len(oscar) + 1)
oscar = oscar[oscar.Octave >= 4]


In [66]:
possiblenotes = ["%s%s" % (row[1]["Note/Rest"], row[1]["Octave"]) for row in oscar.iterrows()]
possiblenotes.insert(0, "start")
possiblenotes.insert(0, "start")
possiblenotes.insert(0, "start")

In [69]:
t = ngram_generation(possiblenotes, 2)
for item in t:
    print item

('start', 'start')
('start', 'start')
('start', 'D5')
('D5', 'E4')
('E4', 'C#5')
('C#5', 'A5')
('A5', 'A4')
('A4', 'F4')
('F4', 'F5')
('F5', 'D5')
('D5', 'D5')
('D5', 'F5')
('F5', 'B-4')
('B-4', 'G4')
('G4', 'E-4')
('E-4', 'G4')
('G4', 'A4')
('A4', 'B-5')
('B-5', 'F4')
('F4', 'B-5')
('B-5', 'G#5')
('G#5', 'D6')
('D6', 'B5')
('B5', 'D5')
('D5', 'B-5')
('B-5', 'G#5')
('G#5', 'D5')
('D5', 'C6')
('C6', 'B-5')
('B-5', 'C5')
('C5', 'C4')
('C4', 'E4')
('E4', 'C5')
('C5', 'F5')
('F5', 'F5')
('F5', 'G5')
('G5', 'G5')
('G5', 'A5')
('A5', 'A5')
('A5', 'B-5')
('B-5', 'C#6')
('C#6', 'D6')
('D6', 'D6')
('D6', 'C6')
('C6', 'D6')
('D6', 'C6')
('C6', 'F5')
('F5', 'B-5')
('B-5', 'C6')
('C6', 'G#5')
('G#5', 'A5')
('A5', 'F5')
('F5', 'C5')
('C5', 'B-4')
('B-4', 'G#4')
('G#4', 'A4')
('A4', 'D4')
('D4', 'F4')
('F4', 'D5')
('D5', 'F5')
('F5', 'A5')
('A5', 'D5')
('D5', 'F5')
('F5', 'A5')
('A5', 'A5')
('A5', 'F5')
('F5', 'D5')
('D5', 'D5')
('D5', 'F5')
('F5', 'A5')
('A5', 'G5')
('G5', 'A5')
('A5', 'G5')
('G5',

## probability calculation

In [76]:
def calculate_prob(possiblenotes, grams):
    upper_gram_freqs = defaultdict(int)
    lower_gram_freqs = defaultdict(int)
    
    for i in ngram_generatione(possiblenotes, grams):
        upper_gram_freqs[i] += 1
    
    for i in ngram_generation(possiblenotes, grams-1):
        lower_gram_freqs[i] += 1
    
    nprobs = defaultdict(int)
    lower_nexts_prob = defaultdict(list)
    
    for ngram, freq in upper_gram_freqs.items():
        lower_nexts = ngram[:-1]
        curr_char = ngram[-1]
        
        curr_prob = float(upper_gram_freqs[ngram]) / lower_gram_freqs[lower_nexts]
        
        nprobs[(curr_char, lower_nexts)] = curr_prob
        
        lower_nexts_prob[lower_nexts].extend([(curr_char, curr_prob)])
    
    return nprobs, lower_nexts_prob

In [77]:
# Normalize an iterable
def normalization(iterable, normalize_to=1):
    value_sum = 0
    for item in iterable:
        value_sum += float(item)
    return [float(x) / value_sum * normalize_to for x in iterable]

In [79]:
# Yield the next note fot a given n-gram model.
def yieldNext(lower_nexts_prob, *args):
    lookup = tuple(a for a in args)
    nexts = np.array(lower_nexts_prob[lookup])
    nextnotes = nexts[:, 0]
    probabilities = nexts[:, 1]
    
    #  normalization
    probabilities = normalization(probabilities)
    
    # chance of being chosen depends on it's prob
    return np.random.choice(nextnotes, p=probabilities)

## offset generation