In [19]:
import pandas as pd
from collections import Counter, defaultdict

In [45]:
# Iterate over a list in chunks of size n. Return tuples (for dict).
def chunks(iterable, n):
    for ix, item in enumerate(iterable):
        if ix == len(iterable) - (n-1): return
        yield tuple(iterable[ix:ix+n])
        
# Build the conditional probability tables.
def condProbTables(ngramfreqs, nngramfreqs):
    nprobs = defaultdict(int)
    prevnngramnexts = defaultdict(list)
    for ngram, freq in ngramfreqs.items():
        prevnngram = ngram[:-1]
        currchar = ngram[-1]
        nprobs[(currchar, prevnngram)] = float(ngramfreqs[ngram]) / nngramfreqs[prevnngram]
        if prevnngram not in prevnngramnexts.keys():
            prevnngramnexts[prevnngram].extend([(currchar, (float(ngramfreqs[ngram]) / nngramfreqs[prevnngram]))])
            continue
        prevnngramnexts[prevnngram].extend([(currchar, (float(ngramfreqs[ngram]) / nngramfreqs[prevnngram]))])
    return nprobs, prevnngramnexts

""" 2. Generate the offsets using simple frequency probabilities. """

# Iterate over iterable in groups of n.
def grouper(n, iterable, fillvalue=None):
    for ix, i in enumerate(iterable):
        if ix == len(iterable) - 1:
            break
        yield (iterable[ix], iterable[ix+1])
        
# Round to nearest nth of a unit.
def my_round(x, n=4):
    return round(x*n)/n

# Normalize an iterable.
def normList(L, normalizeTo=1):
    vMax = 0
    for item in L:
        vMax += float(item)
    return [ float(x)/(vMax*1.0)*normalizeTo for x in L]

In [25]:
oscar = pd.read_csv('oscar2notes.txt', skiprows=2)[:].sort_values("Offset")
oscar.index = range(1, len(oscar) + 1)
oscar = oscar[oscar.Octave >= 4]
oscar.head()

Unnamed: 0,Note/Rest,Octave,Len,Offset
2,D,5,0.75,12.666667
3,E,4,0.666667,14.0
4,C#,5,0.875,14.0
5,A,5,0.25,15.0
6,F,4,3.125,16.0


In [29]:
possiblenotes = ["%s%s" % (row[1]["Note/Rest"], row[1]["Octave"]) for row in oscar.iterrows()]
possiblenotes.insert(0, "start")
possiblenotes.insert(0, "start")
possiblenotes.insert(0, "start")

In [32]:
bigramfreqs = defaultdict(int)
for i in chunks(possiblenotes, 2):
    bigramfreqs[i] += 1
trigramfreqs = defaultdict(int)
for i in chunks(possiblenotes, 3):
    trigramfreqs[i] += 1
quadgramfreqs = defaultdict(int)
for i in chunks(possiblenotes, 4):
    quadgramfreqs[i] += 1

In [34]:
# Encode ngram probabilities
triprobs, prevbigramnexts = condProbTables(trigramfreqs, bigramfreqs)
# quadprobs, prevtrigramnexts = condProbTables(quadgramfreqs, trigramfreqs)

In [43]:
offsets = defaultdict(int)
genTuples = grouper(2, [float(i) for i in oscar["Offset"]])
for j in genTuples:
    toCompare = j
    diff = float(toCompare[1]) - float(toCompare[0])
    diff = my_round(diff)
    if diff > 4: continue # can't have gaps > 4
    offsets[diff] += 1 # set gaps nicely, only integer gaps.
    
offset_poss = [k for k in offsets]           # possible offsets. need separate for np.random.choice()
offset_probs = [offsets[k] for k in offsets] # probabilities for each of those offset

In [46]:
# prune offsets after normalizing so # possible offsets < 32 for np.random.choice()
# durations: cutoff if over 6
offset_ixToDel = [jx for jx, j in enumerate(offset_probs) if j < 5 and (offset_poss[jx] < 2)]
offset_poss = [i for ix, i in enumerate(offset_poss) if ix not in offset_ixToDel]
offset_probs = [j for jx, j in enumerate(offset_probs) if jx not in offset_ixToDel]
for jx, j in enumerate(offset_poss):
    if j <= 0:
        del offset_poss[jx]
        del offset_probs[jx]
offset_probs = normList(offset_probs)