In [50]:
from music21 import *
import os
import random

## Load dataset

In [10]:
dataset_dir = './dataset'
dataset = []
for filename in os.listdir(dataset_dir):
    if filename.endswith('.xml') or filename.endswith('.musicxml'):
        c = converter.parse(dataset_dir + '/' + filename)
        dataset.append(c)
print(len(dataset))

In [26]:
# transpose each piece to the same key (C or Cm)
def transpose_to_C(s):
    key = s.analyze('key')
    i = interval.Interval(key.tonic, pitch.Pitch('C'))
    return s.transpose(i)

dataset_transposed = []
for s in dataset:
    try:
        dataset_transposed.append(transpose_to_C(s))
    except:
        pass

print(len(dataset_transposed))

418


## Feature extract

## Generate chord prog

In [46]:
def hash_rhythm(rhythm):
    '''
    Input: list (e.g. [{measure: 0, beat: 0}, {measure: 1, beat: 3}, {measure: 2, beat: 0}])
    Output: string (e.g. 'M1B0-M1B3-M2B0')
    '''
    hashes = []
    for r in rhythm:
        hashes.append(
            'M' + str(r['measure']) + 'B' + str(r['beat'])
        )
    return '-'.join(hashes)

hash_rhythm([{'measure': 0, 'beat': 0}, {'measure': 1, 'beat': 3}, {'measure': 2, 'beat': 0}])

'M0B0-M1B3-M2B0'

In [47]:
def unhash_rhythm(rhythm_hash):
    '''
    Input: a hashed rhythm
    Output: the unhashed rhythm
    '''
    unhashed = []
    for subhash in rhythm_hash.split('-'):
        m, b = subhash[1:].split('B')
        unhashed.append({
            'measure': m,
            'beat': b
        })
    return unhashed
        
unhash_rhythm('M0B0-M1B3-M2B0')

[{'measure': '0', 'beat': '0'},
 {'measure': '1', 'beat': '3'},
 {'measure': '2', 'beat': '0'}]

In [100]:
def generate_progression(pairs):
    '''
    Inputs:
        list of tuples (progression, rhythm) where progression is a tuple of chord names (e.g. ('ii', 'V', 'I'))
            and rhythm is defined above (unhashed)
    Outputs:
        a tuple (progression, rhythm, rhythm_choosing_func)
    '''
    progs = []
    rhythms_given_prog_len = {}
    for prog, rhythm in pairs:
        progs.append(prog)
        if len(prog) not in rhythms_given_prog_len:
            rhythms_given_prog_len[len(prog)] = []
        rhythms_given_prog_len[len(prog)].append(rhythm)
        
    def choose_rhythm(prog):
        return random.choice(rhythms_given_prog_len[len(prog)])
    
    prog = random.choice(progs)
    rhythm = choose_rhythm(prog)
    return prog, rhythm, choose_rhythm

generate_progression([
    (('ii', 'V', 'I'), [{'measure': 0, 'beat': 0}, {'measure': 1, 'beat': 3}, {'measure': 2, 'beat': 0}]),
    (('ii', 'V', 'I'), [{'measure': 0, 'beat': 0}, {'measure': 1, 'beat': 0}, {'measure': 2, 'beat': 0}]),
    (('I', 'V'), [{'measure': 0, 'beat': 0}, {'measure': 2, 'beat': 2}]),
    (('I', 'V7'), [{'measure': 0, 'beat': 0}, {'measure': 2, 'beat': 0}]),
])

(('ii', 'V', 'I'),
 [{'measure': 0, 'beat': 0},
  {'measure': 1, 'beat': 0},
  {'measure': 2, 'beat': 0}],
 <function __main__.generate_progression.<locals>.choose_rhythm(prog)>)

## Generate melody