In [1]:
import numpy as np
import pandas as pd
import re, os, random

# Part 2: Grammar Generation
This stage of the pipeline moves towards generation of "s-expression", which encode little snippets of style from the training data. This information is passed along to create the model.
## Note and Chord Functionality
### Note Definition and Functions
We define these as helper functions for later on.
Note that a note (ahaha) is a string with first character a capital letter from A-G, followed by an option '#' or 'b' for sharp/flat.

In [10]:
# defines numerical values for notes
notes = ['C', 'Db', 'D', 'Eb', 'E', 'F', 'Gb', 'G', 'Ab', 'A', 'Bb', 'B']
note_to_num = dict([[n, i] for i, n in enumerate(notes)])
num_to_note = dict([[v, k] for k, v in note_to_num.items()])
same_note = {'A#':'Bb', 'C#':'Db', 'D#':'Eb', 'F#': 'Gb', 'G#':'Ab'}
print(note_to_num)
print(num_to_note)

{'C': 0, 'Db': 1, 'D': 2, 'Eb': 3, 'E': 4, 'F': 5, 'Gb': 6, 'G': 7, 'Ab': 8, 'A': 9, 'Bb': 10, 'B': 11}
{0: 'C', 1: 'Db', 2: 'D', 3: 'Eb', 4: 'E', 5: 'F', 6: 'Gb', 7: 'G', 8: 'Ab', 9: 'A', 10: 'Bb', 11: 'B'}


In [None]:
# checks if a note is formatted correctly and splits it into its component parts
def split_note(note):
    assert re.fullmatch('[A-G](#|b)?[0-7]', note) is not None, 'Note not formatted correctly.'
    note, octave = note[:-1], int(note[-1])
    if note in same_note:
        note = same_note[note]
    return note, octave

# shifts the note by amount half-steps (possibly negative)
def shift_note(note, amount):
    note, octave = split_note(note)
    new_num = note_to_num[note] + amount
    if new_num > 11:
        octave += 1
    elif new_num < 0:
        octave -= 1
    return num_to_note[(new_num) % 12] + str(octave)

# output is positive if note2 is above noteorchord1, 0 if same
def find_note_dist(note_or_chord1, note2, chord=False):
    # positive if note2 is above noteorchord1, 0 if same
    tot = 0
    note2, octave2 = split_note(note2)
    if not chord:
        note_or_chord1, octave1 = split_note(note_or_chord1)
        tot += (octave2 - octave1) * 12
    tot += note_to_num[note2] - note_to_num[note_or_chord1]
    return tot

### Chord Dictionary and Type Categorization
Defines the half step numbers for each category for each chord type.

In [None]:
chord_dictionary = {
    "major": {"C": [0, 4, 7], "L": [2, 4, 6, 11]},
    "minor": {"C": [0, 4, 8], "L": [2, 3, 5, 7, 10]},
    "augmented": {"C": [0, 4, 8], "L": []},
    "diminished": {"C": [0, 3, 6] ,  "L": []},
    "half-diminished": {"C": [0, 3, 6, 10], "L": []},
    "dominant-seventh": {"C": [0, 4, 7, 10], "L": [] }
}

In [None]:
def find_chord_type(chord):
    if "m7b5" in chord:
        return "half-diminished"
    elif "j7" in chord:
        return "dominant-seventh"
    elif "o" in chord:
        return "diminished"
    elif "m" in chord: 
        return "minor"
    else:
        return "major"

## Generating S-expressions
This requires a function to find the maximum and minimum slope between two notes, as well as a function to categorize a note given the chord it is played over.
### Finding Slope Bounds

In [None]:
def find_slope_bounds(lst):
    max_jump, min_jump = 0, 0
    for i in range(len(lst) - 1):
        max_jump = max(max_jump, note_dist(lst[i], lst[i+1]))
        min_jump = min(min_jump, note_dist(lst[i], lst[i+1]))
    return str(min_jump) + ' ' + str(max_jump)

### Categorizing a Note

In [None]:
def categorize_note(note, chord, nextchord):
    root = chord[0]
    chord_type = find_chord_type(chord)
    dist = find_note_dist(root, note, True)
    for chord_type, interval_list in chord_dictionary[chord_type].items():
        if dist in interval_list:
            return chord_type
    if chord == nextchord:
        return "X"
    else:
        return "A"

### Generation of Expression String
S-expressions are in the form `"min_slope max_slope note_string|start_time|duration ..."`

**This is the main function that will be used from this section (calls the other two functions above)**

In [None]:
def create_s_exp(notes):
    # notes is list of tuples of (note_string, start_time, duration, chord, last_chord)
    s = ''
    notes_only = []
    for note, start, duration, chord, last_chord in notes:
        s += categorize_note(note, chord, last_chord) + '|%.3f|%.3f '%(duration, start % 1)
        notes_only.append(note)
    return find_slope_bounds(notes_only) + ' ' + s

## Featurizing S-expressions
We currently use 6 different functions to featurize our s-expressions.

### Location of the First Note
The fraction of how far into the measure the first note starts.

In [None]:
loc_first = lambda notes: notes[0][1] % 1

### Total Duration of Rests
$1 - \sum\limits_{note\in measure} \text{duration}(note)$

In [None]:
tot_rests = lambda notes: 1 - sum([note[1] for note in notes])

### Average Maximum Slope and Order of Contour
The average magnitude of each rising/falling run and how many times the slope changes direction, respectively.

In [None]:
def slope_process(notes):
    slopes = []
    ascending = True
    curr_max = 0
    last_note = notes[0][0]
    direction_changes = 0
    for term in notes[1:]:
        note = term[0]
        dist = note_dist(last_note, note)
        if dist > 0:
            if ascending:
                curr_max = max(curr_max, dist)
            else:
                ascending = True
                slopes.append(curr_max)
                curr_max = dist
                direction_changes += 1
        if dist < 0:
            if not ascending:
                curr_max = max(curr_max, -dist)
            else:
                ascending = False
                slopes.append(curr_max)
                curr_max = -dist
                direction_changes += 1
        last_note = note
    slopes.append(curr_max)
    return np.mean(slopes), direction_changes

avg_max_slope = lambda notes: slope_process(notes)[0]
order_contour = lambda notes: slope_process(notes)[1]

### Consonance
Depends on the category of the notes - a higher value corresponds to more chord tones.

In [None]:
def consonance(s_exp):
    total = 0.0
    measure = s_exp.split(' ')[2:-1]
    
    weights = {'R': 0.1, 'C': 0.8, 'L': 0.4, 'X': 0.1, 'A': 0.6, 'H': 0.6}
    for term in measure:
        note_info = term.split('|')
        note = note_info[0]
        duration = note_info[2]
        if note in weights:
            total += weights[note] * float(duration)
    return total

### Using all of the above
**This is the main function that will be used from this section (calls the other five functions above)**

In [None]:
def featurize(args):
    dummy = lambda x: 0
    feature_funcs = [len, loc_first, tot_rests, avg_max_slope, order_contour, consonance]
    arg_num = [0, 0, 0, 0, 0, 1]
    features = {}
    for i, func in enumerate(feature_funcs):
        features[str(i)] = func(args[arg_num[i]])
    return features