In [25]:
# Import libraries
import numpy as np
import music21 as m21
import random
import os
import pickle
from collections import Counter
from typing import Union, List, Tuple
from fractions import Fraction

from muprocdurham.ngram import bigram_matrix_from_model, show_bigram_matrix

# Prepare files

In [26]:
# Billboard Data: CoCoPops-main/Billboard/Data/
# RollingStone: CoCoPops-main/RollingStone/Humdrum/

# Set folder paths
rollingstone_path = 'CoCoPops-main/RollingStone/Humdrum/'
billboard_path = 'CoCoPops-main/Billboard/Data/'

files_in_rollingstone = os.listdir(rollingstone_path)
files_in_billboard = os.listdir(billboard_path)

# List of files in corpus for processing
rollingstone_file_list = []
billboard_file_list = []

# Add every .hum file into processing list
# Rollingstone
for file_name in files_in_rollingstone:
    full_path = os.path.join(rollingstone_path, file_name)
    if os.path.isfile(full_path) and file_name.endswith('.hum'):
        rollingstone_file_list.append(file_name)

# Billboard
for file_name in files_in_billboard:
    full_path = os.path.join(billboard_path, file_name)
    if os.path.isfile(full_path) and file_name.endswith('.hum'):
        billboard_file_list.append(file_name)

In [3]:
# Important to note that some songs in Rollingstone have empty note sequences, and all their quarterLengths are 1
# Only billboard song notes have varying quarterLengths
print(len(rollingstone_file_list))
print(len(billboard_file_list))

200
214


In [5]:
rs_ql_counter = Counter()

for songname in rollingstone_file_list:
    
    # Parse song
    song = m21.converter.parse(rollingstone_path + songname, format="humdrum")
    # Count and sum quarterLengths
    rs_ql_counter += sample_duration_from_piece(song)
                    
print(rs_ql_counter)

Counter({1.0: 59607})


# Basic: Count Probability Model

In [4]:
def sample_pitch_from_piece(s: m21.stream.Stream) -> Counter:
    """
    Create a sample distribution of pitches (MIDI) from a given piece.
    """
    nts = s.recurse().getElementsByClass(m21.note.Note)
    midis = [n.pitch.midi for n in nts]
    return Counter(midis)

def sample_duration_from_piece(s: m21.stream.Stream) -> Counter:
    """
    Create a sample distribution of durations (quarterLength) from a given piece.
    """
    nts = s.recurse().getElementsByClass(m21.note.Note)
    qls = [n.quarterLength for n in nts]
    return Counter(qls)

def make_random_sequence(
    options: Union[List, Tuple],
    weights: Union[List, Tuple],
    length: int = 20
) -> m21.stream.Part:
    """
    Make a pseudorandom sequence of some data given
    a list of `options` (e.g., midi notes or durations) and
    another list of the same length with the corresponding `weights`.
    """
    return random.choices(
        options,
        weights=weights,  # Note: alternatively `cum_weights` for cumulative.
        k=length
    )

# This function ensures that the randomly selected
# quarterLengths sum to the given limit in real time rather than follow a fixed note quantity
def make_random_sequence_qls(
    options: Union[List, Tuple],
    weights: Union[List, Tuple],
    duration: int
) -> m21.stream.Part:
    """
    Make a pseudorandom sequence of some data given
    a list of `options` (e.g., midi notes or durations) and
    another list of the same length with the corresponding `weights`.
    """
    
    sequence = []

    while sum(sequence) < duration:
        sequence += random.choices(options, weights=weights)
    
    return sequence

def make_random_melody(
    midis: List,
    qls: List,
    filename: str
) -> m21.stream.Part:
    """
    Create a stream from a list of pitches (`MIDI`) and durations (`qls`).
    """

    l = len(midis)
    assert l == len(qls)

    p = m21.stream.Part()
    for i in range(l):
        n = m21.note.Note(midis[i])
        n.quarterLength = qls[i]
        p.append(n)

    p.write("midi", filename + ".midi")

In [5]:
# Sum of Pitch Count for Billboard Songs
# Empty Counter
bb_p_counter = Counter()

for songname in billboard_file_list:
    
    # Parse song
    song = m21.converter.parse(billboard_path + songname, format="humdrum")
    
    # Count and sum pitches
    bb_p_counter += sample_pitch_from_piece(song)

print(bb_p_counter)

Counter({62: 9012, 64: 7757, 59: 6203, 60: 5895, 57: 5655, 66: 4970, 67: 4436, 65: 4370, 61: 4117, 69: 3733, 55: 3641, 63: 3191, 58: 2909, 54: 2366, 68: 2333, 52: 1948, 70: 1771, 56: 1699, 71: 1615, 72: 1311, 53: 1301, 50: 797, 74: 725, 73: 683, 49: 638, 51: 594, 48: 488, 46: 369, 47: 359, 44: 317, 75: 232, 76: 182, 45: 161, 42: 105, 43: 76, 77: 61, 79: 55, 78: 40, 41: 25, 80: 17, 81: 16, 36: 15, 82: 9, 40: 8, 38: 8, 37: 7, 83: 1, 39: 1})


In [6]:
# Save Billboard Combined PCPs in a pickle file
with open("pickles/billboard_p", "wb") as file:
    pickle.dump(bb_p_counter, file)

In [7]:
# Sum of quarterLengths for Billboard Songs
# Empty Counter
bb_ql_counter = Counter()

for songname in billboard_file_list:
    
    # Parse song
    song = m21.converter.parse(billboard_path + songname, format="humdrum")
    
    # Count and sum quarterLengths
    bb_ql_counter += sample_duration_from_piece(song)

print(bb_ql_counter)

Counter({0.5: 46491, 1.0: 15615, 0.25: 12807, 1.5: 2918, 2.0: 1944, 0.75: 1888, 0.0: 1798, 4.0: 603, Fraction(2, 3): 568, 3.0: 524, Fraction(1, 3): 514, 0.125: 239, 0.375: 124, Fraction(1, 6): 93, 3.5: 25, 6.0: 25, 1.75: 20, Fraction(4, 3): 14, Fraction(2, 5): 10, 0.0625: 1, 0.1875: 1})


In [8]:
# Save Billboard Combined QLs in a pickle file
with open("pickles/billboard_ql", "wb") as file:
    pickle.dump(bb_ql_counter, file)

In [5]:
# Skip here.
# Load pickle files

with open('pickles/billboard_p', 'rb') as file:
    billboard_p_saved = pickle.load(file)

with open('pickles/billboard_ql', 'rb') as file:
    billboard_ql_saved = pickle.load(file)

In [6]:
billboard_p_saved

Counter({61: 4117,
         62: 9012,
         64: 7757,
         66: 4970,
         59: 6203,
         68: 2333,
         69: 3733,
         57: 5655,
         71: 1615,
         73: 683,
         65: 4370,
         60: 5895,
         72: 1311,
         70: 1771,
         55: 3641,
         63: 3191,
         53: 1301,
         58: 2909,
         67: 4436,
         56: 1699,
         52: 1948,
         54: 2366,
         77: 61,
         74: 725,
         79: 55,
         50: 797,
         48: 488,
         46: 369,
         75: 232,
         47: 359,
         43: 76,
         51: 594,
         45: 161,
         44: 317,
         49: 638,
         42: 105,
         76: 182,
         37: 7,
         41: 25,
         40: 8,
         82: 9,
         81: 16,
         78: 40,
         83: 1,
         80: 17,
         38: 8,
         36: 15,
         39: 1})

In [7]:
billboard_ql_saved

Counter({0.25: 12807,
         0.5: 46491,
         2.0: 1944,
         1.0: 15615,
         1.5: 2918,
         0.75: 1888,
         4.0: 603,
         0.0: 1798,
         Fraction(1, 6): 93,
         Fraction(1, 3): 514,
         3.0: 524,
         0.125: 239,
         0.375: 124,
         3.5: 25,
         Fraction(2, 3): 568,
         Fraction(2, 5): 10,
         1.75: 20,
         0.0625: 1,
         0.1875: 1,
         Fraction(4, 3): 14,
         6.0: 25})

In [8]:
# From my own experimentation, in the default music21 tempo, a quarterLength of 2 corresponds to 1 second in real time
# Therefore for a 60-second piece, my desired total quarterLength should be 120.
duration = 120

# Generate a random sequence of quarterLengths
billboard_random_duration = make_random_sequence_qls(list(billboard_ql_saved.keys()), list(billboard_ql_saved.values()), duration=duration)

# Check how many "notes" I need to generate and associate with the quarterLengths
l = len(billboard_random_duration)

# Generate random sequence of MIDI notes
billboard_random_pitch = make_random_sequence(list(billboard_p_saved.keys()), list(billboard_p_saved.values()), length=l)

# Save as "basic.midi"
make_random_melody(billboard_random_pitch, billboard_random_duration, filename="basic")

# Intermediate & Advanced: Sequential n-gram model

In [10]:
# Extracts the sequence of MIDI notes from a given score
def pitch_sequence(score, ignore_chords=True, min_time=None, max_time=None):
    pitches = []
    # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
    # traverse all elements in the score
    for element in score.flat:
        if isinstance(element, m21.note.Note) and element.tie != m21.tie.Tie("stop"):
            # check for min/max time
            if min_time is not None and element.offset < min_time:
                continue
            if max_time is not None and element.offset >= max_time:
                continue
            # get MIDI pitch for single notes
            pitches.append(element.pitch.midi)
        elif isinstance(element, m21.chord.Chord):
            # ignore chords or raise error
            if ignore_chords:
                continue
            raise RuntimeError(f"Input contains chords {element} at {element.offset}")
        else:
            # ignore anything else
            continue
    # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    return np.asarray(pitches)

# The simple n-gram implementation
class NGramModel:

    def __init__(self, n, prior_counts=0, alphabet=()):
        self.n = n                        # order of the n-gram model
        self.counts = {}                  # dict with counts for the individual n-grams
        self.prior_counts = prior_counts  # prior counts
        self.alphabet = set(alphabet)     # alphabet of symbols

    def fill_alphabet(self):
        """Fill gaps in integer alphabet"""
        for a in list(range(min(self.alphabet), max(self.alphabet) + 1)):
            self.alphabet.add(a)
    
    def check_n_gram(self, n_gram):
        n_gram = tuple(n_gram)
        assert len(n_gram) == self.n, f"n-gram must have length n={self.n}, but {n_gram} has length {len(n_gram)}"
        return n_gram

    def add(self, n_gram):
        """Add an *n*-gram by initialising or incrementing its count."""
        n_gram = self.check_n_gram(n_gram)
        assert len(n_gram) == self.n, \
            f"n-gram has wrong length, expected {self.n}, got {len(n_gram)}"
        self.alphabet |= set(n_gram)
        # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
        try:
            self.counts[n_gram] += 1
        except KeyError:
            self.counts[n_gram] = 1
        # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

    def add_sequence(self, sequence):
        """Add all *n*-grams in the sequence."""
        # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
        for start in range(0, len(sequence) - self.n + 1):
            n_gram = sequence[start:start + self.n]
            self.add(n_gram)
        # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

    def c(self, n_gram):
        """Return counts for this *n*-gram."""
        n_gram = self.check_n_gram(n_gram)
        # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
        try:
            return self.counts[n_gram] + self.prior_counts
        except KeyError:
            return self.prior_counts
        # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

    def p(self, n_gram):
        """Return probability of the last element in the *n*-gram conditional on the first ``n-1`` elements."""
        n_gram = self.check_n_gram(n_gram)
        # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
        norm = sum([self.c(n_gram[:-1] + (a,)) for a in self.alphabet])
        if norm == 0:
            return 1 / len(self.alphabet)
        return self.c(n_gram) / norm
        # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

# The n-gram implementation with smoothing
class SmoothingNGramModel:

    def __init__(self, n, prior_counts=0, alphabet=()):
        self._prior_counts = prior_counts
        self.n_gram_models = {n_: NGramModel(n=n_, prior_counts=prior_counts, alphabet=alphabet) for n_ in range(1, n + 1)}
    
    @property
    def prior_counts(self):
        return self._prior_counts
    
    @prior_counts.setter
    def prior_counts(self, value):
        self._prior_counts = value
        for model in self.n_gram_models.values():
            model.prior_counts = value
    
    @property
    def alphabet(self):
        return set().union(*[m.alphabet for m in self.n_gram_models.values()])

    def fill_alphabet(self):
        for model in self.n_gram_models.values():
            model.fill_alphabet()

    def add_sequence(self, sequence):
        for model in self.n_gram_models.values():
            model.add_sequence(sequence)

    def p(self, n_gram):
        context = n_gram[:-1]
        event = n_gram[-1]
        n = len(n_gram)
        w = self.weight(context)
        # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
        n_gram_prediction = self.n_gram_models[n].p(n_gram)
        if n == 1:
            # stop recursion
            return n_gram_prediction
        else:
            # recurse and smooth
            return w * n_gram_prediction + (1 - w) * self.p(n_gram[1:])
        # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

    # this is giving the weight a value of 0.5 before recursing to smaller n
    def weight(self, context):
        return 0.5

In [11]:
# Extracts the sequence of quarterLengths from a given score
# Additionally, performs some binning to condense rarely-occuring quarterLengths to the nearest significant length
def ql_sequence(score, ignore_chords=True, min_time=None, max_time=None):
    qls = []
    # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
    # traverse all elements in the score
    for element in score.flat:
        if isinstance(element, m21.note.Note) and element.tie != m21.tie.Tie("stop"):
            # check for min/max time
            if min_time is not None and element.offset < min_time:
                continue
            if max_time is not None and element.offset >= max_time:
                continue
            # get quarterLength for single notes
            if element.quarterLength == Fraction(2,3):
                qls.append(0.75)
            elif element.quarterLength in (Fraction(1,3), Fraction(1,6), 0.375, 0.125, 0.0625, 0.1875):
                qls.append(0.25)
            elif element.quarterLength == 1.75:
                qls.append(2)
            elif element.quarterLength == 3.5:
                qls.append(3)
            elif element.quarterLength == Fraction(4, 3):
                qls.append(1.5)
            elif element.quarterLength == Fraction(2, 5):
                qls.append(0.5)
            else:
                qls.append(element.quarterLength)
        elif isinstance(element, m21.chord.Chord):
            # ignore chords or raise error
            if ignore_chords:
                continue
            raise RuntimeError(f"Input contains chords {element} at {element.offset}")
        else:
            # ignore anything else
            continue
    # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    return np.asarray(qls)

In [12]:
# For the sake of reusing the n-gram code rather than reinventing the wheel,
# this function maps the possible binned quarterLengths to a set of n-gram compatible integers
# 1 corresponds to quarterLength 0.25, and so on.
def ql_sequence_to_ints(ql):
    int_ql = []
    for i in range(len(ql)):
        if ql[i] == 0.25:
            int_ql.append(1)
        elif ql[i] == 0.5:
            int_ql.append(2)
        elif ql[i] == 0.75:
            int_ql.append(3)
        elif ql[i] == 1:
            int_ql.append(4)
        elif ql[i] == 1.5:
            int_ql.append(5)
        elif ql[i] == 2:
            int_ql.append(6)
        elif ql[i] == 3:
            int_ql.append(7)
        elif ql[i] == 4:
            int_ql.append(8)
        elif ql[i] == 6:
            int_ql.append(9)
    return int_ql

In [13]:
# Here is an example of the binning of a song's quarterLength sequence
song = m21.converter.parse(billboard_path + 'TheBeatles_Help_1965.varms.hum', format="humdrum")

ql_example = ql_sequence(song, ignore_chords=True)
ql_example

array([1.  , 0.5 , 1.  , 0.5 , 0.5 , 1.  , 1.  , 1.  , 0.75, 0.75, 0.5 ,
       1.  , 1.5 , 0.5 , 0.5 , 1.  , 0.5 , 0.5 , 0.5 , 1.  , 0.5 , 1.5 ,
       1.5 , 0.5 , 1.  , 0.5 , 1.5 , 1.  , 1.  , 0.5 , 1.  , 1.5 , 1.  ,
       0.5 , 1.  , 1.  , 0.5 , 0.5 , 0.5 , 0.5 , 0.5 , 0.5 , 1.  , 0.5 ,
       0.5 , 1.  , 1.  , 1.5 , 1.  , 1.  , 0.5 , 1.  , 0.5 , 1.  , 1.  ,
       1.  , 1.  , 0.5 , 1.  , 1.5 , 1.  , 1.  , 1.  , 1.  , 2.  , 0.5 ,
       0.5 , 0.5 , 0.5 , 1.5 , 0.5 , 0.5 , 0.5 , 1.  , 0.5 , 1.  , 1.  ,
       1.  , 0.5 , 0.5 , 1.  , 1.  , 0.5 , 1.  , 0.5 , 1.  , 1.  , 1.  ,
       1.  , 2.  , 0.5 , 1.5 , 0.5 , 1.  , 0.5 , 1.  , 1.  , 0.5 , 0.5 ,
       1.  , 0.5 , 0.5 , 1.5 , 2.  , 0.5 , 1.5 , 1.  , 1.  , 0.5 , 0.5 ,
       1.  , 1.  , 1.  , 1.  , 0.5 , 0.5 , 1.  , 1.  , 1.  , 1.  , 1.  ,
       1.  , 1.  , 2.  , 2.  , 0.5 , 0.5 , 1.5 , 1.5 , 1.  , 1.  , 1.  ,
       1.  , 4.  , 4.  , 0.5 , 0.5 , 1.5 , 1.5 , 0.5 , 0.5 , 1.5 , 1.5 ,
       1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 0.5 , 1.

In [14]:
# Prepare for the combination of n-grams by finding the total alphabet range
# To prepare our total transition table, we need to know the range of MIDI numbers across all 214 Billboard songs
midi_min = 100
midi_max = 0
counter = 0

# Loop through all 214 Billboard songs
for songname in billboard_file_list:

    # Parse song
    song = m21.converter.parse(billboard_path + songname, format="humdrum")
    counter += 1
    
    # Generate pitch sequence
    s = pitch_sequence(song, ignore_chords=True)

    current_min = min(s)
    current_max = max(s)

    if midi_min > current_min:
        midi_min = current_min

    if midi_max < current_max:
        midi_max = current_max
        
        # Midi Range
        #midi_range = list(range(matrix[1][0], matrix[1][1] + 1))

print("The smallest MIDI number in this set of songs is: " + str(midi_min))
print("The biggest MIDI number in this set of songs is: " + str(midi_max))
print("Parsed through " + str(counter) + " songs." )

The smallest MIDI number in this set of songs is: 36
The biggest MIDI number in this set of songs is: 83
Parsed through 214 songs.


In [24]:
# Based on what we found above:
smallest_midi = 36
biggest_midi = 83
pitch_transition_range = biggest_midi - smallest_midi + 1
quarterLength_transition_range = 9

# Build an empty transition table for MIDI 36 to 83
p_transition = np.zeros((pitch_transition_range, pitch_transition_range))

# Build an empty transition table for my 9 possible quarterLengths
q_transition = np.zeros((quarterLength_transition_range, quarterLength_transition_range))

# Since the smallest MIDI is 36, our offset for computing ranges is 36
offset = smallest_midi

# For the 214 Billboard Songs
for songname in billboard_file_list:

    # Parse Song
    song = m21.converter.parse(billboard_path + songname, format="humdrum")

    # Generate pitch and quarterLength sequences
    s = pitch_sequence(song, ignore_chords=True)
    q = ql_sequence_to_ints(ql_sequence(song, ignore_chords=True))


    #####################################################################################################
    # PITCH N-GRAM HERE
    #
    # Generate n-gram model
    pitch_n_gram_model = SmoothingNGramModel(n=2)
    pitch_n_gram_model.add_sequence(s)
    pitch_n_gram_model.fill_alphabet()
    
    # Set a prior count
    pitch_n_gram_model.prior_counts = 1
    
    # Generate bigram matrix
    p_matrix = bigram_matrix_from_model(pitch_n_gram_model)
    
    # Compute the min/max MIDI range of the current song's bigram matrix
    p_matrix_min = p_matrix[1][0]
    p_matrix_max = p_matrix[1][1]

    # The global transition table looks at the transition of note X to note Y
    # so the first layer of array indexing looks at the range of starting notes
    # for example, if the bigram matrix ranges from 40 to 45, 40 - 36 = 4, 45 - 36 = 9
    # so the bigram matrix maps to cells 4 to 9 of my transition table, that's px_index = 4 and py_index = 9
    px_index = p_matrix_min - offset
    px_limit = p_matrix_max - offset

    # Intialise counter so we can index into the bigram matrix
    p_counter_x = 0
    
    # Seek the current starting note
    while px_index < px_limit:

        # For every starting note X, we have 49 possible transitions to note Y
        # so we set the next layer of array indexing for the range of transition notes
        py_index = p_matrix_min - offset
        py_limit = p_matrix_max - offset

        # Intialise another counter for transition notes
        p_counter_y = 0
        
        # Seek the current transition note
        while py_index < py_limit:

            # Here, we take the probability of X to Y in the bigram matrix and add it into the 
            # corresponding location in the global transition table (for Arithmetic Mean)
            p_transition[px_index][py_index] += p_matrix[0][p_counter_x][p_counter_y]

            # Move on to the next note in the range and update indexing counter
            py_index += 1
            p_counter_y += 1

        # Move on to the next note in the range and update indexing counter
        px_index += 1
        p_counter_x += 1

    ###################################################################################################
    # QUARTERLENGTH N-GRAM HERE
    #
    # Generate n-gram model
    ql_n_gram_model = SmoothingNGramModel(n=2)
    ql_n_gram_model.add_sequence(q)
    ql_n_gram_model.fill_alphabet()
    
    # Set a prior count
    ql_n_gram_model.prior_counts = 1
    
    # Generate bigram matrix
    q_matrix = bigram_matrix_from_model(ql_n_gram_model)
    
    # Compute the min/max QL range of the current song's bigram matrix
    q_matrix_min = q_matrix[1][0]
    q_matrix_max = q_matrix[1][1]

    # The global transition table looks at the transition of note X to note Y
    # so the first layer of array indexing looks at the range of starting notes
    qx_index = q_matrix_min - 1
    qx_limit = q_matrix_max - 1

    # Intialise counter so we can index into the bigram matrix
    q_counter_x = 0
    
    # Seek the current starting note
    while qx_index < qx_limit:

        # For every starting note X, we have 49 possible transitions to note Y
        # so we set the next layer of array indexing for the range of transition notes
        qy_index = q_matrix_min - 1
        qy_limit = q_matrix_max - 1

        # Intialise another counter for transition notes
        q_counter_y = 0
        
        # Seek the current transition note
        while qy_index < qy_limit:

            # Here, we take the probability of X to Y in the bigram matrix and add it into the 
            # corresponding location in the global transition table (for Arithmetic Mean)
            q_transition[qx_index][qy_index] += q_matrix[0][q_counter_x][q_counter_y]

            # Move on to the next note in the range and update indexing counter
            qy_index += 1
            q_counter_y += 1

        # Move on to the next note in the range and update indexing counter
        qx_index += 1
        q_counter_x += 1

print("Transition table for Billboard combined.")

Transition table for Billboard combined.


In [26]:
# Save the transition tables into text files
np.savetxt("csv/billboard_pitch.csv", p_transition, fmt="%f", delimiter=",")
np.savetxt("csv/billboard_quarterlength.csv", q_transition, fmt="%f", delimiter=",")

In [13]:
bb_pitch = np.loadtxt("csv/billboard_pitch.csv", delimiter = ",")
bb_qls = np.loadtxt("csv/billboard_quarterlength.csv", delimiter = ",")

In [14]:
# In the video, I forgot to specify that this is obviously only one row of my entire transition table
# In this case bb_pitch[0] contains the transition probabilities starting from the 0th note, which is MIDI number 36
bb_pitch[0]

array([0.049751, 0.013793, 0.014989, 0.012596, 0.012596, 0.0114  ,
       0.0114  , 0.041305, 0.014989, 0.0114  , 0.014989, 0.016185,
       0.080778, 0.022166, 0.0114  , 0.0114  , 0.0114  , 0.013793,
       0.0114  , 0.013793, 0.0114  , 0.02097 , 0.0114  , 0.018577,
       0.125818, 0.017381, 0.082048, 0.042501, 0.181476, 0.02695 ,
       0.0114  , 0.023362, 0.0114  , 0.      , 0.      , 0.      ,
       0.      , 0.      , 0.      , 0.      , 0.      , 0.      ,
       0.      , 0.      , 0.      , 0.      , 0.      , 0.      ])

In [15]:
bb_qls[0]

array([51.792121, 68.348341, 11.938268, 22.968766,  6.813792,  3.379487,
        1.974043,  0.098183,  0.      ])

# Generation from Billboard n-gram transition tables

In [16]:
# Similar to the modified version I created for the PCP probability model
# These functions keep track of the latest element in the sequence and adjust the weights of the next random selection
# By indexing the correct row of the transition table 

def sequential_random_qls(
    options: Union[List, Tuple],
    weights: Union[List, Tuple],
    duration: int,
    seed: List
) -> m21.stream.Part:
    """
    Make a pseudorandom sequence of some data given
    a list of `options` (e.g., midi notes or durations) and
    another list of the same length with the corresponding `weights`.
    """
    # Set the seed note(s) as the starting sequence
    sequence = seed

    # While the ql sequence is not at max length
    while sum(sequence) < duration:
        # current note is the last note in the sequence
        current = sequence[-1]

        # get index of the current note, e.g. the index of MIDI note 36 will be 0
        # therefore we would look at the transitions for transition[0]
        index = options.index(current)
        nextQL = random.choices(options, weights=weights[index])
        sequence += nextQL
        #print(sequence)
    
    return sequence

def sequential_random_midis(
    options: Union[List, Tuple],
    weights: Union[List, Tuple],
    length: int,
    seed: List
) -> m21.stream.Part:
    """
    Make a pseudorandom sequence of some data given
    a list of `options` (e.g., midi notes or durations) and
    another list of the same length with the corresponding `weights`.
    """
    # Set the seed note(s) as the starting sequence
    sequence = seed

    # While the midi sequence is not at max length
    while len(sequence) < length:
        # current note is the last note in the sequence
        current = sequence[-1]

        # get index of the current note, e.g. the index of MIDI note 36 will be 0
        # therefore we would look at the transitions for transition[0]
        index = options.index(current)
        nextNote = random.choices(options, weights=weights[index])
        sequence += nextNote
        #print(sequence)

    return sequence

In [17]:
# Load the saved checkpoints for billboard transition tables
bb_pitch = np.loadtxt("csv/billboard_pitch.csv", delimiter = ",")
bb_qls = np.loadtxt("csv/billboard_quarterlength.csv", delimiter = ",")

# Basic quarterLength Counter probabilities
with open('pickles/billboard_ql', 'rb') as file:
    billboard_ql_saved = pickle.load(file)

In [18]:
# Prepare constants and options
smallest_midi = 36
biggest_midi = 83

# Set the range of possible MIDI notes for selection
midi_range = list(range(smallest_midi, biggest_midi + 1))

# Set the range of possible quarterLengths
ql_range = [0.25, 0.5, 0.75, 1, 1.5, 2.0, 3, 4, 6]

In [18]:
# We find that the most common starting note is MIDI note 62
first_midis = []

for songname in billboard_file_list:
    # Parse Song
    song = m21.converter.parse(billboard_path + songname, format="humdrum")
    s = pitch_sequence(song, ignore_chords=True)
    first_midis.append(s[0])

Counter(first_midis)

Counter({61: 10,
         65: 14,
         62: 26,
         58: 5,
         69: 3,
         52: 8,
         67: 10,
         54: 9,
         47: 2,
         53: 8,
         51: 3,
         49: 2,
         59: 24,
         48: 3,
         70: 3,
         64: 12,
         57: 8,
         66: 11,
         50: 8,
         55: 10,
         60: 8,
         68: 6,
         74: 4,
         63: 5,
         73: 1,
         46: 1,
         44: 1,
         56: 5,
         45: 1,
         76: 1,
         38: 1,
         71: 1})

In [31]:
# We find that the most common starting quarterLength is 0.5
first_qls = []

for songname in billboard_file_list:
    # Parse Song
    song = m21.converter.parse(billboard_path + songname, format="humdrum")
    q = ql_sequence(song, ignore_chords=True)
    first_qls.append(q[0])

Counter(first_qls)

Counter({0.25: 30,
         0.5: 113,
         1.5: 10,
         1.0: 40,
         0.75: 4,
         4.0: 1,
         2.0: 5,
         0.0: 10,
         3.0: 1})

In [19]:
# Intermediate
# Generate a song with only n-grams for MIDI notes, the quarterLengths use the basic counter probabilities
duration = 120
starting_pitch = [62]

# Generate QL sequence with counter probabilities
quarterLength_generated = make_random_sequence_qls(list(billboard_ql_saved.keys()), list(billboard_ql_saved.values()), duration=duration)

# Define l based on length of QL sequence
l = len(quarterLength_generated)

# Generate MIDI number sequence
pitches_generated = sequential_random_midis(midi_range, bb_pitch, length=l, seed=starting_pitch)

# Save as "inter.midi"
make_random_melody(pitches_generated, quarterLength_generated, filename="inter")

In [20]:
# Generate a song two n-grams, one for notes and one for quarterLengths
duration = 120
starting_pitch = [62]
starting_ql = [0.5]

# Generate QL sequence with the QL transition table
quarterLength_generated = sequential_random_qls(ql_range, bb_qls, duration=duration, seed=starting_ql)

# Define l based on length of QL sequence
l = len(quarterLength_generated)

# Generate MIDI number sequence
pitches_generated = sequential_random_midis(midi_range, bb_pitch, length=l, seed=starting_pitch)

# Save as "adv.midi"
make_random_melody(pitches_generated, quarterLength_generated, filename="adv")

# Selecting source pieces for listening test

In [20]:
# EarthWindAndFire_September_1979.hum
# TheBeatles_Help_1965.varms.hum

In [35]:
# Given some duration, decide how far into the song's note sequence to cut the song
def cut_song(sequence, duration):
    sum = 0
    for i in range(len(sequence)):
        sum += sequence[i]
        if sum > duration:
            return i

In [36]:
# Generate source piece A
song1 = m21.converter.parse(billboard_path + 'EarthWindAndFire_September_1979.hum', format="humdrum")

s1 = pitch_sequence(song1, ignore_chords=True)
q1 = ql_sequence(song1, ignore_chords=True)
limit1 = cut_song(q1, 120) # Find out the cut-off point for 1 minute
cut_s1 = s1[:limit1] # slice the lists accordingly
cut_q1 = q1[:limit1]

make_random_melody(cut_s1, cut_q1, filename="source_A")

In [37]:
# Generate source piece B
song2 = m21.converter.parse(billboard_path + 'TheBeatles_Help_1965.varms.hum', format="humdrum")

s2 = pitch_sequence(song2, ignore_chords=True)
q2 = ql_sequence(song2, ignore_chords=True)
limit2 = cut_song(q2, 120)
cut_s2 = s2[:limit2]
cut_q2 = q2[:limit2]

make_random_melody(cut_s2, cut_q2, filename="source_B")

# PCP Entropy Calculation

In [21]:
# Set folder paths
billboard_path = 'CoCoPops-main/Billboard/Data/'
eval_path = 'Evaluation/'

files_in_billboard = os.listdir(billboard_path)
files_in_eval = os.listdir(eval_path)
files_in_eval

['adv_A.midi',
 'adv_B.midi',
 'basic_A.midi',
 'basic_B.midi',
 'inter_A.midi',
 'inter_B.midi',
 'source_A.midi',
 'source_B.midi']

In [22]:
# Give a file path and the file name, function returns the entropy of the PCP of the song as float
def get_entropy(path, filename):
    
    # Empty numpy array for my pitch class profile
    pcp_array = np.zeros(12)

    # Parse song and use music21 built-in pitch analysis function
    song = m21.converter.parse(path + filename, format="midi")
    pcp = m21.analysis.pitchAnalysis.pitchAttributeCount(song, 'pitchClass')
    
    # music21's pitch analysis returns a Counter dictionary, and we use the key-value pairs to fill my numpy array
    for i in pcp.keys():
        pcp_array[i] = pcp[i]

    # Now we need to normalise the array by dividing each pitch class count by the total no. of pitches in the PCP
    pitch_count = np.sum(pcp_array)
    normalised_pcp = pcp_array / pitch_count

    # Calculate the entropy of the song. 
    # Note that we add 1e-10 to avoid edge cases where pcp_array[i] == 0, which would lead to log(0) which is undefined
    entropy = -np.sum(normalised_pcp * np.log2(normalised_pcp + 1e-10))
    return entropy



# Same function but for humdrum instead of MIDIs
def get_entropy_hum(path, filename):
    # Empty numpy array for my pitch class profile
    pcp_array = np.zeros(12)
    # Parse song and use music21 built-in pitch analysis function
    song = m21.converter.parse(path + filename, format="humdrum")
    pcp = m21.analysis.pitchAnalysis.pitchAttributeCount(song, 'pitchClass')
    # music21's pitch analysis returns a Counter dictionary, and we use the key-value pairs to fill my numpy array
    for i in pcp.keys():
        pcp_array[i] = pcp[i]
    # Now we need to normalise the array by dividing each pitch class count by the total no. of pitches in the PCP
    pitch_count = np.sum(pcp_array)
    normalised_pcp = pcp_array / pitch_count
    # Calculate the entropy of the song. 
    # Note that we add 1e-10 to avoid edge cases where pcp_array[i] == 0, which would lead to log(0) which is undefined
    entropy = -np.sum(normalised_pcp * np.log2(normalised_pcp + 1e-10))
    return entropy

In [23]:
# Entropy of 1-min song snippets
for file in files_in_eval:
    print(file + ": " + str(get_entropy(eval_path, file)))

adv_A.midi: 3.4556477856843655
adv_B.midi: 3.4940298275565524
basic_A.midi: 3.356913583037312
basic_B.midi: 3.522610328357428
inter_A.midi: 3.5024065653709777
inter_B.midi: 3.4987928334886544
source_A.midi: 2.4313360232101155
source_B.midi: 2.6308309936271237


In [24]:
# Mean Entropy of Billboard Songs

sum = 0.0
counter = 0

for file in billboard_file_list:
    counter += 1
    sum += get_entropy_hum(billboard_path, file)

mean_entropy = sum / counter

print("Checked " + str(counter) + " songs.")
print("Mean PCP Entropy: " + str(mean_entropy))

Checked 214 songs.
Mean PCP Entropy: 2.6110516102525945
