In [1]:
from sklearn.cluster import KMeans
import numpy as np
import pandas as pd
import random

# Part 3: Clustering, Modeling, and Solo Generation
## Retrieving S-expressions

In [None]:
labels_path = 'test_files/s_exp_test.csv'
features_path = 'test_files/s_exp_features_test.csv'

In [None]:
s_exp_labels = pd.read_csv(labels_path, index_col=0)
s_exp_labels.head()

In [None]:
s_exp_features = pd.read_csv(features_path, index_col = 0)
s_exp_features.head()

## K-Means
### Raw Clustering

In [None]:
k_size = int(len(s_exp_features) / 4) #k_size is arbitrarily set, equal to the number of clusters
print(k_size)
kmeans = KMeans(n_clusters=k_size).fit(s_exp_features)

In [None]:
kmeans.labels_

### Creating Node Objects

In [None]:
class Node:
    def __init__(self, node_num):
        self.node_num = node_num
        self.s_exp = [] # list of s-exp-ids
        self.cpt = {} # maps from node_num to conditional probability
    
    def add_exp(self, s):
        self.s_exp.append(s)

In [None]:
node_objects = [Node(i) for i in range(k_size)] #list of nodes for the Markov chain
    
for i, label in enumerate(kmeans.labels_): #iterating through all the data points
    cluster_num = kmeans.labels_[i] #access the cluster num each data point corresponds to
    node_objects[label].add_exp(i)

## Markov Chaining
### Generating CPTs

In [None]:
for outer_node in node_objects:
    outer_node_count = 0
    for inner_node in node_objects:
        outer_node.cpt[inner_node.node_num] = 0.0
        for s_exp_outer in outer_node.s_exp:
            for s_exp_inner in inner_node.s_exp:
                if s_exp_labels.loc[s_exp_outer, 'song_id'] == s_exp_labels.loc[s_exp_inner, 'song_id']:
                    if s_exp_labels.loc[s_exp_inner, 'song_index'] - s_exp_labels.loc[s_exp_outer, 'song_index'] == 1:
                        outer_node_count += 1
                        outer_node.cpt[inner_node.node_num] += 1
    #creates the probability of going to the next node, not s-exp
    if outer_node_count:
        outer_node.cpt = {k: (v / outer_node_count) for k, v in outer_node.cpt.items()}

### Generating a Probabilistic Sequence of S-expressions

In [None]:
def weighted_random_by_dct(dct):
    rand_val = random.random() #random value between 0 and 1
    total = 0
    for k, v in dct.items():
        total += v
        if rand_val <= total: #if running total exceeds probability, that's what you want
            return k

In [None]:
def sequence_s_expressions(n): #n is length of 
    s_exp_ids = [] #create empty list
    start = int(random.uniform(0, len(node_objects))) #random start node
    next_node = node_objects[start] #get that node
    for i in range(n):
        next_node_num = weighted_random_by_dct(next_node.cpt) #get the next node based on the current node's cpt
        next_node = node_objects[next_node_num] #set the next node variable to that node
        next_s_exp_id = random.choice(next_node.s_exp) #get a random s-expression in that node 
        s_exp_ids += [next_s_exp_id] #store id in the list
    return s_exp_ids

## Producing Notes
### Selecting Notes from S-expressions

In [None]:
# dummy placeholder functions
possible_notes = lambda a, b : ['G']
select_note = lambda a, b, c, d: 'C'

### Putting it all together

In [None]:
def produce_notes(num_of_measures, list_of_chords): #length of chords list = num of measures
    notes_df = pd.DataFrame(columns=["note_name", "start_time", "duration"])
    s_exp_ids = sequence_s_expressions(num_of_measures)
    curr_note = "Bb4" #figure out how to start the curr_note initially
    row = 0
    for i in range(num_of_measures): #i refers to measure number
        s_exp = s_exp_labels.loc[s_exp_ids[i], 'exp'] #ith s-exp, which is a string
        split_list = s_exp.split(' ')
        min_slope, max_slope = split_list[0], split_list[1]
        for term in split_list[2:-1]:
            elements = term.split("|")
            category, start, duration = elements[0], elements[1], elements[2]
            poss_notes_list = possible_notes(list_of_chords[i], category)
            selected_note = select_note(poss_notes_list, curr_note, min_slope, max_slope)
            new_row = {'note_name': selected_note, 'start_time': float(start) + i, 'duration': duration}
            notes_df = notes_df.append(new_row, ignore_index = True)
            row += 1
            curr_note = selected_note
    return notes_df

In [None]:
notes_df = produce_notes(2, ['Bb', 'A'])
notes_df