Necessary imports

In [12]:
import sys
import os
import random
sys.path.insert(0, '../../dev')
import os
import numpy as np
import pandas as pd
from song_classification_v2 import *

# musicautobot
from musicautobot.numpy_encode import *
from musicautobot.config import *
from musicautobot.music_transformer import *
from musicautobot.utils.midifile import *
from musicautobot.utils.file_processing import process_all

import music21 as m21
from music21 import *

In [4]:
# Read in songID lookup table
df = pd.read_csv("./songname_to_msid.txt", delimiter="   ", header=None, engine='python')
df.columns = ['Artist', 'Song', 'Id']

In [10]:
folders_path = '../numpy_data_collections'
folders = os.listdir(folders_path)
print('folders: ', folders)

count = 0

everything = [] # list of all songs with key classifications
collection = [] # list of at most 100 songs with key classifications

# Initialize new dataframe and csv file to store all data
new_df = pd.DataFrame(collection, columns=['Artist', 'Song', 'Id', 'Aard_key', 'Aard_r', 'KS_key', 'KS_r', 'Trevor_key'])
new_df.to_csv('classified_songs_V3.csv', sep='\t', index=False, header=True)

mf = music21.midi.MidiFile()
score = music21.stream.Score()
vocab = MusicVocab.create()
for sub_folder in folders:
    # Skip this folder; I think it only contains duplicates
    if sub_folder == 'numpy_0_to_10000':
        continue

    files = os.listdir(folders_path + '/' + sub_folder)
    print(sub_folder)

    for file in files:
        count += 1

        filepath = folders_path + '/' + sub_folder + '/' + file
        print(f"filepath: {filepath}")
        
        # Load numpy tensor from file
        npy = np.load(filepath)
        # Create music item from numpy tensor
        musIt = MusicItem.from_npenc(npy, vocab)
        # Generate m21 score from music item
        mf = m21.midi.translate.streamToMidiFile(musIt.to_stream())

        # identify the non-percussion tracks
        tracks = [t for t in mf.tracks if not any([e.channel == 10 for e in t.events])]  
        # translate the non-percussion tracks to a m21 stream
        score = music21.midi.translate.midiTracksToStreams(tracks,                                            
            # inputM21=score, # I don't think we actually want this line in here...
            forceSource=False, 
            quantizePost=False, # not sure why this parameter
            ticksPerQuarter=mf.ticksPerQuarterNote, # not sure why this parameter
            quarterLengthDivisors=(4,3), # not sure why this parameter
        )
        
        # Analyze the key of the m21 stream with the Aarden-Essen algorithm (their default)
        aardenKey = score.analyze('Aarden')
        aard_tonic = aardenKey.tonic.name
        aard_mode = aardenKey.mode
        aard_r = aardenKey.correlationCoefficient

        # Analyze the key of the m21 stream with the Krumhansl-Schmuckler algorithm
        ksKey = score.analyze('Krumhansl')
        ks_tonic = ksKey.tonic.name
        ks_mode = ksKey.mode
        ks_r = ksKey.correlationCoefficient

        # Analyze the key of the MusicItem with Trevor's algorithm
        trevKey, occ_scores = calculate_score(musIt)
        # Reformat Trevor's keys to match m21's keys
        trevKey = trevKey.replace('_', ' ')
        trevKey = trevKey.replace('b', '-')

        print(f"\tAarden\t\tclassification: {aard_tonic} {aard_mode}\t r = {aard_r}")
        print(f"\tKrumhansl\tclassification: {ks_tonic} {ks_mode}\t r = {ks_r}")
        print(f"\tTrevor\t\tclassification: {trevKey}")


        # get artist and song
        row = df.loc[df['Id'] == file[:-4]]
        #if row != None:
        if file[:-4] in df.values:
            artist = row['Artist'].values[0]
            song = row['Song'].values[0]
            id = row['Id'].values[0]

        else:
            artist = 'N/A'
            song = 'N/A'
            id = row['Id']

        # insert row into collection (artist, song, id)
        collection.append([artist, song, id, aard_tonic + " " + aard_mode, aard_r, ks_tonic + " " + ks_mode, ks_r, trevKey])
        if count % 100 == 0:
            print(f"\t{count} files processed")
            # Convert collection to dataframe, and append to CSV file
            new_df = pd.DataFrame(collection, columns=['Artist', 'Song', 'Id', 'Aard_key', 'Aard_r', 'KS_key', 'KS_r', 'Trevor_key'])
            new_df.to_csv('classified_songs_V3.csv', sep='\t', index=False, mode='a')
            everything += collection
            collection = []

# Convert collection to dataframe, and append to CSV file
print(f"\t{count} files processed")
new_df = pd.DataFrame(collection, columns=['Artist', 'Song', 'Id', 'Aard_key', 'Aard_r', 'KS_key', 'KS_r', 'Trevor_key'])
new_df.to_csv('classified_songs_V3.csv', sep='\t', index=False, mode='a', header=False)
everything += collection
collection = []

folders:  ['numpy_path_0_to_200', 'numpy_path_1200_to_2400', 'numpy_path_200_to_400', 'numpy_path_2400_to_4800', 'numpy_path_400_to_600', 'numpy_path_4800_to_6000', 'numpy_path_6000_to_7200', 'numpy_path_600_to_1200', 'numpy_path_7200_to_8400', 'numpy_path_8400_to_10000']
numpy_path_0_to_200
filepath: ../numpy_data_collections/numpy_path_0_to_200/TRAANZE128F148BF55.npy
	Aarden		classification: A major	 r = 0.9020059010224748
	Krumhansl	classification: A major	 r = 0.9627136720132123
	Trevor		classification: F# minor
filepath: ../numpy_data_collections/numpy_path_0_to_200/TRADCVS128F932D857.npy
	Aarden		classification: D major	 r = 0.8764505234672945
	Krumhansl	classification: D major	 r = 0.9905044846116654
	Trevor		classification: F# minor
filepath: ../numpy_data_collections/numpy_path_0_to_200/TRAIFLV12903CCFECD.npy
	Aarden		classification: D major	 r = 0.8117698343329706
	Krumhansl	classification: A major	 r = 0.937730249889782
	Trevor		classification: E minor
filepath: ../numpy_dat

In [66]:
v3 = pd.read_csv('./classified_songs_V3.csv', delimiter='\t', header=1)

total = len(v3)
allMatch = v3.apply(lambda x: x.Aard_key == x.KS_key == x.Trevor_key, axis =1).value_counts()[True]
trev_diff = v3.apply(lambda x: x.Aard_key == x.KS_key != x.Trevor_key, axis =1).value_counts()[True]
aard_diff = v3.apply(lambda x: x.Aard_key != x.KS_key == x.Trevor_key, axis =1).value_counts()[True]
KS_diff = v3.apply(lambda x: x.Aard_key == x.Trevor_key != x.KS_key, axis =1).value_counts()[True]
all_diff = v3.apply(lambda x: x.Aard_key != x.KS_key != x.Trevor_key != x.Aard_key, axis =1).value_counts()[True]

print("Total: ", total)
print("All match: ", allMatch, " ", allMatch/total * 100, "%")
print("Trevor diff: ", trev_diff, " ", trev_diff/total * 100, "%")
print("Aard diff: ", aard_diff, " ", aard_diff/total * 100, "%")
print("KS diff: ", KS_diff, " ", KS_diff/total * 100, "%")
print("All diff: ", all_diff, " ", all_diff/total * 100, "%")

print(f"There is a majority consensus on total - all_diff = total - {all_diff} = {total - all_diff} = {(total - all_diff) / total * 100:.2f}%" )
v3['Key'] = v3.apply(lambda x: x.Trevor_key if not x.Aard_key == x.KS_key else x.Aard_key, axis =1)

Total:  9919
All match:  2724   27.462445811069664 %
Trevor diff:  3816   38.47162012299627 %
Aard diff:  1188   11.977013811876198 %
KS diff:  529   5.333198911180562 %
All diff:  1662   16.755721342877305 %
There is a majority consensus on total - all_diff = total - 1662 = 8257 = 83.24%


In [69]:
v3.to_csv('./classified_songs_v4.csv', sep = '\t', index = False, columns=['Artist', 'Song', 'Id', 'Aard_key', 'Aard_r', 'KS_key', 'KS_r', 'Trevor_key', 'Key'])

In [68]:
len(v3[v3['Key'] == v3['Trevor_key']])/total

0.6152837987700372

In [65]:
((total - trev_diff)/total)

0.6152837987700372