In [3]:
# import librosa
# import sklearn
import os
import re
# from sklearn.ensemble import RandomForestClassifier
from torchvggish import vggish, vggish_input
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
import numpy as np

In [4]:
MUSIC_DIR="music/music-wav"
EMBEDDING_MODEL = vggish()
EMBEDDING_MODEL.eval()
FEATURE_LENGTH = 128 # TODO: search way to do this automatically, instead of by trial-and-error

In [5]:
def readFolder2Embedding(path):

    directory = os.fsencode(path)
    songs = os.listdir(directory)

    # vggish output embedding has length 128
    data = np.zeros((len(songs), FEATURE_LENGTH*128))
    labels = []

    for i, song in enumerate(songs):
        print(f"Learning Embedding for {song}")
        embedding = EMBEDDING_MODEL.forward(
            vggish_input.wavfile_to_examples(path+"/"+song.decode("utf-8")))
        # normalize length, convert to numpy array and flatten the feature array
        converted_embedding = embedding.detach().numpy()[
            :FEATURE_LENGTH, :].flatten()
        data[i, :] = converted_embedding
        # get label/category name from directory name
        labels.append(re.match(r".*cat_(.*)", path)[1])
    return data, labels

In [6]:
def readIn():
    # directories = sorted(os.listdir(TRAIN_MUSIC_DIR))
    directories = sorted(os.listdir(MUSIC_DIR))
    data = np.empty((0, FEATURE_LENGTH*128))
    label = []
    for directory in directories:
        dir_data, dir_label = readFolder2Embedding(
            MUSIC_DIR+"/"+directory)
            # TRAIN_MUSIC_DIR+"/"+directory)
        data = np.concatenate((data, dir_data), axis=0)
        label += dir_label
    print("All data read in successfully!")
    return data, label

In [7]:
def classify(classifier, filepath):
    print("Predicting label for" + filepath + "...")
    embedding = EMBEDDING_MODEL.forward(vggish_input.wavfile_to_examples(filepath))
    converted_embedding = embedding.detach().numpy()[
        :FEATURE_LENGTH, :].flatten()
    return classifier.predict(converted_embedding.reshape(1, -1))

In [8]:
def classifyBatch(classifier, directory_path):
    directory = os.fsencode(directory_path)
    songs = os.listdir(directory)
    res=[]
    for song in songs:
        embedding = EMBEDDING_MODEL.forward(vggish_input.wavfile_to_examples(directory_path+"/"+song.decode("utf-8")))
        converted_embedding = embedding.detach().numpy()[:FEATURE_LENGTH, :].flatten()
        predicted_class=classifier.predict(converted_embedding.reshape(1, -1))
        res.append({"song":song, "predicted_class": predicted_class[0]})
    return res

In [9]:
def test(classifier, data, labels):
    correct=0
    for i,sample in enumerate(data):
        predicted_class=classifier.predict(sample.reshape(1, -1))
        if predicted_class == labels[i]:
            correct+=1
    return correct/len(data)

In [10]:
data, label = readIn()

Learning Embedding for b'A Wake in Providence - We Are Eternity.wav'
Learning Embedding for b'Assemble the Chariots - Empress.wav'
Learning Embedding for b'Aversions Crown - Erebus.wav'
Learning Embedding for b'Aversions Crown - Odium.wav'
Learning Embedding for b'Bodysnatcher - E.D.A..wav'
Learning Embedding for b'Bodysnatcher - Glass Prison.wav'
Learning Embedding for b'Brand of Sacrifice - Animal.wav'
Learning Embedding for b'Brand of Sacrifice - Dynasty.wav'
Learning Embedding for b'Brand of Sacrifice - Exodus.wav'
Learning Embedding for b'Brand of Sacrifice, Will Ramos - Lifeblood.wav'
Learning Embedding for b'Chelsea Grin - The Isnis.wav'
Learning Embedding for b'Chelsea Grin - The Mind of God.wav'
Learning Embedding for b'Chelsea Grin - Yhorm the Giant.wav'
Learning Embedding for b'Chelsea Grin, Filth - Orc March.wav'
Learning Embedding for b'Disembodied Tyrant - Omenkiller.wav'
Learning Embedding for b'Disembodied Tyrant - Ov Brimstone And Fire - Redux.wav'
Learning Embedding f

In [11]:
x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.33, random_state=42)

classifier=DecisionTreeClassifier(max_depth=5)
classifier.fit(x_train, y_train)
print("Training successful")

Training successful


In [12]:
print("Accuracy:"+ str(test(classifier,x_test, y_test)))

TypeError: can only concatenate str (not "float") to str