In [62]:
import os
import re
from torchvggish import vggish, vggish_input
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

In [24]:
MUSIC_DIR="music/sampled/"
EMBEDDING_MODEL = vggish()
EMBEDDING_MODEL.eval()
SONG_LENGTH = 30 # length of samples
SAMPLES_PER_CAT=60

In [25]:
def readFolder2Embedding(path):

    songs = os.listdir(path)

    songs= np.random.choice(songs,size=60,replace=False)

    # vggish output embedding has length 128
    data = np.zeros((SAMPLES_PER_CAT, SONG_LENGTH*128))
    labels = []

    for i, song in enumerate(songs):
        print(f"Learning Embedding for {song}")
        embedding = EMBEDDING_MODEL.forward(
            vggish_input.wavfile_to_examples(path+"/"+song))
        # normalize length, convert to numpy array and flatten the feature array
        converted_embedding = embedding.detach().numpy()[
            :SONG_LENGTH, :].flatten()
        data[i, :] = converted_embedding
        # get label/category name from directory name
        labels.append(re.match(r".*cat_(.*)", path)[1])
    return data, labels

In [26]:
def readIn():
    directories = sorted(os.listdir(MUSIC_DIR))
    data = np.empty((0, SONG_LENGTH*128))
    label = []
    for directory in directories:
        dir_data, dir_label = readFolder2Embedding(
            MUSIC_DIR+directory)
        data = np.concatenate((data, dir_data), axis=0)
        label += dir_label
    print("All data read in successfully!")
    return data, label

In [27]:
def classify(classifier, filepath):
    print("Predicting label for" + filepath + "...")
    embedding = EMBEDDING_MODEL.forward(vggish_input.wavfile_to_examples(filepath))
    converted_embedding = embedding.detach().numpy()[
        :SONG_LENGTH, :].flatten()
    return classifier.predict(converted_embedding.reshape(1, -1))

In [70]:
def test(classifier, data, labels):
    predictions=[]
    for sample in data:
        predictions.append(classifier.predict(sample.reshape(1, -1)))
    return accuracy_score(labels,predictions)

In [29]:
data, label = readIn()

Learning Embedding for My Chemical Romance - Welcome to the Black Parade.wav
Learning Embedding for blink-182 - Bottom Of The Ocean.wav
Learning Embedding for My Chemical Romance - Sleep.wav
Learning Embedding for Foo Fighters - I Am A River.wav
Learning Embedding for Bakkushan - Nur Die Nacht.wav
Learning Embedding for blink-182 - Down.wav
Learning Embedding for Kraftklub - Wie ich.wav
Learning Embedding for Sum 41 - Fat Lip.wav
Learning Embedding for AWOLNATION - Not Your Fault.wav
Learning Embedding for Fall Out Boy - Sugar, We_re Goin Down.wav
Learning Embedding for blink-182 - First Date.wav
Learning Embedding for Bakkushan - Deine Helden sind tot.wav
Learning Embedding for Foo Fighters - Long Road To Ruin.wav
Learning Embedding for Fall Out Boy - XO.wav
Learning Embedding for Royal Blood - Little Monster.wav
Learning Embedding for blink-182 - ANTHEM PART 3.wav
Learning Embedding for blink-182 - I Miss You.wav
Learning Embedding for Linkin Park - In the End.wav
Learning Embedding 

In [60]:
x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.33,stratify=label)

classifier=DecisionTreeClassifier(max_depth=20)
classifier.fit(x_train, y_train)
print("Training successful")

Training successful


In [71]:
print("Accuracy:"+ str(test(classifier,x_test, y_test)))

Accuracy:0.875
