In [2]:
import os
import re
from torchvggish import vggish, vggish_input
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
import numpy as np

In [96]:
MUSIC_DIR="music/sampled/"
EMBEDDING_MODEL = vggish()
EMBEDDING_MODEL.eval()
SONG_LENGTH = 30 # length of samples

In [6]:
def readFolder2Embedding(path):

    songs = os.listdir(path)

    max_samples_per_category=60
    # vggish output embedding has length 128
    data = np.zeros((max_samples_per_category, SONG_LENGTH*128))
    labels = []

    for i, song in enumerate(songs):
        if i >= max_samples_per_category: # TODO: random choice
            break
        print(f"Learning Embedding for {song}")
        embedding = EMBEDDING_MODEL.forward(
            vggish_input.wavfile_to_examples(path+"/"+song))
        # normalize length, convert to numpy array and flatten the feature array
        converted_embedding = embedding.detach().numpy()[
            :SONG_LENGTH, :].flatten()
        data[i, :] = converted_embedding
        # get label/category name from directory name
        labels.append(re.match(r".*cat_(.*)", path)[1])
    return data, labels

In [94]:
def readIn():
    directories = sorted(os.listdir(MUSIC_DIR))
    data = np.empty((0, SONG_LENGTH*128))
    label = []
    for directory in directories:
        dir_data, dir_label = readFolder2Embedding(
            MUSIC_DIR+directory)
        data = np.concatenate((data, dir_data), axis=0)
        label += dir_label
    print("All data read in successfully!")
    return data, label

In [4]:
def classify(classifier, filepath):
    print("Predicting label for" + filepath + "...")
    embedding = EMBEDDING_MODEL.forward(vggish_input.wavfile_to_examples(filepath))
    converted_embedding = embedding.detach().numpy()[
        :SONG_LENGTH, :].flatten()
    return classifier.predict(converted_embedding.reshape(1, -1))

In [14]:
def test(classifier, data, labels):
    correct=0
    for i,sample in enumerate(data):
        predicted_class=classifier.predict(sample.reshape(1, -1))
        if predicted_class == labels[i]:
            correct+=1
    return correct/len(data)

In [97]:
data, label = readIn()

Learning Embedding for AWOLNATION - Kill Your Heroes.wav
Learning Embedding for AWOLNATION - Not Your Fault.wav
Learning Embedding for Bakkushan - Deine Helden sind tot.wav
Learning Embedding for Bakkushan - Nur Die Nacht.wav
Learning Embedding for Billy Talent - Devil in a Midnight Mass.wav
Learning Embedding for Billy Talent - Devil on My Shoulder.wav
Learning Embedding for Billy Talent - Fallen Leaves.wav
Learning Embedding for Billy Talent - Red Flag.wav
Learning Embedding for Billy Talent - Try Honesty.wav
Learning Embedding for blink-182 - After Midnight.wav
Learning Embedding for blink-182 - All The Small Things.wav
Learning Embedding for blink-182 - ANTHEM PART 3.wav
Learning Embedding for blink-182 - BAD NEWS.wav
Learning Embedding for blink-182 - Bottom Of The Ocean.wav
Learning Embedding for blink-182 - Down.wav
Learning Embedding for blink-182 - First Date.wav
Learning Embedding for blink-182 - Going Away To College.wav
Learning Embedding for blink-182 - I Miss You.wav
Lear

In [91]:
x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.33)

classifier=DecisionTreeClassifier(max_depth=20)
classifier.fit(x_train, y_train)
print("Training successful")

Training successful


In [90]:
print("Accuracy:"+ str(test(classifier,x_test, y_test)))

Accuracy:0.7
