In [10]:
import os
import re
from torchvggish import vggish, vggish_input
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
import numpy as np

In [13]:
MUSIC_DIR="music/sampled/"
EMBEDDING_MODEL = vggish()
EMBEDDING_MODEL.eval()
SONG_LENGTH = 30 # length of samples
SAMPLES_PER_CAT=60

In [27]:
def readFolder2Embedding(path):

    songs = os.listdir(path)

    songs= np.random.choice(songs,size=60,replace=False)

    # vggish output embedding has length 128
    data = np.zeros((SAMPLES_PER_CAT, SONG_LENGTH*128))
    labels = []

    for i, song in enumerate(songs):
        print(f"Learning Embedding for {song}")
        embedding = EMBEDDING_MODEL.forward(
            vggish_input.wavfile_to_examples(path+"/"+song))
        # normalize length, convert to numpy array and flatten the feature array
        converted_embedding = embedding.detach().numpy()[
            :SONG_LENGTH, :].flatten()
        data[i, :] = converted_embedding
        # get label/category name from directory name
        labels.append(re.match(r".*cat_(.*)", path)[1])
    return data, labels

In [4]:
def readIn():
    directories = sorted(os.listdir(MUSIC_DIR))
    data = np.empty((0, SONG_LENGTH*128))
    label = []
    for directory in directories:
        dir_data, dir_label = readFolder2Embedding(
            MUSIC_DIR+directory)
        data = np.concatenate((data, dir_data), axis=0)
        label += dir_label
    print("All data read in successfully!")
    return data, label

In [5]:
def classify(classifier, filepath):
    print("Predicting label for" + filepath + "...")
    embedding = EMBEDDING_MODEL.forward(vggish_input.wavfile_to_examples(filepath))
    converted_embedding = embedding.detach().numpy()[
        :SONG_LENGTH, :].flatten()
    return classifier.predict(converted_embedding.reshape(1, -1))

In [6]:
def test(classifier, data, labels):
    correct=0
    for i,sample in enumerate(data):
        predicted_class=classifier.predict(sample.reshape(1, -1))
        if predicted_class == labels[i]:
            correct+=1
    return correct/len(data)

In [14]:
data, label = readIn()

['Kraftklub - Unsere Fans.wav' 'blink-182 - After Midnight.wav'
 'The All-American Rejects - Gives You Hell.wav'
 'The All-American Rejects - Dirty Little Secret.wav'
 'blink-182 - Going Away To College.wav' 'Box Car Racer - Tiny Voices.wav'
 'Billy Talent - Fallen Leaves.wav' 'Billy Talent - Red Flag.wav'
 'Linkin Park - CASTLE OF GLASS.wav'
 'Kraftklub - Chemie Chemie Ya - Geil und Gestört Edit.wav'
 'Box Car Racer - There Is.wav' 'Foo Fighters - I Am A River.wav'
 'Sum 41 - Dear Father.wav' 'AWOLNATION - Kill Your Heroes.wav'
 'blink-182 - All The Small Things.wav' 'blink-182 - First Date.wav'
 'Sum 41 - Still Waiting.wav' 'blink-182 - Stay Together For The Kids.wav'
 'blink-182 - Up All Night.wav' 'Royal Blood - Little Monster.wav'
 'blink-182 - San Diego.wav' 'Oasis - Stand By Me.wav'
 'Billy Talent - Devil on My Shoulder.wav'
 'My Chemical Romance - Famous Last Words.wav'
 'Fall Out Boy - Sugar, We_re Goin Down.wav'
 'The All-American Rejects - Move Along.wav'
 'Kraftklub - Schüs

In [25]:
x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.33)

classifier=DecisionTreeClassifier(max_depth=20)
classifier.fit(x_train, y_train)
print("Training successful")

Training successful


In [26]:
print("Accuracy:"+ str(test(classifier,x_test, y_test)))

Accuracy:0.9
