In [160]:
%matplotlib inline
from scipy.io import wavfile
from scipy import signal
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn import preprocessing

# Pre processing songs with Power Spectral Density

In [207]:
def processSong(path, generic=None, file=False):
    fs, data = wavfile.read(path)
    window = 2**16
    sig = data[:,0]
    window = min(window, len(sig))
    
    f, t, signal_freq = signal.spectrogram(sig, fs=fs, nperseg=window, nfft=window)
    print(signal_freq.shape)
    output = []
    # Filtering
    for i in range(signal_freq.shape[1]):
        signal_chunk_freq = voiceFilter(signal_freq[:,i], fs, window)/window
        if np.max(signal_chunk_freq) < 5:
            continue
        ## voice frequencies are active in this segment
        output.append(signal_chunk_freq)
    output = np.array(output)
    if file == True:
        np.save("mats/" + generic, output)
    else:
        return output
def voiceFilter(signal_freq, fs, window, minimum=85, maximum=1000):
    min_bin = np.round(freqToBin(minimum, fs, window))
    max_bin = np.round(freqToBin(maximum, fs, window))
    return signal_freq[min_bin:max_bin]
def binToFreq(bin_num, fs, window):
    return fs/window * bin_num
def freqToBin(freq, fs, window):
    return freq / (fs/window)

In [208]:
directory = "songs"
for filename in os.listdir(directory):
    if filename.endswith(".wav"):
        path = os.path.join(directory, filename)
        processSong(path, generic=filename, file=True)

(32769, 172)
(32769, 193)
(32769, 150)
(32769, 152)
(32769, 194)
(32769, 203)
(32769, 186)
(32769, 169)
(32769, 227)
(32769, 133)


In [209]:
categories = ["Beyonce", "Drake", "Coldplay"]

In [210]:
directory = "mats"
data = []
labels = []
for filename in os.listdir(directory):
    if filename.endswith(".npy"):
        path = os.path.join(directory, filename)
        mat = np.load(path)
        label = -1
        for i in range(len(categories)):
            category = categories[i]
            if filename.startswith(category):
                label = i           
        for point in mat:
            data.append(point)
            labels.append(label)
data = np.array(data)
labels = np.array(labels)

# PCA dimensionality reduction

In [211]:
print(data.shape)

(1205, 1360)


In [212]:
pca = PCA(n_components=200)
pca.fit(data)
data = pca.transform(data)

In [213]:
print(data.shape)

(1205, 200)


# Decision Tree/Random Forest

In [214]:
random = np.arange(len(data))
np.random.shuffle(random)
training_data = data[random]
training_labels = labels[random]

flip = 0.8 * len(training_data)
validation_data = training_data[flip:]
validation_labels = training_labels[flip:]

training_data = training_data[:flip]
training_labels = training_labels[:flip]

In [215]:
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

In [216]:
clf = RandomForestClassifier()
clf.fit(training_data, training_labels)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

# Test

## Validation data

In [217]:
predicted_labels = clf.predict(validation_data)
correct = 0
for real, predicted in zip(validation_labels, predicted_labels):
    if real == predicted:
        correct += 1
print(correct/len(predicted_labels))

0.8796680497925311


## Entire songs

In [218]:
from collections import defaultdict

In [219]:
def predict(song, pca, clf):
    song = processSong(song)
    song_data = pca.transform(song)
    predictions = clf.predict(song_data)
    print(predictions)
    counts = defaultdict(int)
    for prediction in predictions:
        counts[categories[prediction]] += 1
    return counts

Drake - Hold on, we're going home

In [220]:
drake = predict("test/drake-hold-on-we27re-going-home.wav", pca, clf)
print(drake)

(32769, 174)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 2 0 0
 0 0 0 0 0 0 0 0 0]
defaultdict(<class 'int'>, {'Coldplay': 5, 'Beyonce': 151, 'Drake': 1})


In [221]:
beyonce = predict("test/beyonce-partition.wav", pca, clf)
print(beyonce)

(32769, 150)
[0 1 0 1 0 2 2 2 0 2 0 2 2 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 0 0 0
 2 0 2 0 2 0 2 0 2 2 2 0 2 0 2 2 0 0 2 0 2 2 2 2 2 0 2 0 0 0 2 0 2 2 2 0 0
 2 0 0 2 2 2 0 0 2 0 0 0 2 2 2 2 0 2 0 2 0 2 0 2 0 2 0 2 2 0 2 0 2 0 0 0 0
 0 0 0 0 0 0 0 0 0 2 2 0 2 0 2 1 2 0 2 1 0]
defaultdict(<class 'int'>, {'Coldplay': 54, 'Beyonce': 73, 'Drake': 5})


In [222]:
coldplay = predict("test/Coldplay_-_The_Scientist.wav", pca, clf)
print(coldplay)

(32769, 237)
[2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0 2 2 2 2 2 2 2 2 2 0 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0
 1 2 2 1 2 2 2 2 2 2 2 2 2 2 0 2 2 2 1 2 2 2 2 2 0 2 0 2 2 1 2 2 2 2 0 2 2
 0 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 0 2 0 0 2 0 2 2 2 2 1 2 2 2 2 2 2 2 2 2
 2 2 0 2 2 2 2 2 2 2 0 2 2 0 2 2 2 2 2 2 0 2 2 2 2 2 0 2 2 2 1 2 1 2 2 2 0
 2 2 2 1 2 2 2 2 2]
defaultdict(<class 'int'>, {'Coldplay': 204, 'Beyonce': 18, 'Drake': 9})
