In [90]:
%matplotlib inline
from scipy.io import wavfile
from scipy import signal
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn import preprocessing

# Pre processing songs with Power Spectral Density

In [91]:
def processSong(path, generic=None, file=False):
    fs, data = wavfile.read(path)
    window = 2**16
    sig = data[:,0]
    window = min(window, len(sig))
    
    f, t, signal_freq = signal.spectrogram(sig, fs=fs, nperseg=window, nfft=window)
    print(signal_freq.shape)
    output = []
    # Filtering
    for i in range(signal_freq.shape[1]):
        signal_chunk_freq = signal_freq[:,i]
        signal_chunk_freq = voiceFilter(signal_chunk_freq, fs, window)/window
        if np.max(signal_chunk_freq) < 10:
            continue
        ## voice frequencies are active in this segment
        output.append(signal_chunk_freq)
    output = np.array(output)
    if file == True:
        np.save("mats/" + generic, output)
    else:
        return output
def voiceFilter(signal_freq, fs, window, minimum=50, maximum=1000):
    min_bin = np.round(freqToBin(minimum, fs, window))
    max_bin = np.round(freqToBin(maximum, fs, window))
    return signal_freq[min_bin:max_bin]
def binToFreq(bin_num, fs, window):
    return fs/window * bin_num
def freqToBin(freq, fs, window):
    return freq / (fs/window)

In [92]:
directory = "songs"
for filename in os.listdir(directory):
    if filename.endswith(".wav"):
        path = os.path.join(directory, filename)
        processSong(path, generic=filename, file=True)

(32769, 172)
(32769, 193)
(32769, 150)
(32769, 152)
(32769, 194)
(32769, 203)
(32769, 186)
(32769, 169)
(32769, 227)
(32769, 133)




In [93]:
categories = ["Beyonce", "Drake", "Coldplay"]

In [94]:
directory = "mats"
data = []
labels = []
for filename in os.listdir(directory):
    if filename.endswith(".npy"):
        path = os.path.join(directory, filename)
        mat = np.load(path)
        label = -1
        for i in range(len(categories)):
            category = categories[i]
            if filename.startswith(category):
                label = i           
        for point in mat:
            data.append(point)
            labels.append(label)
data = np.array(data)
labels = np.array(labels)

# PCA dimensionality reduction

In [95]:
print(data.shape)

(1187, 1412)


In [96]:
data = preprocessing.scale(data)
pca = PCA(n_components=200)
pca.fit(data)
data = pca.transform(data)

In [97]:
print(data.shape)

(1187, 200)


# Decision Tree/Random Forest/Adaboost

In [98]:
random = np.arange(len(data))
np.random.shuffle(random)
training_data = data[random]
training_labels = labels[random]

flip = 0.8 * len(training_data)
validation_data = training_data[flip:]
validation_labels = training_labels[flip:]

training_data = training_data[:flip]
training_labels = training_labels[:flip]



In [99]:
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier

In [100]:
clf = MLPClassifier(hidden_layer_sizes=(400,200), alpha=0.001)
clf.fit(training_data, training_labels)

MLPClassifier(activation='relu', alpha=0.001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(400, 200), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

# Test

## Validation data

In [101]:
predicted_labels = clf.predict(validation_data)
correct = 0
for real, predicted in zip(validation_labels, predicted_labels):
    if np.array_equal(real, predicted):
        correct += 1
print(correct/len(predicted_labels))

0.9705882352941176


## Entire songs

In [102]:
from collections import defaultdict

In [103]:
def predict(song, pca, clf):
    song = processSong(song)
    song_data = pca.transform(song)
    predictions = clf.predict(song_data)
    print(predictions)
    counts = defaultdict(int)
    for prediction in predictions:
        counts[categories[prediction]] += 1
    return counts

Drake - Hold on, we're going home

In [104]:
drake = predict("test/drake-hold-on-we27re-going-home.wav", pca, clf)
print(drake)

(32769, 174)
[1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 2 2 2 1 1 1 1 1 1 1 1 2 1 0 1 2 2 2 1 1 1 1
 1 1 1 1 2 1 0 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 2 2 2 1 1 1 1 1 1 1 1 2 1 0 1
 2 2 2 1 1 1 1 1 1 1 1 2 1 0 1 2 2 2 1 1 0 1 1 1 1 1 2 1 1 1 2 2 2 1 1 0 0
 1 1 1 1 2 0 1 2 2 0 1 1 1 0 1 1 1 1 0 1 0 1 2 2 2 1 1 1 0 0 2 1 1 2 0 0 2
 2 2 2 1 1 1 1 1 1 1 1 2 1 1 1]
defaultdict(<class 'int'>, {'Drake': 108, 'Coldplay': 38, 'Beyonce': 17})




In [105]:
beyonce = predict("test/beyonce-partition.wav", pca, clf)
print(beyonce)

(32769, 150)
[0 2 2 2 2 2 0 2 0 2 0 2 2 0 0 2 0 1 1 1 1 1 1 1 1 1 0 0 0 0 1 0 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 2 0 2 1
 1 1 1 0 0 0 2 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 1 1 1 1 1 2
 1 1 1 1 2 0 2 0 1 0 1 0 1 0 1 0]
defaultdict(<class 'int'>, {'Drake': 36, 'Coldplay': 16, 'Beyonce': 75})




In [106]:
coldplay = predict("test/coldplay-yellow.wav", pca, clf)
print(coldplay)

(32769, 206)
[0 2 2 2 0 2 1 0 2 0 1 2 2 2 2 0 1 0 1 1 0 2 2 1 2 2 2 2 2 0 2 2 0 0 0 0 1
 2 0 2 2 2 0 2 2 0 0 0 0 2 2 0 2 2 2 2 0 2 0 0 0 0 0 0 1 1 0 2 0 2 2 2 2 2
 2 0 2 2 2 0 0 0 2 2 0 2 2 2 0 2 2 2 0 1 0 1 2 2 0 2 2 2 1 2 2 2 0 2 0 0 0
 0 0 0 2 0 0 2 2 2 2 0 0 0 0 0 1 1 1 0 2 2 0 2 0 2 2 2 0 2 2 2 2 0 1 0 2 2
 2 2 1 2 2 2 2 2 1 1 1 0 0 1 2 2 2 2 1 0 2 0 2 2 2 0 0 0 0 0 1 2 0 2 2 2 2
 2 1 0 0 1 2 2 2 2 0 0 2 2 2 0]
defaultdict(<class 'int'>, {'Drake': 25, 'Coldplay': 103, 'Beyonce': 72})


