In [1]:
import librosa
from librosa.feature import melspectrogram
import librosa.display as libd

from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import matplotlib
matplotlib.use("TkAgg")
import matplotlib.pyplot as plt

import pickle
import pandas as pd
import numpy as np
import keras
from keras.layers import Conv1D
from keras.layers import Conv2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.models import Sequential


import os
from ipywidgets import IntProgress, HTML, HBox
from IPython.display import display

seed = 2205
np.random.seed(seed)
sampling_rate = 22050

Using TensorFlow backend.


In [10]:
def get_spectrogram(wav_filename, show=False):
    """
    Like in the github article, I used windows of 2048 timesteps,
    with 1024 timesteps overlap between spectrogram frames.
    I'm using a sampling frequency of 22.05 kHz.
    """
    y, sr = librosa.load(wav_filename, sr=sampling_rate)
    M = melspectrogram(y, sr, n_fft=2048, hop_length=1024)

    if show:
        print(wav_filename)
        print(" - y.shape", y.shape)
        print(" - M.shape", M.shape, '\n')

        plt.figure(figsize=(12, 8))
        libd.specshow(librosa.power_to_db(M, ref=np.max),
                      y_axis='mel', fmax=None,
                      x_axis='time')
        plt.colorbar(format='%+2.0f dB')
        plt.title('Mel spectrogram')
        plt.tight_layout()
        plt.show(block=False)

    return M

In [83]:
# audio, sr = librosa.load('data/0_gHGYEljjY2M.wav', sr=sampling_rate)
audio, sr = librosa.load('data/2_oScZuIp6ce8.wav', sr=sampling_rate, offset=25, duration=3)
audio.shape

(66150,)

In [2]:
raw_data = pd.read_csv('data.csv')
data = raw_data[raw_data['status'] == 'valid'][['year', 'filename']]
decades = np.array(data['year'] - (data['year'] % 10))

encoder = LabelEncoder()
encoder.fit(decades)
classes = encoder.transform(decades)
labels = keras.utils.to_categorical(classes)
labels.shape

(7293, 10)

In [3]:
new_raw = raw_data.copy()
new_raw['status'] = 'invalid'

file_count = len(os.listdir('data'))
print("files", file_count)
progress = IntProgress(min=0, max=file_count) # instantiate the bar
label = HTML()
box = HBox(children=[progress, label])
display(box)

for filename in os.listdir('data'):
    progress.value += 1
    label.value = u'{name}: {index} / {size}'.format(
                    name='files loaded',
                    index=progress.value,
                    size=file_count
                )
    if filename.endswith(".wav"):
        index = int(filename.split('_')[0])
        new_raw.at[index, 'status'] = 'valid'
        new_raw.at[index, 'filename'] = filename
#         print(os.path.join('data', filename))
new_raw

files 7312


HBox(children=(IntProgress(value=0, max=7312), HTML(value='')))

Unnamed: 0.1,Unnamed: 0,idx,artist_name,title,release,year,filename,status,replacement_link,yt_query,yt_title,yt_link
0,0,0,Casual,I Didn't Mean To,Fear Itself,1994,0_gHGYEljjY2M.wav,valid,,Casual I Didn't Mean To,Casual - I Didn't Mean To [1994],https://www.youtube.com/watch?v=gHGYEljjY2M
1,1,1,The Box Tops,Soul Deep,Dimensions,1969,1_6VFEIfh2m-g.wav,valid,,The Box Tops Soul Deep,Soul Deep The Box Tops {Stereo},https://www.youtube.com/watch?v=6VFEIfh2m-g
2,2,2,Sonora Santanera,Amor De Cabaret,Las Numero 1 De La Sonora Santanera,2007,2_oScZuIp6ce8.wav,valid,,Sonora Santanera Amor De Cabaret,Amor De Cabaret - La Sonora Santanera,https://www.youtube.com/watch?v=oScZuIp6ce8
3,3,3,Adam Ant,Something Girls,Friend Or Foe,1982,3_dISqPMADs00.wav,valid,,Adam Ant Something Girls,adam ant- something girls.mpg,https://www.youtube.com/watch?v=dISqPMADs00
4,4,4,Gob,Face the Ashes,Muertos Vivos,2007,4_kvZiH1k_5_s.wav,valid,,Gob Face the Ashes,Gob - Face The Ashes,https://www.youtube.com/watch?v=kvZiH1k_5_s
5,5,5,Jeff And Sheri Easter,The Moon And I (Ordinary Day Album Version),Ordinary Day,0,,invalid,,,,
6,6,6,Rated R,Keepin It Real (Skit),Da Ghetto Psychic,2003,,invalid,,,,
7,7,7,Tweeterfriendly Music,Drop of Rain,Gin & Phonic,2003,7_PrSAS9lf2eA.wav,valid,,Tweeterfriendly Music Drop of Rain,Drop of Rain,https://www.youtube.com/watch?v=PrSAS9lf2eA
8,8,8,Planet P Project,Pink World,Pink World,1984,8_BSJK6YKLmEQ.wav,valid,,Planet P Project Pink World,Pink World,https://www.youtube.com/watch?v=BSJK6YKLmEQ
9,9,9,Clp,Insatiable (Instrumental Version),Superinstrumental,2008,9_YTC8xsagZgY.wav,valid,,Clp Insatiable (Instrumental Version),Insatiable (Instrumental),https://www.youtube.com/watch?v=YTC8xsagZgY


In [4]:
def get_time_series(filename):
    audio, sr = librosa.load('data/' + filename, sr=sampling_rate, 
                            offset=25, duration=3)
    return audio

audio = data['filename'].apply(get_time_series)

In [5]:
pickle.dump(audio, open("audio.p", "wb"))

In [3]:
audio = pickle.load(open("audio.p", "rb"))

In [5]:
# X_train = X.apply(pd.Series)
n = len(audio)
m = len(audio.values[:len(audio.values)//2])
# m = 1024
progress = IntProgress(min=0, max=n) # instantiate the bar
label = HTML()
box = HBox(children=[progress, label])
display(box)

rows = []
# X_train = pd.DataFrame(index=np.arange(0, n), columns=np.arange(0, m))
for row in audio:
    if (len(row) < m):
        print("ERROR", len(row))
        break
#     X_train.iloc[progress.value] = row[:m]
    rows.append(pd.Series(row[:m]))
    progress.value += 1
    label.value = u'{name}: {index} / {size}'.format(
                    name='rows converted',
                    index=progress.value,
                    size=n)

X = pd.DataFrame(rows)

HBox(children=(IntProgress(value=0, max=7293), HTML(value='')))

In [6]:
X.to_csv("audio.csv")

In [7]:
X = pd.read_csv("audio.csv", index_col=0).values
X = np.expand_dims(X, axis=2)
X.shape

(7293, 3646, 1)

In [10]:
depth = X.shape[1]
num_classes = labels.shape[1]
def baseline_model():
    model = Sequential([
        Conv1D(128, 256, strides=128, activation = 'relu', 
               padding = 'valid', input_shape = X.shape[1:]),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
    return model

estimator = KerasClassifier(build_fn=baseline_model, epochs=20, 
                            batch_size=5, verbose=1)

In [11]:
kfold = KFold(n_splits=4, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, labels, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20


Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Baseline: 41.60% (1.17%)


In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, labels, train_size=0.8, random_state=seed)
estimator.fit(X_train, y_train)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fad18338470>

In [13]:
pickle.dump(estimator, open("time_series_model.p", "wb"))

In [None]:
estimator = pickle.load(open("time_series_model.p", "rb"))

In [39]:
estimator.predict_proba(X).shape



(7293, 10)

In [41]:
# pred = pd.Series(estimator.predict_proba(spectral).flatten(), index=encoder.inverse_transform(np.arange(10)))
pred = pd.DataFrame(raw_data[raw_data['status'] == 'valid']['idx'])
pred['1910'] = 0
pred = pred.join(pd.DataFrame(estimator.predict_proba(X), index=pred.index, columns=encoder.inverse_transform(np.arange(10))))
pred.shape



(7293, 12)

In [42]:
pred.to_csv("time_series_pred.csv")

In [57]:
def get_freq_domain(filename):
    """
    Like in the github article, I used windows of 2048 timesteps,
    with 1024 timesteps overlap between spectrogram frames.
    I'm using a sampling frequency of 22.05 kHz.
    """
    y, sr = librosa.load('data/' + filename, sr=sampling_rate, 
                         offset=20, duration=10)
    M = melspectrogram(y, sr, n_fft=2048, hop_length=1024)
    return M

In [None]:
n = len(data)
progress = IntProgress(min=0, max=n) # instantiate the bar
label = HTML()
box = HBox(children=[progress, label])
display(box)

rows = []
for filename in data['filename']:
    rows.append(get_freq_domain(filename))
    progress.value += 1
    label.value = u'{name}: {index} / {size}'.format(
                    name='rows converted',
                    index=progress.value,
                    size=n)
pickle.dump(rows, open("raw_freq.p", "wb"))

In [44]:
rows = pickle.load(open("raw_freq.p", "rb"))

In [45]:
X = np.array(rows)
X = np.expand_dims(X, axis=3)
X.shape

(7293, 128, 216, 1)

In [11]:
num_classes = labels.shape[1]
def baseline_model():
    model = Sequential([
        Conv2D(64, (4, 6), strides=(2, 3), activation = 'relu', 
               padding = 'valid', input_shape = X.shape[1:]),
        Flatten(),
        Dense(32, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', 
                  metrics=['acc'])

    return model

estimator = KerasClassifier(build_fn=baseline_model, epochs=20, 
                            batch_size=5, verbose=1)

In [12]:
kfold = KFold(n_splits=4, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, labels, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Baseline: 47.59% (4.15%)


In [43]:
X_train, X_test, y_train, y_test = train_test_split(X, labels, train_size=0.8, random_state=seed)
estimator.fit(X_train, y_train)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fc8d21ab898>

In [44]:
pickle.dump(estimator, open("freq_domain_model.p", "wb"))

In [43]:
estimator = pickle.load(open("freq_domain_model.p", "rb"))

In [46]:
# pred = pd.Series(estimator.predict_proba(spectral).flatten(), index=encoder.inverse_transform(np.arange(10)))
pred = pd.DataFrame(raw_data[raw_data['status'] == 'valid']['idx'])
pred['1910'] = 0
pred = pred.join(pd.DataFrame(estimator.predict_proba(X), index=pred.index, columns=encoder.inverse_transform(np.arange(10))))
pred.shape



(7293, 12)

In [47]:
pred.to_csv("freq_domain_pred.csv")