In [13]:
from keras.layers import Input, Dense
from keras.models import Model
from keras.layers import Dense, Dropout, Flatten
from keras.layers.convolutional import Convolution2D
from keras.layers.convolutional import MaxPooling2D, ZeroPadding2D
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import ELU
from keras.utils.data_utils import get_file
from keras.layers import Input, Dense
import time
import numpy as np
from keras import backend as K
import audio_processor as ap
import pdb


##
def sort_result(tags, preds):
    result = zip(tags, preds)
    sorted_result = sorted(result, key=lambda x: x[1], reverse=True)
    return [(name, '%5.3f' % score) for name, score in sorted_result]

def librosa_exists():
    try:
        __import__('librosa')
    except ImportError:
        return False
    else:
        return True

In [14]:
    audio_paths = ['data/bensound-cute.mp3',
                   'data/bensound-actionable.mp3',
                   'data/bensound-dubstep.mp3',
                   'data/bensound-thejazzpiano.mp3']
    melgram_paths = ['data/bensound-cute.npy',
                     'data/bensound-actionable.npy',
                     'data/bensound-dubstep.npy',
                     'data/bensound-thejazzpiano.npy']

    tags = ['rock', 'pop', 'alternative', 'indie', 'electronic',
            'female vocalists', 'dance', '00s', 'alternative rock', 'jazz',
            'beautiful', 'metal', 'chillout', 'male vocalists',
            'classic rock', 'soul', 'indie rock', 'Mellow', 'electronica',
            '80s', 'folk', '90s', 'chill', 'instrumental', 'punk',
            'oldies', 'blues', 'hard rock', 'ambient', 'acoustic',
            'experimental', 'female vocalist', 'guitar', 'Hip-Hop',
            '70s', 'party', 'country', 'easy listening',
            'sexy', 'catchy', 'funk', 'electro', 'heavy metal',
            'Progressive rock', '60s', 'rnb', 'indie pop',
            'sad', 'House', 'happy']

    # prepare data like this
    melgrams = np.zeros((0, 1, 96, 1366))

    if librosa_exists:
        for audio_path in audio_paths:
            melgram = ap.compute_melgram(audio_path)
            melgrams = np.concatenate((melgrams, melgram), axis=0)
    else:
        for melgram_path in melgram_paths:
            melgram = np.load(melgram_path)
            melgrams = np.concatenate((melgrams, melgram), axis=0)
            
    TH_WEIGHTS_PATH = 'https://github.com/keunwoochoi/music-auto_tagging-keras/blob/master/data/music_tagger_cnn_weights_theano.h5'
    weights='msd'
    input_tensor=None
    include_top=True
    
    if weights not in {'msd', None}:
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization) or `msd` '
                         '(pre-training on Million Song Dataset).')
    if K.image_dim_ordering() == 'th':
        input_shape = (1, 96, 1366)
    else:
        input_shape = (96, 1366, 1)

    if input_tensor is None:
        melgram_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            melgram_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            melgram_input = input_tensor
            
    if K.image_dim_ordering() == 'th':
        channel_axis = 1
        freq_axis = 2
        time_axis = 3
    else:
        channel_axis = 3
        freq_axis = 1
        time_axis = 2
        
    x = BatchNormalization(axis=freq_axis, name='bn_0_freq')(melgram_input)
    
    x = Convolution2D(32, 3, 3, border_mode='same', name='conv1')(x)
    x = BatchNormalization(axis=channel_axis, mode=0, name='bn1')(x)
    x = ELU()(x)
    x = MaxPooling2D(pool_size=(2, 4), name='pool1')(x)
    
    x = Convolution2D(64, 3, 3, border_mode='same', name='conv2')(x)
    x = BatchNormalization(axis=channel_axis, mode=0, name='bn2')(x)
    x = ELU()(x)
    x = MaxPooling2D(pool_size=(2, 4), name='pool2')(x)
    
    x = Convolution2D(64, 3, 3, border_mode='same', name='conv3')(x)
    x = BatchNormalization(axis=channel_axis, mode=0, name='bn3')(x)
    x = ELU()(x)
    x = MaxPooling2D(pool_size=(2, 4), name='pool3')(x)
    
    x = Convolution2D(64, 3, 3, border_mode='same', name='conv4')(x)
    x = BatchNormalization(axis=channel_axis, mode=0, name='bn4')(x)
    x = ELU()(x)
    x = MaxPooling2D(pool_size=(3, 5), name='pool4')(x)
    
    x = Convolution2D(32, 3, 3, border_mode='same', name='conv5')(x)
    x = BatchNormalization(axis=channel_axis, mode=0, name='bn5')(x)
    x = ELU()(x)
    x = MaxPooling2D(pool_size=(4, 4), name='pool5')(x)
    
    x = Flatten()(x)
    if include_top:
        x = Dense(50, activation='sigmoid', name='output')(x)
    model = Model(melgram_input, x)
    print (model)
   
    
   # if weights is None:
   #   return model    
   # else: 
        # Load input
      #  if K.image_dim_ordering() == 'tf':
         #   raise RuntimeError("Please set image_dim_ordering == 'th'."
            #                   "You can set it at ~/.keras/keras.json")
           # model.load_weights('data/music_tagger_cnn_weights_%s.h5' % K._BACKEND,
                 #          by_name=True)
            

            # predict the tags like this
    print('Predicting...')
    start = time.time()
    pred_tags = model.predict(melgrams)
    # print like this...
  #  print "Prediction is done. It took %d seconds." % (time.time()-start)
    print('Printing top-10 tags for each track...')
    
    
    for song_idx, audio_path in enumerate(audio_paths):
        sorted_result = sort_result(tags, pred_tags[song_idx, :].tolist())
        print(audio_path)
        print(sorted_result[:5])
        print(sorted_result[5:10])
        print(' ')



<keras.engine.training.Model object at 0x000001C62BF627F0>
Predicting...
Printing top-10 tags for each track...
data/bensound-cute.mp3
[('indie', '1.000'), ('female vocalists', '0.999'), ('Mellow', '0.994'), ('electro', '0.986'), ('acoustic', '0.977')]
[('Hip-Hop', '0.973'), ('beautiful', '0.966'), ('electronic', '0.938'), ('alternative', '0.906'), ('funk', '0.902')]
 
data/bensound-actionable.mp3
[('indie', '1.000'), ('female vocalists', '0.998'), ('Mellow', '0.997'), ('beautiful', '0.987'), ('Hip-Hop', '0.986')]
[('alternative', '0.978'), ('electro', '0.975'), ('acoustic', '0.965'), ('electronic', '0.949'), ('funk', '0.936')]
 
data/bensound-dubstep.mp3
[('indie', '1.000'), ('female vocalists', '1.000'), ('Mellow', '0.997'), ('electro', '0.992'), ('acoustic', '0.987')]
[('beautiful', '0.986'), ('Hip-Hop', '0.982'), ('funk', '0.982'), ('electronic', '0.973'), ('punk', '0.956')]
 
data/bensound-thejazzpiano.mp3
[('indie', '0.999'), ('female vocalists', '0.998'), ('beautiful', '0.980'),

In [2]:
librosa_exists()

True