# Import Packages

In [1]:
import keras
import librosa
import os
import random
import requests
import spotipy
import time

import numpy as np

from src.obtain.spotify_metadata import generate_token, download_playlist_metadata


Using TensorFlow backend.


# Feature Engineering Functions (Move to `src`)

In [6]:
sample_mp3_dir = 'data/raw/mp3s'


def download_preview_mp3(url, filepath):
    if os.path.isfile(filepath):
        return
    time.sleep(random.random()/2)
    preview = requests.get(url, allow_redirects=True)
    open(filepath, 'wb').write(preview.content)
    

def extract_features(file, timeseries_length = 1294, hop_length = 512, 
                     n_mfcc=13, n_chroma=12, n_bands=6, win_length=192):
    y, sr = librosa.load(file)
    mfcc = librosa.feature.mfcc(
        y=y, sr=sr, hop_length=hop_length, n_mfcc=n_mfcc)
    spectral_center = librosa.feature.spectral_centroid(
        y=y, sr=sr, hop_length=hop_length)
    chroma = librosa.feature.chroma_stft(
        y=y, sr=sr, hop_length=hop_length, n_chroma=n_chroma)
    spectral_contrast = librosa.feature.spectral_contrast(
        y=y, sr=sr, hop_length=hop_length, n_bands=n_bands)
    tempogram = librosa.feature.tempogram(
        y=y, sr=sr, win_length=win_length)
    
    return {'mfcc':mfcc, 'spectral_center':spectral_center,
           'chroma':chroma, 'spectral_contrast':spectral_contrast,
           'tempogram':tempogram}
    
    
    
def build_dataset(song_ids, sample_urls, 
                  timeseries_length = 1294, hop_length = 512, 
                  n_mfcc=13, n_chroma=12, n_bands=6, win_length=192):
    n_features = n_mfcc + 1 + n_chroma + (n_bands + 1) + win_length
    data = np.zeros((len(song_ids), timeseries_length, n_features), dtype=np.float64)

    for i, song_id in enumerate(song_ids):
        file = os.path.join(sample_mp3_dir, song_id + ".mp3")
        if not os.path.isfile(file):
            download_preview_mp3(sample_urls[song_id], file)
            
        features = extract_features(file, timeseries_length, hop_length, 
                                    n_mfcc, n_chroma, n_bands, win_length)
        ts = min(timeseries_length, features['mfcc'].shape[1])
        j0 = 0
        j1 = n_mfcc
        j2 = j1 + 1
        j3 = j2 + n_chroma
        j4 = j3 + n_bands + 1
        j5 = j4 + win_length
        data[i, :ts, j0:j1] = features['mfcc'].T[0:ts, :]
        data[i, :ts, j1:j2] = features['spectral_center'].T[0:ts, :]
        data[i, :ts, j2:j3] = features['chroma'].T[0:ts, :]
        data[i, :ts, j3:j4] = features['spectral_contrast'].T[0:ts, :]
        data[i, :ts, j4:j5] = features['tempogram'].T[0:ts, :]

    return data

# Get User Inputs, Connect to Spotify

In [None]:
user_id = "spotify:user:djconxn"
playlist_uri = "spotify:playlist:7k88cMXb6KrA9Zf4g7IOry"

token=generate_token(username='djconxn')
sp = spotipy.Spotify(auth=token)

# Download Data, Extract Features

In [7]:
metadata = download_playlist_metadata(sp, user_id, playlist_uri)

songs = metadata['id'].tolist()
urls = dict(zip(metadata['id'], metadata['preview_mp3']))

features_path = "data/processed/test_playlist.npy"    
data = build_dataset(songs, urls)
np.save(features_path, data)

## TO DO: Work With Songs, Not Just Playlists

# Classify Songs

In [8]:
model = keras.models.load_model("zouk_classifier.model")

metadata['preds'] = model.predict(data)

metadata[metadata.preds > 0.75][['title', 'artist', 'preds']].sort_values('title')

Unnamed: 0,title,artist,preds
55,Be Like You,Evergreen,0.841624
54,Be The One,Cupidon,0.85911
6,Beauty Beats,Beats Antique,0.843142
69,Been,Boom Forest,0.85625
73,Blindspot,Huntar,0.75107
18,Bonfire,Woodes,0.857491
0,Breezeblocks (Alt-J Remix),Ryan Helsing,0.764064
14,Calling Out for Love,Whilk & Misky,0.770796
39,Can't Hide,Whethan,0.859902
58,Cowards,Raleigh Ritchie,0.815111
