# An example of generating playlist by multilable learning w. audio features

In [1]:
%matplotlib inline

import os, sys, time
import pickle as pkl
import numpy as np
import pandas as pd
import sklearn as sk
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
sys.path.append('src')
import hdf5_getters as h5getters

In [3]:
data_dir = 'data'
faotm = os.path.join(data_dir, 'aotm-2011/aotm-2011-subset.pkl')
fmap  = os.path.join(data_dir, 'aotm-2011/songID2TrackID.pkl')
ffeatures = os.path.join(data_dir, 'aotm-2011/songID2Features.pkl')
msd_h5dir = os.path.join(data_dir, 'msd/files')

## Data loading

Load playlists.

In [4]:
playlists_aotm = pkl.load(open(faotm, 'rb'))

In [5]:
print('#Playlists: %d' % len(playlists_aotm))

#Playlists: 95298


In [6]:
playlists_aotm[0]

['SOFDPDC12A58A7D198',
 'SOPIEQP12A8C13F268',
 'SOKMCJK12A6D4F6105',
 'SOGTGJR12A6310E08D',
 'SOLTBYJ12A6310F2BB',
 'SOBOXXN12A6D4FA1A2',
 'SOUQUFO12B0B80778E']

In [7]:
song_set = sorted({songID for p in playlists_aotm for songID in p})

In [8]:
print('#Songs: %d' % len(song_set))

#Songs: 119466


In [9]:
lengths = [len(p) for p in playlists_aotm]
#plt.hist(lengths, bins=20)
print('Average playlist length: %.1f' % np.mean(lengths))

Average playlist length: 9.4


`Song_id --> Song_name` mapping.

In [10]:
#songID2Name = {s[1]: s[0] for p in playlists_aotm for s in p['playlist']}

Load `song_id` --> `track_id` mapping: a song may correspond to multiple tracks.

In [11]:
song2TrackID = pkl.load(open(fmap, 'rb'))

In [12]:
len(song2TrackID)

119466

In [13]:
{ k : song2TrackID[k] for k in list(song2TrackID.keys())[:10] }

{'SOAAAFI12A6D4F9C66': ['TRZEXLQ128F1491D17'],
 'SOAAAMT12AB018C9C4': ['TRYIOYF12903CD4E73'],
 'SOAAASR12AB018A516': ['TRRTOHC12903CDD2EA'],
 'SOAABHX12AAF3B40E7': ['TRZARKN128F92DE096'],
 'SOAABLG12A6D4F73D2': ['TRFUCDA128F1455C96'],
 'SOAABMP12A6D4F7633': ['TRQVPBD128F1458060'],
 'SOAABMR12A6D4F70E4': ['TRVMASA128F149BB53'],
 'SOAABQL12A67020E76': ['TRJOVXI128E0791CFA'],
 'SOAABRB12A58A792A3': ['TRZGOQN128F935F425'],
 'SOAACBE12A6D4F7A54': ['TRHRWOM128F1466747']}

## Load audio features

Load the audio features given `songID`, if a song has more than one `trackID`, simply use the first available track.

In [14]:
trackID = 'TRQVPBD128F1458060'
h5 = h5getters.open_h5_file_read(os.path.join(msd_h5dir, trackID + '.h5'))
print(h5getters.get_num_songs(h5))
#print(h5getters.get_artist_mbid(h5))
#print(h5getters.get_artist_mbtags(h5))  # SS: song specific
#print(h5getters.get_artist_mbtags_count(h5)) # SS array
#print(h5getters.get_artist_name(h5))
#print(h5getters.get_artist_playmeid(h5))
#print(h5getters.get_artist_terms(h5).shape) # SS, Y, text
#print(h5getters.get_artist_terms_freq(h5).shape) # SS, Y, numerical
#print(h5getters.get_artist_terms_weight(h5).shape) # SS, Y, numerical
#print(h5getters.get_audio_md5(h5))
#print(h5getters.get_bars_confidence(h5).shape) # SS, Y, numerical
#print(h5getters.get_bars_start(h5).shape) # SS, Y, numerical
#print(h5getters.get_beats_confidence(h5).shape) # SS, Y, numerical
#print(h5getters.get_beats_start(h5).shape) # SS, Y, numerical
#print(h5getters.get_danceability(h5)) # Y
#print(h5getters.get_duration(h5)) # Y, seconds
#print(h5getters.get_end_of_fade_in(h5)) # Y, seconds
#print(h5getters.get_energy(h5)) # Y
#print(h5getters.get_key(h5)) # Y
#print(h5getters.get_key_confidence(h5)) # Y
#print(h5getters.get_loudness(h5)) # Y
#print(h5getters.get_mode(h5)) # Y
#print(h5getters.get_mode_confidence(h5)) # Y
#print(h5getters.get_release(h5)) # album name
#print(h5getters.get_release_7digitalid(h5))
#print(h5getters.get_sections_confidence(h5).shape) # SS, Y, numerical
#print(h5getters.get_sections_start(h5).shape) # SS, Y, numerical
#print(h5getters.get_segments_confidence(h5).shape) # SS, Y, numerical
#print(h5getters.get_segments_loudness_max(h5).shape) # SS, Y, numerical
#print(h5getters.get_segments_loudness_max_time(h5).shape) # SS, Y, numerical
#print(h5getters.get_segments_loudness_start(h5).shape) # SS, Y, numerical
#print(h5getters.get_segments_pitches(h5).shape) # SS, Y, numerical, matrix with 12 cols
#print(h5getters.get_segments_start(h5).shape) # SS, Y, numerical
#print(h5getters.get_segments_timbre(h5).shape) # SS, Y, numerical, matrix with 12 cols
#print(h5getters.get_similar_artists(h5).shape) # artist IDs
#print(h5getters.get_song_hotttnesss(h5)) # Y
#print(h5getters.get_song_id(h5)) # song ID
#print(h5getters.get_start_of_fade_out(h5)) # Y, seconds
#print(h5getters.get_tatums_confidence(h5).shape) # SS, Y, numerical
#print(h5getters.get_tatums_start(h5).shape) # SS, Y, numerical
#print(h5getters.get_tempo(h5)) # Y
#print(h5getters.get_time_signature(h5)) # Y, usual number of beats per bar
#print(h5getters.get_time_signature_confidence(h5)) # Y
#print(h5getters.get_title(h5)) # song title
#print(h5getters.get_track_7digitalid(h5))
#print(h5getters.get_track_id(h5))
#print(h5getters.get_year(h5)) # Y, year of release

#print('age:', time.gmtime().tm_year - h5getters.get_year(h5))
h5.close()

1


In [130]:
def extract_msd_track_features(ftrack):
    assert os.path.exists(ftrack)
    assert ftrack.endswith('.h5') or ftrack.endswith('.H5')
    
    features = []    
    h5 = h5getters.open_h5_file_read(ftrack)
    
    #print(h5getters.get_artist_terms(h5).shape) # SS, Y, text, word2vec?
    
    #artist_terms_freq = h5getters.get_artist_terms_freq(h5) # SS, Y, numerical
    #features.append(np.mean(artist_terms_freq))
    #features.append(np.var(artist_terms_freq))
    #print(artist_terms_freq) # can be empty
    
    #artist_terms_weight = h5getters.get_artist_terms_weight(h5) # SS, Y, numerical
    #features.append(np.mean(artist_terms_weight))
    #features.append(np.var(artist_terms_weight))
    #print(artist_terms_weight) # can be empty
    
    bars_confidence = h5getters.get_bars_confidence(h5) # SS, Y, numerical
    if len(bars_confidence) == 0:
        features = features + [0, 0]
    else:
        features.append(np.mean(bars_confidence))
        features.append(np.var(bars_confidence))
    #print(bars_confidence) # can be empty
    
    bars_start = h5getters.get_bars_start(h5) # SS, Y, numerical
    if len(bars_start) == 0:
        features = features + [0, 0]
    else:
        features.append(np.mean(bars_start))
        features.append(np.var(bars_start))
    #print(bars_start) # can be empty
    
    beats_confidence = h5getters.get_beats_confidence(h5) # SS, Y, numerical
    if len(beats_confidence) == 0:
        features = features + [0, 0]
    else:
        features.append(np.mean(beats_confidence))
        features.append(np.var(beats_confidence))
    #print(beats_confidence) # can be empty
    
    beats_start = h5getters.get_beats_start(h5) # SS, Y, numerical
    if len(beats_start) == 0:
        features = features + [0, 0]
    else:
        features.append(np.mean(beats_start))
        features.append(np.var(beats_start))
    #print(beats_start) # can be empty
    
    danceability = h5getters.get_danceability(h5) # Y
    features.append(danceability)
    
    duration = h5getters.get_duration(h5) # Y, seconds
    features.append(duration)
    
    end_of_fade_in = h5getters.get_end_of_fade_in(h5) # Y, seconds
    features.append(end_of_fade_in)
    features.append(end_of_fade_in / duration)
    
    energy = h5getters.get_energy(h5) # Y
    features.append(energy)
    
    key = h5getters.get_key(h5) # Y
    features.append(key)
    
    key_confidence = h5getters.get_key_confidence(h5) # Y
    features.append(key_confidence)
    
    loudness = h5getters.get_loudness(h5) # Y
    features.append(loudness)
    
    mode = h5getters.get_mode(h5) # Y
    features.append(mode)
    
    mode_confidence = h5getters.get_mode_confidence(h5) # Y
    features.append(mode_confidence)
    
    sections_confidence = h5getters.get_sections_confidence(h5) # SS, Y, numerical
    if len(sections_confidence) == 0:
        features = features + [0, 0]
    else:
        features.append(np.mean(sections_confidence))
        features.append(np.var(sections_confidence))
    #print(sections_confidence) # can be empty
    
    sections_start = h5getters.get_sections_start(h5) # SS, Y, numerical
    if len(sections_start) == 0:
        features = features + [0, 0]
    else:
        features.append(np.mean(sections_start))
        features.append(np.var(sections_start))
    #print(sections_start) # can be empty
    
    segments_confidence = h5getters.get_segments_confidence(h5) # SS, Y, numerical
    features.append(np.mean(segments_confidence))
    features.append(np.var(segments_confidence))
    #print(segments_confidence)
    
    segments_loudness_max = h5getters.get_segments_loudness_max(h5) # SS, Y, numerical
    features.append(np.mean(segments_loudness_max))
    features.append(np.var(segments_loudness_max))
    #print(segments_loudness_max)
    
    segments_loudness_max_time = h5getters.get_segments_loudness_max_time(h5) # SS, Y, numerical
    features.append(np.mean(segments_loudness_max_time))
    features.append(np.var(segments_loudness_max_time))
    #print(segments_loudness_max_time)
    
    segments_loudness_start = h5getters.get_segments_loudness_start(h5) # SS, Y, numerical
    features.append(np.mean(segments_loudness_start))
    features.append(np.var(segments_loudness_start))
    #print(segments_loudness_start)
    
    segments_pitches = h5getters.get_segments_pitches(h5) # SS, Y, numerical, matrix with 12 cols
    features = features + np.mean(segments_pitches, axis=0).tolist()
    features = features + np.var(segments_pitches, axis=0).tolist()
    #print(segments_pitches)
    
    segments_start = h5getters.get_segments_start(h5) # SS, Y, numerical
    features.append(np.mean(segments_start))
    features.append(np.var(segments_start))
    #print(segments_start)
    
    segments_timbre = h5getters.get_segments_timbre(h5) # SS, Y, numerical, matrix with 12 cols
    features = features + np.mean(segments_timbre, axis=0).tolist()
    features = features + np.var(segments_timbre, axis=0).tolist()
    #print(segments_timbre)
    
    song_hotttnesss = h5getters.get_song_hotttnesss(h5) # Y
    features.append(song_hotttnesss)
    #print(song_hotttnesss) # can be NaN
    
    start_of_fade_out = h5getters.get_start_of_fade_out(h5) # Y, seconds
    features.append(start_of_fade_out)
    features.append(start_of_fade_out / duration)
    
    tatums_confidence = h5getters.get_tatums_confidence(h5) # SS, Y, numerical
    if len(tatums_confidence) == 0:
        features = features + [0, 0]
    else:
        features.append(np.mean(tatums_confidence))
        features.append(np.var(tatums_confidence))
    #print(tatums_confidence) # can be empty
    
    tatums_start = h5getters.get_tatums_start(h5) # SS, Y, numerical
    if len(tatums_start) == 0:
        features = features + [0, 0]
    else:
        features.append(np.mean(tatums_start))
        features.append(np.var(tatums_start))
    #print(tatums_start) # can be empty
    
    tempo = h5getters.get_tempo(h5) # Y
    features.append(tempo)
    
    time_signature = h5getters.get_time_signature(h5) # Y, usual number of beats per bar
    features.append(time_signature)
    
    time_signature_confidence = h5getters.get_time_signature_confidence(h5) # Y
    features.append(time_signature_confidence)
    
    year_of_release = h5getters.get_year(h5) # Y, year of release
    age = time.gmtime().tm_year - year_of_release
    features.append(age)
    
    h5.close()
    
    return np.nan_to_num(np.asarray(features), copy=False)

In [60]:
#trackID = 'TRQVPBD128F1458060'
#trackID = 'TRZARKN128F92DE096'
trackID = 'TRZEXLQ128F1491D17'
extract_msd_track_features(os.path.join(msd_h5dir, trackID + '.h5')).shape

(91,)

In [61]:
def gen_aotm2011_song_features(songID, msd_h5dir = msd_h5dir, song2TrackID = song2TrackID):
    assert(songID in song2TrackID)
    trackIDs = song2TrackID[songID]
    for trackID in trackIDs:
        h5f = os.path.join(msd_h5dir, trackID + '.h5')
        if os.path.exists(h5f):
            return extract_msd_track_features(h5f)
        else:
            continue
        
    # no track available
    return None

In [135]:
#songID = 'SOFDPDC12A58A7D198'
#songID = 'SOKMCJK12A6D4F6105'
#songID = 'SOGTGJR12A6310E08D'
#songID = song_set[139]
#songID = song_set[443]
songID = song_set[518]
gen_aotm2011_song_features(songID)

array([  1.48666667e-01,   1.74568889e-02,   2.21179167e+00,
         1.95335714e+00,   5.40800000e-01,   4.32625600e-02,
         2.75852750e+00,   2.48118284e+00,   0.00000000e+00,
         5.77261000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   6.00000000e+00,   4.35000000e-01,
        -1.02700000e+01,   0.00000000e+00,   4.80000000e-01,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   5.32551724e-01,   1.12608040e-01,
        -1.13642414e+01,   2.65759901e+00,   4.72613793e-02,
         8.62155108e-04,  -2.09690345e+01,   1.07815580e+02,
         6.47344828e-01,   7.44103448e-01,   4.66517241e-01,
         4.76551724e-01,   6.35827586e-01,   5.37758621e-01,
         6.06379310e-01,   6.02137931e-01,   5.65103448e-01,
         5.75034483e-01,   4.95862069e-01,   2.78551724e-01,
         7.98858121e-02,   8.14030583e-02,   4.17908704e-02,
         4.37474887e-02,   4.24103496e-02,   4.94388038e-02,
         1.00368580e-01,

In [132]:
#ffeatures = os.path.join(data_dir, 'features.pkl')
song2Feature = dict()
cnt = 0
for songID in song_set:
    cnt += 1
    if cnt % 10 == 0:
        sys.stdout.write('\r%d / %d' % (cnt, len(song_set)))
        sys.stdout.flush()
    #print(songID)
    
    features = gen_aotm2011_song_features(songID)
    assert(features is not None)
    song2Feature[songID] = features

119460 / 119466

In [133]:
len(song2Feature)

119466

In [134]:
pkl.dump(song2Feature, open(ffeatures, 'wb'))