In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from tqdm import tqdm_notebook as tqdm
import json
import pandas as pd
import os
import h5py
from ast import literal_eval
import re
import pickle
import collections
import warnings
warnings.filterwarnings('ignore')

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.kernel_approximation import RBFSampler

In [2]:
with open('class-map-10.json', 'r') as f: # only consider 10 classes of Openmic dataset
    class_map = json.load(f)
    
# use a dict to align the classes between Openmic dataset (key) and Irmas dataset (val)
class_align = {'cello': 'cel',
               'clarinet': 'cla',
               'flute': 'flu',
               'guitar': ['gac', 'gel'],
               'organ': 'org',
               'piano': 'pia',
               'saxophone': 'sax',
               'trumpet': 'tru',
               'violin': 'vio',
               'voice': 'voi'}

In [3]:
# irmas genre information: country-folk ([cou_fol]), classical ([cla]), pop-rock ([pop-roc]), latin-soul ([lat-sou]).
aligned_genre = ['pop_roc', 'jazz_blue', 'classical', 'country_folk', 'latin_soul']

In [4]:
# load embeddings
embeddings = h5py.File('embeddings.h5', 'r')

def printname(name):
    print(name)
embeddings.visit(printname)

irmas
irmas/openl3
irmas/openl3/features
irmas/openl3/keys
irmas/vggish
irmas/vggish/features
irmas/vggish/keys
irmas/yamnet
irmas/yamnet/features
irmas/yamnet/keys
openmic
openmic/openl3
openmic/openl3/features
openmic/openl3/keys
openmic/vggish
openmic/vggish/features
openmic/vggish/keys
openmic/yamnet
openmic/yamnet/features
openmic/yamnet/keys


# OpenL3

In [5]:
embedding_name = 'openl3'

## irmas

In [6]:
feature = np.array(embeddings['irmas'][embedding_name]['features'])
keys_ori = np.array(embeddings['irmas'][embedding_name]['keys'])
print(feature.shape, keys_ori.shape)

key_clip = np.unique(keys_ori)
print(key_clip.shape)

feature_clip = []

for key in tqdm(key_clip):
    feature_clip.append(np.mean(feature[keys_ori[:]==key,:],axis=0))
    
feature_clip = np.array(feature_clip)
print(feature_clip.shape, key_clip.shape)

(167625, 512) (167625,)
(6705,)


  0%|          | 0/6705 [00:00<?, ?it/s]

(6705, 512) (6705,)


In [7]:
key_train = list(pd.read_csv('irmas_train.csv', header=None, squeeze=True))
key_test = list(pd.read_csv('irmas_test.csv', header=None, squeeze=True))

key_train = np.array([k[2:-1] for k in key_train])
key_test = np.array([k[2:-1]  for k in key_test])

In [8]:
# These loops go through all sample keys, and save their row numbers to either idx_train or idx_test
idx_train, idx_test = [], []

for k in range(len(key_clip)):
    if str(key_clip[k]) in key_train:
        idx_train.append(k)
    elif str(key_clip[k]) in key_test:
        idx_test.append(k)
    else:
        # This should never happen, but better safe than sorry.
        raise RuntimeError('Unknown sample key={}! Abort!'.format(key_clip[k]))
        
# cast the idx_* arrays to numpy structures
idx_train = np.asarray(idx_train)
idx_test = np.asarray(idx_test)

In [9]:
key_train = np.array(key_clip[idx_train])
key_train_genre = [key[key.rindex('[')+1:key.rindex(']')] for key in key_train]

In [10]:
set(key_train_genre)

{'cla', 'cou_fol', 'jaz_blu', 'lat_sou', 'pop_roc'}

In [11]:
# align genre
key_train_genre = ['jazz_blue' if item =='jaz_blu' else item for item in key_train_genre]
key_train_genre = ['classical' if item =='cla' else item for item in key_train_genre]
key_train_genre = ['country_folk' if item =='cou_fol' else item for item in key_train_genre]
key_train_genre = ['latin_soul' if item =='lat_sou' else item for item in key_train_genre]

In [12]:
collections.Counter(key_train_genre)

Counter({'pop_roc': 1853,
         'classical': 1240,
         'jazz_blue': 1539,
         'country_folk': 365,
         'latin_soul': 42})

In [13]:
X_train_ir = feature_clip[idx_train,:]
Y_ir = np.zeros(len(X_train_ir))
Y_genre_ir = np.array(key_train_genre)

print(X_train_ir.shape, Y_ir.shape, Y_genre_ir.shape)

(5039, 512) (5039,) (5039,)


## openmic

In [14]:
feature = np.array(embeddings['openmic'][embedding_name]['features'])
keys = np.array(embeddings['openmic'][embedding_name]['keys'])
print(feature.shape, keys.shape)

key_clip = np.unique(keys)

X = []

for key in tqdm(key_clip):
    X.append(np.mean(feature[keys[:]==key,:],axis=0))
    
X = np.array(X)
print(X.shape, key_clip.shape)

key_clip = np.array(key_clip)
key_clip

(1900000, 512) (1900000,)


  0%|          | 0/20000 [00:00<?, ?it/s]

(20000, 512) (20000,)


array(['000046_3840', '000135_483840', '000139_119040', ...,
       '155307_211200', '155310_372480', '155311_453120'], dtype=object)

In [15]:
# train-test split
split_train = pd.read_csv('openmic2018_train.csv', header=None, squeeze=True)
split_test = pd.read_csv('openmic2018_test.csv', header=None, squeeze=True)

print('# Train: {},  # Test: {}'.format(len(split_train), len(split_test)))

train_set = set(split_train)
test_set = set(split_test)

idx_train, idx_test = [], []

for idx, n in enumerate(key_clip):
    if n in train_set:
        idx_train.append(idx)
    elif n in test_set:
        idx_test.append(idx)
    else:
        raise RuntimeError('Unknown sample key={}! Abort!'.format(key_clip[n]))
        
idx_train = np.asarray(idx_train)
idx_test = np.asarray(idx_test)

# Train: 14915,  # Test: 5085


In [16]:
# key-label map using the information from the dataset source
data_root = '/import/c4dm-datasets/openmic-2018/openmic-2018/'
# Replaced the above by a local symbolic link within the github repo
# data_root = 'openmic-2018/'

meta = pd.read_csv(data_root + 'openmic-2018-metadata.csv')
train_genre_meta = list(meta['track_genres'][idx_train])
len(train_genre_meta)

14915

In [17]:
key_genre_om = []

for k in tqdm(range(len(train_genre_meta))):
    if isinstance(train_genre_meta[k], str):
        key_genre_om.append(literal_eval(train_genre_meta[k])[0]['genre_title'])
    else:
        key_genre_om.append('other')

  0%|          | 0/14915 [00:00<?, ?it/s]

In [18]:
def list_match(A, B):
    ele_A = set(map(str.lower, A))
    ele_B = set(map(str.lower, B))
    return bool(ele_A & ele_B)

key_genre_om_align = []

for item in key_genre_om:
    key_genre_om_item = re.split('[^a-zA-Z]', item)
    genre_match = 'other'
    for genre in aligned_genre:
        genre_item = re.split('[^a-zA-Z]', genre)
        if list_match(key_genre_om_item, genre_item):
            genre_match = genre
            continue
    key_genre_om_align.append(genre_match)
    
key_genre_om = key_genre_om_align

In [19]:
collections.Counter(key_genre_om)

Counter({'pop_roc': 826,
         'other': 11335,
         'country_folk': 1053,
         'jazz_blue': 778,
         'latin_soul': 76,
         'classical': 847})

In [20]:
X_train_om = X[idx_train]
Y_om = np.ones(len(X_train_om))
Y_genre_om = np.array(key_genre_om)

print(X_train_om.shape, Y_om.shape, Y_genre_om.shape)

(14915, 512) (14915,) (14915,)


In [21]:
X_train = np.vstack((X_train_ir, X_train_om))
Y = np.hstack((Y_ir, Y_om))
Y_genre = np.hstack((Y_genre_ir, Y_genre_om))

print(X_train.shape, Y.shape, Y_genre.shape)

(19954, 512) (19954,) (19954,)


In [22]:
collections.Counter(Y_genre)

Counter({'pop_roc': 2679,
         'classical': 2087,
         'jazz_blue': 2317,
         'country_folk': 1418,
         'latin_soul': 118,
         'other': 11335})

## kernelize & LDA

In [24]:
# kernelize embedding
feature_map_fourier = RBFSampler(gamma=1, n_components=X_train.shape[1], random_state=0)
X_train = feature_map_fourier.fit_transform(X_train, Y)
X_train.shape

(19954, 512)

In [25]:
# loop for each genre
globals()['LDA_coef_'+embedding_name] = []

for genre in aligned_genre:
    X_train_sub = X_train[Y_genre == genre]
    Y_sub = Y[Y_genre == genre]
    print(X_train_sub.shape, Y_sub.shape, collections.Counter(Y_sub))

    LDA = LinearDiscriminantAnalysis(solver='eigen', shrinkage='auto')
    LDA.fit(X_train_sub, Y_sub)

    globals()['LDA_coef_'+embedding_name].append(LDA.coef_.copy())
    
globals()['LDA_coef_'+embedding_name] = np.squeeze(np.array(globals()['LDA_coef_'+embedding_name]))
print(globals()['LDA_coef_'+embedding_name].shape)

with open('kernelize_LDA_' + embedding_name + '_coef_genre.pickle', 'wb') as fdesc:
    pickle.dump(globals()['LDA_coef_'+embedding_name], fdesc)

(2679, 512) (2679,) Counter({0.0: 1853, 1.0: 826})
(2317, 512) (2317,) Counter({0.0: 1539, 1.0: 778})
(2087, 512) (2087,) Counter({0.0: 1240, 1.0: 847})
(1418, 512) (1418,) Counter({1.0: 1053, 0.0: 365})
(118, 512) (118,) Counter({1.0: 76, 0.0: 42})
(5, 512)


# VGGish

In [26]:
embedding_name = 'vggish'

## irmas

In [27]:
feature = np.array(embeddings['irmas'][embedding_name]['features'])
keys_ori = np.array(embeddings['irmas'][embedding_name]['keys'])
print(feature.shape, keys_ori.shape)

key_clip = np.unique(keys_ori)
print(key_clip.shape)

feature_clip = []

for key in tqdm(key_clip):
    feature_clip.append(np.mean(feature[keys_ori[:]==key,:],axis=0))
    
feature_clip = np.array(feature_clip)
print(feature_clip.shape, key_clip.shape)

(13410, 128) (13410,)
(6705,)


  0%|          | 0/6705 [00:00<?, ?it/s]

(6705, 128) (6705,)


In [28]:
key_train = list(pd.read_csv('irmas_train.csv', header=None, squeeze=True))
key_train = np.array([k[2:-1] for k in key_train])

In [29]:
# These loops go through all sample keys, and save their row numbers to either idx_train or idx_test
idx_train, idx_test = [], []

for k in range(len(key_clip)):
    if str(key_clip[k]) in key_train:
        idx_train.append(k)
    elif str(key_clip[k]) in key_test:
        idx_test.append(k)
    else:
        # This should never happen, but better safe than sorry.
        raise RuntimeError('Unknown sample key={}! Abort!'.format(key_clip[k]))
        
# cast the idx_* arrays to numpy structures
idx_train = np.asarray(idx_train)
idx_test = np.asarray(idx_test)

In [30]:
X_train_ir = feature_clip[idx_train,:]
print(X_train_ir.shape, Y_ir.shape, Y_genre_ir.shape)

(5039, 128) (5039,) (5039,)


## openmic

In [31]:
feature = np.array(embeddings['openmic'][embedding_name]['features'])
keys = np.array(embeddings['openmic'][embedding_name]['keys'])
print(feature.shape, keys.shape)

key_clip = np.unique(keys)

X = []

for key in tqdm(key_clip):
    X.append(np.mean(feature[keys[:]==key,:],axis=0))
    
X = np.array(X)
print(X.shape, key_clip.shape)

key_clip = np.array(key_clip)
key_clip

(180000, 128) (180000,)


  0%|          | 0/20000 [00:00<?, ?it/s]

(20000, 128) (20000,)


array(['000046_3840', '000135_483840', '000139_119040', ...,
       '155307_211200', '155310_372480', '155311_453120'], dtype=object)

In [32]:
# train-test split
split_train = pd.read_csv('openmic2018_train.csv', header=None, squeeze=True)
split_test = pd.read_csv('openmic2018_test.csv', header=None, squeeze=True)

print('# Train: {},  # Test: {}'.format(len(split_train), len(split_test)))

train_set = set(split_train)
test_set = set(split_test)

idx_train, idx_test = [], []

for idx, n in enumerate(key_clip):
    if n in train_set:
        idx_train.append(idx)
    elif n in test_set:
        idx_test.append(idx)
    else:
        raise RuntimeError('Unknown sample key={}! Abort!'.format(key_clip[n]))
        
idx_train = np.asarray(idx_train)
idx_test = np.asarray(idx_test)

# Train: 14915,  # Test: 5085


In [33]:
X_train_om = X[idx_train]
print(X_train_om.shape, Y_om.shape, Y_genre_om.shape)

(14915, 128) (14915,) (14915,)


In [34]:
X_train = np.vstack((X_train_ir, X_train_om))
print(X_train.shape, Y.shape, Y_genre.shape)

(19954, 128) (19954,) (19954,)


## kernelize & LDA

In [35]:
# kernelize embedding
feature_map_fourier = RBFSampler(gamma=1, n_components=X_train.shape[1], random_state=0)
X_train = feature_map_fourier.fit_transform(X_train, Y)
X_train.shape

(19954, 128)

In [36]:
# loop for each genre
globals()['LDA_coef_'+embedding_name] = []

for genre in aligned_genre:
    X_train_sub = X_train[Y_genre == genre]
    Y_sub = Y[Y_genre == genre]
    print(X_train_sub.shape, Y_sub.shape)

    LDA = LinearDiscriminantAnalysis(solver='eigen', shrinkage='auto')
    LDA.fit(X_train_sub, Y_sub)

    globals()['LDA_coef_'+embedding_name].append(LDA.coef_.copy())
    
globals()['LDA_coef_'+embedding_name] = np.squeeze(np.array(globals()['LDA_coef_'+embedding_name]))
globals()['LDA_coef_'+embedding_name].shape

with open('kernelize_LDA_' + embedding_name + '_coef_genre.pickle', 'wb') as fdesc:
    pickle.dump(globals()['LDA_coef_'+embedding_name], fdesc)

(2679, 128) (2679,)
(2317, 128) (2317,)
(2087, 128) (2087,)
(1418, 128) (1418,)
(118, 128) (118,)


# YAMnet

In [37]:
embedding_name = 'yamnet'

## irmas

In [38]:
feature = np.array(embeddings['irmas'][embedding_name]['features'])
keys_ori = np.array(embeddings['irmas'][embedding_name]['keys'])
print(feature.shape, keys_ori.shape)

key_clip = np.unique(keys_ori)
print(key_clip.shape)

feature_clip = []

for key in tqdm(key_clip):
    feature_clip.append(np.mean(feature[keys_ori[:]==key,:],axis=0))
    
feature_clip = np.array(feature_clip)
print(feature_clip.shape, key_clip.shape)

(33525, 1024) (33525,)
(6705,)


  0%|          | 0/6705 [00:00<?, ?it/s]

(6705, 1024) (6705,)


In [39]:
key_train = list(pd.read_csv('irmas_train.csv', header=None, squeeze=True))
key_train = np.array([k[2:-1] for k in key_train])

In [40]:
# These loops go through all sample keys, and save their row numbers to either idx_train or idx_test
idx_train, idx_test = [], []

for k in range(len(key_clip)):
    if str(key_clip[k]) in key_train:
        idx_train.append(k)
    elif str(key_clip[k]) in key_test:
        idx_test.append(k)
    else:
        # This should never happen, but better safe than sorry.
        raise RuntimeError('Unknown sample key={}! Abort!'.format(key_clip[k]))
        
# cast the idx_* arrays to numpy structures
idx_train = np.asarray(idx_train)
idx_test = np.asarray(idx_test)

In [41]:
X_train_ir = feature_clip[idx_train,:]
print(X_train_ir.shape, Y_ir.shape, Y_genre_ir.shape)

(5039, 1024) (5039,) (5039,)


## openmic

In [42]:
feature = np.array(embeddings['openmic'][embedding_name]['features'])
keys = np.array(embeddings['openmic'][embedding_name]['keys'])
print(feature.shape, keys.shape)

key_clip = np.unique(keys)

X = []

for key in tqdm(key_clip):
    X.append(np.mean(feature[keys[:]==key,:],axis=0))
    
X = np.array(X)
print(X.shape, key_clip.shape)

key_clip = np.array(key_clip)
key_clip

# train-test split
split_train = pd.read_csv('openmic2018_train.csv', header=None, squeeze=True)
split_test = pd.read_csv('openmic2018_test.csv', header=None, squeeze=True)

print('# Train: {},  # Test: {}'.format(len(split_train), len(split_test)))

train_set = set(split_train)
test_set = set(split_test)

idx_train, idx_test = [], []

for idx, n in enumerate(key_clip):
    if n in train_set:
        idx_train.append(idx)
    elif n in test_set:
        idx_test.append(idx)
    else:
        raise RuntimeError('Unknown sample key={}! Abort!'.format(key_clip[n]))
        
idx_train = np.asarray(idx_train)
idx_test = np.asarray(idx_test)

(380000, 1024) (380000,)


  0%|          | 0/20000 [00:00<?, ?it/s]

(20000, 1024) (20000,)
# Train: 14915,  # Test: 5085


In [43]:
# train-test split
split_train = pd.read_csv('openmic2018_train.csv', header=None, squeeze=True)
split_test = pd.read_csv('openmic2018_test.csv', header=None, squeeze=True)

print('# Train: {},  # Test: {}'.format(len(split_train), len(split_test)))

train_set = set(split_train)
test_set = set(split_test)

idx_train, idx_test = [], []

for idx, n in enumerate(key_clip):
    if n in train_set:
        idx_train.append(idx)
    elif n in test_set:
        idx_test.append(idx)
    else:
        raise RuntimeError('Unknown sample key={}! Abort!'.format(key_clip[n]))
        
idx_train = np.asarray(idx_train)
idx_test = np.asarray(idx_test)

# Train: 14915,  # Test: 5085


In [44]:
X_train_om = X[idx_train]
print(X_train_om.shape, Y_om.shape, Y_genre_om.shape)

(14915, 1024) (14915,) (14915,)


In [45]:
X_train = np.vstack((X_train_ir, X_train_om))
print(X_train.shape, Y.shape, Y_genre.shape)

(19954, 1024) (19954,) (19954,)


## kernelize_ & LDA

In [46]:
# kernelize embedding
feature_map_fourier = RBFSampler(gamma=1, n_components=X_train.shape[1], random_state=0)
X_train = feature_map_fourier.fit_transform(X_train, Y)
X_train.shape

(19954, 1024)

In [47]:
# loop for each genre
globals()['LDA_coef_'+embedding_name] = []

for genre in aligned_genre:
    X_train_sub = X_train[Y_genre == genre]
    Y_sub = Y[Y_genre == genre]
    print(X_train_sub.shape, Y_sub.shape)

    LDA = LinearDiscriminantAnalysis(solver='eigen', shrinkage='auto')
    LDA.fit(X_train_sub, Y_sub)

    globals()['LDA_coef_'+embedding_name].append(LDA.coef_.copy())
    
globals()['LDA_coef_'+embedding_name] = np.squeeze(np.array(globals()['LDA_coef_'+embedding_name]))
globals()['LDA_coef_'+embedding_name].shape

with open('kernelize_LDA_' + embedding_name + '_coef_genre.pickle', 'wb') as fdesc:
    pickle.dump(globals()['LDA_coef_'+embedding_name], fdesc)

(2679, 1024) (2679,)
(2317, 1024) (2317,)
(2087, 1024) (2087,)
(1418, 1024) (1418,)
(118, 1024) (118,)
