In [1]:
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegressionCV
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [2]:
path="/home/ftamagnan/dataset/"
name="total_metadata_training.npz"
data=dict(np.load(path+name))

data["random"]= np.random.rand(6729,3)


data['vae_embeddings']=data['vae_embeddings'][:,0:32]
for key in data.keys():
    print(data[key].shape,key)

(6729, 1) bpm
(6729, 3) random
(6729, 9) drums_pitches_used
(6729, 32) vae_embeddings
(6729, 36) velocity_metadata
(6729, 1) offbeat_notes
(6729, 2) dataset
(6729, 12) genre
(6729, 2, 1) fills


In [3]:
def feature_selection(scaler=True,cv=True,list_list_label=[],penalty='l2',stats=True):

    
    
    
    for list_label in list_list_label:
            list_x=[]
            for key in data.keys():
                if key in list_label:
                    list_x.append(data[key])
            print(x.shape for x in list_x)
            X=np.concatenate(list_x,axis=1)
            y=data['fills'][:,1].reshape(-1)
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)
            if scaler:
                scaler = StandardScaler()
                scaler.fit(X_train)
                X_train=scaler.transform(X_train)
                X_test=scaler.transform(X_test)
            
            if cv:
                clf = LogisticRegressionCV(cv=2, random_state=0,
                                   multi_class='ovr',penalty=penalty,solver='liblinear',max_iter=300,n_jobs=-1).fit(X_train, y_train)
            else:
                clf = LogisticRegression(random_state=0,C=100000000).fit(X_train, y_train)

            y_pred=clf.predict(X_test)
            tn, fp, fn, tp=confusion_matrix(y_test, y_pred).ravel()
            if stats:
                print("__________Features used : "+str(list_label)+"_______")
                print("tn,fp,fn,tp = ",tn,fp,fn,tp)
                print("Accuracy = ",(tp+tn)/(tn+fp+fn+tp))
                print("Recall = ",(tp)/(fn+tp))
                print("Precision = ",(tp)/(fp+tp))

    return clf



# 1.Feature selection

In [4]:
list_list_label=[['vae_embeddings','offbeat_notes','drums_pitches_used','velocity_metadata'],
               ['offbeat_notes','drums_pitches_used','velocity_metadata'],
               ['vae_embeddings'],
               ['offbeat_notes'],
               ['drums_pitches_used'],
               ['velocity_metadata'],
                 ['drums_pitches_used','velocity_metadata'],
                 ['random']
]

clf=feature_selection(scaler=True,cv=True,list_list_label=list_list_label)

<generator object feature_selection.<locals>.<genexpr> at 0x7f1ae6678888>
__________Features used : ['vae_embeddings', 'offbeat_notes', 'drums_pitches_used', 'velocity_metadata']_______
tn,fp,fn,tp =  1582 33 67 337
Accuracy =  0.9504705299653293
Recall =  0.8341584158415841
Precision =  0.9108108108108108
<generator object feature_selection.<locals>.<genexpr> at 0x7f1ae6678888>
__________Features used : ['offbeat_notes', 'drums_pitches_used', 'velocity_metadata']_______
tn,fp,fn,tp =  1567 48 67 337
Accuracy =  0.9430411094601288
Recall =  0.8341584158415841
Precision =  0.8753246753246753
<generator object feature_selection.<locals>.<genexpr> at 0x7f1ae6678888>
__________Features used : ['vae_embeddings']_______
tn,fp,fn,tp =  1598 17 381 23
Accuracy =  0.8028727092620109
Recall =  0.05693069306930693
Precision =  0.575
<generator object feature_selection.<locals>.<genexpr> at 0x7f1ae6678888>
__________Features used : ['offbeat_notes']_______
tn,fp,fn,tp =  1614 1 404 0
Accuracy =  0



In [5]:
list_list_label=[['vae_embeddings','offbeat_notes','velocity_metadata','drums_pitches_used']]
             

clf_l1=feature_selection(scaler=True,cv=True,list_list_label=list_list_label,penalty='l1',stats=False)
clf_l2=feature_selection(scaler=True,cv=True,list_list_label=list_list_label,penalty='l2',stats=False)

<generator object feature_selection.<locals>.<genexpr> at 0x7f1ae6699e08>
<generator object feature_selection.<locals>.<genexpr> at 0x7f1b403b2d00>


In [6]:
name_pitches = ['bass drum','snare drum','closed hi-hat','open hi-hat','low tom','mid tom','high tom','crash cymbal','ride cymbal']
name_features=['min_velocity','max_velocity','std_velocity','max_velocity']

In [7]:
def stats_weights(clf):
    coef=clf.coef_
    coef=coef.reshape(-1)
    print("------VAE EMBEDDINGS-------")
    print(coef[0:32])

    print("------OFFBEATS NOTES-------")
    print(coef[32])

    print("------VELOCITY METRICS-------")
    for i,pitch in enumerate(name_pitches):
        for j,metric in enumerate(name_features):
            print(metric+' of '+pitch,coef[33+i+j])
        
    print("------PITCHES USED-------")
    for i,pitch in enumerate(name_pitches):
        print('use of '+pitch,coef[33+36+i])

# 2. Magnitude of weights with L1 reg

In [8]:
stats_weights(clf_l2)

------VAE EMBEDDINGS-------
[-1.01873577 -1.18969263  0.70587622 -0.18344703  0.58140306 -2.85148953
 -3.04416831 -0.0173172  -1.8305815   0.51281467  0.50442477  0.50443441
  0.7441349  -0.70086569 -1.41113827 -0.20620589 -0.90768445  0.22066152
  0.52801773 -0.05157782  1.11896297 -0.02349166 -0.52808708 -0.23113492
 -0.34061621  0.14853199 -0.40996908 -0.18390271  0.06022144  0.15693987
 -0.22455539 -0.43929968]
------OFFBEATS NOTES-------
0.007825148661606806
------VELOCITY METRICS-------
min_velocity of bass drum -0.5685986489702152
max_velocity of bass drum 0.08846585942092912
std_velocity of bass drum -0.10044688400476565
max_velocity of bass drum -0.6165812396612288
min_velocity of snare drum 0.08846585942092912
max_velocity of snare drum -0.10044688400476565
std_velocity of snare drum -0.6165812396612288
max_velocity of snare drum 0.09199187929430476
min_velocity of closed hi-hat -0.10044688400476565
max_velocity of closed hi-hat -0.6165812396612288
std_velocity of closed hi-h

# 3.Magnitude of weights with L2 reg

In [9]:
stats_weights(clf_l2)

------VAE EMBEDDINGS-------
[-1.01873577 -1.18969263  0.70587622 -0.18344703  0.58140306 -2.85148953
 -3.04416831 -0.0173172  -1.8305815   0.51281467  0.50442477  0.50443441
  0.7441349  -0.70086569 -1.41113827 -0.20620589 -0.90768445  0.22066152
  0.52801773 -0.05157782  1.11896297 -0.02349166 -0.52808708 -0.23113492
 -0.34061621  0.14853199 -0.40996908 -0.18390271  0.06022144  0.15693987
 -0.22455539 -0.43929968]
------OFFBEATS NOTES-------
0.007825148661606806
------VELOCITY METRICS-------
min_velocity of bass drum -0.5685986489702152
max_velocity of bass drum 0.08846585942092912
std_velocity of bass drum -0.10044688400476565
max_velocity of bass drum -0.6165812396612288
min_velocity of snare drum 0.08846585942092912
max_velocity of snare drum -0.10044688400476565
std_velocity of snare drum -0.6165812396612288
max_velocity of snare drum 0.09199187929430476
min_velocity of closed hi-hat -0.10044688400476565
max_velocity of closed hi-hat -0.6165812396612288
std_velocity of closed hi-h

In [17]:
from sklearn.externals import joblib
s = joblib.dump(clf_l2,'clf_fills.pkl')
