In [1]:
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegressionCV
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [2]:
path="/home/ftamagnan/dataset/"
name="total_metadata_training.npz"
data=dict(np.load(path+name))

data["random"]= np.random.rand(6729,3)


data['vae_embeddings']=data['vae_embeddings'][:,0:32]
for key in data.keys():
    print(data[key].shape,key)

(6729, 1) offbeat_notes
(6729, 9) drums_pitches_used
(6729, 3) random
(6729, 1) bpm
(6729, 144) reduced_drums
(6729, 144) reduced_drums_velocity
(6729, 9) count
(6729, 12) genre
(6729, 32) vae_embeddings
(6729, 2, 1) fills
(6729, 27) velocity_metadata
(6729, 2) dataset


In [3]:
def feature_selection(scaler=True,cv=True,list_list_label=[],penalty='l2',stats=True):

    
    
    
    for list_label in list_list_label:
            list_x=[]
            for key in data.keys():
                if key in list_label:
                    list_x.append(data[key])
            print(x.shape for x in list_x)
            X=np.concatenate(list_x,axis=1)
            y=data['fills'][:,1].reshape(-1)
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)
            if scaler:
                scaler = StandardScaler()
                scaler.fit(X_train)
                X_train=scaler.transform(X_train)
                X_test=scaler.transform(X_test)
            
            if cv:
                clf = LogisticRegressionCV(cv=2, random_state=0,
                                   multi_class='ovr',penalty=penalty,solver='liblinear',max_iter=300,n_jobs=-1).fit(X_train, y_train)
            else:
                clf = LogisticRegression(random_state=0,C=100000000).fit(X_train, y_train)

#             y_pred=clf.predict(X_test)
            y_pred=(clf.predict_proba(X_test)>0.5)*1
            y_pred=y_pred[:,1]
            tn, fp, fn, tp=confusion_matrix(y_test, y_pred).ravel()
            if stats:
                print("__________Features used : "+str(list_label)+"_______")
                print("tn,fp,fn,tp = ",tn,fp,fn,tp)
                print("Accuracy = ",(tp+tn)/(tn+fp+fn+tp))
                print("Recall = ",(tp)/(fn+tp))
                print("Precision = ",(tp)/(fp+tp))
                p=(tp)/(fp+tp)
                r=(tp)/(fn+tp)
                print("F1 score =",2*p*r/(p+r))

    return clf,scaler



# 1.Feature selection

In [4]:
list_list_label=[['vae_embeddings','drums_pitches_used','reduced_drums','velocity_metadata'],
               ['offbeat_notes','drums_pitches_used','velocity_metadata'],
               ['vae_embeddings'],
               ['offbeat_notes'],
               ['drums_pitches_used'],
               ['velocity_metadata'],
                 ['drums_pitches_used','velocity_metadata','vae_embeddings'],
                 ['vae_embeddings','velocity_metadata'],
                 ['random']
]

clf,scaler=feature_selection(scaler=True,cv=True,list_list_label=list_list_label)

<generator object feature_selection.<locals>.<genexpr> at 0x7fabefa82a40>
__________Features used : ['vae_embeddings', 'drums_pitches_used', 'reduced_drums', 'velocity_metadata']_______
tn,fp,fn,tp =  1579 36 52 352
Accuracy =  0.9564140663694899
Recall =  0.8712871287128713
Precision =  0.9072164948453608
F1 score = 0.8888888888888888
<generator object feature_selection.<locals>.<genexpr> at 0x7fabefa82a40>
__________Features used : ['offbeat_notes', 'drums_pitches_used', 'velocity_metadata']_______
tn,fp,fn,tp =  1558 57 72 332
Accuracy =  0.9361069836552749
Recall =  0.8217821782178217
Precision =  0.8534704370179949
F1 score = 0.8373266078184111
<generator object feature_selection.<locals>.<genexpr> at 0x7fabefa82a40>
__________Features used : ['vae_embeddings']_______
tn,fp,fn,tp =  1598 17 381 23
Accuracy =  0.8028727092620109
Recall =  0.05693069306930693
Precision =  0.575
F1 score = 0.1036036036036036
<generator object feature_selection.<locals>.<genexpr> at 0x7fabefa82a40>
__



__________Features used : ['drums_pitches_used']_______
tn,fp,fn,tp =  1525 90 171 233
Accuracy =  0.8707280832095097
Recall =  0.5767326732673267
Precision =  0.7213622291021672
F1 score = 0.640990371389271
<generator object feature_selection.<locals>.<genexpr> at 0x7fabefa82a40>
__________Features used : ['velocity_metadata']_______
tn,fp,fn,tp =  1535 80 85 319
Accuracy =  0.9182763744427934
Recall =  0.7896039603960396
Precision =  0.7994987468671679
F1 score = 0.7945205479452055
<generator object feature_selection.<locals>.<genexpr> at 0x7fabefa82a40>
__________Features used : ['drums_pitches_used', 'velocity_metadata', 'vae_embeddings']_______
tn,fp,fn,tp =  1579 36 70 334
Accuracy =  0.9474987617632491
Recall =  0.8267326732673267
Precision =  0.9027027027027027
F1 score = 0.8630490956072351
<generator object feature_selection.<locals>.<genexpr> at 0x7fabefa82a40>
__________Features used : ['vae_embeddings', 'velocity_metadata']_______
tn,fp,fn,tp =  1575 40 76 328
Accuracy =  0



In [5]:
list_list_label=[['vae_embeddings','velocity_metadata']]
             

clf_l1,scaler=feature_selection(scaler=True,cv=True,list_list_label=list_list_label,penalty='l1',stats=False)
clf_l2,scaler=feature_selection(scaler=True,cv=True,list_list_label=list_list_label,penalty='l2',stats=False)

<generator object feature_selection.<locals>.<genexpr> at 0x7fabefa82af0>
<generator object feature_selection.<locals>.<genexpr> at 0x7fabefa82c50>


In [6]:
name_pitches = ['bass drum','snare drum','closed hi-hat','open hi-hat','low tom','mid tom','high tom','crash cymbal','ride cymbal']
name_features=['max_velocity','std_velocity','max_velocity']

In [9]:
def stats_weights(clf):
    coef=clf.coef_
    coef=coef.reshape(-1)
    print("------VAE EMBEDDINGS-------")
    print(coef[0:32])

#     print("------OFFBEATS NOTES-------")
#     print(coef[32])

    print("------VELOCITY METRICS-------")
    for i,pitch in enumerate(name_pitches):
        for j,metric in enumerate(name_features):
            print(metric+' of '+pitch,coef[32+i+j])
        
    print("------PITCHES USED-------")
    for i,pitch in enumerate(name_pitches):
        print('use of '+pitch,coef[33+3*9+i])

# 2. Magnitude of weights with L1 reg

In [11]:
stats_weights(clf_l1)

------VAE EMBEDDINGS-------
[ 0.9801162   0.65158922  0.58326857  1.32651732 -0.24319789 -1.88863305
 -0.09644014 -0.54634354  0.22567227  0.81888318 -0.03951306  1.26262003
 -0.0636314  -0.88569507 -0.44897394 -0.84277649  0.21091747 -0.84717056
 -0.2100739   0.36210882  0.21965828 -0.08105558 -0.05166368  0.42060268
 -0.60239727  0.65543686 -0.20817299 -1.47509021 -0.01119262  0.02910354
  0.53153782  0.56184091]
------VELOCITY METRICS-------
max_velocity of bass drum 1.6179717759322259
std_velocity of bass drum 0.09954478149722493
max_velocity of bass drum 1.5461519856104988
max_velocity of snare drum 0.09954478149722493
std_velocity of snare drum 1.5461519856104988
max_velocity of snare drum 0.7224091137998008
max_velocity of closed hi-hat 1.5461519856104988
std_velocity of closed hi-hat 0.7224091137998008
max_velocity of closed hi-hat 0.12628133123583177
max_velocity of open hi-hat 0.7224091137998008
std_velocity of open hi-hat 0.12628133123583177
max_velocity of open hi-hat 1.597

IndexError: index 60 is out of bounds for axis 0 with size 59

# 3.Magnitude of weights with L2 reg

In [None]:
stats_weights(clf_l2)

In [None]:
from sklearn.externals import joblib
s = joblib.dump(clf_l2,'clf_fills.pkl')


# 4. Evaluate Generated model

In [None]:
list_list_label=[['vae_embeddings','velocity_metadata']]


clf,scaler=feature_selection(scaler=True,cv=True,list_list_label=list_list_label)
from sklearn.externals import joblib
s = joblib.dump(clf,'clf_fills.pkl')
s = joblib.dump(scaler,'scaler.pkl')

