In [1]:
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegressionCV
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [2]:
path="/home/ftamagnan/dataset/"
name="total_metadata_training.npz"
data=dict(np.load(path+name))

data["random"]= np.random.rand(6729,3)


data['vae_embeddings']=data['vae_embeddings'][:,0:32]
for key in data.keys():
    print(data[key].shape,key)

(6729, 9) count
(6729, 9) drums_pitches_used
(6729, 32) vae_embeddings
(6729, 3) random
(6729, 12) genre
(6729, 1) bpm
(6729, 2, 1) fills
(6729, 144) reduced_drums
(6729, 1) offbeat_notes
(6729, 144) reduced_drums_velocity
(6729, 2) dataset
(6729, 27) velocity_metadata


In [3]:
def feature_selection(scaler=True,cv=True,list_list_label=[],penalty='l2',stats=True):

    
    
    
    for list_label in list_list_label:
            list_x=[]
            for key in data.keys():
                if key in list_label:
                    list_x.append(data[key])
            print(x.shape for x in list_x)
            X=np.concatenate(list_x,axis=1)
            y=data['fills'][:,1].reshape(-1)
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)
            if scaler:
                scaler = StandardScaler()
                scaler.fit(X_train)
                X_train=scaler.transform(X_train)
                X_test=scaler.transform(X_test)
            
            if cv:
                clf = LogisticRegressionCV(cv=2, random_state=0,
                                   multi_class='ovr',penalty=penalty,solver='liblinear',max_iter=300,n_jobs=-1).fit(X_train, y_train)
            else:
                clf = LogisticRegression(random_state=0,C=100000000).fit(X_train, y_train)

#             y_pred=clf.predict(X_test)
            y_pred=(clf.predict_proba(X_test)>0.6)*1
            y_pred=y_pred[:,1]
            tn, fp, fn, tp=confusion_matrix(y_test, y_pred).ravel()
            if stats:
                print("__________Features used : "+str(list_label)+"_______")
                print("tn,fp,fn,tp = ",tn,fp,fn,tp)
                print("Accuracy = ",(tp+tn)/(tn+fp+fn+tp))
                print("Recall = ",(tp)/(fn+tp))
                print("Precision = ",(tp)/(fp+tp))

    return clf,scaler



# 1.Feature selection

In [4]:
list_list_label=[['vae_embeddings','drums_pitches_used','reduced_drums','velocity_metadata'],
               ['offbeat_notes','drums_pitches_used','velocity_metadata'],
               ['vae_embeddings'],
               ['offbeat_notes'],
               ['drums_pitches_used'],
               ['velocity_metadata'],
                 ['drums_pitches_used','velocity_metadata','vae_embeddings'],
                 ['vae_embeddings','velocity_metadata'],
                 ['random']
]

clf,scaler=feature_selection(scaler=True,cv=True,list_list_label=list_list_label)

<generator object feature_selection.<locals>.<genexpr> at 0x7f7d0fe0ea40>
__________Features used : ['vae_embeddings', 'drums_pitches_used', 'reduced_drums', 'velocity_metadata']_______
tn,fp,fn,tp =  1588 27 61 343
Accuracy =  0.9564140663694899
Recall =  0.849009900990099
Precision =  0.927027027027027
<generator object feature_selection.<locals>.<genexpr> at 0x7f7d0fe0ea40>
__________Features used : ['offbeat_notes', 'drums_pitches_used', 'velocity_metadata']_______
tn,fp,fn,tp =  1574 41 84 320
Accuracy =  0.9380881624566617
Recall =  0.7920792079207921
Precision =  0.8864265927977839
<generator object feature_selection.<locals>.<genexpr> at 0x7f7d0fe0ea40>
__________Features used : ['vae_embeddings']_______
tn,fp,fn,tp =  1610 5 394 10
Accuracy =  0.8023774145616642
Recall =  0.024752475247524754
Precision =  0.6666666666666666
<generator object feature_selection.<locals>.<genexpr> at 0x7f7d0fe0ea40>
__________Features used : ['offbeat_notes']_______
tn,fp,fn,tp =  1615 0 404 0
Ac



__________Features used : ['drums_pitches_used']_______
tn,fp,fn,tp =  1553 62 206 198
Accuracy =  0.8672610203070827
Recall =  0.4900990099009901
Precision =  0.7615384615384615
<generator object feature_selection.<locals>.<genexpr> at 0x7f7d0fe0ea40>
__________Features used : ['velocity_metadata']_______
tn,fp,fn,tp =  1559 56 104 300
Accuracy =  0.9207528479445269
Recall =  0.7425742574257426
Precision =  0.8426966292134831
<generator object feature_selection.<locals>.<genexpr> at 0x7f7d0fe0ea40>
__________Features used : ['drums_pitches_used', 'velocity_metadata', 'vae_embeddings']_______
tn,fp,fn,tp =  1591 24 77 327
Accuracy =  0.9499752352649826
Recall =  0.8094059405940595
Precision =  0.9316239316239316
<generator object feature_selection.<locals>.<genexpr> at 0x7f7d0fe0ea40>
__________Features used : ['vae_embeddings', 'velocity_metadata']_______
tn,fp,fn,tp =  1589 26 83 321
Accuracy =  0.946012877662209
Recall =  0.7945544554455446
Precision =  0.9250720461095101
<generator

In [5]:
list_list_label=[['vae_embeddings','velocity_metadata']]
             

clf_l1,scaler=feature_selection(scaler=True,cv=True,list_list_label=list_list_label,penalty='l1',stats=False)
clf_l2,scaler=feature_selection(scaler=True,cv=True,list_list_label=list_list_label,penalty='l2',stats=False)

<generator object feature_selection.<locals>.<genexpr> at 0x7f7d0fe0eb48>
<generator object feature_selection.<locals>.<genexpr> at 0x7f7d9c1a10a0>


In [6]:
name_pitches = ['bass drum','snare drum','closed hi-hat','open hi-hat','low tom','mid tom','high tom','crash cymbal','ride cymbal']
name_features=['max_velocity','std_velocity','max_velocity']

In [7]:
def stats_weights(clf):
    coef=clf.coef_
    coef=coef.reshape(-1)
    print("------VAE EMBEDDINGS-------")
    print(coef[0:32])

    print("------OFFBEATS NOTES-------")
    print(coef[32])

    print("------VELOCITY METRICS-------")
    for i,pitch in enumerate(name_pitches):
        for j,metric in enumerate(name_features):
            print(metric+' of '+pitch,coef[33+i+j])
        
    print("------PITCHES USED-------")
    for i,pitch in enumerate(name_pitches):
        print('use of '+pitch,coef[33+3*9+i])

# 2. Magnitude of weights with L1 reg

In [8]:
stats_weights(clf_l2)

------VAE EMBEDDINGS-------
[ 9.44409352e-01  5.37854706e-01  5.86329858e-01  1.16193352e+00
 -2.40195396e-01 -1.49251666e+00 -9.22299517e-02 -5.53691717e-01
  2.04523609e-01  8.23259496e-01 -4.87350359e-02  9.35174289e-01
 -6.31758596e-02 -8.02527275e-01 -4.35322327e-01 -8.49084974e-01
  2.14137337e-01 -8.08603293e-01 -1.95306890e-01  3.22814108e-01
  2.36533445e-01 -4.38454938e-02 -1.72444907e-02  3.83865420e-01
 -5.58345778e-01  6.20259814e-01 -2.03502907e-01 -1.32928558e+00
  7.32809391e-04  4.11162152e-02  5.19003089e-01  5.79665610e-01]
------OFFBEATS NOTES-------
1.5726094006788367
------VELOCITY METRICS-------
max_velocity of bass drum 0.1109787656557349
std_velocity of bass drum 1.3246243665510857
max_velocity of bass drum 0.5946310238840214
max_velocity of snare drum 1.3246243665510857
std_velocity of snare drum 0.5946310238840214
max_velocity of snare drum 0.12938476106078367
max_velocity of closed hi-hat 0.5946310238840214
std_velocity of closed hi-hat 0.12938476106078367
m

IndexError: index 60 is out of bounds for axis 0 with size 59

# 3.Magnitude of weights with L2 reg

In [9]:
stats_weights(clf_l2)

------VAE EMBEDDINGS-------
[ 9.44409352e-01  5.37854706e-01  5.86329858e-01  1.16193352e+00
 -2.40195396e-01 -1.49251666e+00 -9.22299517e-02 -5.53691717e-01
  2.04523609e-01  8.23259496e-01 -4.87350359e-02  9.35174289e-01
 -6.31758596e-02 -8.02527275e-01 -4.35322327e-01 -8.49084974e-01
  2.14137337e-01 -8.08603293e-01 -1.95306890e-01  3.22814108e-01
  2.36533445e-01 -4.38454938e-02 -1.72444907e-02  3.83865420e-01
 -5.58345778e-01  6.20259814e-01 -2.03502907e-01 -1.32928558e+00
  7.32809391e-04  4.11162152e-02  5.19003089e-01  5.79665610e-01]
------OFFBEATS NOTES-------
1.5726094006788367
------VELOCITY METRICS-------
max_velocity of bass drum 0.1109787656557349
std_velocity of bass drum 1.3246243665510857
max_velocity of bass drum 0.5946310238840214
max_velocity of snare drum 1.3246243665510857
std_velocity of snare drum 0.5946310238840214
max_velocity of snare drum 0.12938476106078367
max_velocity of closed hi-hat 0.5946310238840214
std_velocity of closed hi-hat 0.12938476106078367
m

IndexError: index 60 is out of bounds for axis 0 with size 59

In [None]:
from sklearn.externals import joblib
s = joblib.dump(clf_l2,'clf_fills.pkl')


# 4. Evaluate Generated model

In [10]:
list_list_label=[['vae_embeddings','velocity_metadata']]


clf,scaler=feature_selection(scaler=True,cv=True,list_list_label=list_list_label)
from sklearn.externals import joblib
s = joblib.dump(clf,'clf_fills.pkl')
s = joblib.dump(scaler,'scaler.pkl')



<generator object feature_selection.<locals>.<genexpr> at 0x7f7d0f6f4fc0>
__________Features used : ['vae_embeddings', 'velocity_metadata']_______
tn,fp,fn,tp =  1589 26 83 321
Accuracy =  0.946012877662209
Recall =  0.7945544554455446
Precision =  0.9250720461095101
