In [1]:
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegressionCV
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [2]:
path="/home/ftamagnan/dataset/"
name="total_metadata_training.npz"
data=dict(np.load(path+name))

data["random"]= np.random.rand(6729,3)


data['vae_embeddings']=data['vae_embeddings'][:,0:32]
for key in data.keys():
    print(data[key].shape,key)

(6729, 3) random
(6729, 12) genre
(6729, 2, 1) fills
(6729, 9) drums_pitches_used
(6729, 27) velocity_metadata
(6729, 1) bpm
(6729, 32) vae_embeddings
(6729, 1) offbeat_notes
(6729, 2) dataset


In [3]:
def feature_selection(scaler=True,cv=True,list_list_label=[],penalty='l2',stats=True):

    
    
    
    for list_label in list_list_label:
            list_x=[]
            for key in data.keys():
                if key in list_label:
                    list_x.append(data[key])
            print(x.shape for x in list_x)
            X=np.concatenate(list_x,axis=1)
            y=data['fills'][:,1].reshape(-1)
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)
            if scaler:
                scaler = StandardScaler()
                scaler.fit(X_train)
                X_train=scaler.transform(X_train)
                X_test=scaler.transform(X_test)
            
            if cv:
                clf = LogisticRegressionCV(cv=2, random_state=0,
                                   multi_class='ovr',penalty=penalty,solver='liblinear',max_iter=300,n_jobs=-1).fit(X_train, y_train)
            else:
                clf = LogisticRegression(random_state=0,C=100000000).fit(X_train, y_train)

#             y_pred=clf.predict(X_test)
            y_pred=(clf.predict_proba(X_test)>0.5)*1
            y_pred=y_pred[:,1]
            tn, fp, fn, tp=confusion_matrix(y_test, y_pred).ravel()
            if stats:
                print("__________Features used : "+str(list_label)+"_______")
                print("tn,fp,fn,tp = ",tn,fp,fn,tp)
                print("Accuracy = ",(tp+tn)/(tn+fp+fn+tp))
                print("Recall = ",(tp)/(fn+tp))
                print("Precision = ",(tp)/(fp+tp))

    return clf



# 1.Feature selection

In [4]:
list_list_label=[['vae_embeddings','offbeat_notes','drums_pitches_used','velocity_metadata'],
               ['offbeat_notes','drums_pitches_used','velocity_metadata'],
               ['vae_embeddings'],
               ['offbeat_notes'],
               ['drums_pitches_used'],
               ['velocity_metadata'],
                 ['drums_pitches_used','velocity_metadata','vae_embeddings'],
                 ['vae_embeddings','velocity_metadata'],
                 ['random']
]

clf=feature_selection(scaler=True,cv=True,list_list_label=list_list_label)

<generator object feature_selection.<locals>.<genexpr> at 0x7f4019a8b5c8>
__________Features used : ['vae_embeddings', 'offbeat_notes', 'drums_pitches_used', 'velocity_metadata']_______
tn,fp,fn,tp =  1578 37 68 336
Accuracy =  0.9479940564635958
Recall =  0.8316831683168316
Precision =  0.900804289544236
<generator object feature_selection.<locals>.<genexpr> at 0x7f4019a8b5c8>
__________Features used : ['offbeat_notes', 'drums_pitches_used', 'velocity_metadata']_______
tn,fp,fn,tp =  1558 57 72 332
Accuracy =  0.9361069836552749
Recall =  0.8217821782178217
Precision =  0.8534704370179949
<generator object feature_selection.<locals>.<genexpr> at 0x7f4019a8b5c8>
__________Features used : ['vae_embeddings']_______
tn,fp,fn,tp =  1598 17 381 23
Accuracy =  0.8028727092620109
Recall =  0.05693069306930693
Precision =  0.575
<generator object feature_selection.<locals>.<genexpr> at 0x7f4019a8b5c8>
__________Features used : ['offbeat_notes']_______
tn,fp,fn,tp =  1614 1 404 0
Accuracy =  0.



In [5]:
list_list_label=[['vae_embeddings','offbeat_notes','velocity_metadata','drums_pitches_used']]
             

clf_l1=feature_selection(scaler=True,cv=True,list_list_label=list_list_label,penalty='l1',stats=False)
clf_l2=feature_selection(scaler=True,cv=True,list_list_label=list_list_label,penalty='l2',stats=False)

<generator object feature_selection.<locals>.<genexpr> at 0x7f4019aacd58>
<generator object feature_selection.<locals>.<genexpr> at 0x7f40758ca728>


In [6]:
name_pitches = ['bass drum','snare drum','closed hi-hat','open hi-hat','low tom','mid tom','high tom','crash cymbal','ride cymbal']
name_features=['min_velocity','max_velocity','std_velocity','max_velocity']

In [7]:
def stats_weights(clf):
    coef=clf.coef_
    coef=coef.reshape(-1)
    print("------VAE EMBEDDINGS-------")
    print(coef[0:32])

    print("------OFFBEATS NOTES-------")
    print(coef[32])

    print("------VELOCITY METRICS-------")
    for i,pitch in enumerate(name_pitches):
        for j,metric in enumerate(name_features):
            print(metric+' of '+pitch,coef[33+i+j])
        
    print("------PITCHES USED-------")
    for i,pitch in enumerate(name_pitches):
        print('use of '+pitch,coef[33+36+i])

# 2. Magnitude of weights with L1 reg

In [8]:
stats_weights(clf_l2)

------VAE EMBEDDINGS-------
[-1.19913428 -1.27333359  0.59356504 -0.55011543  1.70979162 -3.31472844
 -4.51148881 -0.30450725 -3.09179385  2.76696085  1.13993118  0.89662331
  1.04266921 -1.89674702  5.09189333  5.40363718  0.49869981  1.95776004
 -1.46794113  0.21723554 -2.01971986 -0.59546707  0.40198004 -0.89754297
 -0.71811299  0.33737595  1.41754014 -0.49743002  2.35541024  0.21393959
 -0.21493416  1.15940458]
------OFFBEATS NOTES-------
0.08099926814400074
------VELOCITY METRICS-------
min_velocity of bass drum -0.03623687436579776
max_velocity of bass drum -0.188287761462922
std_velocity of bass drum -1.5434867911034689
max_velocity of bass drum 0.5826175679325705
min_velocity of snare drum -0.188287761462922
max_velocity of snare drum -1.5434867911034689
std_velocity of snare drum 0.5826175679325705
max_velocity of snare drum 0.8993529007692185
min_velocity of closed hi-hat -1.5434867911034689
max_velocity of closed hi-hat 0.5826175679325705
std_velocity of closed hi-hat 0.8993

IndexError: index 69 is out of bounds for axis 0 with size 69

# 3.Magnitude of weights with L2 reg

In [None]:
stats_weights(clf_l2)

In [None]:
from sklearn.externals import joblib
s = joblib.dump(clf_l2,'clf_fills.pkl')


# 4. Evaluate Generated model

In [None]:
list_list_label=[['velocity_metadata']]
             
clf_l2=feature_selection(scaler=True,cv=True,list_list_label=list_list_label,penalty='l2',stats=False)

tr=np.load('/home/ftamagnan/dataset/all.npy')
print(tr.shape)
tr=np.concatenate((tr[:,:16,:],tr[:,16:,:]))
print(tr.shape)
max_axis=np.max(tr,axis=1)
# std_axis=np.std(tr,axis=1,dtype=np.float64)
# mean_axis=np.mean(tr,axis=1,dtype=np.float64)

# velocity_metadata=np.concatenate([max_axis,std_axis,mean_axis],axis=1)
y_pred=(clf_l2.predict_proba(velocity_metadata))*1
y_pred=y_pred[:,1]
print(y_pred.shape)

a=y_pred[:1000]
b=y_pred[1000:]
(b-a).sum()/1000

