In [1]:
import pandas as pd 
import numpy as np
import mlflow

## Leitura dos Dados

In [2]:
# X = np.load("X-mel_spec.npy")
# y = np.load("y-mel_spec.npy")

X = np.load("X_PCP_FREQ2.npy")
y = np.load("y_PCP_FREQ2.npy")

In [3]:
X.shape

(1128, 12, 1216)

In [3]:
def get_label(filepath):

    metadata = pd.read_csv('metadata.csv', sep = ',')
    chords = {}

    filename = filepath.split('/')[1]
    chord = metadata[metadata['Filename'] == filename]['Acorde'].values[0]

    return chord

#metadata with most_freq chords indicator
df = pd.read_csv('metadata_final.csv', sep = ',')
df_chords = df[df['most_freq'] == 1]

## Divisão Treino e Teste

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

## Métricas

In [5]:
from sklearn.metrics import f1_score, accuracy_score
from sklearn.metrics import classification_report


def metrics(model, X_test, y_test):


    preds = model.predict(X_test)
    #pred_idx = tf.math.argmax(preds_score, axis = 1)
    #y_test_idx = tf.math.argmax(y_test_encoded, axis = 1)

    #target_names = mapping.keys()

    print('F1-Score: ', f1_score(y_test, preds, average = 'macro'))
    print('Acurácia: ', accuracy_score(y_test, preds))

    print(classification_report(y_test, preds))



## Random Forest

In [7]:
# Dont run for tuning
mlflow.sklearn.autolog(disable = True)

In [6]:
X_rf = X.reshape(X.shape[0], X.shape[1]*X.shape[2] )
X_train_rf = X_train.reshape(X_train.shape[0], X_train.shape[1]*X_train.shape[2] )
X_test_rf = X_test.reshape(X_test.shape[0], X_test.shape[1]*X_test.shape[2] )

In [39]:
y.shape

(1128,)

### Tuning

In [8]:
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingRandomSearchCV
from sklearn.ensemble import RandomForestClassifier

In [41]:



clf = RandomForestClassifier(max_depth=5, random_state=0)

param_grid = {'max_depth': [3, 5, 10],
              'min_samples_split': [2, 5, 10]}

sh = HalvingRandomSearchCV(clf, param_grid, cv=5,
                        factor=2, resource='n_estimators',
                        max_resources=30).fit(X_rf, y)
sh.best_estimator_





KeyboardInterrupt: 

In [9]:

# matriz de confusão
def rf_model():

  mlflow.autolog(exclusive=False)

  with mlflow.start_run():
    mlflow.log_param("example_param", "example_value")
    clf = RandomForestClassifier(max_depth=10, n_estimators=8, random_state=0)

    clf.fit(X_train_rf, y_train)
    metrics(clf, X_test_rf, y_test)
    
    preds = clf.predict(X_test_rf)
    mlflow.log_metric("test f1-score", f1_score(y_test, preds, average = 'macro'))
    mlflow.log_metric("accuracy", accuracy_score(y_test, preds))

    return clf


model = rf_model()

F1-Score:  0.6066906963985931
Acurácia:  0.6192660550458715
              precision    recall  f1-score   support

      A#:maj       0.50      0.68      0.58        31
       A:maj       0.61      0.68      0.64        25
       B:maj       0.60      0.41      0.49        29
      C#:maj       0.72      0.76      0.74        41
       C:maj       0.72      0.68      0.70        38
      D#:maj       0.57      0.56      0.56        36
       D:maj       0.63      0.47      0.54        36
       E:maj       0.47      0.47      0.47        36
      F#:maj       0.56      0.40      0.47        35
       F:maj       0.62      0.80      0.70        45
      G#:maj       0.67      0.67      0.67        46
       G:maj       0.70      0.74      0.72        38

    accuracy                           0.62       436
   macro avg       0.61      0.61      0.61       436
weighted avg       0.62      0.62      0.61       436



In [10]:
## Save rf model with pickle method
from joblib import dump  

dump(model , 'dashboard/models/rf_model1.joblib')



['dashboard/models/rf_model1.joblib']

In [19]:
X_test.shape
X_test_rf.shape

mlflow.autolog(disable=True)


In [32]:
import dashboard.preprocessing_pipeline as pp 

chroma = pp.PCP('f_major_my.wav')
padded = np.pad(chroma, ((0,0),(0, 1216-1093)))
chroma_rs = padded.reshape(1, padded.shape[0]*padded.shape[1])

In [34]:
model.predict(chroma_rs)

array(['F:maj'], dtype='<U6')

## KNeighborsClassifier

### Tuning

In [10]:
from sklearn.neighbors import KNeighborsClassifier

mlflow.autolog(disable=True)
knn = KNeighborsClassifier()

param_grid = {'n_neighbors': [5, 20, 50],
              'algorithm': ['kd_tree', 'ball_tree', 'auto'],
              'weights': ['uniform', 'distance']}

sh = HalvingRandomSearchCV(knn, param_grid, cv=5,
                        factor=2, random_state = 0, scoring = 'f1_macro').fit(X_rf, y)
sh.best_estimator_

KeyboardInterrupt: 

In [11]:
from sklearn.neighbors import KNeighborsClassifier

def knn_model():

  mlflow.autolog(exclusive=False)

  with mlflow.start_run():
    mlflow.log_param("example_param", "example_value")
    clf = KNeighborsClassifier(algorithm='kd_tree', weights='distance')
    clf.fit(X_train_rf, y_train)
    metrics(clf, X_test_rf, y_test)
    
    preds = clf.predict(X_test_rf)
    mlflow.log_metric("test f1-score", f1_score(y_test, preds, average = 'macro'))
    mlflow.log_metric("accuracy", accuracy_score(y_test, preds))


knn_model()

F1-Score:  0.7131010429933702
Acurácia:  0.7201834862385321
              precision    recall  f1-score   support

      A#:maj       0.64      0.81      0.71        31
       A:maj       0.58      0.72      0.64        25
       B:maj       0.81      0.59      0.68        29
      C#:maj       0.70      0.68      0.69        41
       C:maj       0.85      0.92      0.89        38
      D#:maj       0.84      0.75      0.79        36
       D:maj       0.66      0.64      0.65        36
       E:maj       0.65      0.67      0.66        36
      F#:maj       0.59      0.49      0.53        35
       F:maj       0.69      0.82      0.75        45
      G#:maj       0.91      0.65      0.76        46
       G:maj       0.75      0.87      0.80        38

    accuracy                           0.72       436
   macro avg       0.72      0.72      0.71       436
weighted avg       0.73      0.72      0.72       436



## SVM

In [12]:
from sklearn import svm

def svm_model():

  mlflow.autolog(exclusive=False)

  with mlflow.start_run():
    mlflow.log_param("example_param", "example_value")
    clf = svm.SVC(decision_function_shape='ovo', probability= True)
    clf.fit(X_train_rf, y_train)
    metrics(clf, X_test_rf, y_test)
    
    preds = clf.predict(X_test_rf)
    mlflow.log_metric("test f1-score", f1_score(y_test, preds, average = 'macro'))
    mlflow.log_metric("accuracy", accuracy_score(y_test, preds))

    return clf


model = svm_model()

F1-Score:  0.7627064514272556
Acurácia:  0.7637614678899083
              precision    recall  f1-score   support

      A#:maj       0.80      0.90      0.85        31
       A:maj       0.69      0.80      0.74        25
       B:maj       0.81      0.72      0.76        29
      C#:maj       0.72      0.83      0.77        41
       C:maj       0.91      0.84      0.88        38
      D#:maj       0.89      0.67      0.76        36
       D:maj       0.76      0.81      0.78        36
       E:maj       0.56      0.75      0.64        36
      F#:maj       0.86      0.54      0.67        35
       F:maj       0.89      0.87      0.88        45
      G#:maj       0.78      0.61      0.68        46
       G:maj       0.65      0.84      0.74        38

    accuracy                           0.76       436
   macro avg       0.78      0.77      0.76       436
weighted avg       0.78      0.76      0.76       436



In [13]:
## Save rf model with pickle method
from joblib import dump  

dump(model , 'dashboard/models/svc_model2.joblib')

['dashboard/models/svc_model2.joblib']

In [30]:
X_train_rf.shape
X_train_rf[:5,:]

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [46]:


pred = model.predict_proba(X_train_rf[:5,:])[:1]

model.classes_


df_pred = pd.DataFrame(pred, columns=model.classes_)

In [64]:
df_pred.sort_values(by = 0, axis = 1, ascending=False).iloc[:, :3]

Unnamed: 0,G:maj,C:maj,A#:maj
0,0.897827,0.020242,0.017657


## PCA

In [24]:
X_rf.shape

(2160, 14592)

In [13]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
x = StandardScaler().fit_transform(X_rf)

pca = PCA(n_components=1000)

principalComponents = pca.fit_transform(x)

2023/12/08 12:33:19 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'fab790a64cf94279881c327b95fdf6d5', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


In [14]:
from sklearn.neighbors import KNeighborsClassifier

# matriz de confusão
X_train, X_test, y_train, y_test = train_test_split(principalComponents, y, test_size=0.33, random_state=42)

def knn_model():

  mlflow.autolog(exclusive=False)

  with mlflow.start_run():
    mlflow.log_param("example_param", "example_value")
    clf = KNeighborsClassifier(algorithm='kd_tree', weights='distance')
    clf.fit(X_train, y_train)
    metrics(clf, X_test, y_test)
    
    preds = clf.predict(X_test)
    mlflow.log_metric("test f1-score", f1_score(y_test, preds, average = 'macro'))
    mlflow.log_metric("accuracy", accuracy_score(y_test, preds))


knn_model()



2023/12/08 12:33:28 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


F1-Score:  0.7468955324378183
Acurácia:  0.7533512064343163
              precision    recall  f1-score   support

      A#:maj       0.73      0.81      0.77        27
      C#:maj       0.84      0.69      0.76        39
       C:maj       0.72      0.84      0.77        37
      D#:maj       0.56      0.70      0.62        27
       D:maj       0.71      0.68      0.69        37
       E:maj       0.80      0.86      0.83        37
      F#:maj       0.78      0.55      0.64        33
       F:maj       0.69      0.88      0.77        50
      G#:maj       0.93      0.72      0.81        39
       G:maj       0.83      0.74      0.79        47

    accuracy                           0.75       373
   macro avg       0.76      0.75      0.75       373
weighted avg       0.77      0.75      0.75       373



In [20]:
model.predict(X_train_rf)

AttributeError: 'NoneType' object has no attribute 'predict'