In [1]:
import pandas as pd 
import numpy as np
import mlflow

In [2]:
X = np.load("X-mel_spec.npy")
y = np.load("y-mel_spec.npy")

In [3]:
def get_label(filepath):

    metadata = pd.read_csv('metadata.csv', sep = ',')
    chords = {}

    filename = filepath.split('/')[1]
    chord = metadata[metadata['Filename'] == filename]['Acorde'].values[0]

    return chord

#metadata with most_freq chords indicator
df = pd.read_csv('metadata_final.csv', sep = ',')
df_chords = df[df['most_freq'] == 1]

## Divisão Treino e Teste

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

## Métricas

In [7]:
from sklearn.metrics import f1_score, accuracy_score
from sklearn.metrics import classification_report


def metrics(model, X_test, y_test):


    preds = model.predict(X_test)
    #pred_idx = tf.math.argmax(preds_score, axis = 1)
    #y_test_idx = tf.math.argmax(y_test_encoded, axis = 1)

    #target_names = mapping.keys()

    print('F1-Score: ', f1_score(y_test, preds, average = 'macro'))
    print('Acurácia: ', accuracy_score(y_test, preds))

    print(classification_report(y_test, preds))



## Random Forest

In [6]:
mlflow.sklearn.autolog()



In [5]:
X_rf = X.reshape(X.shape[0], X.shape[1]*X.shape[2] )
X_train_rf = X_train.reshape(X_train.shape[0], X_train.shape[1]*X_train.shape[2] )
X_test_rf = X_test.reshape(X_test.shape[0], X_test.shape[1]*X_test.shape[2] )

### Tuning

In [6]:
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingRandomSearchCV
from sklearn.ensemble import RandomForestClassifier


clf = RandomForestClassifier(max_depth=5, random_state=0)

param_grid = {'max_depth': [3, 5, 10],
              'min_samples_split': [2, 5, 10]}

sh = HalvingRandomSearchCV(clf, param_grid, cv=5,
                        factor=2, resource='n_estimators',
                        max_resources=30).fit(X_rf, y)
sh.best_estimator_





In [11]:


def fit_evaluate(model):

  mlflow.autolog(exclusive=False)

  with mlflow.start_run():
    mlflow.log_param("example_param", "example_value")
    clf = model(max_depth=10,min_samples_split=5, n_estimators=8,
                        random_state=0 )
    clf.fit(X_train_rf, y_train)
    metrics(clf, X_test_rf, y_test)
    
    preds = clf.predict(X_test_rf)
    mlflow.log_metric("test f1-score", f1_score(y_test, preds, average = 'macro'))
    mlflow.log_metric("accuracy", accuracy_score(y_test, preds))


fit_evaluate(RandomForestClassifier)


2023/10/21 17:38:44 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


F1-Score:  0.4582312797480463
Acurácia:  0.4584450402144772
              precision    recall  f1-score   support

      A#:maj       0.36      0.48      0.41        27
      C#:maj       0.53      0.54      0.53        39
       C:maj       0.41      0.46      0.44        37
      D#:maj       0.40      0.44      0.42        27
       D:maj       0.50      0.35      0.41        37
       E:maj       0.65      0.65      0.65        37
      F#:maj       0.49      0.52      0.50        33
       F:maj       0.42      0.46      0.44        50
      G#:maj       0.44      0.44      0.44        39
       G:maj       0.41      0.30      0.35        47

    accuracy                           0.46       373
   macro avg       0.46      0.46      0.46       373
weighted avg       0.46      0.46      0.46       373

