In [None]:
!pip install resampy

Collecting resampy
  Downloading resampy-0.4.3-py3-none-any.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: resampy
Successfully installed resampy-0.4.3


In [None]:
import resampy
import numpy as np
import pandas as pd
import os
import fnmatch
import librosa
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score, roc_curve , auc

In [None]:
from google.colab import drive
drive.mount('/content/drive')

INPUT_DIR='/content/drive/MyDrive/TCC' #caminho para a pasta do projeto

Mounted at /content/drive


##FUNÇÃO DE EXTRAÇAO DE CARACTERISTICAS

In [None]:
def extract_audio_features(music_folders, columns, genre_list):
    features_list = []
    for folder in music_folders:
        for genre in genre_list:
            files = fnmatch.filter(os.listdir(folder), genre)
            label = "normal" if "normal" in genre else "abnormal"  # Define o rótulo da classe
            for file in files:
                x, sr = librosa.load(folder + file, duration=5, res_type='kaiser_fast')
                features = [np.mean(x) for x in librosa.feature.mfcc(y= x, sr=sr)]
                features.append(sum(librosa.zero_crossings(x)))
                features.append(np.mean(librosa.feature.spectral_centroid(y=x)))
                features.append(np.mean(librosa.feature.spectral_rolloff(y=x, sr=sr)))
                features.append(np.mean(librosa.feature.chroma_stft(y=x, sr=sr)))
                features.append(label)
                features.append(file)
                features_list.append(features)
    return pd.DataFrame(features_list, columns=columns)

music_folders = [INPUT_DIR + "/set_b/"]
column_names = ["mfcc_" + str(i) for i in range(20)] + ["zero_crossings", "spectral_centroid", "spectral_rolloff", "chroma_stft", "genre", "file_name"]
genre_list = ["normal*.wav", "extrastole*.wav", "murmur*.wav"]
music_df = extract_audio_features(music_folders, column_names, genre_list)
X = music_df.iloc[:,0:24]
y = music_df["genre"]

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder().fit(y)
y = le.transform(y)




# CRIAÇÃO DOS FOLDS EM DRIVE


## **STandardScaler**

In [None]:
from sklearn.preprocessing import StandardScaler

base_dir = os.path.join(INPUT_DIR, "Folds")

# Implementação da validação cruzada k-fold com k=5
kf = KFold(n_splits=5, shuffle=True, random_state=2612)


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

for fold, (train_index, test_index) in enumerate(kf.split(X_scaled)):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]
    print(f"Fold {fold}: Train Mean = {np.mean(X_train, axis=0)}, Test Mean = {np.mean(X_test, axis=0)}")
# Construir o caminho para o arquivo de cada fold
    fold_path = os.path.join(base_dir, f'fold_{fold}_data.npz')

    # Salvar em arquivos
    np.savez_compressed(fold_path, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)

# Supondo que o primeiro 320 são normais e o restante são anormais
labels = np.array([1]*320 + [0]*141)  # 1 para normal, 0 para anormal
fold = 0
for train_index, test_index in kf.split(labels):
    train_labels, test_labels = labels[train_index], labels[test_index]
    print(f"Fold {fold}")
    print(f"Training normal: {np.sum(train_labels == 1)}, Training abnormal: {np.sum(train_labels == 0)}")
    print(f"Testing normal: {np.sum(test_labels == 1)}, Testing abnormal: {np.sum(test_labels == 0)}")
    fold += 1

Fold 0: Train Mean = [ 0.00367976  0.02688045 -0.015133   -0.03208767 -0.02336793  0.01066751
 -0.01243531 -0.03373171 -0.02123646  0.01251501  0.00487582 -0.03036375
 -0.0232717   0.02642832  0.02864365 -0.02027773 -0.03482085 -0.03271456
 -0.01676704 -0.03853883  0.03402203  0.03304353  0.02523495 -0.0184632 ], Test Mean = [-0.01456077 -0.10636565  0.05988111  0.12697057  0.09246666 -0.0422112
  0.04920639  0.13347602  0.08403243 -0.04952175 -0.01929356  0.12014903
  0.09208588 -0.10457657 -0.11334261  0.08023874  0.13778571  0.12945114
  0.06634701  0.15249773 -0.13462483 -0.13075289 -0.09985441  0.07305867]
Fold 1: Train Mean = [ 0.03059354 -0.01023428  0.01174628  0.01203299 -0.00933061 -0.02125587
  0.00624184  0.02706653  0.01299882 -0.02429672 -0.02559883  0.00975919
  0.01747215 -0.00703134 -0.0062247   0.03435298  0.03516411  0.01156844
 -0.01729833  0.00844179 -0.00880949 -0.0086468  -0.00819968  0.02747846], Test Mean = [-0.12270669  0.04104836 -0.04711279 -0.04826274  0.03

#  FPR e TPR

## Geração das métricas

In [None]:
from sklearn.metrics import accuracy_score
def train_model(fold, base_dir):
    fold_path = os.path.join(base_dir, f'fold_{fold}_data.npz')

    # Carregar dados
    data = np.load(fold_path)
    X_train = data['X_train']
    y_train = data['y_train']
    X_test = data['X_test']
    y_test = data['y_test']

    # Inicializar e treinar o modelo
    forest = RandomForestClassifier(max_depth=14, max_features=4, min_samples_split=5, n_estimators=180)
    forest.fit(X_train, y_train)

    # Prever probabilidades
    y_pred_proba = forest.predict_proba(X_test)[:, 1]  # obter somente probabilidades da classe positiva

    y_pred = forest.predict(X_test)

    # Calcular ROC AUC diretamente e calcular fpr, tpr para plotar a curva ROC
    roc_auc = roc_auc_score(y_test, y_pred_proba)
    fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
    acc = accuracy_score(y_test, y_pred)  # Calcular a acurácia
    print(f"Fold {fold}, accs: {acc}")
    print(f"Fold {fold}, roc_auc: {roc_auc}")
    return fpr, tpr, roc_auc, acc

# Parâmetro 'base_dir' precisa ser definido antes de chamar a função train_model
base_dir = os.path.join(INPUT_DIR, "Folds")

# Treinar o modelo e avaliar
results = [train_model(fold, base_dir) for fold in range(5)]
roc_auc_scores = [result[2] for result in results]
accs = [result[3] for result in results]

Fold 0, accs: 0.7526881720430108
Fold 0, roc_auc: 0.665948275862069
Fold 1, accs: 0.8043478260869565
Fold 1, roc_auc: 0.7016967706622879
Fold 2, accs: 0.717391304347826
Fold 2, roc_auc: 0.62109375
Fold 3, accs: 0.7065217391304348
Fold 3, roc_auc: 0.5576923076923077
Fold 4, accs: 0.75
Fold 4, roc_auc: 0.6803503010399562


In [None]:
print("ROC AUC Scores:", roc_auc_scores)
print("ACCScores:", accs)

ROC AUC Scores: [0.665948275862069, 0.7016967706622879, 0.62109375, 0.5576923076923077, 0.6803503010399562]
ACCScores: [0.7526881720430108, 0.8043478260869565, 0.717391304347826, 0.7065217391304348, 0.75]


In [None]:
print("Mean ROC AUC:", np.mean(roc_auc_scores))
print("Mean ACC:", np.mean(accs))

Mean ROC AUC: 0.6714687713803491
Mean ACC: 0.7679055633473586
