In [1]:
import os
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeClassifier,plot_tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import export_graphviz
import librosa
import numpy as np
import matplotlib.pyplot as plt
from joblib import dump
from joblib import load

In [2]:
def extract_features(audio_file, max_frames=1000):
    try:
        
        y, sr = librosa.load(audio_file, sr=None)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        
        if mfcc.shape[1] < max_frames:
            mfcc = np.pad(mfcc, ((0, 0), (0, max_frames - mfcc.shape[1])), mode='constant')
        else:
            mfcc = mfcc[:, :max_frames]
        return mfcc
    except Exception as e:
        print(f"Error processing {audio_file}: {e}")
        return None

In [3]:
def load_mapping_from_excel(excel_file):
    df = pd.read_excel(excel_file)
    return df

In [4]:
def train_decision_tree(features, labels):
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=100)
    clf = DecisionTreeClassifier(max_depth=80,criterion= 'log_loss',splitter='random')
    clf.fit(X_train, y_train)

    return clf, X_test, y_test


In [5]:
audio_folder = "./operations"
audio_files = os.listdir(audio_folder)
audio_features = [extract_features(os.path.join(audio_folder, file)) for file in audio_files]



In [6]:
excel_file = "operation_dataset.xlsx"
mapping_df = load_mapping_from_excel(excel_file)
max_frames = 1000  
features = [extract_features("./operaciones/"+audio_file, max_frames) for audio_file in audio_files]
labels = mapping_df["result"]

In [7]:
features_flat = [features.flatten() for features in audio_features]
model, X_test, y_test = train_decision_tree(features_flat, labels)


In [8]:
y_pred_train = model.predict(features_flat)  # Predictions on the training set
accuracy_train = accuracy_score(labels, y_pred_train)

# Predictions on the test set
y_pred_test = model.predict(X_test)
accuracy_test = accuracy_score(y_test, y_pred_test)

print("Training Accuracy:", accuracy_train)
print("Test Accuracy:", accuracy_test)

Training Accuracy: 1.0
Test Accuracy: 1.0


In [9]:
if accuracy_train > accuracy_test:
    print("The model may be overfitting.")
else:
    print("The model does not seem to be overfitting.")
    

The model does not seem to be overfitting.


In [10]:
model_file = "decision_tree_model.joblib"
dump(model, model_file)

print("Model saved successfully to", model_file)

Model saved successfully to decision_tree_model.joblib


In [11]:
loaded_model = load("decision_tree_model.joblib")

In [12]:
audio_file_test =  ["./operaciones/2x9.wav"]
feature_test = [extract_features(audio_file, max_frames) for audio_file in audio_file_test]
features_flat_test = [features.flatten() for features in feature_test]
loaded_model.predict(features_flat_test)

array([20])