# Evaluation

In [3]:
import os
import numpy as np
import pandas as pd
from joblib import load
import librosa

In [4]:
DATA_DIRECTORY = os.path.join("..", "data")

In [None]:
def load_data(file_path):
    print("Loading audio file:", file_path)
    audio, sample_rate = librosa.load(file_path, sr=None)
    print("Audio loaded. Sample rate:", sample_rate, "Length:", len(audio))
    return audio, sample_rate


def analyze_voice(audio, sample_rate):
    try:
        # print("Extracting MFCCs...")
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)

        # print("Extracting Chroma...")
        chroma = librosa.feature.chroma_stft(y=audio, sr=sample_rate)

        # print("Extracting Mel Spectrogram...")
        mel = librosa.feature.melspectrogram(y=audio, sr=sample_rate)  # Corrected line

        # print("Extracting Spectral Contrast...")
        contrast = librosa.feature.spectral_contrast(y=audio, sr=sample_rate)

        # print("Extracting Tonnetz...")
        tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(audio), sr=sample_rate)

        return {
                    "length": len(audio),
                    "mfccs": np.mean(mfccs) if mfccs.ndim > 1 else np.mean(mfccs),
                    "chroma": np.mean(chroma) if chroma.ndim > 1 else np.mean(chroma),
                    "mel": np.mean(mel) if mel.ndim > 1 else np.mean(mel),
                    "contrast": np.mean(contrast) if contrast.ndim > 1 else np.mean(contrast),
                    "tonnetz": np.mean(tonnetz) if tonnetz.ndim > 1 else np.mean(tonnetz)
                }
    
    except Exception as e:
        print(f"Error analyzing audio data: {e}")

In [None]:
df = pd.DataFrame(columns=["name", "number", "length", "sample_rate", 
                           "mfccs", "chroma", "mel", "contrast", "tonnetz"])

for subdirectory in os.listdir(TRAINING_DATA_DIRECTORY):
    subdirectory_path = os.path.join(TRAINING_DATA_DIRECTORY, subdirectory)
    print(f"Iterating files in {subdirectory_path}")

    if os.path.isdir(subdirectory_path):
        for filename in os.listdir(subdirectory_path):
            if filename.lower().endswith(".wav"):
                file_path = os.path.join(subdirectory_path, filename)
                try:
                    name, number_with_extension = filename.split("+")
                    number = int(number_with_extension.split('.')[0])

                    print(f"Processing {filename}...")
                    audio, sample_rate = load_data(file_path)
                    voice_data = analyze_voice(audio, sample_rate)

                    temp_df = pd.DataFrame({
                        "name": [name.lower()],
                        "number": [number],
                        "length": [voice_data["length"]],
                        "sample_rate": [sample_rate],
                        "mfccs": [voice_data["mfccs"]],  # Compute mean across the appropriate axis
                        "chroma": [voice_data["chroma"]],
                        "mel": [voice_data["mel"]],
                        "contrast": [voice_data["contrast"]],
                        "tonnetz": [voice_data["tonnetz"]],
                    })

                    df = pd.concat([df, temp_df], ignore_index=True)

                except Exception as e:
                    print(f"Error processing {filename} in {subdirectory}: {e}")

In [6]:
model_file_path = os.path.join(DATA_DIRECTORY, "random_forest_model.joblib")
model = load(model_file_path)