# Voice Analyzer

In [10]:
import os
import numpy as np
import pandas as pd
from joblib import load
from scipy.io import wavfile
import sounddevice as sd
import librosa

In [11]:
DATA_DIRECTORY = os.path.join("..", "data")

# 1. User Input

In [12]:
def record_voice(filename, duration=5, fs=44100):
    """
    Record voice for a given duration and save it as a .wav file.

    :param filename: str, the name of the file to save the recording.
    :param duration: int, duration of the recording in seconds.
    :param fs: int, sampling rate.
    """
    print("Recording...")
    recording = sd.rec(int(duration * fs), samplerate=fs, channels=2)
    sd.wait()  # Wait until recording is finished
    print("Recording finished. Saving file...")
    wavfile.write(filename, fs, recording)  # Save as WAV file 

In [13]:
record_voice('voice_recording.wav')

Recording...
Recording finished. Saving file...


# 2. Feature Extraction

In [14]:
def load_data(file_path):
    print("Loading audio file:", file_path)
    audio, sample_rate = librosa.load(file_path, sr=None)
    print("Audio loaded. Sample rate:", sample_rate, "Length:", len(audio))
    return audio, sample_rate


def analyze_voice(audio, sample_rate):
    try:
        # print("Extracting MFCCs...")
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)

        # print("Extracting Chroma...")
        chroma = librosa.feature.chroma_stft(y=audio, sr=sample_rate)

        # print("Extracting Mel Spectrogram...")
        mel = librosa.feature.melspectrogram(y=audio, sr=sample_rate)  # Corrected line

        # print("Extracting Spectral Contrast...")
        contrast = librosa.feature.spectral_contrast(y=audio, sr=sample_rate)

        # print("Extracting Tonnetz...")
        tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(audio), sr=sample_rate)

        return pd.DataFrame({
                    "length": [len(audio)],
                    "sample_rate": [sample_rate],
                    "mfccs": [np.mean(mfccs) if mfccs.ndim > 1 else np.mean(mfccs)],
                    "chroma": [np.mean(chroma) if chroma.ndim > 1 else np.mean(chroma)],
                    "mel": [np.mean(mel) if mel.ndim > 1 else np.mean(mel)],
                    "contrast": [np.mean(contrast) if contrast.ndim > 1 else np.mean(contrast)],
                    "tonnetz": [np.mean(tonnetz) if tonnetz.ndim > 1 else np.mean(tonnetz)]
                })
    
    except Exception as e:
        print(f"Error analyzing audio data: {e}")

In [15]:
audio, sample_rate = load_data('voice_recording.wav')
voice_data = analyze_voice(audio, sample_rate)
print(voice_data)

Loading audio file: voice_recording.wav
Audio loaded. Sample rate: 44100 Length: 220500
   length  sample_rate    mfccs    chroma       mel  contrast   tonnetz
0  220500        44100 -8.78126  0.422828  0.842698  20.74218  0.001552


In [16]:
X_pred = voice_data.drop(columns=["length", "sample_rate"])

model_file_path = os.path.join(DATA_DIRECTORY, "model.joblib")
model = load(model_file_path)
print("Loaded model type:", type(model).__name__)
model_classes = model.classes_
predicted_label = model.predict(X_pred)
predicted_probability = model.predict_proba(X_pred)

Loaded model type: RandomForestClassifier




In [17]:
for class_index in range(len(model_classes)):
    print(f"Figure:\t{model_classes[class_index]},\tProbability:\t{predicted_probability[0][class_index]*100:2.1f}%")

Figure:	andrew_tate,	Probability:	3.2%
Figure:	donald_trump,	Probability:	5.4%
Figure:	kanye_west,	Probability:	20.0%
Figure:	kendrick_lamar,	Probability:	2.8%
Figure:	marge_simpson,	Probability:	4.7%
Figure:	morgan_freeman,	Probability:	45.0%
Figure:	spongebob,	Probability:	0.0%
Figure:	squidward_tentacles,	Probability:	18.9%
