In [None]:
import os
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [None]:
# mel spectogram converts audio to visual form
# uses librosa library

def extract_features(file_name):
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast', duration=5, sr=22050, offset=0.5)
        
        mfccs = np.mean(librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40).T, axis=0)
        # Mel-frequency cepstral coefficients - power spectrum of sound
        chroma = np.mean(librosa.feature.chroma_stft(y=audio, sr=sample_rate).T, axis=0)
        # chromagram -> vector representation of audio, representing pitch on chromatic scale
        mel = np.mean(librosa.feature.melspectrogram(y=audio, sr=sample_rate).T, axis=0)
        # compute mel-scaled spectrogram
        
        features = np.hstack([mfccs, chroma, mel])
        return features  
    
    except Exception as e:
        print(f"Error processing {file_name}: {e}")
        return None